1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2013 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16   |         Rui Hirokawa <hirokawa@php.net>                              |
17   +----------------------------------------------------------------------+
18 */
19
20/* $Id$ */
21
22/*
23 * PHP 4 Multibyte String module "mbstring"
24 *
25 * History:
26 *   2000.5.19  Release php-4.0RC2_jstring-1.0
27 *   2001.4.1   Release php4_jstring-1.0.91
28 *   2001.4.30  Release php4_jstring-1.1 (contribute to The PHP Group)
29 *   2001.5.1   Renamed from jstring to mbstring (hirokawa@php.net)
30 */
31
32/*
33 * PHP3 Internationalization support program.
34 *
35 * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36 * All rights reserved.
37 *
38 * See README_PHP3-i18n-ja for more detail.
39 *
40 * Authors:
41 *    Hironori Sato <satoh@jpnnet.com>
42 *    Shigeru Kanemoto <sgk@happysize.co.jp>
43 *    Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44 *    Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45 */
46
47/* {{{ includes */
48#ifdef HAVE_CONFIG_H
49#include "config.h"
50#endif
51
52#include "php.h"
53#include "php_ini.h"
54#include "php_variables.h"
55#include "mbstring.h"
56#include "ext/standard/php_string.h"
57#include "ext/standard/php_mail.h"
58#include "ext/standard/exec.h"
59#include "ext/standard/php_smart_str.h"
60#include "ext/standard/url.h"
61#include "main/php_output.h"
62#include "ext/standard/info.h"
63
64#include "libmbfl/mbfl/mbfl_allocators.h"
65
66#include "php_variables.h"
67#include "php_globals.h"
68#include "rfc1867.h"
69#include "php_content_types.h"
70#include "SAPI.h"
71#include "php_unicode.h"
72#include "TSRM.h"
73
74#include "mb_gpc.h"
75
76#if HAVE_MBREGEX
77#include "php_mbregex.h"
78#endif
79
80#ifdef ZEND_MULTIBYTE
81#include "zend_multibyte.h"
82#endif /* ZEND_MULTIBYTE */
83
84#if HAVE_ONIG
85#include "php_onig_compat.h"
86#include <oniguruma.h>
87#undef UChar
88#elif HAVE_PCRE || HAVE_BUNDLED_PCRE
89#include "ext/pcre/php_pcre.h"
90#endif
91/* }}} */
92
93#if HAVE_MBSTRING
94
95/* {{{ prototypes */
96ZEND_DECLARE_MODULE_GLOBALS(mbstring)
97
98static PHP_GINIT_FUNCTION(mbstring);
99static PHP_GSHUTDOWN_FUNCTION(mbstring);
100
101#ifdef ZEND_MULTIBYTE
102static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC);
103static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC);
104static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC);
105static int php_mb_set_zend_encoding(TSRMLS_D);
106#endif
107/* }}} */
108
109/* {{{ php_mb_default_identify_list */
110typedef struct _php_mb_nls_ident_list {
111    enum mbfl_no_language lang;
112    const enum mbfl_no_encoding* list;
113    int list_size;
114} php_mb_nls_ident_list;
115
116static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
117    mbfl_no_encoding_ascii,
118    mbfl_no_encoding_jis,
119    mbfl_no_encoding_utf8,
120    mbfl_no_encoding_euc_jp,
121    mbfl_no_encoding_sjis
122};
123
124static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
125    mbfl_no_encoding_ascii,
126    mbfl_no_encoding_utf8,
127    mbfl_no_encoding_euc_cn,
128    mbfl_no_encoding_cp936
129};
130
131static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
132    mbfl_no_encoding_ascii,
133    mbfl_no_encoding_utf8,
134    mbfl_no_encoding_euc_tw,
135    mbfl_no_encoding_big5
136};
137
138static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
139    mbfl_no_encoding_ascii,
140    mbfl_no_encoding_utf8,
141    mbfl_no_encoding_euc_kr,
142    mbfl_no_encoding_uhc
143};
144
145static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
146    mbfl_no_encoding_ascii,
147    mbfl_no_encoding_utf8,
148    mbfl_no_encoding_koi8r,
149    mbfl_no_encoding_cp1251,
150    mbfl_no_encoding_cp866
151};
152
153static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
154    mbfl_no_encoding_ascii,
155    mbfl_no_encoding_utf8,
156    mbfl_no_encoding_armscii8
157};
158
159static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
160    mbfl_no_encoding_ascii,
161    mbfl_no_encoding_utf8,
162    mbfl_no_encoding_cp1254,
163    mbfl_no_encoding_8859_9
164};
165
166static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
167    mbfl_no_encoding_ascii,
168    mbfl_no_encoding_utf8,
169    mbfl_no_encoding_koi8u
170};
171
172static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
173    mbfl_no_encoding_ascii,
174    mbfl_no_encoding_utf8
175};
176
177
178static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
179    { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
180    { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
181    { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
182    { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
183    { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
184    { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
185    { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
186    { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
187    { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
188};
189
190/* }}} */
191
192/* {{{ mb_overload_def mb_ovld[] */
193static const struct mb_overload_def mb_ovld[] = {
194    {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
195    {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
196    {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
197    {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
198    {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
199    {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
200    {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
201    {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
202    {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
203    {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
204    {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
205    {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
206    {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
207#if HAVE_MBREGEX
208    {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
209    {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
210    {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
211    {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
212    {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
213#endif
214    {0, NULL, NULL, NULL}
215};
216/* }}} */
217
218/* {{{ arginfo */
219ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
220    ZEND_ARG_INFO(0, language)
221ZEND_END_ARG_INFO()
222
223ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
224    ZEND_ARG_INFO(0, encoding)
225ZEND_END_ARG_INFO()
226
227ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
228    ZEND_ARG_INFO(0, type)
229ZEND_END_ARG_INFO()
230
231ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
232    ZEND_ARG_INFO(0, encoding)
233ZEND_END_ARG_INFO()
234
235ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
236    ZEND_ARG_INFO(0, encoding)
237ZEND_END_ARG_INFO()
238
239ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
240    ZEND_ARG_INFO(0, substchar)
241ZEND_END_ARG_INFO()
242
243ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
244    ZEND_ARG_INFO(0, encoding)
245ZEND_END_ARG_INFO()
246
247ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
248    ZEND_ARG_INFO(0, encoded_string)
249    ZEND_ARG_INFO(1, result)
250ZEND_END_ARG_INFO()
251
252ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
253    ZEND_ARG_INFO(0, contents)
254    ZEND_ARG_INFO(0, status)
255ZEND_END_ARG_INFO()
256
257ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
258    ZEND_ARG_INFO(0, str)
259    ZEND_ARG_INFO(0, encoding)
260ZEND_END_ARG_INFO()
261
262ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
263    ZEND_ARG_INFO(0, haystack)
264    ZEND_ARG_INFO(0, needle)
265    ZEND_ARG_INFO(0, offset)
266    ZEND_ARG_INFO(0, encoding)
267ZEND_END_ARG_INFO()
268
269ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
270    ZEND_ARG_INFO(0, haystack)
271    ZEND_ARG_INFO(0, needle)
272    ZEND_ARG_INFO(0, offset)
273    ZEND_ARG_INFO(0, encoding)
274ZEND_END_ARG_INFO()
275
276ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
277    ZEND_ARG_INFO(0, haystack)
278    ZEND_ARG_INFO(0, needle)
279    ZEND_ARG_INFO(0, offset)
280    ZEND_ARG_INFO(0, encoding)
281ZEND_END_ARG_INFO()
282
283ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
284    ZEND_ARG_INFO(0, haystack)
285    ZEND_ARG_INFO(0, needle)
286    ZEND_ARG_INFO(0, offset)
287    ZEND_ARG_INFO(0, encoding)
288ZEND_END_ARG_INFO()
289
290ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
291    ZEND_ARG_INFO(0, haystack)
292    ZEND_ARG_INFO(0, needle)
293    ZEND_ARG_INFO(0, part)
294    ZEND_ARG_INFO(0, encoding)
295ZEND_END_ARG_INFO()
296
297ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
298    ZEND_ARG_INFO(0, haystack)
299    ZEND_ARG_INFO(0, needle)
300    ZEND_ARG_INFO(0, part)
301    ZEND_ARG_INFO(0, encoding)
302ZEND_END_ARG_INFO()
303
304ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
305    ZEND_ARG_INFO(0, haystack)
306    ZEND_ARG_INFO(0, needle)
307    ZEND_ARG_INFO(0, part)
308    ZEND_ARG_INFO(0, encoding)
309ZEND_END_ARG_INFO()
310
311ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
312    ZEND_ARG_INFO(0, haystack)
313    ZEND_ARG_INFO(0, needle)
314    ZEND_ARG_INFO(0, part)
315    ZEND_ARG_INFO(0, encoding)
316ZEND_END_ARG_INFO()
317
318ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
319    ZEND_ARG_INFO(0, haystack)
320    ZEND_ARG_INFO(0, needle)
321    ZEND_ARG_INFO(0, encoding)
322ZEND_END_ARG_INFO()
323
324ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
325    ZEND_ARG_INFO(0, str)
326    ZEND_ARG_INFO(0, start)
327    ZEND_ARG_INFO(0, length)
328    ZEND_ARG_INFO(0, encoding)
329ZEND_END_ARG_INFO()
330
331ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
332    ZEND_ARG_INFO(0, str)
333    ZEND_ARG_INFO(0, start)
334    ZEND_ARG_INFO(0, length)
335    ZEND_ARG_INFO(0, encoding)
336ZEND_END_ARG_INFO()
337
338ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
339    ZEND_ARG_INFO(0, str)
340    ZEND_ARG_INFO(0, encoding)
341ZEND_END_ARG_INFO()
342
343ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
344    ZEND_ARG_INFO(0, str)
345    ZEND_ARG_INFO(0, start)
346    ZEND_ARG_INFO(0, width)
347    ZEND_ARG_INFO(0, trimmarker)
348    ZEND_ARG_INFO(0, encoding)
349ZEND_END_ARG_INFO()
350
351ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
352    ZEND_ARG_INFO(0, str)
353    ZEND_ARG_INFO(0, to)
354    ZEND_ARG_INFO(0, from)
355ZEND_END_ARG_INFO()
356
357ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
358    ZEND_ARG_INFO(0, sourcestring)
359    ZEND_ARG_INFO(0, mode)
360    ZEND_ARG_INFO(0, encoding)
361ZEND_END_ARG_INFO()
362
363ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
364    ZEND_ARG_INFO(0, sourcestring)
365    ZEND_ARG_INFO(0, encoding)
366ZEND_END_ARG_INFO()
367
368ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
369    ZEND_ARG_INFO(0, sourcestring)
370    ZEND_ARG_INFO(0, encoding)
371ZEND_END_ARG_INFO()
372
373ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
374    ZEND_ARG_INFO(0, str)
375    ZEND_ARG_INFO(0, encoding_list)
376    ZEND_ARG_INFO(0, strict)
377ZEND_END_ARG_INFO()
378
379ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
380ZEND_END_ARG_INFO()
381
382ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
383    ZEND_ARG_INFO(0, encoding)
384ZEND_END_ARG_INFO()
385
386ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
387    ZEND_ARG_INFO(0, str)
388    ZEND_ARG_INFO(0, charset)
389    ZEND_ARG_INFO(0, transfer)
390    ZEND_ARG_INFO(0, linefeed)
391    ZEND_ARG_INFO(0, indent)
392ZEND_END_ARG_INFO()
393
394ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
395    ZEND_ARG_INFO(0, string)
396ZEND_END_ARG_INFO()
397
398ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
399    ZEND_ARG_INFO(0, str)
400    ZEND_ARG_INFO(0, option)
401    ZEND_ARG_INFO(0, encoding)
402ZEND_END_ARG_INFO()
403
404ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 1, 0, 3)
405    ZEND_ARG_INFO(0, to)
406    ZEND_ARG_INFO(0, from)
407    ZEND_ARG_INFO(1, ...)
408ZEND_END_ARG_INFO()
409
410ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
411    ZEND_ARG_INFO(0, string)
412    ZEND_ARG_INFO(0, convmap)
413    ZEND_ARG_INFO(0, encoding)
414ZEND_END_ARG_INFO()
415
416ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
417    ZEND_ARG_INFO(0, string)
418    ZEND_ARG_INFO(0, convmap)
419    ZEND_ARG_INFO(0, encoding)
420ZEND_END_ARG_INFO()
421
422ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
423    ZEND_ARG_INFO(0, to)
424    ZEND_ARG_INFO(0, subject)
425    ZEND_ARG_INFO(0, message)
426    ZEND_ARG_INFO(0, additional_headers)
427    ZEND_ARG_INFO(0, additional_parameters)
428ZEND_END_ARG_INFO()
429
430ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
431    ZEND_ARG_INFO(0, type)
432ZEND_END_ARG_INFO()
433
434ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
435    ZEND_ARG_INFO(0, var)
436    ZEND_ARG_INFO(0, encoding)
437ZEND_END_ARG_INFO()
438
439ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
440    ZEND_ARG_INFO(0, encoding)
441ZEND_END_ARG_INFO()
442
443ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
444    ZEND_ARG_INFO(0, pattern)
445    ZEND_ARG_INFO(0, string)
446    ZEND_ARG_INFO(1, registers)
447ZEND_END_ARG_INFO()
448
449ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
450    ZEND_ARG_INFO(0, pattern)
451    ZEND_ARG_INFO(0, string)
452    ZEND_ARG_INFO(1, registers)
453ZEND_END_ARG_INFO()
454
455ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
456    ZEND_ARG_INFO(0, pattern)
457    ZEND_ARG_INFO(0, replacement)
458    ZEND_ARG_INFO(0, string)
459    ZEND_ARG_INFO(0, option)
460ZEND_END_ARG_INFO()
461
462ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
463    ZEND_ARG_INFO(0, pattern)
464    ZEND_ARG_INFO(0, replacement)
465    ZEND_ARG_INFO(0, string)
466ZEND_END_ARG_INFO()
467
468ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
469    ZEND_ARG_INFO(0, pattern)
470    ZEND_ARG_INFO(0, string)
471    ZEND_ARG_INFO(0, limit)
472ZEND_END_ARG_INFO()
473
474ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
475    ZEND_ARG_INFO(0, pattern)
476    ZEND_ARG_INFO(0, string)
477    ZEND_ARG_INFO(0, option)
478ZEND_END_ARG_INFO()
479
480ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
481    ZEND_ARG_INFO(0, pattern)
482    ZEND_ARG_INFO(0, option)
483ZEND_END_ARG_INFO()
484
485ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
486    ZEND_ARG_INFO(0, pattern)
487    ZEND_ARG_INFO(0, option)
488ZEND_END_ARG_INFO()
489
490ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
491    ZEND_ARG_INFO(0, pattern)
492    ZEND_ARG_INFO(0, option)
493ZEND_END_ARG_INFO()
494
495ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
496    ZEND_ARG_INFO(0, string)
497    ZEND_ARG_INFO(0, pattern)
498    ZEND_ARG_INFO(0, option)
499ZEND_END_ARG_INFO()
500
501ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
502ZEND_END_ARG_INFO()
503
504ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
505ZEND_END_ARG_INFO()
506
507ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
508    ZEND_ARG_INFO(0, position)
509ZEND_END_ARG_INFO()
510
511ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
512    ZEND_ARG_INFO(0, options)
513ZEND_END_ARG_INFO()
514/* }}} */
515
516/* {{{ zend_function_entry mbstring_functions[] */
517const zend_function_entry mbstring_functions[] = {
518    PHP_FE(mb_convert_case,         arginfo_mb_convert_case)
519    PHP_FE(mb_strtoupper,           arginfo_mb_strtoupper)
520    PHP_FE(mb_strtolower,           arginfo_mb_strtolower)
521    PHP_FE(mb_language,             arginfo_mb_language)
522    PHP_FE(mb_internal_encoding,    arginfo_mb_internal_encoding)
523    PHP_FE(mb_http_input,           arginfo_mb_http_input)
524    PHP_FE(mb_http_output,          arginfo_mb_http_output)
525    PHP_FE(mb_detect_order,         arginfo_mb_detect_order)
526    PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
527    PHP_FE(mb_parse_str,            arginfo_mb_parse_str)
528    PHP_FE(mb_output_handler,       arginfo_mb_output_handler)
529    PHP_FE(mb_preferred_mime_name,  arginfo_mb_preferred_mime_name)
530    PHP_FE(mb_strlen,               arginfo_mb_strlen)
531    PHP_FE(mb_strpos,               arginfo_mb_strpos)
532    PHP_FE(mb_strrpos,              arginfo_mb_strrpos)
533    PHP_FE(mb_stripos,              arginfo_mb_stripos)
534    PHP_FE(mb_strripos,             arginfo_mb_strripos)
535    PHP_FE(mb_strstr,               arginfo_mb_strstr)
536    PHP_FE(mb_strrchr,              arginfo_mb_strrchr)
537    PHP_FE(mb_stristr,              arginfo_mb_stristr)
538    PHP_FE(mb_strrichr,             arginfo_mb_strrichr)
539    PHP_FE(mb_substr_count,         arginfo_mb_substr_count)
540    PHP_FE(mb_substr,               arginfo_mb_substr)
541    PHP_FE(mb_strcut,               arginfo_mb_strcut)
542    PHP_FE(mb_strwidth,             arginfo_mb_strwidth)
543    PHP_FE(mb_strimwidth,           arginfo_mb_strimwidth)
544    PHP_FE(mb_convert_encoding,     arginfo_mb_convert_encoding)
545    PHP_FE(mb_detect_encoding,      arginfo_mb_detect_encoding)
546    PHP_FE(mb_list_encodings,       arginfo_mb_list_encodings)
547    PHP_FE(mb_encoding_aliases,     arginfo_mb_encoding_aliases)
548    PHP_FE(mb_convert_kana,         arginfo_mb_convert_kana)
549    PHP_FE(mb_encode_mimeheader,    arginfo_mb_encode_mimeheader)
550    PHP_FE(mb_decode_mimeheader,    arginfo_mb_decode_mimeheader)
551    PHP_FE(mb_convert_variables,    arginfo_mb_convert_variables)
552    PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
553    PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
554    PHP_FE(mb_send_mail,            arginfo_mb_send_mail)
555    PHP_FE(mb_get_info,             arginfo_mb_get_info)
556    PHP_FE(mb_check_encoding,       arginfo_mb_check_encoding)
557#if HAVE_MBREGEX
558    PHP_MBREGEX_FUNCTION_ENTRIES
559#endif
560    PHP_FE_END
561};
562/* }}} */
563
564/* {{{ zend_module_entry mbstring_module_entry */
565zend_module_entry mbstring_module_entry = {
566    STANDARD_MODULE_HEADER,
567    "mbstring",
568    mbstring_functions,
569    PHP_MINIT(mbstring),
570    PHP_MSHUTDOWN(mbstring),
571    PHP_RINIT(mbstring),
572    PHP_RSHUTDOWN(mbstring),
573    PHP_MINFO(mbstring),
574    NO_VERSION_YET,
575    PHP_MODULE_GLOBALS(mbstring),
576    PHP_GINIT(mbstring),
577    PHP_GSHUTDOWN(mbstring),
578    NULL,
579    STANDARD_MODULE_PROPERTIES_EX
580};
581/* }}} */
582
583/* {{{ static sapi_post_entry php_post_entries[] */
584static sapi_post_entry php_post_entries[] = {
585    { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
586    { MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
587    { NULL, 0, NULL, NULL }
588};
589/* }}} */
590
591#ifdef COMPILE_DL_MBSTRING
592ZEND_GET_MODULE(mbstring)
593#endif
594
595/* {{{ allocators */
596static void *_php_mb_allocators_malloc(unsigned int sz)
597{
598    return emalloc(sz);
599}
600
601static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
602{
603    return erealloc(ptr, sz);
604}
605
606static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
607{
608    return ecalloc(nelems, szelem);
609}
610
611static void _php_mb_allocators_free(void *ptr)
612{
613    efree(ptr);
614}
615
616static void *_php_mb_allocators_pmalloc(unsigned int sz)
617{
618    return pemalloc(sz, 1);
619}
620
621static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
622{
623    return perealloc(ptr, sz, 1);
624}
625
626static void _php_mb_allocators_pfree(void *ptr)
627{
628    pefree(ptr, 1);
629}
630
631static mbfl_allocators _php_mb_allocators = {
632    _php_mb_allocators_malloc,
633    _php_mb_allocators_realloc,
634    _php_mb_allocators_calloc,
635    _php_mb_allocators_free,
636    _php_mb_allocators_pmalloc,
637    _php_mb_allocators_prealloc,
638    _php_mb_allocators_pfree
639};
640/* }}} */
641
642/* {{{ static sapi_post_entry mbstr_post_entries[] */
643static sapi_post_entry mbstr_post_entries[] = {
644    { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
645    { MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
646    { NULL, 0, NULL, NULL }
647};
648/* }}} */
649
650/* {{{ static int php_mb_parse_encoding_list()
651 *  Return 0 if input contains any illegal encoding, otherwise 1.
652 *  Even if any illegal encoding is detected the result may contain a list
653 *  of parsed encodings.
654 */
655static int
656php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
657{
658    int n, l, size, bauto, ret = 1;
659    char *p, *p1, *p2, *endp, *tmpstr;
660    enum mbfl_no_encoding no_encoding;
661    enum mbfl_no_encoding *src, *entry, *list;
662
663    list = NULL;
664    if (value == NULL || value_length <= 0) {
665        if (return_list) {
666            *return_list = NULL;
667        }
668        if (return_size) {
669            *return_size = 0;
670        }
671        return 0;
672    } else {
673        enum mbfl_no_encoding *identify_list;
674        int identify_list_size;
675
676        identify_list = MBSTRG(default_detect_order_list);
677        identify_list_size = MBSTRG(default_detect_order_list_size);
678
679        /* copy the value string for work */
680        if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
681            tmpstr = (char *)estrndup(value+1, value_length-2);
682            value_length -= 2;
683        }
684        else
685            tmpstr = (char *)estrndup(value, value_length);
686        if (tmpstr == NULL) {
687            return 0;
688        }
689        /* count the number of listed encoding names */
690        endp = tmpstr + value_length;
691        n = 1;
692        p1 = tmpstr;
693        while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
694            p1 = p2 + 1;
695            n++;
696        }
697        size = n + identify_list_size;
698        /* make list */
699        list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
700        if (list != NULL) {
701            entry = list;
702            n = 0;
703            bauto = 0;
704            p1 = tmpstr;
705            do {
706                p2 = p = php_memnstr(p1, ",", 1, endp);
707                if (p == NULL) {
708                    p = endp;
709                }
710                *p = '\0';
711                /* trim spaces */
712                while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
713                    p1++;
714                }
715                p--;
716                while (p > p1 && (*p == ' ' || *p == '\t')) {
717                    *p = '\0';
718                    p--;
719                }
720                /* convert to the encoding number and check encoding */
721                if (strcasecmp(p1, "auto") == 0) {
722                    if (!bauto) {
723                        bauto = 1;
724                        l = identify_list_size;
725                        src = identify_list;
726                        while (l > 0) {
727                            *entry++ = *src++;
728                            l--;
729                            n++;
730                        }
731                    }
732                } else {
733                    no_encoding = mbfl_name2no_encoding(p1);
734                    if (no_encoding != mbfl_no_encoding_invalid) {
735                        *entry++ = no_encoding;
736                        n++;
737                    } else {
738                        ret = 0;
739                    }
740                }
741                p1 = p2 + 1;
742            } while (n < size && p2 != NULL);
743            if (n > 0) {
744                if (return_list) {
745                    *return_list = list;
746                } else {
747                    pefree(list, persistent);
748                }
749            } else {
750                pefree(list, persistent);
751                if (return_list) {
752                    *return_list = NULL;
753                }
754                ret = 0;
755            }
756            if (return_size) {
757                *return_size = n;
758            }
759        } else {
760            if (return_list) {
761                *return_list = NULL;
762            }
763            if (return_size) {
764                *return_size = 0;
765            }
766            ret = 0;
767        }
768        efree(tmpstr);
769    }
770
771    return ret;
772}
773/* }}} */
774
775/* {{{ MBSTRING_API php_mb_check_encoding_list */
776MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
777    return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
778}
779/* }}} */
780
781/* {{{ static int php_mb_parse_encoding_array()
782 *  Return 0 if input contains any illegal encoding, otherwise 1.
783 *  Even if any illegal encoding is detected the result may contain a list
784 *  of parsed encodings.
785 */
786static int
787php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
788{
789    zval **hash_entry;
790    HashTable *target_hash;
791    int i, n, l, size, bauto,ret = 1;
792    enum mbfl_no_encoding no_encoding;
793    enum mbfl_no_encoding *src, *list, *entry;
794
795    list = NULL;
796    if (Z_TYPE_P(array) == IS_ARRAY) {
797        enum mbfl_no_encoding *identify_list;
798        int identify_list_size;
799
800        identify_list = MBSTRG(default_detect_order_list);
801        identify_list_size = MBSTRG(default_detect_order_list_size);
802
803        target_hash = Z_ARRVAL_P(array);
804        zend_hash_internal_pointer_reset(target_hash);
805        i = zend_hash_num_elements(target_hash);
806        size = i + identify_list_size;
807        list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
808        if (list != NULL) {
809            entry = list;
810            bauto = 0;
811            n = 0;
812            while (i > 0) {
813                if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
814                    break;
815                }
816                convert_to_string_ex(hash_entry);
817                if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
818                    if (!bauto) {
819                        bauto = 1;
820                        l = identify_list_size;
821                        src = identify_list;
822                        while (l > 0) {
823                            *entry++ = *src++;
824                            l--;
825                            n++;
826                        }
827                    }
828                } else {
829                    no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
830                    if (no_encoding != mbfl_no_encoding_invalid) {
831                        *entry++ = no_encoding;
832                        n++;
833                    } else {
834                        ret = 0;
835                    }
836                }
837                zend_hash_move_forward(target_hash);
838                i--;
839            }
840            if (n > 0) {
841                if (return_list) {
842                    *return_list = list;
843                } else {
844                    pefree(list, persistent);
845                }
846            } else {
847                pefree(list, persistent);
848                if (return_list) {
849                    *return_list = NULL;
850                }
851                ret = 0;
852            }
853            if (return_size) {
854                *return_size = n;
855            }
856        } else {
857            if (return_list) {
858                *return_list = NULL;
859            }
860            if (return_size) {
861                *return_size = 0;
862            }
863            ret = 0;
864        }
865    }
866
867    return ret;
868}
869/* }}} */
870
871static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
872static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
873static void _php_mb_free_regex(void *opaque);
874
875#if HAVE_ONIG
876/* {{{ _php_mb_compile_regex */
877static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
878{
879    php_mb_regex_t *retval;
880    OnigErrorInfo err_info;
881    int err_code;
882
883    if ((err_code = onig_new(&retval,
884            (const OnigUChar *)pattern,
885            (const OnigUChar *)pattern + strlen(pattern),
886            ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
887            ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
888        OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
889        onig_error_code_to_str(err_str, err_code, err_info);
890        php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
891        retval = NULL;
892    }
893    return retval;
894}
895/* }}} */
896
897/* {{{ _php_mb_match_regex */
898static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
899{
900    return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
901            (const OnigUChar*)str + str_len, (const OnigUChar *)str,
902            (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
903}
904/* }}} */
905
906/* {{{ _php_mb_free_regex */
907static void _php_mb_free_regex(void *opaque)
908{
909    onig_free((php_mb_regex_t *)opaque);
910}
911/* }}} */
912#elif HAVE_PCRE || HAVE_BUNDLED_PCRE
913/* {{{ _php_mb_compile_regex */
914static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
915{
916    pcre *retval;
917    const char *err_str;
918    int err_offset;
919
920    if (!(retval = pcre_compile(pattern,
921            PCRE_CASELESS, &err_str, &err_offset, NULL))) {
922        php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
923    }
924    return retval;
925}
926/* }}} */
927
928/* {{{ _php_mb_match_regex */
929static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
930{
931    return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
932            0, NULL, 0) >= 0;
933}
934/* }}} */
935
936/* {{{ _php_mb_free_regex */
937static void _php_mb_free_regex(void *opaque)
938{
939    pcre_free(opaque);
940}
941/* }}} */
942#endif
943
944/* {{{ php_mb_nls_get_default_detect_order_list */
945static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size)
946{
947    size_t i;
948
949    *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
950    *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
951
952    for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
953        if (php_mb_default_identify_list[i].lang == lang) {
954            *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
955            *plist_size = php_mb_default_identify_list[i].list_size;
956            return 1;
957        }
958    }
959    return 0;
960}
961/* }}} */
962
963/* {{{ php.ini directive handler */
964/* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
965static PHP_INI_MH(OnUpdate_mbstring_language)
966{
967    enum mbfl_no_language no_language;
968
969    no_language = mbfl_name2no_language(new_value);
970    if (no_language == mbfl_no_language_invalid) {
971        MBSTRG(language) = mbfl_no_language_neutral;
972        return FAILURE;
973    }
974    MBSTRG(language) = no_language;
975    php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
976    return SUCCESS;
977}
978/* }}} */
979
980/* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
981static PHP_INI_MH(OnUpdate_mbstring_detect_order)
982{
983    enum mbfl_no_encoding *list;
984    int size;
985
986    if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
987        if (MBSTRG(detect_order_list)) {
988            free(MBSTRG(detect_order_list));
989        }
990        MBSTRG(detect_order_list) = list;
991        MBSTRG(detect_order_list_size) = size;
992    } else {
993        if (MBSTRG(detect_order_list)) {
994            free(MBSTRG(detect_order_list));
995            MBSTRG(detect_order_list) = NULL;
996        }
997        return FAILURE;
998    }
999
1000    return SUCCESS;
1001}
1002/* }}} */
1003
1004/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
1005static PHP_INI_MH(OnUpdate_mbstring_http_input)
1006{
1007    enum mbfl_no_encoding *list;
1008    int size;
1009
1010    if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1011        if (MBSTRG(http_input_list)) {
1012            free(MBSTRG(http_input_list));
1013        }
1014        MBSTRG(http_input_list) = list;
1015        MBSTRG(http_input_list_size) = size;
1016    } else {
1017        if (MBSTRG(http_input_list)) {
1018            free(MBSTRG(http_input_list));
1019            MBSTRG(http_input_list) = NULL;
1020        }
1021        MBSTRG(http_input_list_size) = 0;
1022        return FAILURE;
1023    }
1024
1025    return SUCCESS;
1026}
1027/* }}} */
1028
1029/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
1030static PHP_INI_MH(OnUpdate_mbstring_http_output)
1031{
1032    enum mbfl_no_encoding no_encoding;
1033
1034    no_encoding = mbfl_name2no_encoding(new_value);
1035    if (no_encoding != mbfl_no_encoding_invalid) {
1036        MBSTRG(http_output_encoding) = no_encoding;
1037        MBSTRG(current_http_output_encoding) = no_encoding;
1038    } else {
1039        MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
1040        MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
1041        if (new_value != NULL && new_value_length > 0) {
1042            return FAILURE;
1043        }
1044    }
1045
1046    return SUCCESS;
1047}
1048/* }}} */
1049
1050/* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
1051int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
1052{
1053    enum mbfl_no_encoding no_encoding;
1054    const char *enc_name = NULL;
1055    uint enc_name_len = 0;
1056
1057    no_encoding = new_value ? mbfl_name2no_encoding(new_value):
1058                mbfl_no_encoding_invalid;
1059    if (no_encoding != mbfl_no_encoding_invalid) {
1060        enc_name = new_value;
1061        enc_name_len = new_value_length;
1062    } else {
1063        switch (MBSTRG(language)) {
1064            case mbfl_no_language_uni:
1065                enc_name = "UTF-8";
1066                enc_name_len = sizeof("UTF-8") - 1;
1067                break;
1068            case mbfl_no_language_japanese:
1069                enc_name = "EUC-JP";
1070                enc_name_len = sizeof("EUC-JP") - 1;
1071                break;
1072            case mbfl_no_language_korean:
1073                enc_name = "EUC-KR";
1074                enc_name_len = sizeof("EUC-KR") - 1;
1075                break;
1076            case mbfl_no_language_simplified_chinese:
1077                enc_name = "EUC-CN";
1078                enc_name_len = sizeof("EUC-CN") - 1;
1079                break;
1080            case mbfl_no_language_traditional_chinese:
1081                enc_name = "EUC-TW";
1082                enc_name_len = sizeof("EUC-TW") - 1;
1083                break;
1084            case mbfl_no_language_russian:
1085                enc_name = "KOI8-R";
1086                enc_name_len = sizeof("KOI8-R") - 1;
1087                break;
1088            case mbfl_no_language_german:
1089                enc_name = "ISO-8859-15";
1090                enc_name_len = sizeof("ISO-8859-15") - 1;
1091                break;
1092            case mbfl_no_language_armenian:
1093                enc_name = "ArmSCII-8";
1094                enc_name_len = sizeof("ArmSCII-8") - 1;
1095                break;
1096            case mbfl_no_language_turkish:
1097                enc_name = "ISO-8859-9";
1098                enc_name_len = sizeof("ISO-8859-9") - 1;
1099                break;
1100            default:
1101                enc_name = "ISO-8859-1";
1102                enc_name_len = sizeof("ISO-8859-1") - 1;
1103                break;
1104        }
1105        no_encoding = mbfl_name2no_encoding(enc_name);
1106    }
1107    MBSTRG(internal_encoding) = no_encoding;
1108    MBSTRG(current_internal_encoding) = no_encoding;
1109#if HAVE_MBREGEX
1110    {
1111        const char *enc_name = new_value;
1112        if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
1113            /* falls back to EUC-JP if an unknown encoding name is given */
1114            enc_name = "EUC-JP";
1115            php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
1116        }
1117        php_mb_regex_set_mbctype(new_value TSRMLS_CC);
1118    }
1119#endif
1120    return SUCCESS;
1121}
1122/* }}} */
1123
1124/* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
1125static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1126{
1127    if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN
1128            || stage == PHP_INI_STAGE_RUNTIME) {
1129        return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1130    } else {
1131        /* the corresponding mbstring globals needs to be set according to the
1132         * ini value in the later stage because it never falls back to the
1133         * default value if 1. no value for mbstring.internal_encoding is given,
1134         * 2. mbstring.language directive is processed in per-dir or runtime
1135         * context and 3. call to the handler for mbstring.language is done
1136         * after mbstring.internal_encoding is handled. */
1137        return SUCCESS;
1138    }
1139}
1140/* }}} */
1141
1142#ifdef ZEND_MULTIBYTE
1143/* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */
1144static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
1145{
1146    int *list, size;
1147
1148    if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1149        if (MBSTRG(script_encoding_list) != NULL) {
1150            free(MBSTRG(script_encoding_list));
1151        }
1152        MBSTRG(script_encoding_list) = list;
1153        MBSTRG(script_encoding_list_size) = size;
1154    } else {
1155        if (MBSTRG(script_encoding_list) != NULL) {
1156            free(MBSTRG(script_encoding_list));
1157        }
1158        MBSTRG(script_encoding_list) = NULL;
1159        MBSTRG(script_encoding_list_size) = 0;
1160        return FAILURE;
1161    }
1162
1163    return SUCCESS;
1164}
1165/* }}} */
1166#endif /* ZEND_MULTIBYTE */
1167
1168/* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
1169static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1170{
1171    int c;
1172    char *endptr = NULL;
1173
1174    if (new_value != NULL) {
1175        if (strcasecmp("none", new_value) == 0) {
1176            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1177            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1178        } else if (strcasecmp("long", new_value) == 0) {
1179            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1180            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1181        } else if (strcasecmp("entity", new_value) == 0) {
1182            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1183            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1184        } else {
1185            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1186            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1187            if (new_value_length >0) {
1188                c = strtol(new_value, &endptr, 0);
1189                if (*endptr == '\0') {
1190                    MBSTRG(filter_illegal_substchar) = c;
1191                    MBSTRG(current_filter_illegal_substchar) = c;
1192                }
1193            }
1194        }
1195    } else {
1196        MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1197        MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1198        MBSTRG(filter_illegal_substchar) = 0x3f;    /* '?' */
1199        MBSTRG(current_filter_illegal_substchar) = 0x3f;    /* '?' */
1200    }
1201
1202    return SUCCESS;
1203}
1204/* }}} */
1205
1206/* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
1207static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1208{
1209    if (new_value == NULL) {
1210       return FAILURE;
1211    }
1212
1213    OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
1214
1215    if (MBSTRG(encoding_translation)) {
1216        sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
1217        sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1218    } else {
1219        sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
1220        sapi_register_post_entries(php_post_entries TSRMLS_CC);
1221    }
1222
1223    return SUCCESS;
1224}
1225/* }}} */
1226
1227/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
1228static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1229{
1230    zval tmp;
1231    void *re = NULL;
1232
1233    if (!new_value) {
1234        new_value = entry->orig_value;
1235        new_value_length = entry->orig_value_length;
1236    }
1237    php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
1238
1239    if (Z_STRLEN(tmp) > 0) {
1240        if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
1241            zval_dtor(&tmp);
1242            return FAILURE;
1243        }
1244    }
1245
1246    if (MBSTRG(http_output_conv_mimetypes)) {
1247        _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1248    }
1249
1250    MBSTRG(http_output_conv_mimetypes) = re;
1251
1252    zval_dtor(&tmp);
1253    return SUCCESS;
1254}
1255/* }}} */
1256/* }}} */
1257
1258/* {{{ php.ini directive registration */
1259PHP_INI_BEGIN()
1260    PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1261    PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1262    PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
1263    PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
1264    PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding)
1265#ifdef ZEND_MULTIBYTE
1266    PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
1267#endif /* ZEND_MULTIBYTE */
1268    PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1269    STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1270    PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1271
1272    STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1273        PHP_INI_SYSTEM | PHP_INI_PERDIR,
1274        OnUpdate_mbstring_encoding_translation,
1275        encoding_translation, zend_mbstring_globals, mbstring_globals)
1276    PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1277        "^(text/|application/xhtml\\+xml)",
1278        PHP_INI_ALL,
1279        OnUpdate_mbstring_http_output_conv_mimetypes)
1280
1281    STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1282        PHP_INI_ALL,
1283        OnUpdateLong,
1284        strict_detection, zend_mbstring_globals, mbstring_globals)
1285PHP_INI_END()
1286/* }}} */
1287
1288/* {{{ module global initialize handler */
1289static PHP_GINIT_FUNCTION(mbstring)
1290{
1291    mbstring_globals->language = mbfl_no_language_uni;
1292    mbstring_globals->internal_encoding = mbfl_no_encoding_invalid;
1293    mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1294#ifdef ZEND_MULTIBYTE
1295    mbstring_globals->script_encoding_list = NULL;
1296    mbstring_globals->script_encoding_list_size = 0;
1297#endif /* ZEND_MULTIBYTE */
1298    mbstring_globals->http_output_encoding = mbfl_no_encoding_pass;
1299    mbstring_globals->current_http_output_encoding = mbfl_no_encoding_pass;
1300    mbstring_globals->http_input_identify = mbfl_no_encoding_invalid;
1301    mbstring_globals->http_input_identify_get = mbfl_no_encoding_invalid;
1302    mbstring_globals->http_input_identify_post = mbfl_no_encoding_invalid;
1303    mbstring_globals->http_input_identify_cookie = mbfl_no_encoding_invalid;
1304    mbstring_globals->http_input_identify_string = mbfl_no_encoding_invalid;
1305    mbstring_globals->http_input_list = NULL;
1306    mbstring_globals->http_input_list_size = 0;
1307    mbstring_globals->detect_order_list = NULL;
1308    mbstring_globals->detect_order_list_size = 0;
1309    mbstring_globals->current_detect_order_list = NULL;
1310    mbstring_globals->current_detect_order_list_size = 0;
1311    mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1312    mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1313    mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1314    mbstring_globals->filter_illegal_substchar = 0x3f;  /* '?' */
1315    mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1316    mbstring_globals->current_filter_illegal_substchar = 0x3f;  /* '?' */
1317    mbstring_globals->illegalchars = 0;
1318    mbstring_globals->func_overload = 0;
1319    mbstring_globals->encoding_translation = 0;
1320    mbstring_globals->strict_detection = 0;
1321    mbstring_globals->outconv = NULL;
1322    mbstring_globals->http_output_conv_mimetypes = NULL;
1323#if HAVE_MBREGEX
1324    mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
1325#endif
1326}
1327/* }}} */
1328
1329/* {{{ PHP_GSHUTDOWN_FUNCTION */
1330static PHP_GSHUTDOWN_FUNCTION(mbstring)
1331{
1332    if (mbstring_globals->http_input_list) {
1333        free(mbstring_globals->http_input_list);
1334    }
1335#ifdef ZEND_MULTIBYTE
1336    if (mbstring_globals->script_encoding_list) {
1337        free(mbstring_globals->script_encoding_list);
1338    }
1339#endif /* ZEND_MULTIBYTE */
1340    if (mbstring_globals->detect_order_list) {
1341        free(mbstring_globals->detect_order_list);
1342    }
1343    if (mbstring_globals->http_output_conv_mimetypes) {
1344        _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1345    }
1346#if HAVE_MBREGEX
1347    php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
1348#endif
1349}
1350/* }}} */
1351
1352/* {{{ PHP_MINIT_FUNCTION(mbstring) */
1353PHP_MINIT_FUNCTION(mbstring)
1354{
1355    __mbfl_allocators = &_php_mb_allocators;
1356
1357    REGISTER_INI_ENTRIES();
1358
1359    /* This is a global handler. Should not be set in a per-request handler. */
1360    sapi_register_treat_data(mbstr_treat_data);
1361
1362    /* Post handlers are stored in the thread-local context. */
1363    if (MBSTRG(encoding_translation)) {
1364        sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1365    }
1366
1367    REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1368    REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1369    REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1370
1371    REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1372    REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1373    REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1374
1375#if HAVE_MBREGEX
1376    PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1377#endif
1378    return SUCCESS;
1379}
1380/* }}} */
1381
1382/* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
1383PHP_MSHUTDOWN_FUNCTION(mbstring)
1384{
1385    UNREGISTER_INI_ENTRIES();
1386
1387#if HAVE_MBREGEX
1388    PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1389#endif
1390
1391    return SUCCESS;
1392}
1393/* }}} */
1394
1395/* {{{ PHP_RINIT_FUNCTION(mbstring) */
1396PHP_RINIT_FUNCTION(mbstring)
1397{
1398    int n;
1399    enum mbfl_no_encoding *list=NULL, *entry;
1400    zend_function *func, *orig;
1401    const struct mb_overload_def *p;
1402
1403    MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1404    MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1405    MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1406    MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1407
1408    MBSTRG(illegalchars) = 0;
1409
1410    n = 0;
1411    if (MBSTRG(detect_order_list)) {
1412        list = MBSTRG(detect_order_list);
1413        n = MBSTRG(detect_order_list_size);
1414    }
1415    if (n <= 0) {
1416        list = MBSTRG(default_detect_order_list);
1417        n = MBSTRG(default_detect_order_list_size);
1418    }
1419    entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0);
1420    MBSTRG(current_detect_order_list) = entry;
1421    MBSTRG(current_detect_order_list_size) = n;
1422    while (n > 0) {
1423        *entry++ = *list++;
1424        n--;
1425    }
1426
1427    /* override original function. */
1428    if (MBSTRG(func_overload)){
1429        p = &(mb_ovld[0]);
1430
1431        while (p->type > 0) {
1432            if ((MBSTRG(func_overload) & p->type) == p->type &&
1433                zend_hash_find(EG(function_table), p->save_func,
1434                    strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
1435
1436                zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
1437
1438                if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
1439                    php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1440                    return FAILURE;
1441                } else {
1442                    zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
1443
1444                    if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
1445                        NULL) == FAILURE) {
1446                        php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1447                        return FAILURE;
1448                    }
1449                }
1450            }
1451            p++;
1452        }
1453    }
1454#if HAVE_MBREGEX
1455    PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1456#endif
1457#ifdef ZEND_MULTIBYTE
1458    zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC);
1459    php_mb_set_zend_encoding(TSRMLS_C);
1460#endif /* ZEND_MULTIBYTE */
1461
1462    return SUCCESS;
1463}
1464/* }}} */
1465
1466/* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
1467PHP_RSHUTDOWN_FUNCTION(mbstring)
1468{
1469    const struct mb_overload_def *p;
1470    zend_function *orig;
1471
1472    if (MBSTRG(current_detect_order_list) != NULL) {
1473        efree(MBSTRG(current_detect_order_list));
1474        MBSTRG(current_detect_order_list) = NULL;
1475        MBSTRG(current_detect_order_list_size) = 0;
1476    }
1477    if (MBSTRG(outconv) != NULL) {
1478        MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1479        mbfl_buffer_converter_delete(MBSTRG(outconv));
1480        MBSTRG(outconv) = NULL;
1481    }
1482
1483    /* clear http input identification. */
1484    MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
1485    MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
1486    MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
1487    MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
1488    MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
1489
1490    /*  clear overloaded function. */
1491    if (MBSTRG(func_overload)){
1492        p = &(mb_ovld[0]);
1493        while (p->type > 0) {
1494            if ((MBSTRG(func_overload) & p->type) == p->type &&
1495                zend_hash_find(EG(function_table), p->save_func,
1496                               strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
1497
1498                zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
1499                zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
1500            }
1501            p++;
1502        }
1503    }
1504
1505#if HAVE_MBREGEX
1506    PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1507#endif
1508
1509    return SUCCESS;
1510}
1511/* }}} */
1512
1513/* {{{ PHP_MINFO_FUNCTION(mbstring) */
1514PHP_MINFO_FUNCTION(mbstring)
1515{
1516    php_info_print_table_start();
1517    php_info_print_table_row(2, "Multibyte Support", "enabled");
1518    php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1519    php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1520    php_info_print_table_end();
1521
1522    php_info_print_table_start();
1523    php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1524    php_info_print_table_end();
1525
1526#if HAVE_MBREGEX
1527    PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1528#endif
1529
1530    DISPLAY_INI_ENTRIES();
1531}
1532/* }}} */
1533
1534/* {{{ proto string mb_language([string language])
1535   Sets the current language or Returns the current language as a string */
1536PHP_FUNCTION(mb_language)
1537{
1538    char *name = NULL;
1539    int name_len = 0;
1540
1541    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1542        return;
1543    }
1544    if (name == NULL) {
1545        RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
1546    } else {
1547        if (FAILURE == zend_alter_ini_entry(
1548                "mbstring.language", sizeof("mbstring.language"),
1549                name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1550            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
1551            RETVAL_FALSE;
1552        } else {
1553            RETVAL_TRUE;
1554        }
1555    }
1556}
1557/* }}} */
1558
1559/* {{{ proto string mb_internal_encoding([string encoding])
1560   Sets the current internal encoding or Returns the current internal encoding as a string */
1561PHP_FUNCTION(mb_internal_encoding)
1562{
1563    char *name = NULL;
1564    int name_len;
1565    enum mbfl_no_encoding no_encoding;
1566
1567    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1568        RETURN_FALSE;
1569    }
1570    if (name == NULL) {
1571        name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
1572        if (name != NULL) {
1573            RETURN_STRING(name, 1);
1574        } else {
1575            RETURN_FALSE;
1576        }
1577    } else {
1578        no_encoding = mbfl_name2no_encoding(name);
1579        if (no_encoding == mbfl_no_encoding_invalid) {
1580            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1581            RETURN_FALSE;
1582        } else {
1583            MBSTRG(current_internal_encoding) = no_encoding;
1584#ifdef ZEND_MULTIBYTE
1585            /* TODO: make independent from mbstring.encoding_translation? */
1586            if (MBSTRG(encoding_translation)) {
1587                zend_multibyte_set_internal_encoding(name TSRMLS_CC);
1588            }
1589#endif /* ZEND_MULTIBYTE */
1590            RETURN_TRUE;
1591        }
1592    }
1593}
1594/* }}} */
1595
1596/* {{{ proto mixed mb_http_input([string type])
1597   Returns the input encoding */
1598PHP_FUNCTION(mb_http_input)
1599{
1600    char *typ = NULL;
1601    int typ_len;
1602    int retname, n;
1603    char *name, *list, *temp;
1604    enum mbfl_no_encoding *entry;
1605    enum mbfl_no_encoding result = mbfl_no_encoding_invalid;
1606
1607    retname = 1;
1608    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
1609        RETURN_FALSE;
1610    }
1611    if (typ == NULL) {
1612        result = MBSTRG(http_input_identify);
1613    } else {
1614        switch (*typ) {
1615        case 'G':
1616        case 'g':
1617            result = MBSTRG(http_input_identify_get);
1618            break;
1619        case 'P':
1620        case 'p':
1621            result = MBSTRG(http_input_identify_post);
1622            break;
1623        case 'C':
1624        case 'c':
1625            result = MBSTRG(http_input_identify_cookie);
1626            break;
1627        case 'S':
1628        case 's':
1629            result = MBSTRG(http_input_identify_string);
1630            break;
1631        case 'I':
1632        case 'i':
1633            array_init(return_value);
1634            entry = MBSTRG(http_input_list);
1635            n = MBSTRG(http_input_list_size);
1636            while (n > 0) {
1637                name = (char *)mbfl_no_encoding2name(*entry);
1638                if (name) {
1639                    add_next_index_string(return_value, name, 1);
1640                }
1641                entry++;
1642                n--;
1643            }
1644            retname = 0;
1645            break;
1646        case 'L':
1647        case 'l':
1648            entry = MBSTRG(http_input_list);
1649            n = MBSTRG(http_input_list_size);
1650            list = NULL;
1651            while (n > 0) {
1652                name = (char *)mbfl_no_encoding2name(*entry);
1653                if (name) {
1654                    if (list) {
1655                        temp = list;
1656                        spprintf(&list, 0, "%s,%s", temp, name);
1657                        efree(temp);
1658                        if (!list) {
1659                            break;
1660                        }
1661                    } else {
1662                        list = estrdup(name);
1663                    }
1664                }
1665                entry++;
1666                n--;
1667            }
1668            if (!list) {
1669                RETURN_FALSE;
1670            }
1671            RETVAL_STRING(list, 0);
1672            retname = 0;
1673            break;
1674        default:
1675            result = MBSTRG(http_input_identify);
1676            break;
1677        }
1678    }
1679
1680    if (retname) {
1681        if (result != mbfl_no_encoding_invalid &&
1682            (name = (char *)mbfl_no_encoding2name(result)) != NULL) {
1683            RETVAL_STRING(name, 1);
1684        } else {
1685            RETVAL_FALSE;
1686        }
1687    }
1688}
1689/* }}} */
1690
1691/* {{{ proto string mb_http_output([string encoding])
1692   Sets the current output_encoding or returns the current output_encoding as a string */
1693PHP_FUNCTION(mb_http_output)
1694{
1695    char *name = NULL;
1696    int name_len;
1697    enum mbfl_no_encoding no_encoding;
1698
1699    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
1700        RETURN_FALSE;
1701    }
1702
1703    if (name == NULL) {
1704        name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding));
1705        if (name != NULL) {
1706            RETURN_STRING(name, 1);
1707        } else {
1708            RETURN_FALSE;
1709        }
1710    } else {
1711        no_encoding = mbfl_name2no_encoding(name);
1712        if (no_encoding == mbfl_no_encoding_invalid) {
1713            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1714            RETURN_FALSE;
1715        } else {
1716            MBSTRG(current_http_output_encoding) = no_encoding;
1717            RETURN_TRUE;
1718        }
1719    }
1720}
1721/* }}} */
1722
1723/* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1724   Sets the current detect_order or Return the current detect_order as a array */
1725PHP_FUNCTION(mb_detect_order)
1726{
1727    zval **arg1 = NULL;
1728    int n, size;
1729    enum mbfl_no_encoding *list, *entry;
1730    char *name;
1731
1732    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1733        return;
1734    }
1735
1736    if (!arg1) {
1737        array_init(return_value);
1738        entry = MBSTRG(current_detect_order_list);
1739        n = MBSTRG(current_detect_order_list_size);
1740        while (n > 0) {
1741            name = (char *)mbfl_no_encoding2name(*entry);
1742            if (name) {
1743                add_next_index_string(return_value, name, 1);
1744            }
1745            entry++;
1746            n--;
1747        }
1748    } else {
1749        list = NULL;
1750        size = 0;
1751        switch (Z_TYPE_PP(arg1)) {
1752        case IS_ARRAY:
1753            if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
1754                if (list) {
1755                    efree(list);
1756                }
1757                RETURN_FALSE;
1758            }
1759            break;
1760        default:
1761            convert_to_string_ex(arg1);
1762            if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
1763                if (list) {
1764                    efree(list);
1765                }
1766                RETURN_FALSE;
1767            }
1768            break;
1769        }
1770
1771        if (list == NULL) {
1772            RETURN_FALSE;
1773        }
1774
1775        if (MBSTRG(current_detect_order_list)) {
1776            efree(MBSTRG(current_detect_order_list));
1777        }
1778        MBSTRG(current_detect_order_list) = list;
1779        MBSTRG(current_detect_order_list_size) = size;
1780        RETURN_TRUE;
1781    }
1782}
1783/* }}} */
1784
1785/* {{{ proto mixed mb_substitute_character([mixed substchar])
1786   Sets the current substitute_character or returns the current substitute_character */
1787PHP_FUNCTION(mb_substitute_character)
1788{
1789    zval **arg1 = NULL;
1790
1791    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1792        return;
1793    }
1794
1795    if (!arg1) {
1796        if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1797            RETURN_STRING("none", 1);
1798        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1799            RETURN_STRING("long", 1);
1800        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1801            RETURN_STRING("entity", 1);
1802        } else {
1803            RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1804        }
1805    } else {
1806        RETVAL_TRUE;
1807
1808        switch (Z_TYPE_PP(arg1)) {
1809        case IS_STRING:
1810            if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1811                MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1812            } else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1813                MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1814            } else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1815                MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1816            } else {
1817                convert_to_long_ex(arg1);
1818
1819                if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1820                    MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1821                    MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1822                } else {
1823                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1824                    RETURN_FALSE;
1825                }
1826            }
1827            break;
1828        default:
1829            convert_to_long_ex(arg1);
1830            if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1831                MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1832                MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1833            } else {
1834                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1835                RETURN_FALSE;
1836            }
1837            break;
1838        }
1839    }
1840}
1841/* }}} */
1842
1843/* {{{ proto string mb_preferred_mime_name(string encoding)
1844   Return the preferred MIME name (charset) as a string */
1845PHP_FUNCTION(mb_preferred_mime_name)
1846{
1847    enum mbfl_no_encoding no_encoding;
1848    char *name = NULL;
1849    int name_len;
1850
1851    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
1852        return;
1853    } else {
1854        no_encoding = mbfl_name2no_encoding(name);
1855        if (no_encoding == mbfl_no_encoding_invalid) {
1856            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1857            RETVAL_FALSE;
1858        } else {
1859            const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
1860            if (preferred_name == NULL || *preferred_name == '\0') {
1861                php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
1862                RETVAL_FALSE;
1863            } else {
1864                RETVAL_STRING((char *)preferred_name, 1);
1865            }
1866        }
1867    }
1868}
1869/* }}} */
1870
1871#define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
1872#define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
1873
1874/* {{{ proto bool mb_parse_str(string encoded_string [, array result])
1875   Parses GET/POST/COOKIE data and sets global variables */
1876PHP_FUNCTION(mb_parse_str)
1877{
1878    zval *track_vars_array = NULL;
1879    char *encstr = NULL;
1880    int encstr_len;
1881    php_mb_encoding_handler_info_t info;
1882    enum mbfl_no_encoding detected;
1883
1884    track_vars_array = NULL;
1885    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
1886        return;
1887    }
1888
1889    /* Clear out the array */
1890    if (track_vars_array != NULL) {
1891        zval_dtor(track_vars_array);
1892        array_init(track_vars_array);
1893    }
1894
1895    encstr = estrndup(encstr, encstr_len);
1896
1897    info.data_type              = PARSE_STRING;
1898    info.separator              = PG(arg_separator).input;
1899    info.force_register_globals = (track_vars_array == NULL);
1900    info.report_errors          = 1;
1901    info.to_encoding            = MBSTRG(current_internal_encoding);
1902    info.to_language            = MBSTRG(language);
1903    info.from_encodings         = MBSTRG(http_input_list);
1904    info.num_from_encodings     = MBSTRG(http_input_list_size);
1905    info.from_language          = MBSTRG(language);
1906
1907    detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
1908
1909    MBSTRG(http_input_identify) = detected;
1910
1911    RETVAL_BOOL(detected != mbfl_no_encoding_invalid);
1912
1913    if (encstr != NULL) efree(encstr);
1914}
1915/* }}} */
1916
1917/* {{{ proto string mb_output_handler(string contents, int status)
1918   Returns string in output buffer converted to the http_output encoding */
1919PHP_FUNCTION(mb_output_handler)
1920{
1921    char *arg_string;
1922    int arg_string_len;
1923    long arg_status;
1924    mbfl_string string, result;
1925    const char *charset;
1926    char *p;
1927    enum mbfl_no_encoding encoding;
1928    int last_feed, len;
1929    unsigned char send_text_mimetype = 0;
1930    char *s, *mimetype = NULL;
1931
1932    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
1933        return;
1934    }
1935
1936    encoding = MBSTRG(current_http_output_encoding);
1937
1938    /* start phase only */
1939    if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
1940        /* delete the converter just in case. */
1941        if (MBSTRG(outconv)) {
1942            MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1943            mbfl_buffer_converter_delete(MBSTRG(outconv));
1944            MBSTRG(outconv) = NULL;
1945        }
1946        if (encoding == mbfl_no_encoding_pass) {
1947            RETURN_STRINGL(arg_string, arg_string_len, 1);
1948        }
1949
1950        /* analyze mime type */
1951        if (SG(sapi_headers).mimetype &&
1952            _php_mb_match_regex(
1953                MBSTRG(http_output_conv_mimetypes),
1954                SG(sapi_headers).mimetype,
1955                strlen(SG(sapi_headers).mimetype))) {
1956            if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
1957                mimetype = estrdup(SG(sapi_headers).mimetype);
1958            } else {
1959                mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
1960            }
1961            send_text_mimetype = 1;
1962        } else if (SG(sapi_headers).send_default_content_type) {
1963            mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
1964        }
1965
1966        /* if content-type is not yet set, set it and activate the converter */
1967        if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
1968            charset = mbfl_no2preferred_mime_name(encoding);
1969            if (charset) {
1970                len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
1971                if (sapi_add_header(p, len, 0) != FAILURE) {
1972                    SG(sapi_headers).send_default_content_type = 0;
1973                }
1974            }
1975            /* activate the converter */
1976            MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
1977            if (send_text_mimetype){
1978                efree(mimetype);
1979            }
1980        }
1981    }
1982
1983    /* just return if the converter is not activated. */
1984    if (MBSTRG(outconv) == NULL) {
1985        RETURN_STRINGL(arg_string, arg_string_len, 1);
1986    }
1987
1988    /* flag */
1989    last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
1990    /* mode */
1991    mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
1992    mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
1993
1994    /* feed the string */
1995    mbfl_string_init(&string);
1996    string.no_language = MBSTRG(language);
1997    string.no_encoding = MBSTRG(current_internal_encoding);
1998    string.val = (unsigned char *)arg_string;
1999    string.len = arg_string_len;
2000    mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2001    if (last_feed) {
2002        mbfl_buffer_converter_flush(MBSTRG(outconv));
2003    }
2004    /* get the converter output, and return it */
2005    mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2006    RETVAL_STRINGL((char *)result.val, result.len, 0);      /* the string is already strdup()'ed */
2007
2008    /* delete the converter if it is the last feed. */
2009    if (last_feed) {
2010        MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2011        mbfl_buffer_converter_delete(MBSTRG(outconv));
2012        MBSTRG(outconv) = NULL;
2013    }
2014}
2015/* }}} */
2016
2017/* {{{ proto int mb_strlen(string str [, string encoding])
2018   Get character numbers of a string */
2019PHP_FUNCTION(mb_strlen)
2020{
2021    int n;
2022    mbfl_string string;
2023    char *enc_name = NULL;
2024    int enc_name_len;
2025
2026    mbfl_string_init(&string);
2027
2028    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2029        RETURN_FALSE;
2030    }
2031
2032    string.no_language = MBSTRG(language);
2033    if (enc_name == NULL) {
2034        string.no_encoding = MBSTRG(current_internal_encoding);
2035    } else {
2036        string.no_encoding = mbfl_name2no_encoding(enc_name);
2037        if (string.no_encoding == mbfl_no_encoding_invalid) {
2038            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2039            RETURN_FALSE;
2040        }
2041    }
2042
2043    n = mbfl_strlen(&string);
2044    if (n >= 0) {
2045        RETVAL_LONG(n);
2046    } else {
2047        RETVAL_FALSE;
2048    }
2049}
2050/* }}} */
2051
2052/* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2053   Find position of first occurrence of a string within another */
2054PHP_FUNCTION(mb_strpos)
2055{
2056    int n, reverse = 0;
2057    long offset;
2058    mbfl_string haystack, needle;
2059    char *enc_name = NULL;
2060    int enc_name_len;
2061
2062    mbfl_string_init(&haystack);
2063    mbfl_string_init(&needle);
2064    haystack.no_language = MBSTRG(language);
2065    haystack.no_encoding = MBSTRG(current_internal_encoding);
2066    needle.no_language = MBSTRG(language);
2067    needle.no_encoding = MBSTRG(current_internal_encoding);
2068    offset = 0;
2069
2070    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2071        RETURN_FALSE;
2072    }
2073
2074    if (enc_name != NULL) {
2075        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2076        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2077            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2078            RETURN_FALSE;
2079        }
2080    }
2081
2082    if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2083        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
2084        RETURN_FALSE;
2085    }
2086    if (needle.len == 0) {
2087        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2088        RETURN_FALSE;
2089    }
2090
2091    n = mbfl_strpos(&haystack, &needle, offset, reverse);
2092    if (n >= 0) {
2093        RETVAL_LONG(n);
2094    } else {
2095        switch (-n) {
2096        case 1:
2097            break;
2098        case 2:
2099            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
2100            break;
2101        case 4:
2102            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
2103            break;
2104        case 8:
2105            php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
2106            break;
2107        default:
2108            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
2109            break;
2110        }
2111        RETVAL_FALSE;
2112    }
2113}
2114/* }}} */
2115
2116/* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2117   Find position of last occurrence of a string within another */
2118PHP_FUNCTION(mb_strrpos)
2119{
2120    int n;
2121    mbfl_string haystack, needle;
2122    char *enc_name = NULL;
2123    int enc_name_len;
2124    zval **zoffset = NULL;
2125    long offset = 0, str_flg;
2126    char *enc_name2 = NULL;
2127    int enc_name_len2;
2128
2129    mbfl_string_init(&haystack);
2130    mbfl_string_init(&needle);
2131    haystack.no_language = MBSTRG(language);
2132    haystack.no_encoding = MBSTRG(current_internal_encoding);
2133    needle.no_language = MBSTRG(language);
2134    needle.no_encoding = MBSTRG(current_internal_encoding);
2135
2136    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2137        RETURN_FALSE;
2138    }
2139
2140    if (zoffset) {
2141        if (Z_TYPE_PP(zoffset) == IS_STRING) {
2142            enc_name2     = Z_STRVAL_PP(zoffset);
2143            enc_name_len2 = Z_STRLEN_PP(zoffset);
2144            str_flg       = 1;
2145
2146            if (enc_name2 != NULL) {
2147                switch (*enc_name2) {
2148                case '0':
2149                case '1':
2150                case '2':
2151                case '3':
2152                case '4':
2153                case '5':
2154                case '6':
2155                case '7':
2156                case '8':
2157                case '9':
2158                case ' ':
2159                case '-':
2160                case '.':
2161                    break;
2162                default :
2163                    str_flg = 0;
2164                    break;
2165                }
2166            }
2167
2168            if (str_flg) {
2169                convert_to_long_ex(zoffset);
2170                offset   = Z_LVAL_PP(zoffset);
2171            } else {
2172                enc_name     = enc_name2;
2173                enc_name_len = enc_name_len2;
2174            }
2175        } else {
2176            convert_to_long_ex(zoffset);
2177            offset = Z_LVAL_PP(zoffset);
2178        }
2179    }
2180
2181    if (enc_name != NULL) {
2182        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2183        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2184            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2185            RETURN_FALSE;
2186        }
2187    }
2188
2189    if (haystack.len <= 0) {
2190        RETURN_FALSE;
2191    }
2192    if (needle.len <= 0) {
2193        RETURN_FALSE;
2194    }
2195
2196    {
2197        int haystack_char_len = mbfl_strlen(&haystack);
2198        if ((offset > 0 && offset > haystack_char_len) ||
2199            (offset < 0 && -offset > haystack_char_len)) {
2200            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
2201            RETURN_FALSE;
2202        }
2203    }
2204
2205    n = mbfl_strpos(&haystack, &needle, offset, 1);
2206    if (n >= 0) {
2207        RETVAL_LONG(n);
2208    } else {
2209        RETVAL_FALSE;
2210    }
2211}
2212/* }}} */
2213
2214/* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2215   Finds position of first occurrence of a string within another, case insensitive */
2216PHP_FUNCTION(mb_stripos)
2217{
2218    int n;
2219    long offset;
2220    mbfl_string haystack, needle;
2221    char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2222    int from_encoding_len;
2223    n = -1;
2224    offset = 0;
2225
2226    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2227        RETURN_FALSE;
2228    }
2229    if (needle.len == 0) {
2230        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2231        RETURN_FALSE;
2232    }
2233    n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2234
2235    if (n >= 0) {
2236        RETVAL_LONG(n);
2237    } else {
2238        RETVAL_FALSE;
2239    }
2240}
2241/* }}} */
2242
2243/* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2244   Finds position of last occurrence of a string within another, case insensitive */
2245PHP_FUNCTION(mb_strripos)
2246{
2247    int n;
2248    long offset;
2249    mbfl_string haystack, needle;
2250    const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2251    int from_encoding_len;
2252    n = -1;
2253    offset = 0;
2254
2255    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2256        RETURN_FALSE;
2257    }
2258
2259    n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2260
2261    if (n >= 0) {
2262        RETVAL_LONG(n);
2263    } else {
2264        RETVAL_FALSE;
2265    }
2266}
2267/* }}} */
2268
2269/* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2270   Finds first occurrence of a string within another */
2271PHP_FUNCTION(mb_strstr)
2272{
2273    int n, len, mblen;
2274    mbfl_string haystack, needle, result, *ret = NULL;
2275    char *enc_name = NULL;
2276    int enc_name_len;
2277    zend_bool part = 0;
2278
2279    mbfl_string_init(&haystack);
2280    mbfl_string_init(&needle);
2281    haystack.no_language = MBSTRG(language);
2282    haystack.no_encoding = MBSTRG(current_internal_encoding);
2283    needle.no_language = MBSTRG(language);
2284    needle.no_encoding = MBSTRG(current_internal_encoding);
2285
2286    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2287        RETURN_FALSE;
2288    }
2289
2290    if (enc_name != NULL) {
2291        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2292        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2293            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2294            RETURN_FALSE;
2295        }
2296    }
2297
2298    if (needle.len <= 0) {
2299        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2300        RETURN_FALSE;
2301    }
2302    n = mbfl_strpos(&haystack, &needle, 0, 0);
2303    if (n >= 0) {
2304        mblen = mbfl_strlen(&haystack);
2305        if (part) {
2306            ret = mbfl_substr(&haystack, &result, 0, n);
2307            if (ret != NULL) {
2308                RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2309            } else {
2310                RETVAL_FALSE;
2311            }
2312        } else {
2313            len = (mblen - n);
2314            ret = mbfl_substr(&haystack, &result, n, len);
2315            if (ret != NULL) {
2316                RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2317            } else {
2318                RETVAL_FALSE;
2319            }
2320        }
2321    } else {
2322        RETVAL_FALSE;
2323    }
2324}
2325/* }}} */
2326
2327/* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2328   Finds the last occurrence of a character in a string within another */
2329PHP_FUNCTION(mb_strrchr)
2330{
2331    int n, len, mblen;
2332    mbfl_string haystack, needle, result, *ret = NULL;
2333    char *enc_name = NULL;
2334    int enc_name_len;
2335    zend_bool part = 0;
2336
2337    mbfl_string_init(&haystack);
2338    mbfl_string_init(&needle);
2339    haystack.no_language = MBSTRG(language);
2340    haystack.no_encoding = MBSTRG(current_internal_encoding);
2341    needle.no_language = MBSTRG(language);
2342    needle.no_encoding = MBSTRG(current_internal_encoding);
2343
2344    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2345        RETURN_FALSE;
2346    }
2347
2348    if (enc_name != NULL) {
2349        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2350        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2351            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2352            RETURN_FALSE;
2353        }
2354    }
2355
2356    if (haystack.len <= 0) {
2357        RETURN_FALSE;
2358    }
2359    if (needle.len <= 0) {
2360        RETURN_FALSE;
2361    }
2362    n = mbfl_strpos(&haystack, &needle, 0, 1);
2363    if (n >= 0) {
2364        mblen = mbfl_strlen(&haystack);
2365        if (part) {
2366            ret = mbfl_substr(&haystack, &result, 0, n);
2367            if (ret != NULL) {
2368                RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2369            } else {
2370                RETVAL_FALSE;
2371            }
2372        } else {
2373            len = (mblen - n);
2374            ret = mbfl_substr(&haystack, &result, n, len);
2375            if (ret != NULL) {
2376                RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2377            } else {
2378                RETVAL_FALSE;
2379            }
2380        }
2381    } else {
2382        RETVAL_FALSE;
2383    }
2384}
2385/* }}} */
2386
2387/* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2388   Finds first occurrence of a string within another, case insensitive */
2389PHP_FUNCTION(mb_stristr)
2390{
2391    zend_bool part = 0;
2392    unsigned int from_encoding_len, len, mblen;
2393    int n;
2394    mbfl_string haystack, needle, result, *ret = NULL;
2395    const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2396    mbfl_string_init(&haystack);
2397    mbfl_string_init(&needle);
2398    haystack.no_language = MBSTRG(language);
2399    haystack.no_encoding = MBSTRG(current_internal_encoding);
2400    needle.no_language = MBSTRG(language);
2401    needle.no_encoding = MBSTRG(current_internal_encoding);
2402
2403
2404    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2405        RETURN_FALSE;
2406    }
2407
2408    if (!needle.len) {
2409        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2410        RETURN_FALSE;
2411    }
2412
2413    haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2414    if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2415        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2416        RETURN_FALSE;
2417    }
2418
2419    n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2420
2421    if (n <0) {
2422        RETURN_FALSE;
2423    }
2424
2425    mblen = mbfl_strlen(&haystack);
2426
2427    if (part) {
2428        ret = mbfl_substr(&haystack, &result, 0, n);
2429        if (ret != NULL) {
2430            RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2431        } else {
2432            RETVAL_FALSE;
2433        }
2434    } else {
2435        len = (mblen - n);
2436        ret = mbfl_substr(&haystack, &result, n, len);
2437        if (ret != NULL) {
2438            RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2439        } else {
2440            RETVAL_FALSE;
2441        }
2442    }
2443}
2444/* }}} */
2445
2446/* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2447   Finds the last occurrence of a character in a string within another, case insensitive */
2448PHP_FUNCTION(mb_strrichr)
2449{
2450    zend_bool part = 0;
2451    int n, from_encoding_len, len, mblen;
2452    mbfl_string haystack, needle, result, *ret = NULL;
2453    char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2454    mbfl_string_init(&haystack);
2455    mbfl_string_init(&needle);
2456    haystack.no_language = MBSTRG(language);
2457    haystack.no_encoding = MBSTRG(current_internal_encoding);
2458    needle.no_language = MBSTRG(language);
2459    needle.no_encoding = MBSTRG(current_internal_encoding);
2460
2461
2462    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2463        RETURN_FALSE;
2464    }
2465
2466    haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2467    if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2468        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2469        RETURN_FALSE;
2470    }
2471
2472    n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2473
2474    if (n <0) {
2475        RETURN_FALSE;
2476    }
2477
2478    mblen = mbfl_strlen(&haystack);
2479
2480    if (part) {
2481        ret = mbfl_substr(&haystack, &result, 0, n);
2482        if (ret != NULL) {
2483            RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2484        } else {
2485            RETVAL_FALSE;
2486        }
2487    } else {
2488        len = (mblen - n);
2489        ret = mbfl_substr(&haystack, &result, n, len);
2490        if (ret != NULL) {
2491            RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2492        } else {
2493            RETVAL_FALSE;
2494        }
2495    }
2496}
2497/* }}} */
2498
2499/* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2500   Count the number of substring occurrences */
2501PHP_FUNCTION(mb_substr_count)
2502{
2503    int n;
2504    mbfl_string haystack, needle;
2505    char *enc_name = NULL;
2506    int enc_name_len;
2507
2508    mbfl_string_init(&haystack);
2509    mbfl_string_init(&needle);
2510    haystack.no_language = MBSTRG(language);
2511    haystack.no_encoding = MBSTRG(current_internal_encoding);
2512    needle.no_language = MBSTRG(language);
2513    needle.no_encoding = MBSTRG(current_internal_encoding);
2514
2515    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2516        return;
2517    }
2518
2519    if (enc_name != NULL) {
2520        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2521        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2522            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2523            RETURN_FALSE;
2524        }
2525    }
2526
2527    if (needle.len <= 0) {
2528        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
2529        RETURN_FALSE;
2530    }
2531
2532    n = mbfl_substr_count(&haystack, &needle);
2533    if (n >= 0) {
2534        RETVAL_LONG(n);
2535    } else {
2536        RETVAL_FALSE;
2537    }
2538}
2539/* }}} */
2540
2541/* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2542   Returns part of a string */
2543PHP_FUNCTION(mb_substr)
2544{
2545    size_t argc = ZEND_NUM_ARGS();
2546    char *str, *encoding;
2547    long from, len;
2548    int mblen, str_len, encoding_len;
2549    mbfl_string string, result, *ret;
2550
2551    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", &str, &str_len, &from, &len, &encoding, &encoding_len) == FAILURE) {
2552        return;
2553    }
2554
2555    mbfl_string_init(&string);
2556    string.no_language = MBSTRG(language);
2557    string.no_encoding = MBSTRG(current_internal_encoding);
2558
2559    if (argc == 4) {
2560        string.no_encoding = mbfl_name2no_encoding(encoding);
2561        if (string.no_encoding == mbfl_no_encoding_invalid) {
2562            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2563            RETURN_FALSE;
2564        }
2565    }
2566
2567    string.val = (unsigned char *)str;
2568    string.len = str_len;
2569
2570    if (argc < 3) {
2571        len = str_len;
2572    }
2573
2574    /* measures length */
2575    mblen = 0;
2576    if (from < 0 || len < 0) {
2577        mblen = mbfl_strlen(&string);
2578    }
2579
2580    /* if "from" position is negative, count start position from the end
2581     * of the string
2582     */
2583    if (from < 0) {
2584        from = mblen + from;
2585        if (from < 0) {
2586            from = 0;
2587        }
2588    }
2589
2590    /* if "length" position is negative, set it to the length
2591     * needed to stop that many chars from the end of the string
2592     */
2593    if (len < 0) {
2594        len = (mblen - from) + len;
2595        if (len < 0) {
2596            len = 0;
2597        }
2598    }
2599
2600    if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2601        && (from >= mbfl_strlen(&string))) {
2602        RETURN_FALSE;
2603    }
2604
2605    ret = mbfl_substr(&string, &result, from, len);
2606    if (NULL == ret) {
2607        RETURN_FALSE;
2608    }
2609
2610    RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2611}
2612/* }}} */
2613
2614/* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2615   Returns part of a string */
2616PHP_FUNCTION(mb_strcut)
2617{
2618    size_t argc = ZEND_NUM_ARGS();
2619    char *encoding;
2620    long from, len;
2621    int encoding_len;
2622    mbfl_string string, result, *ret;
2623
2624    mbfl_string_init(&string);
2625    string.no_language = MBSTRG(language);
2626    string.no_encoding = MBSTRG(current_internal_encoding);
2627
2628    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", (char **)&string.val, (int **)&string.len, &from, &len, &encoding, &encoding_len) == FAILURE) {
2629        return;
2630    }
2631
2632    if (argc == 4) {
2633        string.no_encoding = mbfl_name2no_encoding(encoding);
2634        if (string.no_encoding == mbfl_no_encoding_invalid) {
2635            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2636            RETURN_FALSE;
2637        }
2638    }
2639
2640    if (argc < 3) {
2641        len = string.len;
2642    }
2643
2644    /* if "from" position is negative, count start position from the end
2645     * of the string
2646     */
2647    if (from < 0) {
2648        from = string.len + from;
2649        if (from < 0) {
2650            from = 0;
2651        }
2652    }
2653
2654    /* if "length" position is negative, set it to the length
2655     * needed to stop that many chars from the end of the string
2656     */
2657    if (len < 0) {
2658        len = (string.len - from) + len;
2659        if (len < 0) {
2660            len = 0;
2661        }
2662    }
2663
2664    if ((unsigned int)from > string.len) {
2665        RETURN_FALSE;
2666    }
2667
2668    ret = mbfl_strcut(&string, &result, from, len);
2669    if (ret == NULL) {
2670        RETURN_FALSE;
2671    }
2672
2673    RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2674}
2675/* }}} */
2676
2677/* {{{ proto int mb_strwidth(string str [, string encoding])
2678   Gets terminal width of a string */
2679PHP_FUNCTION(mb_strwidth)
2680{
2681    int n;
2682    mbfl_string string;
2683    char *enc_name = NULL;
2684    int enc_name_len;
2685
2686    mbfl_string_init(&string);
2687
2688    string.no_language = MBSTRG(language);
2689    string.no_encoding = MBSTRG(current_internal_encoding);
2690
2691    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2692        return;
2693    }
2694
2695    if (enc_name != NULL) {
2696        string.no_encoding = mbfl_name2no_encoding(enc_name);
2697        if (string.no_encoding == mbfl_no_encoding_invalid) {
2698            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2699            RETURN_FALSE;
2700        }
2701    }
2702
2703    n = mbfl_strwidth(&string);
2704    if (n >= 0) {
2705        RETVAL_LONG(n);
2706    } else {
2707        RETVAL_FALSE;
2708    }
2709}
2710/* }}} */
2711
2712/* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2713   Trim the string in terminal width */
2714PHP_FUNCTION(mb_strimwidth)
2715{
2716    char *str, *trimmarker, *encoding;
2717    long from, width;
2718    int str_len, trimmarker_len, encoding_len;
2719    mbfl_string string, result, marker, *ret;
2720
2721    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2722        return;
2723    }
2724
2725    mbfl_string_init(&string);
2726    mbfl_string_init(&marker);
2727    string.no_language = MBSTRG(language);
2728    string.no_encoding = MBSTRG(current_internal_encoding);
2729    marker.no_language = MBSTRG(language);
2730    marker.no_encoding = MBSTRG(current_internal_encoding);
2731    marker.val = NULL;
2732    marker.len = 0;
2733
2734    if (ZEND_NUM_ARGS() == 5) {
2735        string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
2736        if (string.no_encoding == mbfl_no_encoding_invalid) {
2737            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2738            RETURN_FALSE;
2739        }
2740    }
2741
2742    string.val = (unsigned char *)str;
2743    string.len = str_len;
2744
2745    if (from < 0 || from > str_len) {
2746        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
2747        RETURN_FALSE;
2748    }
2749
2750    if (width < 0) {
2751        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
2752        RETURN_FALSE;
2753    }
2754
2755    if (ZEND_NUM_ARGS() >= 4) {
2756        marker.val = (unsigned char *)trimmarker;
2757        marker.len = trimmarker_len;
2758    }
2759
2760    ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2761
2762    if (ret == NULL) {
2763        RETURN_FALSE;
2764    }
2765
2766    RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2767}
2768/* }}} */
2769
2770/* {{{ MBSTRING_API char *php_mb_convert_encoding() */
2771MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
2772{
2773    mbfl_string string, result, *ret;
2774    enum mbfl_no_encoding from_encoding, to_encoding;
2775    mbfl_buffer_converter *convd;
2776    int size, *list;
2777    char *output=NULL;
2778
2779    if (output_len) {
2780        *output_len = 0;
2781    }
2782    if (!input) {
2783        return NULL;
2784    }
2785    /* new encoding */
2786    if (_to_encoding && strlen(_to_encoding)) {
2787        to_encoding = mbfl_name2no_encoding(_to_encoding);
2788        if (to_encoding == mbfl_no_encoding_invalid) {
2789            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
2790            return NULL;
2791        }
2792    } else {
2793        to_encoding = MBSTRG(current_internal_encoding);
2794    }
2795
2796    /* initialize string */
2797    mbfl_string_init(&string);
2798    mbfl_string_init(&result);
2799    from_encoding = MBSTRG(current_internal_encoding);
2800    string.no_encoding = from_encoding;
2801    string.no_language = MBSTRG(language);
2802    string.val = (unsigned char *)input;
2803    string.len = length;
2804
2805    /* pre-conversion encoding */
2806    if (_from_encodings) {
2807        list = NULL;
2808        size = 0;
2809        php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
2810        if (size == 1) {
2811            from_encoding = *list;
2812            string.no_encoding = from_encoding;
2813        } else if (size > 1) {
2814            /* auto detect */
2815            from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection));
2816            if (from_encoding != mbfl_no_encoding_invalid) {
2817                string.no_encoding = from_encoding;
2818            } else {
2819                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
2820                from_encoding = mbfl_no_encoding_pass;
2821                to_encoding = from_encoding;
2822                string.no_encoding = from_encoding;
2823            }
2824        } else {
2825            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
2826        }
2827        if (list != NULL) {
2828            efree((void *)list);
2829        }
2830    }
2831
2832    /* initialize converter */
2833    convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
2834    if (convd == NULL) {
2835        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
2836        return NULL;
2837    }
2838    mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
2839    mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
2840
2841    /* do it */
2842    ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
2843    if (ret) {
2844        if (output_len) {
2845            *output_len = ret->len;
2846        }
2847        output = (char *)ret->val;
2848    }
2849
2850    MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
2851    mbfl_buffer_converter_delete(convd);
2852    return output;
2853}
2854/* }}} */
2855
2856/* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
2857   Returns converted string in desired encoding */
2858PHP_FUNCTION(mb_convert_encoding)
2859{
2860    char *arg_str, *arg_new;
2861    int str_len, new_len;
2862    zval *arg_old;
2863    int i;
2864    size_t size, l, n;
2865    char *_from_encodings = NULL, *ret, *s_free = NULL;
2866
2867    zval **hash_entry;
2868    HashTable *target_hash;
2869
2870    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
2871        return;
2872    }
2873
2874    if (ZEND_NUM_ARGS() == 3) {
2875        switch (Z_TYPE_P(arg_old)) {
2876        case IS_ARRAY:
2877            target_hash = Z_ARRVAL_P(arg_old);
2878            zend_hash_internal_pointer_reset(target_hash);
2879            i = zend_hash_num_elements(target_hash);
2880            _from_encodings = NULL;
2881
2882            while (i > 0) {
2883                if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
2884                    break;
2885                }
2886
2887                convert_to_string_ex(hash_entry);
2888
2889                if ( _from_encodings) {
2890                    l = strlen(_from_encodings);
2891                    n = strlen(Z_STRVAL_PP(hash_entry));
2892                    _from_encodings = erealloc(_from_encodings, l+n+2);
2893                    strcpy(_from_encodings+l, ",");
2894                    strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
2895                } else {
2896                    _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
2897                }
2898
2899                zend_hash_move_forward(target_hash);
2900                i--;
2901            }
2902
2903            if (_from_encodings != NULL && !strlen(_from_encodings)) {
2904                efree(_from_encodings);
2905                _from_encodings = NULL;
2906            }
2907            s_free = _from_encodings;
2908            break;
2909        default:
2910            convert_to_string(arg_old);
2911            _from_encodings = Z_STRVAL_P(arg_old);
2912            break;
2913        }
2914    }
2915
2916    /* new encoding */
2917    ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
2918    if (ret != NULL) {
2919        RETVAL_STRINGL(ret, size, 0);       /* the string is already strdup()'ed */
2920    } else {
2921        RETVAL_FALSE;
2922    }
2923
2924    if ( s_free) {
2925        efree(s_free);
2926    }
2927}
2928/* }}} */
2929
2930/* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
2931   Returns a case-folded version of sourcestring */
2932PHP_FUNCTION(mb_convert_case)
2933{
2934    char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2935    int str_len, from_encoding_len;
2936    long case_mode = 0;
2937    char *newstr;
2938    size_t ret_len;
2939
2940    RETVAL_FALSE;
2941    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
2942                &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
2943        RETURN_FALSE;
2944
2945    newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
2946
2947    if (newstr) {
2948        RETVAL_STRINGL(newstr, ret_len, 0);
2949    }
2950}
2951/* }}} */
2952
2953/* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
2954 *  Returns a uppercased version of sourcestring
2955 */
2956PHP_FUNCTION(mb_strtoupper)
2957{
2958    char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2959    int str_len, from_encoding_len;
2960    char *newstr;
2961    size_t ret_len;
2962
2963    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
2964                &from_encoding, &from_encoding_len) == FAILURE) {
2965        return;
2966    }
2967    newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
2968
2969    if (newstr) {
2970        RETURN_STRINGL(newstr, ret_len, 0);
2971    }
2972    RETURN_FALSE;
2973}
2974/* }}} */
2975
2976/* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
2977 *  Returns a lowercased version of sourcestring
2978 */
2979PHP_FUNCTION(mb_strtolower)
2980{
2981    char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2982    int str_len, from_encoding_len;
2983    char *newstr;
2984    size_t ret_len;
2985
2986    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
2987                &from_encoding, &from_encoding_len) == FAILURE) {
2988        return;
2989    }
2990    newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
2991
2992    if (newstr) {
2993        RETURN_STRINGL(newstr, ret_len, 0);
2994    }
2995    RETURN_FALSE;
2996}
2997/* }}} */
2998
2999/* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3000   Encodings of the given string is returned (as a string) */
3001PHP_FUNCTION(mb_detect_encoding)
3002{
3003    char *str;
3004    int str_len;
3005    zend_bool strict=0;
3006    zval *encoding_list;
3007
3008    mbfl_string string;
3009    const char *ret;
3010    enum mbfl_no_encoding *elist;
3011    int size, *list;
3012
3013    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3014        return;
3015    }
3016
3017    /* make encoding list */
3018    list = NULL;
3019    size = 0;
3020    if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) {
3021        switch (Z_TYPE_P(encoding_list)) {
3022        case IS_ARRAY:
3023            if (!php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
3024                if (list) {
3025                    efree(list);
3026                    list = NULL;
3027                    size = 0;
3028                }
3029            }
3030            break;
3031        default:
3032            convert_to_string(encoding_list);
3033            if (!php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
3034                if (list) {
3035                    efree(list);
3036                    list = NULL;
3037                    size = 0;
3038                }
3039            }
3040            break;
3041        }
3042        if (size <= 0) {
3043            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
3044        }
3045    }
3046
3047    if (ZEND_NUM_ARGS() < 3) {
3048        strict = (zend_bool)MBSTRG(strict_detection);
3049    }
3050
3051    if (size > 0 && list != NULL) {
3052        elist = list;
3053    } else {
3054        elist = MBSTRG(current_detect_order_list);
3055        size = MBSTRG(current_detect_order_list_size);
3056    }
3057
3058    mbfl_string_init(&string);
3059    string.no_language = MBSTRG(language);
3060    string.val = (unsigned char *)str;
3061    string.len = str_len;
3062    ret = mbfl_identify_encoding_name(&string, elist, size, strict);
3063
3064    if (list != NULL) {
3065        efree((void *)list);
3066    }
3067
3068    if (ret == NULL) {
3069        RETURN_FALSE;
3070    }
3071
3072    RETVAL_STRING((char *)ret, 1);
3073}
3074/* }}} */
3075
3076/* {{{ proto mixed mb_list_encodings()
3077   Returns an array of all supported entity encodings */
3078PHP_FUNCTION(mb_list_encodings)
3079{
3080    const mbfl_encoding **encodings;
3081    const mbfl_encoding *encoding;
3082    int i;
3083
3084    array_init(return_value);
3085    i = 0;
3086    encodings = mbfl_get_supported_encodings();
3087    while ((encoding = encodings[i++]) != NULL) {
3088        add_next_index_string(return_value, (char *) encoding->name, 1);
3089    }
3090}
3091/* }}} */
3092
3093/* {{{ proto array mb_encoding_aliases(string encoding)
3094   Returns an array of the aliases of a given encoding name */
3095PHP_FUNCTION(mb_encoding_aliases)
3096{
3097    const mbfl_encoding *encoding;
3098    char *name = NULL;
3099    int name_len;
3100
3101    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
3102        RETURN_FALSE;
3103    }
3104
3105    encoding = mbfl_name2encoding(name);
3106    if (!encoding) {
3107        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
3108        RETURN_FALSE;
3109    }
3110
3111    array_init(return_value);
3112    if (encoding->aliases != NULL) {
3113        const char **alias;
3114        for (alias = *encoding->aliases; *alias; ++alias) {
3115            add_next_index_string(return_value, (char *)*alias, 1);
3116        }
3117    }
3118}
3119/* }}} */
3120
3121/* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3122   Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
3123PHP_FUNCTION(mb_encode_mimeheader)
3124{
3125    enum mbfl_no_encoding charset, transenc;
3126    mbfl_string  string, result, *ret;
3127    char *charset_name = NULL;
3128    int charset_name_len;
3129    char *trans_enc_name = NULL;
3130    int trans_enc_name_len;
3131    char *linefeed = "\r\n";
3132    int linefeed_len;
3133    long indent = 0;
3134
3135    mbfl_string_init(&string);
3136    string.no_language = MBSTRG(language);
3137    string.no_encoding = MBSTRG(current_internal_encoding);
3138
3139    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3140        return;
3141    }
3142
3143    charset = mbfl_no_encoding_pass;
3144    transenc = mbfl_no_encoding_base64;
3145
3146    if (charset_name != NULL) {
3147        charset = mbfl_name2no_encoding(charset_name);
3148        if (charset == mbfl_no_encoding_invalid) {
3149            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3150            RETURN_FALSE;
3151        }
3152    } else {
3153        const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3154        if (lang != NULL) {
3155            charset = lang->mail_charset;
3156            transenc = lang->mail_header_encoding;
3157        }
3158    }
3159
3160    if (trans_enc_name != NULL) {
3161        if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3162            transenc = mbfl_no_encoding_base64;
3163        } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3164            transenc = mbfl_no_encoding_qprint;
3165        }
3166    }
3167
3168    mbfl_string_init(&result);
3169    ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3170    if (ret != NULL) {
3171        RETVAL_STRINGL((char *)ret->val, ret->len, 0)   /* the string is already strdup()'ed */
3172    } else {
3173        RETVAL_FALSE;
3174    }
3175}
3176/* }}} */
3177
3178/* {{{ proto string mb_decode_mimeheader(string string)
3179   Decodes the MIME "encoded-word" in the string */
3180PHP_FUNCTION(mb_decode_mimeheader)
3181{
3182    mbfl_string string, result, *ret;
3183
3184    mbfl_string_init(&string);
3185    string.no_language = MBSTRG(language);
3186    string.no_encoding = MBSTRG(current_internal_encoding);
3187
3188    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
3189        return;
3190    }
3191
3192    mbfl_string_init(&result);
3193    ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
3194    if (ret != NULL) {
3195        RETVAL_STRINGL((char *)ret->val, ret->len, 0)   /* the string is already strdup()'ed */
3196    } else {
3197        RETVAL_FALSE;
3198    }
3199}
3200/* }}} */
3201
3202/* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3203   Conversion between full-width character and half-width character (Japanese) */
3204PHP_FUNCTION(mb_convert_kana)
3205{
3206    int opt, i;
3207    mbfl_string string, result, *ret;
3208    char *optstr = NULL;
3209    int optstr_len;
3210    char *encname = NULL;
3211    int encname_len;
3212
3213    mbfl_string_init(&string);
3214    string.no_language = MBSTRG(language);
3215    string.no_encoding = MBSTRG(current_internal_encoding);
3216
3217    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3218        return;
3219    }
3220
3221    /* option */
3222    if (optstr != NULL) {
3223        char *p = optstr;
3224        int n = optstr_len;
3225        i = 0;
3226        opt = 0;
3227        while (i < n) {
3228            i++;
3229            switch (*p++) {
3230            case 'A':
3231                opt |= 0x1;
3232                break;
3233            case 'a':
3234                opt |= 0x10;
3235                break;
3236            case 'R':
3237                opt |= 0x2;
3238                break;
3239            case 'r':
3240                opt |= 0x20;
3241                break;
3242            case 'N':
3243                opt |= 0x4;
3244                break;
3245            case 'n':
3246                opt |= 0x40;
3247                break;
3248            case 'S':
3249                opt |= 0x8;
3250                break;
3251            case 's':
3252                opt |= 0x80;
3253                break;
3254            case 'K':
3255                opt |= 0x100;
3256                break;
3257            case 'k':
3258                opt |= 0x1000;
3259                break;
3260            case 'H':
3261                opt |= 0x200;
3262                break;
3263            case 'h':
3264                opt |= 0x2000;
3265                break;
3266            case 'V':
3267                opt |= 0x800;
3268                break;
3269            case 'C':
3270                opt |= 0x10000;
3271                break;
3272            case 'c':
3273                opt |= 0x20000;
3274                break;
3275            case 'M':
3276                opt |= 0x100000;
3277                break;
3278            case 'm':
3279                opt |= 0x200000;
3280                break;
3281            }
3282        }
3283    } else {
3284        opt = 0x900;
3285    }
3286
3287    /* encoding */
3288    if (encname != NULL) {
3289        string.no_encoding = mbfl_name2no_encoding(encname);
3290        if (string.no_encoding == mbfl_no_encoding_invalid) {
3291            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
3292            RETURN_FALSE;
3293        }
3294    }
3295
3296    ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3297    if (ret != NULL) {
3298        RETVAL_STRINGL((char *)ret->val, ret->len, 0);      /* the string is already strdup()'ed */
3299    } else {
3300        RETVAL_FALSE;
3301    }
3302}
3303/* }}} */
3304
3305#define PHP_MBSTR_STACK_BLOCK_SIZE 32
3306
3307/* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3308   Converts the string resource in variables to desired encoding */
3309PHP_FUNCTION(mb_convert_variables)
3310{
3311    zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
3312    HashTable *target_hash;
3313    mbfl_string string, result, *ret;
3314    enum mbfl_no_encoding from_encoding, to_encoding;
3315    mbfl_encoding_detector *identd;
3316    mbfl_buffer_converter *convd;
3317    int n, to_enc_len, argc, stack_level, stack_max, elistsz;
3318    enum mbfl_no_encoding *elist;
3319    char *name, *to_enc;
3320    void *ptmp;
3321
3322    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3323        return;
3324    }
3325
3326    /* new encoding */
3327    to_encoding = mbfl_name2no_encoding(to_enc);
3328    if (to_encoding == mbfl_no_encoding_invalid) {
3329        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3330        efree(args);
3331        RETURN_FALSE;
3332    }
3333
3334    /* initialize string */
3335    mbfl_string_init(&string);
3336    mbfl_string_init(&result);
3337    from_encoding = MBSTRG(current_internal_encoding);
3338    string.no_encoding = from_encoding;
3339    string.no_language = MBSTRG(language);
3340
3341    /* pre-conversion encoding */
3342    elist = NULL;
3343    elistsz = 0;
3344    switch (Z_TYPE_PP(zfrom_enc)) {
3345    case IS_ARRAY:
3346        php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
3347        break;
3348    default:
3349        convert_to_string_ex(zfrom_enc);
3350        php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
3351        break;
3352    }
3353    if (elistsz <= 0) {
3354        from_encoding = mbfl_no_encoding_pass;
3355    } else if (elistsz == 1) {
3356        from_encoding = *elist;
3357    } else {
3358        /* auto detect */
3359        from_encoding = mbfl_no_encoding_invalid;
3360        stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3361        stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3362        stack_level = 0;
3363        identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
3364        if (identd != NULL) {
3365            n = 0;
3366            while (n < argc || stack_level > 0) {
3367                if (stack_level <= 0) {
3368                    var = args[n++];
3369                    if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3370                        target_hash = HASH_OF(*var);
3371                        if (target_hash != NULL) {
3372                            zend_hash_internal_pointer_reset(target_hash);
3373                        }
3374                    }
3375                } else {
3376                    stack_level--;
3377                    var = stack[stack_level];
3378                }
3379                if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3380                    target_hash = HASH_OF(*var);
3381                    if (target_hash != NULL) {
3382                        while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3383                            zend_hash_move_forward(target_hash);
3384                            if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3385                                if (stack_level >= stack_max) {
3386                                    stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3387                                    ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3388                                    stack = (zval ***)ptmp;
3389                                }
3390                                stack[stack_level] = var;
3391                                stack_level++;
3392                                var = hash_entry;
3393                                target_hash = HASH_OF(*var);
3394                                if (target_hash != NULL) {
3395                                    zend_hash_internal_pointer_reset(target_hash);
3396                                    continue;
3397                                }
3398                            } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3399                                string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3400                                string.len = Z_STRLEN_PP(hash_entry);
3401                                if (mbfl_encoding_detector_feed(identd, &string)) {
3402                                    goto detect_end;        /* complete detecting */
3403                                }
3404                            }
3405                        }
3406                    }
3407                } else if (Z_TYPE_PP(var) == IS_STRING) {
3408                    string.val = (unsigned char *)Z_STRVAL_PP(var);
3409                    string.len = Z_STRLEN_PP(var);
3410                    if (mbfl_encoding_detector_feed(identd, &string)) {
3411                        goto detect_end;        /* complete detecting */
3412                    }
3413                }
3414            }
3415detect_end:
3416            from_encoding = mbfl_encoding_detector_judge(identd);
3417            mbfl_encoding_detector_delete(identd);
3418        }
3419        efree(stack);
3420
3421        if (from_encoding == mbfl_no_encoding_invalid) {
3422            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
3423            from_encoding = mbfl_no_encoding_pass;
3424        }
3425    }
3426    if (elist != NULL) {
3427        efree((void *)elist);
3428    }
3429    /* create converter */
3430    convd = NULL;
3431    if (from_encoding != mbfl_no_encoding_pass) {
3432        convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
3433        if (convd == NULL) {
3434            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
3435            RETURN_FALSE;
3436        }
3437        mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3438        mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3439    }
3440
3441    /* convert */
3442    if (convd != NULL) {
3443        stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3444        stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3445        stack_level = 0;
3446        n = 0;
3447        while (n < argc || stack_level > 0) {
3448            if (stack_level <= 0) {
3449                var = args[n++];
3450                if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3451                    target_hash = HASH_OF(*var);
3452                    if (target_hash != NULL) {
3453                        zend_hash_internal_pointer_reset(target_hash);
3454                    }
3455                }
3456            } else {
3457                stack_level--;
3458                var = stack[stack_level];
3459            }
3460            if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3461                target_hash = HASH_OF(*var);
3462                if (target_hash != NULL) {
3463                    while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3464                        zend_hash_move_forward(target_hash);
3465                        if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3466                            if (stack_level >= stack_max) {
3467                                stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3468                                ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3469                                stack = (zval ***)ptmp;
3470                            }
3471                            stack[stack_level] = var;
3472                            stack_level++;
3473                            var = hash_entry;
3474                            SEPARATE_ZVAL(hash_entry);
3475                            target_hash = HASH_OF(*var);
3476                            if (target_hash != NULL) {
3477                                zend_hash_internal_pointer_reset(target_hash);
3478                                continue;
3479                            }
3480                        } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3481                            string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3482                            string.len = Z_STRLEN_PP(hash_entry);
3483                            ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3484                            if (ret != NULL) {
3485                                if (Z_REFCOUNT_PP(hash_entry) > 1) {
3486                                    Z_DELREF_PP(hash_entry);
3487                                    MAKE_STD_ZVAL(*hash_entry);
3488                                } else {
3489                                    zval_dtor(*hash_entry);
3490                                }
3491                            ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
3492                        }
3493                    }
3494                }
3495            }
3496        } else if (Z_TYPE_PP(var) == IS_STRING) {
3497            string.val = (unsigned char *)Z_STRVAL_PP(var);
3498            string.len = Z_STRLEN_PP(var);
3499            ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3500            if (ret != NULL) {
3501                zval_dtor(*var);
3502                ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
3503                }
3504            }
3505        }
3506        efree(stack);
3507
3508        MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3509        mbfl_buffer_converter_delete(convd);
3510    }
3511
3512    efree(args);
3513
3514    name = (char *)mbfl_no_encoding2name(from_encoding);
3515    if (name != NULL) {
3516        RETURN_STRING(name, 1);
3517    } else {
3518        RETURN_FALSE;
3519    }
3520}
3521/* }}} */
3522
3523/* {{{ HTML numeric entity */
3524/* {{{ static void php_mb_numericentity_exec() */
3525static void
3526php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3527{
3528    char *str, *encoding;
3529    int str_len, encoding_len;
3530    zval *zconvmap, **hash_entry;
3531    HashTable *target_hash;
3532    size_t argc = ZEND_NUM_ARGS();
3533    int i, *convmap, *mapelm, mapsize=0;
3534    mbfl_string string, result, *ret;
3535    enum mbfl_no_encoding no_encoding;
3536
3537    if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, &zconvmap, &encoding, &encoding_len) == FAILURE) {
3538        return;
3539    }
3540
3541    mbfl_string_init(&string);
3542    string.no_language = MBSTRG(language);
3543    string.no_encoding = MBSTRG(current_internal_encoding);
3544    string.val = (unsigned char *)str;
3545    string.len = str_len;
3546
3547    /* encoding */
3548    if (argc == 3) {
3549        no_encoding = mbfl_name2no_encoding(encoding);
3550        if (no_encoding == mbfl_no_encoding_invalid) {
3551            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
3552            RETURN_FALSE;
3553        } else {
3554            string.no_encoding = no_encoding;
3555        }
3556    }
3557
3558    /* conversion map */
3559    convmap = NULL;
3560    if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3561        target_hash = Z_ARRVAL_P(zconvmap);
3562        zend_hash_internal_pointer_reset(target_hash);
3563        i = zend_hash_num_elements(target_hash);
3564        if (i > 0) {
3565            convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3566            mapelm = convmap;
3567            mapsize = 0;
3568            while (i > 0) {
3569                if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3570                    break;
3571                }
3572                convert_to_long_ex(hash_entry);
3573                *mapelm++ = Z_LVAL_PP(hash_entry);
3574                mapsize++;
3575                i--;
3576                zend_hash_move_forward(target_hash);
3577            }
3578        }
3579    }
3580    if (convmap == NULL) {
3581        RETURN_FALSE;
3582    }
3583    mapsize /= 4;
3584
3585    ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3586    if (ret != NULL) {
3587        RETVAL_STRINGL((char *)ret->val, ret->len, 0);
3588    } else {
3589        RETVAL_FALSE;
3590    }
3591    efree((void *)convmap);
3592}
3593/* }}} */
3594
3595/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding])
3596   Converts specified characters to HTML numeric entities */
3597PHP_FUNCTION(mb_encode_numericentity)
3598{
3599    php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3600}
3601/* }}} */
3602
3603/* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
3604   Converts HTML numeric entities to character code */
3605PHP_FUNCTION(mb_decode_numericentity)
3606{
3607    php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3608}
3609/* }}} */
3610/* }}} */
3611
3612/* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
3613 *  Sends an email message with MIME scheme
3614 */
3615
3616#define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)                                     \
3617    if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {    \
3618        pos += 2;                                           \
3619        while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {                           \
3620            pos++;                                          \
3621        }                                               \
3622        continue;                                           \
3623    }
3624
3625#define MAIL_ASCIIZ_CHECK_MBSTRING(str, len)            \
3626    pp = str;                   \
3627    ee = pp + len;                  \
3628    while ((pp = memchr(pp, '\0', (ee - pp)))) {    \
3629        *pp = ' ';              \
3630    }                       \
3631
3632#define APPEND_ONE_CHAR(ch) do { \
3633    if (token.a > 0) { \
3634        smart_str_appendc(&token, ch); \
3635    } else {\
3636        token.len++; \
3637    } \
3638} while (0)
3639
3640#define SEPARATE_SMART_STR(str) do {\
3641    if ((str)->a == 0) { \
3642        char *tmp_ptr; \
3643        (str)->a = 1; \
3644        while ((str)->a < (str)->len) { \
3645            (str)->a <<= 1; \
3646        } \
3647        tmp_ptr = emalloc((str)->a + 1); \
3648        memcpy(tmp_ptr, (str)->c, (str)->len); \
3649        (str)->c = tmp_ptr; \
3650    } \
3651} while (0)
3652
3653static void my_smart_str_dtor(smart_str *s)
3654{
3655    if (s->a > 0) {
3656        smart_str_free(s);
3657    }
3658}
3659
3660static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3661{
3662    const char *ps;
3663    size_t icnt;
3664    int state = 0;
3665    int crlf_state = -1;
3666
3667    smart_str token = { 0, 0, 0 };
3668    smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
3669
3670    ps = str;
3671    icnt = str_len;
3672
3673    /*
3674     *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3675     *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3676     *      state  0            1           2          3
3677     *
3678     *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3679     *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3680     * crlf_state -1                       0                     1 -1
3681     *
3682     */
3683
3684    while (icnt > 0) {
3685        switch (*ps) {
3686            case ':':
3687                if (crlf_state == 1) {
3688                    APPEND_ONE_CHAR('\r');
3689                }
3690
3691                if (state == 0 || state == 1) {
3692                    fld_name = token;
3693
3694                    state = 2;
3695                } else {
3696                    APPEND_ONE_CHAR(*ps);
3697                }
3698
3699                crlf_state = 0;
3700                break;
3701
3702            case '\n':
3703                if (crlf_state == -1) {
3704                    goto out;
3705                }
3706                crlf_state = -1;
3707                break;
3708
3709            case '\r':
3710                if (crlf_state == 1) {
3711                    APPEND_ONE_CHAR('\r');
3712                } else {
3713                    crlf_state = 1;
3714                }
3715                break;
3716
3717            case ' ': case '\t':
3718                if (crlf_state == -1) {
3719                    if (state == 3) {
3720                        /* continuing from the previous line */
3721                        SEPARATE_SMART_STR(&token);
3722                        state = 4;
3723                    } else {
3724                        /* simply skipping this new line */
3725                        state = 5;
3726                    }
3727                } else {
3728                    if (crlf_state == 1) {
3729                        APPEND_ONE_CHAR('\r');
3730                    }
3731                    if (state == 1 || state == 3) {
3732                        APPEND_ONE_CHAR(*ps);
3733                    }
3734                }
3735                crlf_state = 0;
3736                break;
3737
3738            default:
3739                switch (state) {
3740                    case 0:
3741                        token.c = (char *)ps;
3742                        token.len = 0;
3743                        token.a = 0;
3744                        state = 1;
3745                        break;
3746
3747                    case 2:
3748                        if (crlf_state != -1) {
3749                            token.c = (char *)ps;
3750                            token.len = 0;
3751                            token.a = 0;
3752
3753                            state = 3;
3754                            break;
3755                        }
3756                        /* break is missing intentionally */
3757
3758                    case 3:
3759                        if (crlf_state == -1) {
3760                            fld_val = token;
3761
3762                            if (fld_name.c != NULL && fld_val.c != NULL) {
3763                                char *dummy;
3764
3765                                /* FIXME: some locale free implementation is
3766                                 * really required here,,, */
3767                                SEPARATE_SMART_STR(&fld_name);
3768                                php_strtoupper(fld_name.c, fld_name.len);
3769
3770                                zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
3771
3772                                my_smart_str_dtor(&fld_name);
3773                            }
3774
3775                            memset(&fld_name, 0, sizeof(smart_str));
3776                            memset(&fld_val, 0, sizeof(smart_str));
3777
3778                            token.c = (char *)ps;
3779                            token.len = 0;
3780                            token.a = 0;
3781
3782                            state = 1;
3783                        }
3784                        break;
3785
3786                    case 4:
3787                        APPEND_ONE_CHAR(' ');
3788                        state = 3;
3789                        break;
3790                }
3791
3792                if (crlf_state == 1) {
3793                    APPEND_ONE_CHAR('\r');
3794                }
3795
3796                APPEND_ONE_CHAR(*ps);
3797
3798                crlf_state = 0;
3799                break;
3800        }
3801        ps++, icnt--;
3802    }
3803out:
3804    if (state == 2) {
3805        token.c = "";
3806        token.len = 0;
3807        token.a = 0;
3808
3809        state = 3;
3810    }
3811    if (state == 3) {
3812        fld_val = token;
3813
3814        if (fld_name.c != NULL && fld_val.c != NULL) {
3815            void *dummy;
3816
3817            /* FIXME: some locale free implementation is
3818             * really required here,,, */
3819            SEPARATE_SMART_STR(&fld_name);
3820            php_strtoupper(fld_name.c, fld_name.len);
3821
3822            zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
3823
3824            my_smart_str_dtor(&fld_name);
3825        }
3826    }
3827    return state;
3828}
3829
3830PHP_FUNCTION(mb_send_mail)
3831{
3832    int n;
3833    char *to = NULL;
3834    int to_len;
3835    char *message = NULL;
3836    int message_len;
3837    char *headers = NULL;
3838    int headers_len;
3839    char *subject = NULL;
3840    int subject_len;
3841    char *extra_cmd = NULL;
3842    int extra_cmd_len;
3843    int i;
3844    char *to_r = NULL;
3845    char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
3846    struct {
3847        int cnt_type:1;
3848        int cnt_trans_enc:1;
3849    } suppressed_hdrs = { 0, 0 };
3850
3851    char *message_buf = NULL, *subject_buf = NULL, *p;
3852    mbfl_string orig_str, conv_str;
3853    mbfl_string *pstr;  /* pointer to mbfl string for return value */
3854    enum mbfl_no_encoding
3855        tran_cs,    /* transfar text charset */
3856        head_enc,   /* header transfar encoding */
3857        body_enc;   /* body transfar encoding */
3858    mbfl_memory_device device;  /* automatic allocateable buffer for additional header */
3859    const mbfl_language *lang;
3860    int err = 0;
3861    HashTable ht_headers;
3862    smart_str *s;
3863    extern void mbfl_memory_device_unput(mbfl_memory_device *device);
3864    char *pp, *ee;
3865
3866    if (PG(safe_mode) && (ZEND_NUM_ARGS() == 5)) {
3867        php_error_docref(NULL TSRMLS_CC, E_WARNING, "SAFE MODE Restriction in effect.  The fifth parameter is disabled in SAFE MODE.");
3868        RETURN_FALSE;
3869    }
3870
3871    /* initialize */
3872    mbfl_memory_device_init(&device, 0, 0);
3873    mbfl_string_init(&orig_str);
3874    mbfl_string_init(&conv_str);
3875
3876    /* character-set, transfer-encoding */
3877    tran_cs = mbfl_no_encoding_utf8;
3878    head_enc = mbfl_no_encoding_base64;
3879    body_enc = mbfl_no_encoding_base64;
3880    lang = mbfl_no2language(MBSTRG(language));
3881    if (lang != NULL) {
3882        tran_cs = lang->mail_charset;
3883        head_enc = lang->mail_header_encoding;
3884        body_enc = lang->mail_body_encoding;
3885    }
3886
3887    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
3888        return;
3889    }
3890
3891    /* ASCIIZ check */
3892    MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
3893    MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
3894    MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
3895    if (headers) {
3896        MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
3897    }
3898    if (extra_cmd) {
3899        MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
3900    }
3901
3902    zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
3903
3904    if (headers != NULL) {
3905        _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
3906    }
3907
3908    if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
3909        char *tmp;
3910        char *param_name;
3911        char *charset = NULL;
3912
3913        SEPARATE_SMART_STR(s);
3914        smart_str_0(s);
3915
3916        p = strchr(s->c, ';');
3917
3918        if (p != NULL) {
3919            /* skipping the padded spaces */
3920            do {
3921                ++p;
3922            } while (*p == ' ' || *p == '\t');
3923
3924            if (*p != '\0') {
3925                if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
3926                    if (strcasecmp(param_name, "charset") == 0) {
3927                        enum mbfl_no_encoding _tran_cs = tran_cs;
3928
3929                        charset = php_strtok_r(NULL, "= \"", &tmp);
3930                        if (charset != NULL) {
3931                            _tran_cs = mbfl_name2no_encoding(charset);
3932                        }
3933
3934                        if (_tran_cs == mbfl_no_encoding_invalid) {
3935                            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
3936                            _tran_cs = mbfl_no_encoding_ascii;
3937                        }
3938                        tran_cs = _tran_cs;
3939                    }
3940                }
3941            }
3942        }
3943        suppressed_hdrs.cnt_type = 1;
3944    }
3945
3946    if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
3947        enum mbfl_no_encoding _body_enc;
3948        SEPARATE_SMART_STR(s);
3949        smart_str_0(s);
3950
3951        _body_enc = mbfl_name2no_encoding(s->c);
3952        switch (_body_enc) {
3953            case mbfl_no_encoding_base64:
3954            case mbfl_no_encoding_7bit:
3955            case mbfl_no_encoding_8bit:
3956                body_enc = _body_enc;
3957                break;
3958
3959            default:
3960                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
3961                body_enc =  mbfl_no_encoding_8bit;
3962                break;
3963        }
3964        suppressed_hdrs.cnt_trans_enc = 1;
3965    }
3966
3967    /* To: */
3968    if (to != NULL) {
3969        if (to_len > 0) {
3970            to_r = estrndup(to, to_len);
3971            for (; to_len; to_len--) {
3972                if (!isspace((unsigned char) to_r[to_len - 1])) {
3973                    break;
3974                }
3975                to_r[to_len - 1] = '\0';
3976            }
3977            for (i = 0; to_r[i]; i++) {
3978            if (iscntrl((unsigned char) to_r[i])) {
3979                /* According to RFC 822, section 3.1.1 long headers may be separated into
3980                 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
3981                 * To prevent these separators from being replaced with a space, we use the
3982                 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
3983                 */
3984                SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
3985                to_r[i] = ' ';
3986            }
3987            }
3988        } else {
3989            to_r = to;
3990        }
3991    } else {
3992        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
3993        err = 1;
3994    }
3995
3996    /* Subject: */
3997    if (subject != NULL && subject_len >= 0) {
3998        orig_str.no_language = MBSTRG(language);
3999        orig_str.val = (unsigned char *)subject;
4000        orig_str.len = subject_len;
4001        orig_str.no_encoding = MBSTRG(current_internal_encoding);
4002        if (orig_str.no_encoding == mbfl_no_encoding_invalid
4003            || orig_str.no_encoding == mbfl_no_encoding_pass) {
4004            orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4005        }
4006        pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4007        if (pstr != NULL) {
4008            subject_buf = subject = (char *)pstr->val;
4009        }
4010    } else {
4011        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
4012        err = 1;
4013    }
4014
4015    /* message body */
4016    if (message != NULL) {
4017        orig_str.no_language = MBSTRG(language);
4018        orig_str.val = (unsigned char *)message;
4019        orig_str.len = (unsigned int)message_len;
4020        orig_str.no_encoding = MBSTRG(current_internal_encoding);
4021
4022        if (orig_str.no_encoding == mbfl_no_encoding_invalid
4023            || orig_str.no_encoding == mbfl_no_encoding_pass) {
4024            orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4025        }
4026
4027        pstr = NULL;
4028        {
4029            mbfl_string tmpstr;
4030
4031            if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4032                tmpstr.no_encoding=mbfl_no_encoding_8bit;
4033                pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4034                efree(tmpstr.val);
4035            }
4036        }
4037        if (pstr != NULL) {
4038            message_buf = message = (char *)pstr->val;
4039        }
4040    } else {
4041        /* this is not really an error, so it is allowed. */
4042        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
4043        message = NULL;
4044    }
4045
4046    /* other headers */
4047#define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4048#define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4049#define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4050#define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4051    if (headers != NULL) {
4052        p = headers;
4053        n = headers_len;
4054        mbfl_memory_device_strncat(&device, p, n);
4055        if (n > 0 && p[n - 1] != '\n') {
4056            mbfl_memory_device_strncat(&device, "\n", 1);
4057        }
4058    }
4059
4060    if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4061        mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4062        mbfl_memory_device_strncat(&device, "\n", 1);
4063    }
4064
4065    if (!suppressed_hdrs.cnt_type) {
4066        mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4067
4068        p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4069        if (p != NULL) {
4070            mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4071            mbfl_memory_device_strcat(&device, p);
4072        }
4073        mbfl_memory_device_strncat(&device, "\n", 1);
4074    }
4075    if (!suppressed_hdrs.cnt_trans_enc) {
4076        mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4077        p = (char *)mbfl_no2preferred_mime_name(body_enc);
4078        if (p == NULL) {
4079            p = "7bit";
4080        }
4081        mbfl_memory_device_strcat(&device, p);
4082        mbfl_memory_device_strncat(&device, "\n", 1);
4083    }
4084
4085    mbfl_memory_device_unput(&device);
4086    mbfl_memory_device_output('\0', &device);
4087    headers = (char *)device.buffer;
4088
4089    if (force_extra_parameters) {
4090        extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4091    } else if (extra_cmd) {
4092        extra_cmd = php_escape_shell_cmd(extra_cmd);
4093    }
4094
4095    if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
4096        RETVAL_TRUE;
4097    } else {
4098        RETVAL_FALSE;
4099    }
4100
4101    if (extra_cmd) {
4102        efree(extra_cmd);
4103    }
4104    if (to_r != to) {
4105        efree(to_r);
4106    }
4107    if (subject_buf) {
4108        efree((void *)subject_buf);
4109    }
4110    if (message_buf) {
4111        efree((void *)message_buf);
4112    }
4113    mbfl_memory_device_clear(&device);
4114    zend_hash_destroy(&ht_headers);
4115}
4116
4117#undef SKIP_LONG_HEADER_SEP_MBSTRING
4118#undef MAIL_ASCIIZ_CHECK_MBSTRING
4119#undef APPEND_ONE_CHAR
4120#undef SEPARATE_SMART_STR
4121#undef PHP_MBSTR_MAIL_MIME_HEADER1
4122#undef PHP_MBSTR_MAIL_MIME_HEADER2
4123#undef PHP_MBSTR_MAIL_MIME_HEADER3
4124#undef PHP_MBSTR_MAIL_MIME_HEADER4
4125/* }}} */
4126
4127/* {{{ proto mixed mb_get_info([string type])
4128   Returns the current settings of mbstring */
4129PHP_FUNCTION(mb_get_info)
4130{
4131    char *typ = NULL;
4132    int typ_len, n;
4133    char *name;
4134    const struct mb_overload_def *over_func;
4135    zval *row1, *row2;
4136    const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4137    enum mbfl_no_encoding *entry;
4138#ifdef ZEND_MULTIBYTE
4139    zval *row3;
4140#endif /* ZEND_MULTIBYTE */
4141
4142    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
4143        RETURN_FALSE;
4144    }
4145
4146    if (!typ || !strcasecmp("all", typ)) {
4147        array_init(return_value);
4148        if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
4149            add_assoc_string(return_value, "internal_encoding", name, 1);
4150        }
4151        if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
4152            add_assoc_string(return_value, "http_input", name, 1);
4153        }
4154        if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
4155            add_assoc_string(return_value, "http_output", name, 1);
4156        }
4157        if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4158            add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
4159        }
4160        add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4161        if (MBSTRG(func_overload)){
4162            over_func = &(mb_ovld[0]);
4163            MAKE_STD_ZVAL(row1);
4164            array_init(row1);
4165            while (over_func->type > 0) {
4166                if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4167                    add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
4168                }
4169                over_func++;
4170            }
4171            add_assoc_zval(return_value, "func_overload_list", row1);
4172        } else {
4173            add_assoc_string(return_value, "func_overload_list", "no overload", 1);
4174        }
4175        if (lang != NULL) {
4176            if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4177                add_assoc_string(return_value, "mail_charset", name, 1);
4178            }
4179            if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4180                add_assoc_string(return_value, "mail_header_encoding", name, 1);
4181            }
4182            if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4183                add_assoc_string(return_value, "mail_body_encoding", name, 1);
4184            }
4185        }
4186        add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4187        if (MBSTRG(encoding_translation)) {
4188            add_assoc_string(return_value, "encoding_translation", "On", 1);
4189        } else {
4190            add_assoc_string(return_value, "encoding_translation", "Off", 1);
4191        }
4192        if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4193            add_assoc_string(return_value, "language", name, 1);
4194        }
4195        n = MBSTRG(current_detect_order_list_size);
4196        entry = MBSTRG(current_detect_order_list);
4197        if(n > 0) {
4198            MAKE_STD_ZVAL(row2);
4199            array_init(row2);
4200            while (n > 0) {
4201                if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
4202                    add_next_index_string(row2, name, 1);
4203                }
4204                entry++;
4205                n--;
4206            }
4207            add_assoc_zval(return_value, "detect_order", row2);
4208        }
4209        if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4210            add_assoc_string(return_value, "substitute_character", "none", 1);
4211        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4212            add_assoc_string(return_value, "substitute_character", "long", 1);
4213        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4214            add_assoc_string(return_value, "substitute_character", "entity", 1);
4215        } else {
4216            add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4217        }
4218        if (MBSTRG(strict_detection)) {
4219            add_assoc_string(return_value, "strict_detection", "On", 1);
4220        } else {
4221            add_assoc_string(return_value, "strict_detection", "Off", 1);
4222        }
4223#ifdef ZEND_MULTIBYTE
4224        entry = MBSTRG(script_encoding_list);
4225        n = MBSTRG(script_encoding_list_size);
4226        if(n > 0) {
4227            MAKE_STD_ZVAL(row3);
4228            array_init(row3);
4229            while (n > 0) {
4230                if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
4231                    add_next_index_string(row3, name, 1);
4232                }
4233                entry++;
4234                n--;
4235            }
4236            add_assoc_zval(return_value, "script_encoding", row3);
4237        }
4238#endif /* ZEND_MULTIBYTE */
4239    } else if (!strcasecmp("internal_encoding", typ)) {
4240        if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
4241            RETVAL_STRING(name, 1);
4242        }
4243    } else if (!strcasecmp("http_input", typ)) {
4244        if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
4245            RETVAL_STRING(name, 1);
4246        }
4247    } else if (!strcasecmp("http_output", typ)) {
4248        if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
4249            RETVAL_STRING(name, 1);
4250        }
4251    } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4252        if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4253            RETVAL_STRING(name, 1);
4254        }
4255    } else if (!strcasecmp("func_overload", typ)) {
4256        RETVAL_LONG(MBSTRG(func_overload));
4257    } else if (!strcasecmp("func_overload_list", typ)) {
4258        if (MBSTRG(func_overload)){
4259                over_func = &(mb_ovld[0]);
4260                array_init(return_value);
4261                while (over_func->type > 0) {
4262                    if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4263                        add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
4264                    }
4265                    over_func++;
4266                }
4267        } else {
4268            RETVAL_STRING("no overload", 1);
4269        }
4270    } else if (!strcasecmp("mail_charset", typ)) {
4271        if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4272            RETVAL_STRING(name, 1);
4273        }
4274    } else if (!strcasecmp("mail_header_encoding", typ)) {
4275        if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4276            RETVAL_STRING(name, 1);
4277        }
4278    } else if (!strcasecmp("mail_body_encoding", typ)) {
4279        if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4280            RETVAL_STRING(name, 1);
4281        }
4282    } else if (!strcasecmp("illegal_chars", typ)) {
4283        RETVAL_LONG(MBSTRG(illegalchars));
4284    } else if (!strcasecmp("encoding_translation", typ)) {
4285        if (MBSTRG(encoding_translation)) {
4286            RETVAL_STRING("On", 1);
4287        } else {
4288            RETVAL_STRING("Off", 1);
4289        }
4290    } else if (!strcasecmp("language", typ)) {
4291        if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4292            RETVAL_STRING(name, 1);
4293        }
4294    } else if (!strcasecmp("detect_order", typ)) {
4295        n = MBSTRG(current_detect_order_list_size);
4296        entry = MBSTRG(current_detect_order_list);
4297        if(n > 0) {
4298            array_init(return_value);
4299            while (n > 0) {
4300                name = (char *)mbfl_no_encoding2name(*entry);
4301                if (name) {
4302                    add_next_index_string(return_value, name, 1);
4303                }
4304                entry++;
4305                n--;
4306            }
4307        }
4308    } else if (!strcasecmp("substitute_character", typ)) {
4309        if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4310            RETVAL_STRING("none", 1);
4311        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4312            RETVAL_STRING("long", 1);
4313        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4314            RETVAL_STRING("entity", 1);
4315        } else {
4316            RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4317        }
4318    } else if (!strcasecmp("strict_detection", typ)) {
4319        if (MBSTRG(strict_detection)) {
4320            RETVAL_STRING("On", 1);
4321        } else {
4322            RETVAL_STRING("Off", 1);
4323        }
4324    } else {
4325#ifdef ZEND_MULTIBYTE
4326    if (!strcasecmp("script_encoding", typ)) {
4327        entry = MBSTRG(script_encoding_list);
4328        n = MBSTRG(script_encoding_list_size);
4329        if(n > 0) {
4330            array_init(return_value);
4331            while (n > 0) {
4332                name = (char *)mbfl_no_encoding2name(*entry);
4333                if (name) {
4334                    add_next_index_string(return_value, name, 1);
4335                }
4336                entry++;
4337                n--;
4338            }
4339        }
4340        return;
4341    }
4342#endif /* ZEND_MULTIBYTE */
4343        RETURN_FALSE;
4344    }
4345}
4346/* }}} */
4347
4348/* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4349   Check if the string is valid for the specified encoding */
4350PHP_FUNCTION(mb_check_encoding)
4351{
4352    char *var = NULL;
4353    int var_len;
4354    char *enc = NULL;
4355    int enc_len;
4356    mbfl_buffer_converter *convd;
4357    enum mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding);
4358    mbfl_string string, result, *ret = NULL;
4359    long illegalchars = 0;
4360
4361    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4362        RETURN_FALSE;
4363    }
4364
4365    if (var == NULL) {
4366        RETURN_BOOL(MBSTRG(illegalchars) == 0);
4367    }
4368
4369    if (enc != NULL) {
4370        no_encoding = mbfl_name2no_encoding(enc);
4371        if (no_encoding == mbfl_no_encoding_invalid || no_encoding == mbfl_no_encoding_pass) {
4372            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
4373            RETURN_FALSE;
4374        }
4375    }
4376
4377    convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0);
4378    if (convd == NULL) {
4379        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
4380        RETURN_FALSE;
4381    }
4382    mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4383    mbfl_buffer_converter_illegal_substchar(convd, 0);
4384
4385    /* initialize string */
4386    mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding);
4387    mbfl_string_init(&result);
4388
4389    string.val = (unsigned char *)var;
4390    string.len = var_len;
4391    ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4392    illegalchars = mbfl_buffer_illegalchars(convd);
4393    mbfl_buffer_converter_delete(convd);
4394
4395    RETVAL_FALSE;
4396    if (ret != NULL) {
4397        if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4398            RETVAL_TRUE;
4399        }
4400        mbfl_string_clear(&result);
4401    }
4402}
4403/* }}} */
4404
4405/* {{{ MBSTRING_API int php_mb_encoding_translation() */
4406MBSTRING_API int php_mb_encoding_translation(TSRMLS_D)
4407{
4408    return MBSTRG(encoding_translation);
4409}
4410/* }}} */
4411
4412/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
4413MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4414{
4415    if (enc != NULL) {
4416        if (enc->flag & MBFL_ENCTYPE_MBCS) {
4417            if (enc->mblen_table != NULL) {
4418                if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4419            }
4420        } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4421            return 2;
4422        } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4423            return 4;
4424        }
4425    }
4426    return 1;
4427}
4428/* }}} */
4429
4430/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
4431MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
4432{
4433    return php_mb_mbchar_bytes_ex(s,
4434        mbfl_no2encoding(MBSTRG(internal_encoding)));
4435}
4436/* }}} */
4437
4438/* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
4439MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4440{
4441    register const char *p = s;
4442    char *last=NULL;
4443
4444    if (nbytes == (size_t)-1) {
4445        size_t nb = 0;
4446
4447        while (*p != '\0') {
4448            if (nb == 0) {
4449                if ((unsigned char)*p == (unsigned char)c) {
4450                    last = (char *)p;
4451                }
4452                nb = php_mb_mbchar_bytes_ex(p, enc);
4453                if (nb == 0) {
4454                    return NULL; /* something is going wrong! */
4455                }
4456            }
4457            --nb;
4458            ++p;
4459        }
4460    } else {
4461        register size_t bcnt = nbytes;
4462        register size_t nbytes_char;
4463        while (bcnt > 0) {
4464            if ((unsigned char)*p == (unsigned char)c) {
4465                last = (char *)p;
4466            }
4467            nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4468            if (bcnt < nbytes_char) {
4469                return NULL;
4470            }
4471            p += nbytes_char;
4472            bcnt -= nbytes_char;
4473        }
4474    }
4475    return last;
4476}
4477/* }}} */
4478
4479/* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
4480MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
4481{
4482    return php_mb_safe_strrchr_ex(s, c, nbytes,
4483        mbfl_no2encoding(MBSTRG(internal_encoding)));
4484}
4485/* }}} */
4486
4487/* {{{ MBSTRING_API char *php_mb_strrchr() */
4488MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC)
4489{
4490    return php_mb_safe_strrchr(s, c, -1 TSRMLS_CC);
4491}
4492/* }}} */
4493
4494/* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */
4495MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
4496{
4497
4498    if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){
4499        return php_mb_mbchar_bytes_ex(s,
4500            mbfl_no2encoding(MBSTRG(http_input_identify)));
4501    } else {
4502        return php_mb_mbchar_bytes_ex(s,
4503            mbfl_no2encoding(MBSTRG(internal_encoding)));
4504    }
4505}
4506/* }}} */
4507
4508/*  {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
4509MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC)
4510{
4511    int i;
4512    mbfl_string string, result, *ret = NULL;
4513    enum mbfl_no_encoding from_encoding, to_encoding;
4514    mbfl_buffer_converter *convd;
4515
4516    if (encoding_to) {
4517        /* new encoding */
4518        to_encoding = mbfl_name2no_encoding(encoding_to);
4519        if (to_encoding == mbfl_no_encoding_invalid) {
4520            return -1;
4521        }
4522    } else {
4523        to_encoding = MBSTRG(current_internal_encoding);
4524    }
4525    if (encoding_from) {
4526        /* old encoding */
4527        from_encoding = mbfl_name2no_encoding(encoding_from);
4528        if (from_encoding == mbfl_no_encoding_invalid) {
4529            return -1;
4530        }
4531    } else {
4532        from_encoding = MBSTRG(http_input_identify);
4533    }
4534
4535    if (from_encoding == mbfl_no_encoding_pass) {
4536        return 0;
4537    }
4538
4539    /* initialize string */
4540    mbfl_string_init(&string);
4541    mbfl_string_init(&result);
4542    string.no_encoding = from_encoding;
4543    string.no_language = MBSTRG(language);
4544
4545    for (i=0; i<num; i++){
4546        string.val = (unsigned char *)str[i];
4547        string.len = len[i];
4548
4549        /* initialize converter */
4550        convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
4551        if (convd == NULL) {
4552            return -1;
4553        }
4554        mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
4555        mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
4556
4557        /* do it */
4558        ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4559        if (ret != NULL) {
4560            efree(str[i]);
4561            str[i] = (char *)ret->val;
4562            len[i] = (int)ret->len;
4563        }
4564
4565        MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
4566        mbfl_buffer_converter_delete(convd);
4567    }
4568
4569    return ret ? 0 : -1;
4570}
4571/* }}} */
4572
4573/* {{{ MBSTRING_API int php_mb_gpc_encoding_detector()
4574 */
4575MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC)
4576{
4577    mbfl_string string;
4578    enum mbfl_no_encoding *elist;
4579    enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid;
4580    mbfl_encoding_detector *identd = NULL;
4581
4582    int size;
4583    enum mbfl_no_encoding *list;
4584
4585    if (MBSTRG(http_input_list_size) == 1 &&
4586        MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) {
4587        MBSTRG(http_input_identify) = mbfl_no_encoding_pass;
4588        return SUCCESS;
4589    }
4590
4591    if (MBSTRG(http_input_list_size) == 1 &&
4592        MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto &&
4593        mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) {
4594        MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0];
4595        return SUCCESS;
4596    }
4597
4598    if (arg_list && strlen(arg_list)>0) {
4599        /* make encoding list */
4600        list = NULL;
4601        size = 0;
4602        php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
4603
4604        if (size > 0 && list != NULL) {
4605            elist = list;
4606        } else {
4607            elist = MBSTRG(current_detect_order_list);
4608            size = MBSTRG(current_detect_order_list_size);
4609            if (size <= 0){
4610                elist = MBSTRG(default_detect_order_list);
4611                size = MBSTRG(default_detect_order_list_size);
4612            }
4613        }
4614    } else {
4615        elist = MBSTRG(current_detect_order_list);
4616        size = MBSTRG(current_detect_order_list_size);
4617        if (size <= 0){
4618            elist = MBSTRG(default_detect_order_list);
4619            size = MBSTRG(default_detect_order_list_size);
4620        }
4621    }
4622
4623    mbfl_string_init(&string);
4624    string.no_language = MBSTRG(language);
4625
4626    identd = mbfl_encoding_detector_new(elist, size, MBSTRG(strict_detection));
4627
4628    if (identd) {
4629        int n = 0;
4630        while(n < num){
4631            string.val = (unsigned char *)arg_string[n];
4632            string.len = arg_length[n];
4633            if (mbfl_encoding_detector_feed(identd, &string)) {
4634                break;
4635            }
4636            n++;
4637        }
4638        encoding = mbfl_encoding_detector_judge(identd);
4639        mbfl_encoding_detector_delete(identd);
4640    }
4641
4642    if (encoding != mbfl_no_encoding_invalid) {
4643        MBSTRG(http_input_identify) = encoding;
4644        return SUCCESS;
4645    } else {
4646        return FAILURE;
4647    }
4648}
4649/* }}} */
4650
4651/* {{{ MBSTRING_API int php_mb_stripos()
4652 */
4653MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
4654{
4655    int n;
4656    mbfl_string haystack, needle;
4657    n = -1;
4658
4659    mbfl_string_init(&haystack);
4660    mbfl_string_init(&needle);
4661    haystack.no_language = MBSTRG(language);
4662    haystack.no_encoding = MBSTRG(current_internal_encoding);
4663    needle.no_language = MBSTRG(language);
4664    needle.no_encoding = MBSTRG(current_internal_encoding);
4665
4666    do {
4667        size_t len = 0;
4668        haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
4669        haystack.len = len;
4670
4671        if (!haystack.val) {
4672            break;
4673        }
4674
4675        if (haystack.len <= 0) {
4676            break;
4677        }
4678
4679        needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
4680        needle.len = len;
4681
4682        if (!needle.val) {
4683            break;
4684        }
4685
4686        if (needle.len <= 0) {
4687            break;
4688        }
4689
4690        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4691        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4692            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4693            break;
4694        }
4695
4696        {
4697            int haystack_char_len = mbfl_strlen(&haystack);
4698
4699            if (mode) {
4700                if ((offset > 0 && offset > haystack_char_len) ||
4701                    (offset < 0 && -offset > haystack_char_len)) {
4702                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
4703                    break;
4704                }
4705            } else {
4706                if (offset < 0 || offset > haystack_char_len) {
4707                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
4708                    break;
4709                }
4710            }
4711        }
4712
4713        n = mbfl_strpos(&haystack, &needle, offset, mode);
4714    } while(0);
4715
4716    if (haystack.val) {
4717        efree(haystack.val);
4718    }
4719
4720    if (needle.val) {
4721        efree(needle.val);
4722    }
4723
4724    return n;
4725}
4726/* }}} */
4727
4728#ifdef ZEND_MULTIBYTE
4729/* {{{ php_mb_set_zend_encoding() */
4730static int php_mb_set_zend_encoding(TSRMLS_D)
4731{
4732    /* 'd better use mbfl_memory_device? */
4733    char *name, *list = NULL;
4734    int n, *entry, list_size = 0;
4735    zend_encoding_detector encoding_detector;
4736    zend_encoding_converter encoding_converter;
4737    zend_encoding_oddlen encoding_oddlen;
4738
4739    /* notify script encoding to Zend Engine */
4740    entry = MBSTRG(script_encoding_list);
4741    n = MBSTRG(script_encoding_list_size);
4742    while (n > 0) {
4743        name = (char *)mbfl_no_encoding2name(*entry);
4744        if (name) {
4745            list_size += strlen(name) + 1;
4746            if (!list) {
4747                list = (char*)emalloc(list_size);
4748                *list = '\0';
4749            } else {
4750                list = (char*)erealloc(list, list_size);
4751                strcat(list, ",");
4752            }
4753            strcat(list, name);
4754        }
4755        entry++;
4756        n--;
4757    }
4758    zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC);
4759    if (list) {
4760        efree(list);
4761    }
4762    encoding_detector = php_mb_encoding_detector;
4763    encoding_converter = php_mb_encoding_converter;
4764    encoding_oddlen = php_mb_oddlen;
4765
4766    /* TODO: make independent from mbstring.encoding_translation? */
4767    if (MBSTRG(encoding_translation)) {
4768        /* notify internal encoding to Zend Engine */
4769        name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
4770        zend_multibyte_set_internal_encoding(name TSRMLS_CC);
4771    }
4772
4773    zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
4774
4775    return 0;
4776}
4777/* }}} */
4778
4779/* {{{ char *php_mb_encoding_detector()
4780 * Interface for Zend Engine
4781 */
4782static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC)
4783{
4784    mbfl_string string;
4785    const char *ret;
4786    enum mbfl_no_encoding *elist;
4787    int size, *list;
4788
4789    /* make encoding list */
4790    list = NULL;
4791    size = 0;
4792    php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
4793    if (size <= 0) {
4794        return NULL;
4795    }
4796    if (size > 0 && list != NULL) {
4797        elist = list;
4798    } else {
4799        elist = MBSTRG(current_detect_order_list);
4800        size = MBSTRG(current_detect_order_list_size);
4801    }
4802
4803    mbfl_string_init(&string);
4804    string.no_language = MBSTRG(language);
4805    string.val = (unsigned char *)arg_string;
4806    string.len = arg_length;
4807    ret = mbfl_identify_encoding_name(&string, elist, size, 0);
4808    if (list != NULL) {
4809        efree((void *)list);
4810    }
4811    if (ret != NULL) {
4812        return estrdup(ret);
4813    } else {
4814        return NULL;
4815    }
4816}
4817/* }}} */
4818
4819/*  {{{ int php_mb_encoding_converter() */
4820static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC)
4821{
4822    mbfl_string string, result, *ret;
4823    enum mbfl_no_encoding from_encoding, to_encoding;
4824    mbfl_buffer_converter *convd;
4825
4826    /* new encoding */
4827    to_encoding = mbfl_name2no_encoding(encoding_to);
4828    if (to_encoding == mbfl_no_encoding_invalid) {
4829        return -1;
4830    }
4831    /* old encoding */
4832    from_encoding = mbfl_name2no_encoding(encoding_from);
4833    if (from_encoding == mbfl_no_encoding_invalid) {
4834        return -1;
4835    }
4836    /* initialize string */
4837    mbfl_string_init(&string);
4838    mbfl_string_init(&result);
4839    string.no_encoding = from_encoding;
4840    string.no_language = MBSTRG(language);
4841    string.val = (unsigned char*)from;
4842    string.len = from_length;
4843
4844    /* initialize converter */
4845    convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
4846    if (convd == NULL) {
4847        return -1;
4848    }
4849    mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
4850    mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
4851
4852    /* do it */
4853    ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4854    if (ret != NULL) {
4855        *to = ret->val;
4856        *to_length = ret->len;
4857    }
4858
4859    MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
4860    mbfl_buffer_converter_delete(convd);
4861
4862    return ret ? 0 : -1;
4863}
4864/* }}} */
4865
4866/* {{{ int php_mb_oddlen()
4867 *  returns number of odd (e.g. appears only first byte of multibyte
4868 *  character) chars
4869 */
4870static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
4871{
4872    mbfl_string mb_string;
4873
4874    mbfl_string_init(&mb_string);
4875    mb_string.no_language = MBSTRG(language);
4876    mb_string.no_encoding = mbfl_name2no_encoding(encoding);
4877    mb_string.val = (unsigned char *)string;
4878    mb_string.len = length;
4879
4880    if (mb_string.no_encoding == mbfl_no_encoding_invalid) {
4881        return 0;
4882    }
4883    return mbfl_oddlen(&mb_string);
4884}
4885/* }}} */
4886#endif /* ZEND_MULTIBYTE */
4887
4888#endif  /* HAVE_MBSTRING */
4889
4890/*
4891 * Local variables:
4892 * tab-width: 4
4893 * c-basic-offset: 4
4894 * End:
4895 * vim600: fdm=marker
4896 * vim: noet sw=4 ts=4
4897 */
4898