1/*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2014 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
16   |         Rui Hirokawa <hirokawa@php.net>                              |
17   +----------------------------------------------------------------------+
18 */
19
20/* $Id$ */
21
22/*
23 * PHP 4 Multibyte String module "mbstring"
24 *
25 * History:
26 *   2000.5.19  Release php-4.0RC2_jstring-1.0
27 *   2001.4.1   Release php4_jstring-1.0.91
28 *   2001.4.30  Release php4_jstring-1.1 (contribute to The PHP Group)
29 *   2001.5.1   Renamed from jstring to mbstring (hirokawa@php.net)
30 */
31
32/*
33 * PHP3 Internationalization support program.
34 *
35 * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36 * All rights reserved.
37 *
38 * See README_PHP3-i18n-ja for more detail.
39 *
40 * Authors:
41 *    Hironori Sato <satoh@jpnnet.com>
42 *    Shigeru Kanemoto <sgk@happysize.co.jp>
43 *    Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44 *    Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45 */
46
47/* {{{ includes */
48#ifdef HAVE_CONFIG_H
49#include "config.h"
50#endif
51
52#include "php.h"
53#include "php_ini.h"
54#include "php_variables.h"
55#include "mbstring.h"
56#include "ext/standard/php_string.h"
57#include "ext/standard/php_mail.h"
58#include "ext/standard/exec.h"
59#include "ext/standard/php_smart_str.h"
60#include "ext/standard/url.h"
61#include "main/php_output.h"
62#include "ext/standard/info.h"
63
64#include "libmbfl/mbfl/mbfl_allocators.h"
65#include "libmbfl/mbfl/mbfilter_pass.h"
66
67#include "php_variables.h"
68#include "php_globals.h"
69#include "rfc1867.h"
70#include "php_content_types.h"
71#include "SAPI.h"
72#include "php_unicode.h"
73#include "TSRM.h"
74
75#include "mb_gpc.h"
76
77#if HAVE_MBREGEX
78#include "php_mbregex.h"
79#endif
80
81#include "zend_multibyte.h"
82
83#if HAVE_ONIG
84#include "php_onig_compat.h"
85#include <oniguruma.h>
86#undef UChar
87#elif HAVE_PCRE || HAVE_BUNDLED_PCRE
88#include "ext/pcre/php_pcre.h"
89#endif
90/* }}} */
91
92#if HAVE_MBSTRING
93
94/* {{{ prototypes */
95ZEND_DECLARE_MODULE_GLOBALS(mbstring)
96
97static PHP_GINIT_FUNCTION(mbstring);
98static PHP_GSHUTDOWN_FUNCTION(mbstring);
99
100static void php_mb_populate_current_detect_order_list(TSRMLS_D);
101
102static int php_mb_encoding_translation(TSRMLS_D);
103
104static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC);
105
106static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC);
107
108/* }}} */
109
110/* {{{ php_mb_default_identify_list */
111typedef struct _php_mb_nls_ident_list {
112    enum mbfl_no_language lang;
113    const enum mbfl_no_encoding *list;
114    size_t list_size;
115} php_mb_nls_ident_list;
116
117static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
118    mbfl_no_encoding_ascii,
119    mbfl_no_encoding_jis,
120    mbfl_no_encoding_utf8,
121    mbfl_no_encoding_euc_jp,
122    mbfl_no_encoding_sjis
123};
124
125static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
126    mbfl_no_encoding_ascii,
127    mbfl_no_encoding_utf8,
128    mbfl_no_encoding_euc_cn,
129    mbfl_no_encoding_cp936
130};
131
132static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
133    mbfl_no_encoding_ascii,
134    mbfl_no_encoding_utf8,
135    mbfl_no_encoding_euc_tw,
136    mbfl_no_encoding_big5
137};
138
139static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
140    mbfl_no_encoding_ascii,
141    mbfl_no_encoding_utf8,
142    mbfl_no_encoding_euc_kr,
143    mbfl_no_encoding_uhc
144};
145
146static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
147    mbfl_no_encoding_ascii,
148    mbfl_no_encoding_utf8,
149    mbfl_no_encoding_koi8r,
150    mbfl_no_encoding_cp1251,
151    mbfl_no_encoding_cp866
152};
153
154static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
155    mbfl_no_encoding_ascii,
156    mbfl_no_encoding_utf8,
157    mbfl_no_encoding_armscii8
158};
159
160static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
161    mbfl_no_encoding_ascii,
162    mbfl_no_encoding_utf8,
163    mbfl_no_encoding_cp1254,
164    mbfl_no_encoding_8859_9
165};
166
167static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
168    mbfl_no_encoding_ascii,
169    mbfl_no_encoding_utf8,
170    mbfl_no_encoding_koi8u
171};
172
173static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
174    mbfl_no_encoding_ascii,
175    mbfl_no_encoding_utf8
176};
177
178
179static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
180    { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
181    { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
182    { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
183    { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
184    { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
185    { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
186    { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
187    { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
188    { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
189};
190
191/* }}} */
192
193/* {{{ mb_overload_def mb_ovld[] */
194static const struct mb_overload_def mb_ovld[] = {
195    {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
196    {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
197    {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
198    {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
199    {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
200    {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
201    {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
202    {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
203    {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
204    {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
205    {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
206    {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
207    {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
208#if HAVE_MBREGEX
209    {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
210    {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
211    {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
212    {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
213    {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
214#endif
215    {0, NULL, NULL, NULL}
216};
217/* }}} */
218
219/* {{{ arginfo */
220ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
221    ZEND_ARG_INFO(0, language)
222ZEND_END_ARG_INFO()
223
224ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
225    ZEND_ARG_INFO(0, encoding)
226ZEND_END_ARG_INFO()
227
228ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
229    ZEND_ARG_INFO(0, type)
230ZEND_END_ARG_INFO()
231
232ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
233    ZEND_ARG_INFO(0, encoding)
234ZEND_END_ARG_INFO()
235
236ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
237    ZEND_ARG_INFO(0, encoding)
238ZEND_END_ARG_INFO()
239
240ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
241    ZEND_ARG_INFO(0, substchar)
242ZEND_END_ARG_INFO()
243
244ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
245    ZEND_ARG_INFO(0, encoding)
246ZEND_END_ARG_INFO()
247
248ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
249    ZEND_ARG_INFO(0, encoded_string)
250    ZEND_ARG_INFO(1, result)
251ZEND_END_ARG_INFO()
252
253ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
254    ZEND_ARG_INFO(0, contents)
255    ZEND_ARG_INFO(0, status)
256ZEND_END_ARG_INFO()
257
258ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
259    ZEND_ARG_INFO(0, str)
260    ZEND_ARG_INFO(0, encoding)
261ZEND_END_ARG_INFO()
262
263ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
264    ZEND_ARG_INFO(0, haystack)
265    ZEND_ARG_INFO(0, needle)
266    ZEND_ARG_INFO(0, offset)
267    ZEND_ARG_INFO(0, encoding)
268ZEND_END_ARG_INFO()
269
270ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
271    ZEND_ARG_INFO(0, haystack)
272    ZEND_ARG_INFO(0, needle)
273    ZEND_ARG_INFO(0, offset)
274    ZEND_ARG_INFO(0, encoding)
275ZEND_END_ARG_INFO()
276
277ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
278    ZEND_ARG_INFO(0, haystack)
279    ZEND_ARG_INFO(0, needle)
280    ZEND_ARG_INFO(0, offset)
281    ZEND_ARG_INFO(0, encoding)
282ZEND_END_ARG_INFO()
283
284ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
285    ZEND_ARG_INFO(0, haystack)
286    ZEND_ARG_INFO(0, needle)
287    ZEND_ARG_INFO(0, offset)
288    ZEND_ARG_INFO(0, encoding)
289ZEND_END_ARG_INFO()
290
291ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
292    ZEND_ARG_INFO(0, haystack)
293    ZEND_ARG_INFO(0, needle)
294    ZEND_ARG_INFO(0, part)
295    ZEND_ARG_INFO(0, encoding)
296ZEND_END_ARG_INFO()
297
298ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
299    ZEND_ARG_INFO(0, haystack)
300    ZEND_ARG_INFO(0, needle)
301    ZEND_ARG_INFO(0, part)
302    ZEND_ARG_INFO(0, encoding)
303ZEND_END_ARG_INFO()
304
305ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
306    ZEND_ARG_INFO(0, haystack)
307    ZEND_ARG_INFO(0, needle)
308    ZEND_ARG_INFO(0, part)
309    ZEND_ARG_INFO(0, encoding)
310ZEND_END_ARG_INFO()
311
312ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
313    ZEND_ARG_INFO(0, haystack)
314    ZEND_ARG_INFO(0, needle)
315    ZEND_ARG_INFO(0, part)
316    ZEND_ARG_INFO(0, encoding)
317ZEND_END_ARG_INFO()
318
319ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
320    ZEND_ARG_INFO(0, haystack)
321    ZEND_ARG_INFO(0, needle)
322    ZEND_ARG_INFO(0, encoding)
323ZEND_END_ARG_INFO()
324
325ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
326    ZEND_ARG_INFO(0, str)
327    ZEND_ARG_INFO(0, start)
328    ZEND_ARG_INFO(0, length)
329    ZEND_ARG_INFO(0, encoding)
330ZEND_END_ARG_INFO()
331
332ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
333    ZEND_ARG_INFO(0, str)
334    ZEND_ARG_INFO(0, start)
335    ZEND_ARG_INFO(0, length)
336    ZEND_ARG_INFO(0, encoding)
337ZEND_END_ARG_INFO()
338
339ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
340    ZEND_ARG_INFO(0, str)
341    ZEND_ARG_INFO(0, encoding)
342ZEND_END_ARG_INFO()
343
344ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
345    ZEND_ARG_INFO(0, str)
346    ZEND_ARG_INFO(0, start)
347    ZEND_ARG_INFO(0, width)
348    ZEND_ARG_INFO(0, trimmarker)
349    ZEND_ARG_INFO(0, encoding)
350ZEND_END_ARG_INFO()
351
352ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
353    ZEND_ARG_INFO(0, str)
354    ZEND_ARG_INFO(0, to)
355    ZEND_ARG_INFO(0, from)
356ZEND_END_ARG_INFO()
357
358ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
359    ZEND_ARG_INFO(0, sourcestring)
360    ZEND_ARG_INFO(0, mode)
361    ZEND_ARG_INFO(0, encoding)
362ZEND_END_ARG_INFO()
363
364ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
365    ZEND_ARG_INFO(0, sourcestring)
366    ZEND_ARG_INFO(0, encoding)
367ZEND_END_ARG_INFO()
368
369ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
370    ZEND_ARG_INFO(0, sourcestring)
371    ZEND_ARG_INFO(0, encoding)
372ZEND_END_ARG_INFO()
373
374ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
375    ZEND_ARG_INFO(0, str)
376    ZEND_ARG_INFO(0, encoding_list)
377    ZEND_ARG_INFO(0, strict)
378ZEND_END_ARG_INFO()
379
380ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
381ZEND_END_ARG_INFO()
382
383ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
384    ZEND_ARG_INFO(0, encoding)
385ZEND_END_ARG_INFO()
386
387ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
388    ZEND_ARG_INFO(0, str)
389    ZEND_ARG_INFO(0, charset)
390    ZEND_ARG_INFO(0, transfer)
391    ZEND_ARG_INFO(0, linefeed)
392    ZEND_ARG_INFO(0, indent)
393ZEND_END_ARG_INFO()
394
395ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
396    ZEND_ARG_INFO(0, string)
397ZEND_END_ARG_INFO()
398
399ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
400    ZEND_ARG_INFO(0, str)
401    ZEND_ARG_INFO(0, option)
402    ZEND_ARG_INFO(0, encoding)
403ZEND_END_ARG_INFO()
404
405ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 1, 0, 3)
406    ZEND_ARG_INFO(0, to)
407    ZEND_ARG_INFO(0, from)
408    ZEND_ARG_INFO(1, ...)
409ZEND_END_ARG_INFO()
410
411ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
412    ZEND_ARG_INFO(0, string)
413    ZEND_ARG_INFO(0, convmap)
414    ZEND_ARG_INFO(0, encoding)
415    ZEND_ARG_INFO(0, is_hex)
416ZEND_END_ARG_INFO()
417
418ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
419    ZEND_ARG_INFO(0, string)
420    ZEND_ARG_INFO(0, convmap)
421    ZEND_ARG_INFO(0, encoding)
422ZEND_END_ARG_INFO()
423
424ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
425    ZEND_ARG_INFO(0, to)
426    ZEND_ARG_INFO(0, subject)
427    ZEND_ARG_INFO(0, message)
428    ZEND_ARG_INFO(0, additional_headers)
429    ZEND_ARG_INFO(0, additional_parameters)
430ZEND_END_ARG_INFO()
431
432ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
433    ZEND_ARG_INFO(0, type)
434ZEND_END_ARG_INFO()
435
436ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
437    ZEND_ARG_INFO(0, var)
438    ZEND_ARG_INFO(0, encoding)
439ZEND_END_ARG_INFO()
440
441ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
442    ZEND_ARG_INFO(0, encoding)
443ZEND_END_ARG_INFO()
444
445ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
446    ZEND_ARG_INFO(0, pattern)
447    ZEND_ARG_INFO(0, string)
448    ZEND_ARG_INFO(1, registers)
449ZEND_END_ARG_INFO()
450
451ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
452    ZEND_ARG_INFO(0, pattern)
453    ZEND_ARG_INFO(0, string)
454    ZEND_ARG_INFO(1, registers)
455ZEND_END_ARG_INFO()
456
457ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
458    ZEND_ARG_INFO(0, pattern)
459    ZEND_ARG_INFO(0, replacement)
460    ZEND_ARG_INFO(0, string)
461    ZEND_ARG_INFO(0, option)
462ZEND_END_ARG_INFO()
463
464ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
465    ZEND_ARG_INFO(0, pattern)
466    ZEND_ARG_INFO(0, replacement)
467    ZEND_ARG_INFO(0, string)
468ZEND_END_ARG_INFO()
469
470ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
471    ZEND_ARG_INFO(0, pattern)
472    ZEND_ARG_INFO(0, callback)
473    ZEND_ARG_INFO(0, string)
474    ZEND_ARG_INFO(0, option)
475ZEND_END_ARG_INFO()
476
477ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
478    ZEND_ARG_INFO(0, pattern)
479    ZEND_ARG_INFO(0, string)
480    ZEND_ARG_INFO(0, limit)
481ZEND_END_ARG_INFO()
482
483ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
484    ZEND_ARG_INFO(0, pattern)
485    ZEND_ARG_INFO(0, string)
486    ZEND_ARG_INFO(0, option)
487ZEND_END_ARG_INFO()
488
489ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
490    ZEND_ARG_INFO(0, pattern)
491    ZEND_ARG_INFO(0, option)
492ZEND_END_ARG_INFO()
493
494ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
495    ZEND_ARG_INFO(0, pattern)
496    ZEND_ARG_INFO(0, option)
497ZEND_END_ARG_INFO()
498
499ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
500    ZEND_ARG_INFO(0, pattern)
501    ZEND_ARG_INFO(0, option)
502ZEND_END_ARG_INFO()
503
504ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
505    ZEND_ARG_INFO(0, string)
506    ZEND_ARG_INFO(0, pattern)
507    ZEND_ARG_INFO(0, option)
508ZEND_END_ARG_INFO()
509
510ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
511ZEND_END_ARG_INFO()
512
513ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
514ZEND_END_ARG_INFO()
515
516ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
517    ZEND_ARG_INFO(0, position)
518ZEND_END_ARG_INFO()
519
520ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
521    ZEND_ARG_INFO(0, options)
522ZEND_END_ARG_INFO()
523/* }}} */
524
525/* {{{ zend_function_entry mbstring_functions[] */
526const zend_function_entry mbstring_functions[] = {
527    PHP_FE(mb_convert_case,         arginfo_mb_convert_case)
528    PHP_FE(mb_strtoupper,           arginfo_mb_strtoupper)
529    PHP_FE(mb_strtolower,           arginfo_mb_strtolower)
530    PHP_FE(mb_language,             arginfo_mb_language)
531    PHP_FE(mb_internal_encoding,    arginfo_mb_internal_encoding)
532    PHP_FE(mb_http_input,           arginfo_mb_http_input)
533    PHP_FE(mb_http_output,          arginfo_mb_http_output)
534    PHP_FE(mb_detect_order,         arginfo_mb_detect_order)
535    PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
536    PHP_FE(mb_parse_str,            arginfo_mb_parse_str)
537    PHP_FE(mb_output_handler,       arginfo_mb_output_handler)
538    PHP_FE(mb_preferred_mime_name,  arginfo_mb_preferred_mime_name)
539    PHP_FE(mb_strlen,               arginfo_mb_strlen)
540    PHP_FE(mb_strpos,               arginfo_mb_strpos)
541    PHP_FE(mb_strrpos,              arginfo_mb_strrpos)
542    PHP_FE(mb_stripos,              arginfo_mb_stripos)
543    PHP_FE(mb_strripos,             arginfo_mb_strripos)
544    PHP_FE(mb_strstr,               arginfo_mb_strstr)
545    PHP_FE(mb_strrchr,              arginfo_mb_strrchr)
546    PHP_FE(mb_stristr,              arginfo_mb_stristr)
547    PHP_FE(mb_strrichr,             arginfo_mb_strrichr)
548    PHP_FE(mb_substr_count,         arginfo_mb_substr_count)
549    PHP_FE(mb_substr,               arginfo_mb_substr)
550    PHP_FE(mb_strcut,               arginfo_mb_strcut)
551    PHP_FE(mb_strwidth,             arginfo_mb_strwidth)
552    PHP_FE(mb_strimwidth,           arginfo_mb_strimwidth)
553    PHP_FE(mb_convert_encoding,     arginfo_mb_convert_encoding)
554    PHP_FE(mb_detect_encoding,      arginfo_mb_detect_encoding)
555    PHP_FE(mb_list_encodings,       arginfo_mb_list_encodings)
556    PHP_FE(mb_encoding_aliases,     arginfo_mb_encoding_aliases)
557    PHP_FE(mb_convert_kana,         arginfo_mb_convert_kana)
558    PHP_FE(mb_encode_mimeheader,    arginfo_mb_encode_mimeheader)
559    PHP_FE(mb_decode_mimeheader,    arginfo_mb_decode_mimeheader)
560    PHP_FE(mb_convert_variables,    arginfo_mb_convert_variables)
561    PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
562    PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
563    PHP_FE(mb_send_mail,            arginfo_mb_send_mail)
564    PHP_FE(mb_get_info,             arginfo_mb_get_info)
565    PHP_FE(mb_check_encoding,       arginfo_mb_check_encoding)
566#if HAVE_MBREGEX
567    PHP_MBREGEX_FUNCTION_ENTRIES
568#endif
569    PHP_FE_END
570};
571/* }}} */
572
573/* {{{ zend_module_entry mbstring_module_entry */
574zend_module_entry mbstring_module_entry = {
575    STANDARD_MODULE_HEADER,
576    "mbstring",
577    mbstring_functions,
578    PHP_MINIT(mbstring),
579    PHP_MSHUTDOWN(mbstring),
580    PHP_RINIT(mbstring),
581    PHP_RSHUTDOWN(mbstring),
582    PHP_MINFO(mbstring),
583    NO_VERSION_YET,
584    PHP_MODULE_GLOBALS(mbstring),
585    PHP_GINIT(mbstring),
586    PHP_GSHUTDOWN(mbstring),
587    NULL,
588    STANDARD_MODULE_PROPERTIES_EX
589};
590/* }}} */
591
592/* {{{ static sapi_post_entry php_post_entries[] */
593static sapi_post_entry php_post_entries[] = {
594    { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
595    { MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
596    { NULL, 0, NULL, NULL }
597};
598/* }}} */
599
600#ifdef COMPILE_DL_MBSTRING
601ZEND_GET_MODULE(mbstring)
602#endif
603
604/* {{{ allocators */
605static void *_php_mb_allocators_malloc(unsigned int sz)
606{
607    return emalloc(sz);
608}
609
610static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
611{
612    return erealloc(ptr, sz);
613}
614
615static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
616{
617    return ecalloc(nelems, szelem);
618}
619
620static void _php_mb_allocators_free(void *ptr)
621{
622    efree(ptr);
623}
624
625static void *_php_mb_allocators_pmalloc(unsigned int sz)
626{
627    return pemalloc(sz, 1);
628}
629
630static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
631{
632    return perealloc(ptr, sz, 1);
633}
634
635static void _php_mb_allocators_pfree(void *ptr)
636{
637    pefree(ptr, 1);
638}
639
640static mbfl_allocators _php_mb_allocators = {
641    _php_mb_allocators_malloc,
642    _php_mb_allocators_realloc,
643    _php_mb_allocators_calloc,
644    _php_mb_allocators_free,
645    _php_mb_allocators_pmalloc,
646    _php_mb_allocators_prealloc,
647    _php_mb_allocators_pfree
648};
649/* }}} */
650
651/* {{{ static sapi_post_entry mbstr_post_entries[] */
652static sapi_post_entry mbstr_post_entries[] = {
653    { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
654    { MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
655    { NULL, 0, NULL, NULL }
656};
657/* }}} */
658
659/* {{{ static int php_mb_parse_encoding_list()
660 *  Return 0 if input contains any illegal encoding, otherwise 1.
661 *  Even if any illegal encoding is detected the result may contain a list
662 *  of parsed encodings.
663 */
664static int
665php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
666{
667    int size, bauto, ret = SUCCESS;
668    size_t n;
669    char *p, *p1, *p2, *endp, *tmpstr;
670    const mbfl_encoding **entry, **list;
671
672    list = NULL;
673    if (value == NULL || value_length <= 0) {
674        if (return_list) {
675            *return_list = NULL;
676        }
677        if (return_size) {
678            *return_size = 0;
679        }
680        return FAILURE;
681    } else {
682        /* copy the value string for work */
683        if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
684            tmpstr = (char *)estrndup(value+1, value_length-2);
685            value_length -= 2;
686        }
687        else
688            tmpstr = (char *)estrndup(value, value_length);
689        if (tmpstr == NULL) {
690            return FAILURE;
691        }
692        /* count the number of listed encoding names */
693        endp = tmpstr + value_length;
694        n = 1;
695        p1 = tmpstr;
696        while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
697            p1 = p2 + 1;
698            n++;
699        }
700        size = n + MBSTRG(default_detect_order_list_size);
701        /* make list */
702        list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
703        if (list != NULL) {
704            entry = list;
705            n = 0;
706            bauto = 0;
707            p1 = tmpstr;
708            do {
709                p2 = p = php_memnstr(p1, ",", 1, endp);
710                if (p == NULL) {
711                    p = endp;
712                }
713                *p = '\0';
714                /* trim spaces */
715                while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
716                    p1++;
717                }
718                p--;
719                while (p > p1 && (*p == ' ' || *p == '\t')) {
720                    *p = '\0';
721                    p--;
722                }
723                /* convert to the encoding number and check encoding */
724                if (strcasecmp(p1, "auto") == 0) {
725                    if (!bauto) {
726                        const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
727                        const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
728                        size_t i;
729                        bauto = 1;
730                        for (i = 0; i < identify_list_size; i++) {
731                            *entry++ = mbfl_no2encoding(*src++);
732                            n++;
733                        }
734                    }
735                } else {
736                    const mbfl_encoding *encoding = mbfl_name2encoding(p1);
737                    if (encoding) {
738                        *entry++ = encoding;
739                        n++;
740                    } else {
741                        ret = 0;
742                    }
743                }
744                p1 = p2 + 1;
745            } while (n < size && p2 != NULL);
746            if (n > 0) {
747                if (return_list) {
748                    *return_list = list;
749                } else {
750                    pefree(list, persistent);
751                }
752            } else {
753                pefree(list, persistent);
754                if (return_list) {
755                    *return_list = NULL;
756                }
757                ret = 0;
758            }
759            if (return_size) {
760                *return_size = n;
761            }
762        } else {
763            if (return_list) {
764                *return_list = NULL;
765            }
766            if (return_size) {
767                *return_size = 0;
768            }
769            ret = 0;
770        }
771        efree(tmpstr);
772    }
773
774    return ret;
775}
776/* }}} */
777
778/* {{{ static int php_mb_parse_encoding_array()
779 *  Return 0 if input contains any illegal encoding, otherwise 1.
780 *  Even if any illegal encoding is detected the result may contain a list
781 *  of parsed encodings.
782 */
783static int
784php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
785{
786    zval **hash_entry;
787    HashTable *target_hash;
788    int i, n, size, bauto, ret = SUCCESS;
789    const mbfl_encoding **list, **entry;
790
791    list = NULL;
792    if (Z_TYPE_P(array) == IS_ARRAY) {
793        target_hash = Z_ARRVAL_P(array);
794        zend_hash_internal_pointer_reset(target_hash);
795        i = zend_hash_num_elements(target_hash);
796        size = i + MBSTRG(default_detect_order_list_size);
797        list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
798        if (list != NULL) {
799            entry = list;
800            bauto = 0;
801            n = 0;
802            while (i > 0) {
803                if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
804                    break;
805                }
806                convert_to_string_ex(hash_entry);
807                if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
808                    if (!bauto) {
809                        const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
810                        const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
811                        size_t j;
812
813                        bauto = 1;
814                        for (j = 0; j < identify_list_size; j++) {
815                            *entry++ = mbfl_no2encoding(*src++);
816                            n++;
817                        }
818                    }
819                } else {
820                    const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_PP(hash_entry));
821                    if (encoding) {
822                        *entry++ = encoding;
823                        n++;
824                    } else {
825                        ret = FAILURE;
826                    }
827                }
828                zend_hash_move_forward(target_hash);
829                i--;
830            }
831            if (n > 0) {
832                if (return_list) {
833                    *return_list = list;
834                } else {
835                    pefree(list, persistent);
836                }
837            } else {
838                pefree(list, persistent);
839                if (return_list) {
840                    *return_list = NULL;
841                }
842                ret = FAILURE;
843            }
844            if (return_size) {
845                *return_size = n;
846            }
847        } else {
848            if (return_list) {
849                *return_list = NULL;
850            }
851            if (return_size) {
852                *return_size = 0;
853            }
854            ret = FAILURE;
855        }
856    }
857
858    return ret;
859}
860/* }}} */
861
862/* {{{ zend_multibyte interface */
863static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC)
864{
865    return (const zend_encoding*)mbfl_name2encoding(encoding_name);
866}
867
868static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
869{
870    return ((const mbfl_encoding *)encoding)->name;
871}
872
873static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
874{
875    const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
876    if (encoding->flag & MBFL_ENCTYPE_SBCS) {
877        return 1;
878    }
879    if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
880        return 1;
881    }
882    return 0;
883}
884
885static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC)
886{
887    mbfl_string string;
888
889    if (!list) {
890        list = (const zend_encoding **)MBSTRG(current_detect_order_list);
891        list_size = MBSTRG(current_detect_order_list_size);
892    }
893
894    mbfl_string_init(&string);
895    string.no_language = MBSTRG(language);
896    string.val = (unsigned char *)arg_string;
897    string.len = arg_length;
898    return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
899}
900
901static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC)
902{
903    mbfl_string string, result;
904    mbfl_buffer_converter *convd;
905    int status, loc;
906
907    /* new encoding */
908    /* initialize string */
909    mbfl_string_init(&string);
910    mbfl_string_init(&result);
911    string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
912    string.no_language = MBSTRG(language);
913    string.val = (unsigned char*)from;
914    string.len = from_length;
915
916    /* initialize converter */
917    convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
918    if (convd == NULL) {
919        return -1;
920    }
921    mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
922    mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
923
924    /* do it */
925    status = mbfl_buffer_converter_feed2(convd, &string, &loc);
926    if (status) {
927        mbfl_buffer_converter_delete(convd);
928        return (size_t)-1;
929    }
930
931    mbfl_buffer_converter_flush(convd);
932    if (!mbfl_buffer_converter_result(convd, &result)) {
933        mbfl_buffer_converter_delete(convd);
934        return (size_t)-1;
935    }
936
937    *to = result.val;
938    *to_length = result.len;
939
940    mbfl_buffer_converter_delete(convd);
941
942    return loc;
943}
944
945static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
946{
947    return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC);
948}
949
950static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D)
951{
952    return (const zend_encoding *)MBSTRG(internal_encoding);
953}
954
955static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC)
956{
957    MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
958    return SUCCESS;
959}
960
961static zend_multibyte_functions php_mb_zend_multibyte_functions = {
962    "mbstring",
963    php_mb_zend_encoding_fetcher,
964    php_mb_zend_encoding_name_getter,
965    php_mb_zend_encoding_lexer_compatibility_checker,
966    php_mb_zend_encoding_detector,
967    php_mb_zend_encoding_converter,
968    php_mb_zend_encoding_list_parser,
969    php_mb_zend_internal_encoding_getter,
970    php_mb_zend_internal_encoding_setter
971};
972/* }}} */
973
974static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
975static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
976static void _php_mb_free_regex(void *opaque);
977
978#if HAVE_ONIG
979/* {{{ _php_mb_compile_regex */
980static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
981{
982    php_mb_regex_t *retval;
983    OnigErrorInfo err_info;
984    int err_code;
985
986    if ((err_code = onig_new(&retval,
987            (const OnigUChar *)pattern,
988            (const OnigUChar *)pattern + strlen(pattern),
989            ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
990            ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
991        OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
992        onig_error_code_to_str(err_str, err_code, err_info);
993        php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
994        retval = NULL;
995    }
996    return retval;
997}
998/* }}} */
999
1000/* {{{ _php_mb_match_regex */
1001static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1002{
1003    return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1004            (const OnigUChar*)str + str_len, (const OnigUChar *)str,
1005            (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
1006}
1007/* }}} */
1008
1009/* {{{ _php_mb_free_regex */
1010static void _php_mb_free_regex(void *opaque)
1011{
1012    onig_free((php_mb_regex_t *)opaque);
1013}
1014/* }}} */
1015#elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1016/* {{{ _php_mb_compile_regex */
1017static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
1018{
1019    pcre *retval;
1020    const char *err_str;
1021    int err_offset;
1022
1023    if (!(retval = pcre_compile(pattern,
1024            PCRE_CASELESS, &err_str, &err_offset, NULL))) {
1025        php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
1026    }
1027    return retval;
1028}
1029/* }}} */
1030
1031/* {{{ _php_mb_match_regex */
1032static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1033{
1034    return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
1035            0, NULL, 0) >= 0;
1036}
1037/* }}} */
1038
1039/* {{{ _php_mb_free_regex */
1040static void _php_mb_free_regex(void *opaque)
1041{
1042    pcre_free(opaque);
1043}
1044/* }}} */
1045#endif
1046
1047/* {{{ php_mb_nls_get_default_detect_order_list */
1048static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1049{
1050    size_t i;
1051
1052    *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1053    *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1054
1055    for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1056        if (php_mb_default_identify_list[i].lang == lang) {
1057            *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1058            *plist_size = php_mb_default_identify_list[i].list_size;
1059            return 1;
1060        }
1061    }
1062    return 0;
1063}
1064/* }}} */
1065
1066static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote TSRMLS_DC)
1067{
1068    char *result = emalloc(len + 2);
1069    char *resp = result;
1070    int i;
1071
1072    for (i = 0; i < len && start[i] != quote; ++i) {
1073        if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1074            *resp++ = start[++i];
1075        } else {
1076            size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1077
1078            while (j-- > 0 && i < len) {
1079                *resp++ = start[i++];
1080            }
1081            --i;
1082        }
1083    }
1084
1085    *resp = '\0';
1086    return result;
1087}
1088
1089static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC) /* {{{ */
1090{
1091    char *pos = *line, quote;
1092    char *res;
1093
1094    while (*pos && *pos != stop) {
1095        if ((quote = *pos) == '"' || quote == '\'') {
1096            ++pos;
1097            while (*pos && *pos != quote) {
1098                if (*pos == '\\' && pos[1] && pos[1] == quote) {
1099                    pos += 2;
1100                } else {
1101                    ++pos;
1102                }
1103            }
1104            if (*pos) {
1105                ++pos;
1106            }
1107        } else {
1108            pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1109
1110        }
1111    }
1112    if (*pos == '\0') {
1113        res = estrdup(*line);
1114        *line += strlen(*line);
1115        return res;
1116    }
1117
1118    res = estrndup(*line, pos - *line);
1119
1120    while (*pos == stop) {
1121        pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1122    }
1123
1124    *line = pos;
1125    return res;
1126}
1127/* }}} */
1128
1129static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC) /* {{{ */
1130{
1131    while (*str && isspace(*(unsigned char *)str)) {
1132        ++str;
1133    }
1134
1135    if (!*str) {
1136        return estrdup("");
1137    }
1138
1139    if (*str == '"' || *str == '\'') {
1140        char quote = *str;
1141
1142        str++;
1143        return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote TSRMLS_CC);
1144    } else {
1145        char *strend = str;
1146
1147        while (*strend && !isspace(*(unsigned char *)strend)) {
1148            ++strend;
1149        }
1150        return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0 TSRMLS_CC);
1151    }
1152}
1153/* }}} */
1154
1155static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename TSRMLS_DC) /* {{{ */
1156{
1157    char *s, *s2;
1158    const size_t filename_len = strlen(filename);
1159
1160    /* The \ check should technically be needed for win32 systems only where
1161     * it is a valid path separator. However, IE in all it's wisdom always sends
1162     * the full path of the file on the user's filesystem, which means that unless
1163     * the user does basename() they get a bogus file name. Until IE's user base drops
1164     * to nill or problem is fixed this code must remain enabled for all systems. */
1165    s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1166    s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1167
1168    if (s && s2) {
1169        if (s > s2) {
1170            return ++s;
1171        } else {
1172            return ++s2;
1173        }
1174    } else if (s) {
1175        return ++s;
1176    } else if (s2) {
1177        return ++s2;
1178    } else {
1179        return filename;
1180    }
1181}
1182/* }}} */
1183
1184/* {{{ php.ini directive handler */
1185/* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
1186static PHP_INI_MH(OnUpdate_mbstring_language)
1187{
1188    enum mbfl_no_language no_language;
1189
1190    no_language = mbfl_name2no_language(new_value);
1191    if (no_language == mbfl_no_language_invalid) {
1192        MBSTRG(language) = mbfl_no_language_neutral;
1193        return FAILURE;
1194    }
1195    MBSTRG(language) = no_language;
1196    php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1197    return SUCCESS;
1198}
1199/* }}} */
1200
1201/* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
1202static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1203{
1204    const mbfl_encoding **list;
1205    size_t size;
1206
1207    if (!new_value) {
1208        if (MBSTRG(detect_order_list)) {
1209            pefree(MBSTRG(detect_order_list), 1);
1210        }
1211        MBSTRG(detect_order_list) = NULL;
1212        MBSTRG(detect_order_list_size) = 0;
1213        return SUCCESS;
1214    }
1215
1216    if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1217        return FAILURE;
1218    }
1219
1220    if (MBSTRG(detect_order_list)) {
1221        pefree(MBSTRG(detect_order_list), 1);
1222    }
1223    MBSTRG(detect_order_list) = list;
1224    MBSTRG(detect_order_list_size) = size;
1225    return SUCCESS;
1226}
1227/* }}} */
1228
1229/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
1230static PHP_INI_MH(OnUpdate_mbstring_http_input)
1231{
1232    const mbfl_encoding **list;
1233    size_t size;
1234
1235    if (!new_value) {
1236        if (MBSTRG(http_input_list)) {
1237            pefree(MBSTRG(http_input_list), 1);
1238        }
1239        MBSTRG(http_input_list) = NULL;
1240        MBSTRG(http_input_list_size) = 0;
1241        return SUCCESS;
1242    }
1243
1244    if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1245        return FAILURE;
1246    }
1247
1248    if (MBSTRG(http_input_list)) {
1249        pefree(MBSTRG(http_input_list), 1);
1250    }
1251    MBSTRG(http_input_list) = list;
1252    MBSTRG(http_input_list_size) = size;
1253
1254    return SUCCESS;
1255}
1256/* }}} */
1257
1258/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
1259static PHP_INI_MH(OnUpdate_mbstring_http_output)
1260{
1261    const mbfl_encoding *encoding;
1262
1263    if (new_value == NULL || new_value_length == 0) {
1264        MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1265        MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1266        return SUCCESS;
1267    }
1268
1269    encoding = mbfl_name2encoding(new_value);
1270    if (!encoding) {
1271        MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1272        MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1273        return FAILURE;
1274    }
1275
1276    MBSTRG(http_output_encoding) = encoding;
1277    MBSTRG(current_http_output_encoding) = encoding;
1278    return SUCCESS;
1279}
1280/* }}} */
1281
1282/* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
1283int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
1284{
1285    const mbfl_encoding *encoding;
1286
1287    if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) {
1288        switch (MBSTRG(language)) {
1289            case mbfl_no_language_uni:
1290                encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1291                break;
1292            case mbfl_no_language_japanese:
1293                encoding = mbfl_no2encoding(mbfl_no_encoding_euc_jp);
1294                break;
1295            case mbfl_no_language_korean:
1296                encoding = mbfl_no2encoding(mbfl_no_encoding_euc_kr);
1297                break;
1298            case mbfl_no_language_simplified_chinese:
1299                encoding = mbfl_no2encoding(mbfl_no_encoding_euc_cn);
1300                break;
1301            case mbfl_no_language_traditional_chinese:
1302                encoding = mbfl_no2encoding(mbfl_no_encoding_euc_tw);
1303                break;
1304            case mbfl_no_language_russian:
1305                encoding = mbfl_no2encoding(mbfl_no_encoding_koi8r);
1306                break;
1307            case mbfl_no_language_german:
1308                encoding = mbfl_no2encoding(mbfl_no_encoding_8859_15);
1309                break;
1310            case mbfl_no_language_armenian:
1311                encoding = mbfl_no2encoding(mbfl_no_encoding_armscii8);
1312                break;
1313            case mbfl_no_language_turkish:
1314                encoding = mbfl_no2encoding(mbfl_no_encoding_8859_9);
1315                break;
1316            default:
1317                encoding = mbfl_no2encoding(mbfl_no_encoding_8859_1);
1318                break;
1319        }
1320    }
1321    MBSTRG(internal_encoding) = encoding;
1322    MBSTRG(current_internal_encoding) = encoding;
1323#if HAVE_MBREGEX
1324    {
1325        const char *enc_name = new_value;
1326        if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
1327            /* falls back to EUC-JP if an unknown encoding name is given */
1328            enc_name = "EUC-JP";
1329            php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
1330        }
1331        php_mb_regex_set_mbctype(new_value TSRMLS_CC);
1332    }
1333#endif
1334    return SUCCESS;
1335}
1336/* }}} */
1337
1338/* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
1339static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1340{
1341    if (OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC) == FAILURE) {
1342        return FAILURE;
1343    }
1344    if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN
1345            || stage == PHP_INI_STAGE_RUNTIME) {
1346        return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1347    } else {
1348        /* the corresponding mbstring globals needs to be set according to the
1349         * ini value in the later stage because it never falls back to the
1350         * default value if 1. no value for mbstring.internal_encoding is given,
1351         * 2. mbstring.language directive is processed in per-dir or runtime
1352         * context and 3. call to the handler for mbstring.language is done
1353         * after mbstring.internal_encoding is handled. */
1354        return SUCCESS;
1355    }
1356}
1357/* }}} */
1358
1359/* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
1360static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1361{
1362    int c;
1363    char *endptr = NULL;
1364
1365    if (new_value != NULL) {
1366        if (strcasecmp("none", new_value) == 0) {
1367            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1368            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1369        } else if (strcasecmp("long", new_value) == 0) {
1370            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1371            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1372        } else if (strcasecmp("entity", new_value) == 0) {
1373            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1374            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1375        } else {
1376            MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1377            MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1378            if (new_value_length >0) {
1379                c = strtol(new_value, &endptr, 0);
1380                if (*endptr == '\0') {
1381                    MBSTRG(filter_illegal_substchar) = c;
1382                    MBSTRG(current_filter_illegal_substchar) = c;
1383                }
1384            }
1385        }
1386    } else {
1387        MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1388        MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1389        MBSTRG(filter_illegal_substchar) = 0x3f;    /* '?' */
1390        MBSTRG(current_filter_illegal_substchar) = 0x3f;    /* '?' */
1391    }
1392
1393    return SUCCESS;
1394}
1395/* }}} */
1396
1397/* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
1398static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1399{
1400    if (new_value == NULL) {
1401        return FAILURE;
1402    }
1403
1404    OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
1405
1406    if (MBSTRG(encoding_translation)) {
1407        sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
1408        sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1409    } else {
1410        sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
1411        sapi_register_post_entries(php_post_entries TSRMLS_CC);
1412    }
1413
1414    return SUCCESS;
1415}
1416/* }}} */
1417
1418/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
1419static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1420{
1421    zval tmp;
1422    void *re = NULL;
1423
1424    if (!new_value) {
1425        new_value = entry->orig_value;
1426        new_value_length = entry->orig_value_length;
1427    }
1428    php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
1429
1430    if (Z_STRLEN(tmp) > 0) {
1431        if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
1432            zval_dtor(&tmp);
1433            return FAILURE;
1434        }
1435    }
1436
1437    if (MBSTRG(http_output_conv_mimetypes)) {
1438        _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1439    }
1440
1441    MBSTRG(http_output_conv_mimetypes) = re;
1442
1443    zval_dtor(&tmp);
1444    return SUCCESS;
1445}
1446/* }}} */
1447/* }}} */
1448
1449/* {{{ php.ini directive registration */
1450PHP_INI_BEGIN()
1451    PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1452    PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1453    PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
1454    PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
1455    STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1456    PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1457    STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1458    PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1459
1460    STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1461        PHP_INI_SYSTEM | PHP_INI_PERDIR,
1462        OnUpdate_mbstring_encoding_translation,
1463        encoding_translation, zend_mbstring_globals, mbstring_globals)
1464    PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1465        "^(text/|application/xhtml\\+xml)",
1466        PHP_INI_ALL,
1467        OnUpdate_mbstring_http_output_conv_mimetypes)
1468
1469    STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1470        PHP_INI_ALL,
1471        OnUpdateLong,
1472        strict_detection, zend_mbstring_globals, mbstring_globals)
1473PHP_INI_END()
1474/* }}} */
1475
1476/* {{{ module global initialize handler */
1477static PHP_GINIT_FUNCTION(mbstring)
1478{
1479    mbstring_globals->language = mbfl_no_language_uni;
1480    mbstring_globals->internal_encoding = NULL;
1481    mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1482    mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1483    mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1484    mbstring_globals->http_input_identify = NULL;
1485    mbstring_globals->http_input_identify_get = NULL;
1486    mbstring_globals->http_input_identify_post = NULL;
1487    mbstring_globals->http_input_identify_cookie = NULL;
1488    mbstring_globals->http_input_identify_string = NULL;
1489    mbstring_globals->http_input_list = NULL;
1490    mbstring_globals->http_input_list_size = 0;
1491    mbstring_globals->detect_order_list = NULL;
1492    mbstring_globals->detect_order_list_size = 0;
1493    mbstring_globals->current_detect_order_list = NULL;
1494    mbstring_globals->current_detect_order_list_size = 0;
1495    mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1496    mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1497    mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1498    mbstring_globals->filter_illegal_substchar = 0x3f;  /* '?' */
1499    mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1500    mbstring_globals->current_filter_illegal_substchar = 0x3f;  /* '?' */
1501    mbstring_globals->illegalchars = 0;
1502    mbstring_globals->func_overload = 0;
1503    mbstring_globals->encoding_translation = 0;
1504    mbstring_globals->strict_detection = 0;
1505    mbstring_globals->outconv = NULL;
1506    mbstring_globals->http_output_conv_mimetypes = NULL;
1507#if HAVE_MBREGEX
1508    mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
1509#endif
1510}
1511/* }}} */
1512
1513/* {{{ PHP_GSHUTDOWN_FUNCTION */
1514static PHP_GSHUTDOWN_FUNCTION(mbstring)
1515{
1516    if (mbstring_globals->http_input_list) {
1517        free(mbstring_globals->http_input_list);
1518    }
1519    if (mbstring_globals->detect_order_list) {
1520        free(mbstring_globals->detect_order_list);
1521    }
1522    if (mbstring_globals->http_output_conv_mimetypes) {
1523        _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1524    }
1525#if HAVE_MBREGEX
1526    php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
1527#endif
1528}
1529/* }}} */
1530
1531/* {{{ PHP_MINIT_FUNCTION(mbstring) */
1532PHP_MINIT_FUNCTION(mbstring)
1533{
1534    __mbfl_allocators = &_php_mb_allocators;
1535
1536    REGISTER_INI_ENTRIES();
1537
1538    /* This is a global handler. Should not be set in a per-request handler. */
1539    sapi_register_treat_data(mbstr_treat_data TSRMLS_CC);
1540
1541    /* Post handlers are stored in the thread-local context. */
1542    if (MBSTRG(encoding_translation)) {
1543        sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1544    }
1545
1546    REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1547    REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1548    REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1549
1550    REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1551    REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1552    REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1553
1554#if HAVE_MBREGEX
1555    PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1556#endif
1557
1558    if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) {
1559        return FAILURE;
1560    }
1561
1562    php_rfc1867_set_multibyte_callbacks(
1563        php_mb_encoding_translation,
1564        php_mb_gpc_get_detect_order,
1565        php_mb_gpc_set_input_encoding,
1566        php_mb_rfc1867_getword,
1567        php_mb_rfc1867_getword_conf,
1568        php_mb_rfc1867_basename);
1569
1570    return SUCCESS;
1571}
1572/* }}} */
1573
1574/* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
1575PHP_MSHUTDOWN_FUNCTION(mbstring)
1576{
1577    UNREGISTER_INI_ENTRIES();
1578
1579#if HAVE_MBREGEX
1580    PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1581#endif
1582
1583    return SUCCESS;
1584}
1585/* }}} */
1586
1587/* {{{ PHP_RINIT_FUNCTION(mbstring) */
1588PHP_RINIT_FUNCTION(mbstring)
1589{
1590    zend_function *func, *orig;
1591    const struct mb_overload_def *p;
1592
1593    MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1594    MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1595    MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1596    MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1597
1598    MBSTRG(illegalchars) = 0;
1599
1600    php_mb_populate_current_detect_order_list(TSRMLS_C);
1601
1602    /* override original function. */
1603    if (MBSTRG(func_overload)){
1604        p = &(mb_ovld[0]);
1605
1606        while (p->type > 0) {
1607            if ((MBSTRG(func_overload) & p->type) == p->type &&
1608                zend_hash_find(EG(function_table), p->save_func,
1609                    strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
1610
1611                zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
1612
1613                if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
1614                    php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1615                    return FAILURE;
1616                } else {
1617                    zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
1618
1619                    if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
1620                        NULL) == FAILURE) {
1621                        php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1622                        return FAILURE;
1623                    }
1624                }
1625            }
1626            p++;
1627        }
1628    }
1629#if HAVE_MBREGEX
1630    PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1631#endif
1632    zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC);
1633
1634    return SUCCESS;
1635}
1636/* }}} */
1637
1638/* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
1639PHP_RSHUTDOWN_FUNCTION(mbstring)
1640{
1641    const struct mb_overload_def *p;
1642    zend_function *orig;
1643
1644    if (MBSTRG(current_detect_order_list) != NULL) {
1645        efree(MBSTRG(current_detect_order_list));
1646        MBSTRG(current_detect_order_list) = NULL;
1647        MBSTRG(current_detect_order_list_size) = 0;
1648    }
1649    if (MBSTRG(outconv) != NULL) {
1650        MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1651        mbfl_buffer_converter_delete(MBSTRG(outconv));
1652        MBSTRG(outconv) = NULL;
1653    }
1654
1655    /* clear http input identification. */
1656    MBSTRG(http_input_identify) = NULL;
1657    MBSTRG(http_input_identify_post) = NULL;
1658    MBSTRG(http_input_identify_get) = NULL;
1659    MBSTRG(http_input_identify_cookie) = NULL;
1660    MBSTRG(http_input_identify_string) = NULL;
1661
1662    /*  clear overloaded function. */
1663    if (MBSTRG(func_overload)){
1664        p = &(mb_ovld[0]);
1665        while (p->type > 0) {
1666            if ((MBSTRG(func_overload) & p->type) == p->type &&
1667                zend_hash_find(EG(function_table), p->save_func,
1668                               strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
1669
1670                zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
1671                zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
1672            }
1673            p++;
1674        }
1675    }
1676
1677#if HAVE_MBREGEX
1678    PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1679#endif
1680
1681    return SUCCESS;
1682}
1683/* }}} */
1684
1685/* {{{ PHP_MINFO_FUNCTION(mbstring) */
1686PHP_MINFO_FUNCTION(mbstring)
1687{
1688    php_info_print_table_start();
1689    php_info_print_table_row(2, "Multibyte Support", "enabled");
1690    php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1691    php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1692    {
1693        char tmp[256];
1694        snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1695        php_info_print_table_row(2, "libmbfl version", tmp);
1696    }
1697    php_info_print_table_end();
1698
1699    php_info_print_table_start();
1700    php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1701    php_info_print_table_end();
1702
1703#if HAVE_MBREGEX
1704    PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1705#endif
1706
1707    DISPLAY_INI_ENTRIES();
1708}
1709/* }}} */
1710
1711/* {{{ proto string mb_language([string language])
1712   Sets the current language or Returns the current language as a string */
1713PHP_FUNCTION(mb_language)
1714{
1715    char *name = NULL;
1716    int name_len = 0;
1717
1718    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1719        return;
1720    }
1721    if (name == NULL) {
1722        RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
1723    } else {
1724        if (FAILURE == zend_alter_ini_entry(
1725                "mbstring.language", sizeof("mbstring.language"),
1726                name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1727            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
1728            RETVAL_FALSE;
1729        } else {
1730            RETVAL_TRUE;
1731        }
1732    }
1733}
1734/* }}} */
1735
1736/* {{{ proto string mb_internal_encoding([string encoding])
1737   Sets the current internal encoding or Returns the current internal encoding as a string */
1738PHP_FUNCTION(mb_internal_encoding)
1739{
1740    const char *name = NULL;
1741    int name_len;
1742    const mbfl_encoding *encoding;
1743
1744    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1745        RETURN_FALSE;
1746    }
1747    if (name == NULL) {
1748        name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1749        if (name != NULL) {
1750            RETURN_STRING(name, 1);
1751        } else {
1752            RETURN_FALSE;
1753        }
1754    } else {
1755        encoding = mbfl_name2encoding(name);
1756        if (!encoding) {
1757            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1758            RETURN_FALSE;
1759        } else {
1760            MBSTRG(current_internal_encoding) = encoding;
1761            RETURN_TRUE;
1762        }
1763    }
1764}
1765/* }}} */
1766
1767/* {{{ proto mixed mb_http_input([string type])
1768   Returns the input encoding */
1769PHP_FUNCTION(mb_http_input)
1770{
1771    char *typ = NULL;
1772    int typ_len;
1773    int retname;
1774    char *list, *temp;
1775    const mbfl_encoding *result = NULL;
1776
1777    retname = 1;
1778    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
1779        RETURN_FALSE;
1780    }
1781    if (typ == NULL) {
1782        result = MBSTRG(http_input_identify);
1783    } else {
1784        switch (*typ) {
1785        case 'G':
1786        case 'g':
1787            result = MBSTRG(http_input_identify_get);
1788            break;
1789        case 'P':
1790        case 'p':
1791            result = MBSTRG(http_input_identify_post);
1792            break;
1793        case 'C':
1794        case 'c':
1795            result = MBSTRG(http_input_identify_cookie);
1796            break;
1797        case 'S':
1798        case 's':
1799            result = MBSTRG(http_input_identify_string);
1800            break;
1801        case 'I':
1802        case 'i':
1803            {
1804                const mbfl_encoding **entry = MBSTRG(http_input_list);
1805                const size_t n = MBSTRG(http_input_list_size);
1806                size_t i;
1807                array_init(return_value);
1808                for (i = 0; i < n; i++) {
1809                    add_next_index_string(return_value, (*entry)->name, 1);
1810                    entry++;
1811                }
1812                retname = 0;
1813            }
1814            break;
1815        case 'L':
1816        case 'l':
1817            {
1818                const mbfl_encoding **entry = MBSTRG(http_input_list);
1819                const size_t n = MBSTRG(http_input_list_size);
1820                size_t i;
1821                list = NULL;
1822                for (i = 0; i < n; i++) {
1823                    if (list) {
1824                        temp = list;
1825                        spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1826                        efree(temp);
1827                        if (!list) {
1828                            break;
1829                        }
1830                    } else {
1831                        list = estrdup((*entry)->name);
1832                    }
1833                    entry++;
1834                }
1835            }
1836            if (!list) {
1837                RETURN_FALSE;
1838            }
1839            RETVAL_STRING(list, 0);
1840            retname = 0;
1841            break;
1842        default:
1843            result = MBSTRG(http_input_identify);
1844            break;
1845        }
1846    }
1847
1848    if (retname) {
1849        if (result) {
1850            RETVAL_STRING(result->name, 1);
1851        } else {
1852            RETVAL_FALSE;
1853        }
1854    }
1855}
1856/* }}} */
1857
1858/* {{{ proto string mb_http_output([string encoding])
1859   Sets the current output_encoding or returns the current output_encoding as a string */
1860PHP_FUNCTION(mb_http_output)
1861{
1862    const char *name = NULL;
1863    int name_len;
1864    const mbfl_encoding *encoding;
1865
1866    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
1867        RETURN_FALSE;
1868    }
1869
1870    if (name == NULL) {
1871        name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1872        if (name != NULL) {
1873            RETURN_STRING(name, 1);
1874        } else {
1875            RETURN_FALSE;
1876        }
1877    } else {
1878        encoding = mbfl_name2encoding(name);
1879        if (!encoding) {
1880            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1881            RETURN_FALSE;
1882        } else {
1883            MBSTRG(current_http_output_encoding) = encoding;
1884            RETURN_TRUE;
1885        }
1886    }
1887}
1888/* }}} */
1889
1890/* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1891   Sets the current detect_order or Return the current detect_order as a array */
1892PHP_FUNCTION(mb_detect_order)
1893{
1894    zval **arg1 = NULL;
1895
1896    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1897        return;
1898    }
1899
1900    if (!arg1) {
1901        size_t i;
1902        size_t n = MBSTRG(current_detect_order_list_size);
1903        const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1904        array_init(return_value);
1905        for (i = 0; i < n; i++) {
1906            add_next_index_string(return_value, (*entry)->name, 1);
1907            entry++;
1908        }
1909    } else {
1910        const mbfl_encoding **list = NULL;
1911        size_t size = 0;
1912        switch (Z_TYPE_PP(arg1)) {
1913        case IS_ARRAY:
1914            if (FAILURE == php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
1915                if (list) {
1916                    efree(list);
1917                }
1918                RETURN_FALSE;
1919            }
1920            break;
1921        default:
1922            convert_to_string_ex(arg1);
1923            if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
1924                if (list) {
1925                    efree(list);
1926                }
1927                RETURN_FALSE;
1928            }
1929            break;
1930        }
1931
1932        if (list == NULL) {
1933            RETURN_FALSE;
1934        }
1935
1936        if (MBSTRG(current_detect_order_list)) {
1937            efree(MBSTRG(current_detect_order_list));
1938        }
1939        MBSTRG(current_detect_order_list) = list;
1940        MBSTRG(current_detect_order_list_size) = size;
1941        RETURN_TRUE;
1942    }
1943}
1944/* }}} */
1945
1946/* {{{ proto mixed mb_substitute_character([mixed substchar])
1947   Sets the current substitute_character or returns the current substitute_character */
1948PHP_FUNCTION(mb_substitute_character)
1949{
1950    zval **arg1 = NULL;
1951
1952    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1953        return;
1954    }
1955
1956    if (!arg1) {
1957        if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1958            RETURN_STRING("none", 1);
1959        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1960            RETURN_STRING("long", 1);
1961        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1962            RETURN_STRING("entity", 1);
1963        } else {
1964            RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1965        }
1966    } else {
1967        RETVAL_TRUE;
1968
1969        switch (Z_TYPE_PP(arg1)) {
1970        case IS_STRING:
1971            if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1972                MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1973            } else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1974                MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1975            } else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1976                MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1977            } else {
1978                convert_to_long_ex(arg1);
1979
1980                if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1981                    MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1982                    MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1983                } else {
1984                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1985                    RETURN_FALSE;
1986                }
1987            }
1988            break;
1989        default:
1990            convert_to_long_ex(arg1);
1991            if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1992                MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1993                MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1994            } else {
1995                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1996                RETURN_FALSE;
1997            }
1998            break;
1999        }
2000    }
2001}
2002/* }}} */
2003
2004/* {{{ proto string mb_preferred_mime_name(string encoding)
2005   Return the preferred MIME name (charset) as a string */
2006PHP_FUNCTION(mb_preferred_mime_name)
2007{
2008    enum mbfl_no_encoding no_encoding;
2009    char *name = NULL;
2010    int name_len;
2011
2012    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
2013        return;
2014    } else {
2015        no_encoding = mbfl_name2no_encoding(name);
2016        if (no_encoding == mbfl_no_encoding_invalid) {
2017            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
2018            RETVAL_FALSE;
2019        } else {
2020            const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2021            if (preferred_name == NULL || *preferred_name == '\0') {
2022                php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2023                RETVAL_FALSE;
2024            } else {
2025                RETVAL_STRING((char *)preferred_name, 1);
2026            }
2027        }
2028    }
2029}
2030/* }}} */
2031
2032#define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2033#define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2034
2035/* {{{ proto bool mb_parse_str(string encoded_string [, array result])
2036   Parses GET/POST/COOKIE data and sets global variables */
2037PHP_FUNCTION(mb_parse_str)
2038{
2039    zval *track_vars_array = NULL;
2040    char *encstr = NULL;
2041    int encstr_len;
2042    php_mb_encoding_handler_info_t info;
2043    const mbfl_encoding *detected;
2044
2045    track_vars_array = NULL;
2046    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2047        return;
2048    }
2049
2050    if (track_vars_array != NULL) {
2051        /* Clear out the array */
2052        zval_dtor(track_vars_array);
2053        array_init(track_vars_array);
2054    }
2055
2056    encstr = estrndup(encstr, encstr_len);
2057
2058    info.data_type              = PARSE_STRING;
2059    info.separator              = PG(arg_separator).input;
2060    info.report_errors          = 1;
2061    info.to_encoding            = MBSTRG(current_internal_encoding);
2062    info.to_language            = MBSTRG(language);
2063    info.from_encodings         = MBSTRG(http_input_list);
2064    info.num_from_encodings     = MBSTRG(http_input_list_size);
2065    info.from_language          = MBSTRG(language);
2066
2067    if (track_vars_array != NULL) {
2068        detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
2069    } else {
2070        zval tmp;
2071        if (!EG(active_symbol_table)) {
2072            zend_rebuild_symbol_table(TSRMLS_C);
2073        }
2074        Z_ARRVAL(tmp) = EG(active_symbol_table);
2075        detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr TSRMLS_CC);
2076    }
2077
2078    MBSTRG(http_input_identify) = detected;
2079
2080    RETVAL_BOOL(detected);
2081
2082    if (encstr != NULL) efree(encstr);
2083}
2084/* }}} */
2085
2086/* {{{ proto string mb_output_handler(string contents, int status)
2087   Returns string in output buffer converted to the http_output encoding */
2088PHP_FUNCTION(mb_output_handler)
2089{
2090    char *arg_string;
2091    int arg_string_len;
2092    long arg_status;
2093    mbfl_string string, result;
2094    const char *charset;
2095    char *p;
2096    const mbfl_encoding *encoding;
2097    int last_feed, len;
2098    unsigned char send_text_mimetype = 0;
2099    char *s, *mimetype = NULL;
2100
2101    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2102        return;
2103    }
2104
2105    encoding = MBSTRG(current_http_output_encoding);
2106
2107    /* start phase only */
2108    if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2109        /* delete the converter just in case. */
2110        if (MBSTRG(outconv)) {
2111            MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2112            mbfl_buffer_converter_delete(MBSTRG(outconv));
2113            MBSTRG(outconv) = NULL;
2114        }
2115        if (encoding == &mbfl_encoding_pass) {
2116            RETURN_STRINGL(arg_string, arg_string_len, 1);
2117        }
2118
2119        /* analyze mime type */
2120        if (SG(sapi_headers).mimetype &&
2121            _php_mb_match_regex(
2122                MBSTRG(http_output_conv_mimetypes),
2123                SG(sapi_headers).mimetype,
2124                strlen(SG(sapi_headers).mimetype))) {
2125            if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2126                mimetype = estrdup(SG(sapi_headers).mimetype);
2127            } else {
2128                mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2129            }
2130            send_text_mimetype = 1;
2131        } else if (SG(sapi_headers).send_default_content_type) {
2132            mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2133        }
2134
2135        /* if content-type is not yet set, set it and activate the converter */
2136        if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2137            charset = encoding->mime_name;
2138            if (charset) {
2139                len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
2140                if (sapi_add_header(p, len, 0) != FAILURE) {
2141                    SG(sapi_headers).send_default_content_type = 0;
2142                }
2143            }
2144            /* activate the converter */
2145            MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
2146            if (send_text_mimetype){
2147                efree(mimetype);
2148            }
2149        }
2150    }
2151
2152    /* just return if the converter is not activated. */
2153    if (MBSTRG(outconv) == NULL) {
2154        RETURN_STRINGL(arg_string, arg_string_len, 1);
2155    }
2156
2157    /* flag */
2158    last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2159    /* mode */
2160    mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2161    mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2162
2163    /* feed the string */
2164    mbfl_string_init(&string);
2165    string.no_language = MBSTRG(language);
2166    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2167    string.val = (unsigned char *)arg_string;
2168    string.len = arg_string_len;
2169    mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2170    if (last_feed) {
2171        mbfl_buffer_converter_flush(MBSTRG(outconv));
2172    }
2173    /* get the converter output, and return it */
2174    mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2175    RETVAL_STRINGL((char *)result.val, result.len, 0);      /* the string is already strdup()'ed */
2176
2177    /* delete the converter if it is the last feed. */
2178    if (last_feed) {
2179        MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2180        mbfl_buffer_converter_delete(MBSTRG(outconv));
2181        MBSTRG(outconv) = NULL;
2182    }
2183}
2184/* }}} */
2185
2186/* {{{ proto int mb_strlen(string str [, string encoding])
2187   Get character numbers of a string */
2188PHP_FUNCTION(mb_strlen)
2189{
2190    int n;
2191    mbfl_string string;
2192    char *enc_name = NULL;
2193    int enc_name_len;
2194
2195    mbfl_string_init(&string);
2196
2197    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2198        RETURN_FALSE;
2199    }
2200
2201    string.no_language = MBSTRG(language);
2202    if (enc_name == NULL) {
2203        string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2204    } else {
2205        string.no_encoding = mbfl_name2no_encoding(enc_name);
2206        if (string.no_encoding == mbfl_no_encoding_invalid) {
2207            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2208            RETURN_FALSE;
2209        }
2210    }
2211
2212    n = mbfl_strlen(&string);
2213    if (n >= 0) {
2214        RETVAL_LONG(n);
2215    } else {
2216        RETVAL_FALSE;
2217    }
2218}
2219/* }}} */
2220
2221/* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2222   Find position of first occurrence of a string within another */
2223PHP_FUNCTION(mb_strpos)
2224{
2225    int n, reverse = 0;
2226    long offset;
2227    mbfl_string haystack, needle;
2228    char *enc_name = NULL;
2229    int enc_name_len;
2230
2231    mbfl_string_init(&haystack);
2232    mbfl_string_init(&needle);
2233    haystack.no_language = MBSTRG(language);
2234    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2235    needle.no_language = MBSTRG(language);
2236    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2237    offset = 0;
2238
2239    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2240        RETURN_FALSE;
2241    }
2242
2243    if (enc_name != NULL) {
2244        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2245        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2246            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2247            RETURN_FALSE;
2248        }
2249    }
2250
2251    if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2252        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
2253        RETURN_FALSE;
2254    }
2255    if (needle.len == 0) {
2256        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2257        RETURN_FALSE;
2258    }
2259
2260    n = mbfl_strpos(&haystack, &needle, offset, reverse);
2261    if (n >= 0) {
2262        RETVAL_LONG(n);
2263    } else {
2264        switch (-n) {
2265        case 1:
2266            break;
2267        case 2:
2268            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
2269            break;
2270        case 4:
2271            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
2272            break;
2273        case 8:
2274            php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
2275            break;
2276        default:
2277            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
2278            break;
2279        }
2280        RETVAL_FALSE;
2281    }
2282}
2283/* }}} */
2284
2285/* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2286   Find position of last occurrence of a string within another */
2287PHP_FUNCTION(mb_strrpos)
2288{
2289    int n;
2290    mbfl_string haystack, needle;
2291    char *enc_name = NULL;
2292    int enc_name_len;
2293    zval **zoffset = NULL;
2294    long offset = 0, str_flg;
2295    char *enc_name2 = NULL;
2296    int enc_name_len2;
2297
2298    mbfl_string_init(&haystack);
2299    mbfl_string_init(&needle);
2300    haystack.no_language = MBSTRG(language);
2301    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2302    needle.no_language = MBSTRG(language);
2303    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2304
2305    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2306        RETURN_FALSE;
2307    }
2308
2309    if (zoffset) {
2310        if (Z_TYPE_PP(zoffset) == IS_STRING) {
2311            enc_name2     = Z_STRVAL_PP(zoffset);
2312            enc_name_len2 = Z_STRLEN_PP(zoffset);
2313            str_flg       = 1;
2314
2315            if (enc_name2 != NULL) {
2316                switch (*enc_name2) {
2317                case '0':
2318                case '1':
2319                case '2':
2320                case '3':
2321                case '4':
2322                case '5':
2323                case '6':
2324                case '7':
2325                case '8':
2326                case '9':
2327                case ' ':
2328                case '-':
2329                case '.':
2330                    break;
2331                default :
2332                    str_flg = 0;
2333                    break;
2334                }
2335            }
2336
2337            if (str_flg) {
2338                convert_to_long_ex(zoffset);
2339                offset   = Z_LVAL_PP(zoffset);
2340            } else {
2341                enc_name     = enc_name2;
2342                enc_name_len = enc_name_len2;
2343            }
2344        } else {
2345            convert_to_long_ex(zoffset);
2346            offset = Z_LVAL_PP(zoffset);
2347        }
2348    }
2349
2350    if (enc_name != NULL) {
2351        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2352        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2353            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2354            RETURN_FALSE;
2355        }
2356    }
2357
2358    if (haystack.len <= 0) {
2359        RETURN_FALSE;
2360    }
2361    if (needle.len <= 0) {
2362        RETURN_FALSE;
2363    }
2364
2365    {
2366        int haystack_char_len = mbfl_strlen(&haystack);
2367        if ((offset > 0 && offset > haystack_char_len) ||
2368            (offset < 0 && -offset > haystack_char_len)) {
2369            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
2370            RETURN_FALSE;
2371        }
2372    }
2373
2374    n = mbfl_strpos(&haystack, &needle, offset, 1);
2375    if (n >= 0) {
2376        RETVAL_LONG(n);
2377    } else {
2378        RETVAL_FALSE;
2379    }
2380}
2381/* }}} */
2382
2383/* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2384   Finds position of first occurrence of a string within another, case insensitive */
2385PHP_FUNCTION(mb_stripos)
2386{
2387    int n;
2388    long offset;
2389    mbfl_string haystack, needle;
2390    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2391    int from_encoding_len;
2392    n = -1;
2393    offset = 0;
2394
2395    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2396        RETURN_FALSE;
2397    }
2398    if (needle.len == 0) {
2399        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2400        RETURN_FALSE;
2401    }
2402    n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2403
2404    if (n >= 0) {
2405        RETVAL_LONG(n);
2406    } else {
2407        RETVAL_FALSE;
2408    }
2409}
2410/* }}} */
2411
2412/* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2413   Finds position of last occurrence of a string within another, case insensitive */
2414PHP_FUNCTION(mb_strripos)
2415{
2416    int n;
2417    long offset;
2418    mbfl_string haystack, needle;
2419    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2420    int from_encoding_len;
2421    n = -1;
2422    offset = 0;
2423
2424    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2425        RETURN_FALSE;
2426    }
2427
2428    n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2429
2430    if (n >= 0) {
2431        RETVAL_LONG(n);
2432    } else {
2433        RETVAL_FALSE;
2434    }
2435}
2436/* }}} */
2437
2438/* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2439   Finds first occurrence of a string within another */
2440PHP_FUNCTION(mb_strstr)
2441{
2442    int n, len, mblen;
2443    mbfl_string haystack, needle, result, *ret = NULL;
2444    char *enc_name = NULL;
2445    int enc_name_len;
2446    zend_bool part = 0;
2447
2448    mbfl_string_init(&haystack);
2449    mbfl_string_init(&needle);
2450    haystack.no_language = MBSTRG(language);
2451    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2452    needle.no_language = MBSTRG(language);
2453    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2454
2455    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2456        RETURN_FALSE;
2457    }
2458
2459    if (enc_name != NULL) {
2460        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2461        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2462            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2463            RETURN_FALSE;
2464        }
2465    }
2466
2467    if (needle.len <= 0) {
2468        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2469        RETURN_FALSE;
2470    }
2471    n = mbfl_strpos(&haystack, &needle, 0, 0);
2472    if (n >= 0) {
2473        mblen = mbfl_strlen(&haystack);
2474        if (part) {
2475            ret = mbfl_substr(&haystack, &result, 0, n);
2476            if (ret != NULL) {
2477                RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2478            } else {
2479                RETVAL_FALSE;
2480            }
2481        } else {
2482            len = (mblen - n);
2483            ret = mbfl_substr(&haystack, &result, n, len);
2484            if (ret != NULL) {
2485                RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2486            } else {
2487                RETVAL_FALSE;
2488            }
2489        }
2490    } else {
2491        RETVAL_FALSE;
2492    }
2493}
2494/* }}} */
2495
2496/* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2497   Finds the last occurrence of a character in a string within another */
2498PHP_FUNCTION(mb_strrchr)
2499{
2500    int n, len, mblen;
2501    mbfl_string haystack, needle, result, *ret = NULL;
2502    char *enc_name = NULL;
2503    int enc_name_len;
2504    zend_bool part = 0;
2505
2506    mbfl_string_init(&haystack);
2507    mbfl_string_init(&needle);
2508    haystack.no_language = MBSTRG(language);
2509    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2510    needle.no_language = MBSTRG(language);
2511    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2512
2513    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2514        RETURN_FALSE;
2515    }
2516
2517    if (enc_name != NULL) {
2518        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2519        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2520            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2521            RETURN_FALSE;
2522        }
2523    }
2524
2525    if (haystack.len <= 0) {
2526        RETURN_FALSE;
2527    }
2528    if (needle.len <= 0) {
2529        RETURN_FALSE;
2530    }
2531    n = mbfl_strpos(&haystack, &needle, 0, 1);
2532    if (n >= 0) {
2533        mblen = mbfl_strlen(&haystack);
2534        if (part) {
2535            ret = mbfl_substr(&haystack, &result, 0, n);
2536            if (ret != NULL) {
2537                RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2538            } else {
2539                RETVAL_FALSE;
2540            }
2541        } else {
2542            len = (mblen - n);
2543            ret = mbfl_substr(&haystack, &result, n, len);
2544            if (ret != NULL) {
2545                RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2546            } else {
2547                RETVAL_FALSE;
2548            }
2549        }
2550    } else {
2551        RETVAL_FALSE;
2552    }
2553}
2554/* }}} */
2555
2556/* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2557   Finds first occurrence of a string within another, case insensitive */
2558PHP_FUNCTION(mb_stristr)
2559{
2560    zend_bool part = 0;
2561    unsigned int from_encoding_len, len, mblen;
2562    int n;
2563    mbfl_string haystack, needle, result, *ret = NULL;
2564    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2565    mbfl_string_init(&haystack);
2566    mbfl_string_init(&needle);
2567    haystack.no_language = MBSTRG(language);
2568    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2569    needle.no_language = MBSTRG(language);
2570    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2571
2572
2573    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2574        RETURN_FALSE;
2575    }
2576
2577    if (!needle.len) {
2578        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2579        RETURN_FALSE;
2580    }
2581
2582    haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2583    if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2584        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2585        RETURN_FALSE;
2586    }
2587
2588    n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2589
2590    if (n <0) {
2591        RETURN_FALSE;
2592    }
2593
2594    mblen = mbfl_strlen(&haystack);
2595
2596    if (part) {
2597        ret = mbfl_substr(&haystack, &result, 0, n);
2598        if (ret != NULL) {
2599            RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2600        } else {
2601            RETVAL_FALSE;
2602        }
2603    } else {
2604        len = (mblen - n);
2605        ret = mbfl_substr(&haystack, &result, n, len);
2606        if (ret != NULL) {
2607            RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2608        } else {
2609            RETVAL_FALSE;
2610        }
2611    }
2612}
2613/* }}} */
2614
2615/* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2616   Finds the last occurrence of a character in a string within another, case insensitive */
2617PHP_FUNCTION(mb_strrichr)
2618{
2619    zend_bool part = 0;
2620    int n, from_encoding_len, len, mblen;
2621    mbfl_string haystack, needle, result, *ret = NULL;
2622    const char *from_encoding = MBSTRG(current_internal_encoding)->name;
2623    mbfl_string_init(&haystack);
2624    mbfl_string_init(&needle);
2625    haystack.no_language = MBSTRG(language);
2626    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2627    needle.no_language = MBSTRG(language);
2628    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2629
2630
2631    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2632        RETURN_FALSE;
2633    }
2634
2635    haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2636    if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2637        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2638        RETURN_FALSE;
2639    }
2640
2641    n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2642
2643    if (n <0) {
2644        RETURN_FALSE;
2645    }
2646
2647    mblen = mbfl_strlen(&haystack);
2648
2649    if (part) {
2650        ret = mbfl_substr(&haystack, &result, 0, n);
2651        if (ret != NULL) {
2652            RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2653        } else {
2654            RETVAL_FALSE;
2655        }
2656    } else {
2657        len = (mblen - n);
2658        ret = mbfl_substr(&haystack, &result, n, len);
2659        if (ret != NULL) {
2660            RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2661        } else {
2662            RETVAL_FALSE;
2663        }
2664    }
2665}
2666/* }}} */
2667
2668/* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2669   Count the number of substring occurrences */
2670PHP_FUNCTION(mb_substr_count)
2671{
2672    int n;
2673    mbfl_string haystack, needle;
2674    char *enc_name = NULL;
2675    int enc_name_len;
2676
2677    mbfl_string_init(&haystack);
2678    mbfl_string_init(&needle);
2679    haystack.no_language = MBSTRG(language);
2680    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2681    needle.no_language = MBSTRG(language);
2682    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2683
2684    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2685        return;
2686    }
2687
2688    if (enc_name != NULL) {
2689        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2690        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2691            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2692            RETURN_FALSE;
2693        }
2694    }
2695
2696    if (needle.len <= 0) {
2697        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
2698        RETURN_FALSE;
2699    }
2700
2701    n = mbfl_substr_count(&haystack, &needle);
2702    if (n >= 0) {
2703        RETVAL_LONG(n);
2704    } else {
2705        RETVAL_FALSE;
2706    }
2707}
2708/* }}} */
2709
2710/* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2711   Returns part of a string */
2712PHP_FUNCTION(mb_substr)
2713{
2714    size_t argc = ZEND_NUM_ARGS();
2715    char *str, *encoding;
2716    long from, len;
2717    int mblen, str_len, encoding_len;
2718    zval **z_len = NULL;
2719    mbfl_string string, result, *ret;
2720
2721    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", &str, &str_len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2722        return;
2723    }
2724
2725    mbfl_string_init(&string);
2726    string.no_language = MBSTRG(language);
2727    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2728
2729    if (argc == 4) {
2730        string.no_encoding = mbfl_name2no_encoding(encoding);
2731        if (string.no_encoding == mbfl_no_encoding_invalid) {
2732            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2733            RETURN_FALSE;
2734        }
2735    }
2736
2737    string.val = (unsigned char *)str;
2738    string.len = str_len;
2739
2740    if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
2741        len = str_len;
2742    } else {
2743        convert_to_long_ex(z_len);
2744        len = Z_LVAL_PP(z_len);
2745    }
2746
2747    /* measures length */
2748    mblen = 0;
2749    if (from < 0 || len < 0) {
2750        mblen = mbfl_strlen(&string);
2751    }
2752
2753    /* if "from" position is negative, count start position from the end
2754     * of the string
2755     */
2756    if (from < 0) {
2757        from = mblen + from;
2758        if (from < 0) {
2759            from = 0;
2760        }
2761    }
2762
2763    /* if "length" position is negative, set it to the length
2764     * needed to stop that many chars from the end of the string
2765     */
2766    if (len < 0) {
2767        len = (mblen - from) + len;
2768        if (len < 0) {
2769            len = 0;
2770        }
2771    }
2772
2773    if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2774        && (from >= mbfl_strlen(&string))) {
2775        RETURN_FALSE;
2776    }
2777
2778    ret = mbfl_substr(&string, &result, from, len);
2779    if (NULL == ret) {
2780        RETURN_FALSE;
2781    }
2782
2783    RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2784}
2785/* }}} */
2786
2787/* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2788   Returns part of a string */
2789PHP_FUNCTION(mb_strcut)
2790{
2791    size_t argc = ZEND_NUM_ARGS();
2792    char *encoding;
2793    long from, len;
2794    int encoding_len;
2795    zval **z_len = NULL;
2796    mbfl_string string, result, *ret;
2797
2798    mbfl_string_init(&string);
2799    string.no_language = MBSTRG(language);
2800    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2801
2802    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", (char **)&string.val, (int **)&string.len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2803        return;
2804    }
2805
2806    if (argc == 4) {
2807        string.no_encoding = mbfl_name2no_encoding(encoding);
2808        if (string.no_encoding == mbfl_no_encoding_invalid) {
2809            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2810            RETURN_FALSE;
2811        }
2812    }
2813
2814    if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
2815        len = string.len;
2816    } else {
2817        convert_to_long_ex(z_len);
2818        len = Z_LVAL_PP(z_len);
2819    }
2820
2821    /* if "from" position is negative, count start position from the end
2822     * of the string
2823     */
2824    if (from < 0) {
2825        from = string.len + from;
2826        if (from < 0) {
2827            from = 0;
2828        }
2829    }
2830
2831    /* if "length" position is negative, set it to the length
2832     * needed to stop that many chars from the end of the string
2833     */
2834    if (len < 0) {
2835        len = (string.len - from) + len;
2836        if (len < 0) {
2837            len = 0;
2838        }
2839    }
2840
2841    if ((unsigned int)from > string.len) {
2842        RETURN_FALSE;
2843    }
2844
2845    ret = mbfl_strcut(&string, &result, from, len);
2846    if (ret == NULL) {
2847        RETURN_FALSE;
2848    }
2849
2850    RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2851}
2852/* }}} */
2853
2854/* {{{ proto int mb_strwidth(string str [, string encoding])
2855   Gets terminal width of a string */
2856PHP_FUNCTION(mb_strwidth)
2857{
2858    int n;
2859    mbfl_string string;
2860    char *enc_name = NULL;
2861    int enc_name_len;
2862
2863    mbfl_string_init(&string);
2864
2865    string.no_language = MBSTRG(language);
2866    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2867
2868    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2869        return;
2870    }
2871
2872    if (enc_name != NULL) {
2873        string.no_encoding = mbfl_name2no_encoding(enc_name);
2874        if (string.no_encoding == mbfl_no_encoding_invalid) {
2875            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2876            RETURN_FALSE;
2877        }
2878    }
2879
2880    n = mbfl_strwidth(&string);
2881    if (n >= 0) {
2882        RETVAL_LONG(n);
2883    } else {
2884        RETVAL_FALSE;
2885    }
2886}
2887/* }}} */
2888
2889/* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2890   Trim the string in terminal width */
2891PHP_FUNCTION(mb_strimwidth)
2892{
2893    char *str, *trimmarker, *encoding;
2894    long from, width;
2895    int str_len, trimmarker_len, encoding_len;
2896    mbfl_string string, result, marker, *ret;
2897
2898    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2899        return;
2900    }
2901
2902    mbfl_string_init(&string);
2903    mbfl_string_init(&marker);
2904    string.no_language = MBSTRG(language);
2905    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2906    marker.no_language = MBSTRG(language);
2907    marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2908    marker.val = NULL;
2909    marker.len = 0;
2910
2911    if (ZEND_NUM_ARGS() == 5) {
2912        string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
2913        if (string.no_encoding == mbfl_no_encoding_invalid) {
2914            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2915            RETURN_FALSE;
2916        }
2917    }
2918
2919    string.val = (unsigned char *)str;
2920    string.len = str_len;
2921
2922    if (from < 0 || from > str_len) {
2923        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
2924        RETURN_FALSE;
2925    }
2926
2927    if (width < 0) {
2928        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
2929        RETURN_FALSE;
2930    }
2931
2932    if (ZEND_NUM_ARGS() >= 4) {
2933        marker.val = (unsigned char *)trimmarker;
2934        marker.len = trimmarker_len;
2935    }
2936
2937    ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2938
2939    if (ret == NULL) {
2940        RETURN_FALSE;
2941    }
2942
2943    RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2944}
2945/* }}} */
2946
2947/* {{{ MBSTRING_API char *php_mb_convert_encoding() */
2948MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
2949{
2950    mbfl_string string, result, *ret;
2951    const mbfl_encoding *from_encoding, *to_encoding;
2952    mbfl_buffer_converter *convd;
2953    size_t size;
2954    const mbfl_encoding **list;
2955    char *output=NULL;
2956
2957    if (output_len) {
2958        *output_len = 0;
2959    }
2960    if (!input) {
2961        return NULL;
2962    }
2963    /* new encoding */
2964    if (_to_encoding && strlen(_to_encoding)) {
2965        to_encoding = mbfl_name2encoding(_to_encoding);
2966        if (!to_encoding) {
2967            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
2968            return NULL;
2969        }
2970    } else {
2971        to_encoding = MBSTRG(current_internal_encoding);
2972    }
2973
2974    /* initialize string */
2975    mbfl_string_init(&string);
2976    mbfl_string_init(&result);
2977    from_encoding = MBSTRG(current_internal_encoding);
2978    string.no_encoding = from_encoding->no_encoding;
2979    string.no_language = MBSTRG(language);
2980    string.val = (unsigned char *)input;
2981    string.len = length;
2982
2983    /* pre-conversion encoding */
2984    if (_from_encodings) {
2985        list = NULL;
2986        size = 0;
2987        php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
2988        if (size == 1) {
2989            from_encoding = *list;
2990            string.no_encoding = from_encoding->no_encoding;
2991        } else if (size > 1) {
2992            /* auto detect */
2993            from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
2994            if (from_encoding) {
2995                string.no_encoding = from_encoding->no_encoding;
2996            } else {
2997                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
2998                from_encoding = &mbfl_encoding_pass;
2999                to_encoding = from_encoding;
3000                string.no_encoding = from_encoding->no_encoding;
3001            }
3002        } else {
3003            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
3004        }
3005        if (list != NULL) {
3006            efree((void *)list);
3007        }
3008    }
3009
3010    /* initialize converter */
3011    convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
3012    if (convd == NULL) {
3013        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
3014        return NULL;
3015    }
3016    mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3017    mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3018
3019    /* do it */
3020    ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3021    if (ret) {
3022        if (output_len) {
3023            *output_len = ret->len;
3024        }
3025        output = (char *)ret->val;
3026    }
3027
3028    MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3029    mbfl_buffer_converter_delete(convd);
3030    return output;
3031}
3032/* }}} */
3033
3034/* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
3035   Returns converted string in desired encoding */
3036PHP_FUNCTION(mb_convert_encoding)
3037{
3038    char *arg_str, *arg_new;
3039    int str_len, new_len;
3040    zval *arg_old;
3041    int i;
3042    size_t size, l, n;
3043    char *_from_encodings = NULL, *ret, *s_free = NULL;
3044
3045    zval **hash_entry;
3046    HashTable *target_hash;
3047
3048    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
3049        return;
3050    }
3051
3052    if (ZEND_NUM_ARGS() == 3) {
3053        switch (Z_TYPE_P(arg_old)) {
3054        case IS_ARRAY:
3055            target_hash = Z_ARRVAL_P(arg_old);
3056            zend_hash_internal_pointer_reset(target_hash);
3057            i = zend_hash_num_elements(target_hash);
3058            _from_encodings = NULL;
3059
3060            while (i > 0) {
3061                if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3062                    break;
3063                }
3064
3065                convert_to_string_ex(hash_entry);
3066
3067                if ( _from_encodings) {
3068                    l = strlen(_from_encodings);
3069                    n = strlen(Z_STRVAL_PP(hash_entry));
3070                    _from_encodings = erealloc(_from_encodings, l+n+2);
3071                    strcpy(_from_encodings+l, ",");
3072                    strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
3073                } else {
3074                    _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
3075                }
3076
3077                zend_hash_move_forward(target_hash);
3078                i--;
3079            }
3080
3081            if (_from_encodings != NULL && !strlen(_from_encodings)) {
3082                efree(_from_encodings);
3083                _from_encodings = NULL;
3084            }
3085            s_free = _from_encodings;
3086            break;
3087        default:
3088            convert_to_string(arg_old);
3089            _from_encodings = Z_STRVAL_P(arg_old);
3090            break;
3091        }
3092    }
3093
3094    /* new encoding */
3095    ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
3096    if (ret != NULL) {
3097        RETVAL_STRINGL(ret, size, 0);       /* the string is already strdup()'ed */
3098    } else {
3099        RETVAL_FALSE;
3100    }
3101
3102    if ( s_free) {
3103        efree(s_free);
3104    }
3105}
3106/* }}} */
3107
3108/* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
3109   Returns a case-folded version of sourcestring */
3110PHP_FUNCTION(mb_convert_case)
3111{
3112    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3113    char *str;
3114    int str_len, from_encoding_len;
3115    long case_mode = 0;
3116    char *newstr;
3117    size_t ret_len;
3118
3119    RETVAL_FALSE;
3120    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
3121                &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
3122        RETURN_FALSE;
3123
3124    newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3125
3126    if (newstr) {
3127        RETVAL_STRINGL(newstr, ret_len, 0);
3128    }
3129}
3130/* }}} */
3131
3132/* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
3133 *  Returns a uppercased version of sourcestring
3134 */
3135PHP_FUNCTION(mb_strtoupper)
3136{
3137    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3138    char *str;
3139    int str_len, from_encoding_len;
3140    char *newstr;
3141    size_t ret_len;
3142
3143    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3144                &from_encoding, &from_encoding_len) == FAILURE) {
3145        return;
3146    }
3147    newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3148
3149    if (newstr) {
3150        RETURN_STRINGL(newstr, ret_len, 0);
3151    }
3152    RETURN_FALSE;
3153}
3154/* }}} */
3155
3156/* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
3157 *  Returns a lowercased version of sourcestring
3158 */
3159PHP_FUNCTION(mb_strtolower)
3160{
3161    const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3162    char *str;
3163    int str_len, from_encoding_len;
3164    char *newstr;
3165    size_t ret_len;
3166
3167    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3168                &from_encoding, &from_encoding_len) == FAILURE) {
3169        return;
3170    }
3171    newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3172
3173    if (newstr) {
3174        RETURN_STRINGL(newstr, ret_len, 0);
3175    }
3176    RETURN_FALSE;
3177}
3178/* }}} */
3179
3180/* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3181   Encodings of the given string is returned (as a string) */
3182PHP_FUNCTION(mb_detect_encoding)
3183{
3184    char *str;
3185    int str_len;
3186    zend_bool strict=0;
3187    zval *encoding_list;
3188
3189    mbfl_string string;
3190    const mbfl_encoding *ret;
3191    const mbfl_encoding **elist, **list;
3192    size_t size;
3193
3194    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3195        return;
3196    }
3197
3198    /* make encoding list */
3199    list = NULL;
3200    size = 0;
3201    if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) {
3202        switch (Z_TYPE_P(encoding_list)) {
3203        case IS_ARRAY:
3204            if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
3205                if (list) {
3206                    efree(list);
3207                    list = NULL;
3208                    size = 0;
3209                }
3210            }
3211            break;
3212        default:
3213            convert_to_string(encoding_list);
3214            if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
3215                if (list) {
3216                    efree(list);
3217                    list = NULL;
3218                    size = 0;
3219                }
3220            }
3221            break;
3222        }
3223        if (size <= 0) {
3224            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
3225        }
3226    }
3227
3228    if (ZEND_NUM_ARGS() < 3) {
3229        strict = (zend_bool)MBSTRG(strict_detection);
3230    }
3231
3232    if (size > 0 && list != NULL) {
3233        elist = list;
3234    } else {
3235        elist = MBSTRG(current_detect_order_list);
3236        size = MBSTRG(current_detect_order_list_size);
3237    }
3238
3239    mbfl_string_init(&string);
3240    string.no_language = MBSTRG(language);
3241    string.val = (unsigned char *)str;
3242    string.len = str_len;
3243    ret = mbfl_identify_encoding2(&string, elist, size, strict);
3244
3245    if (list != NULL) {
3246        efree((void *)list);
3247    }
3248
3249    if (ret == NULL) {
3250        RETURN_FALSE;
3251    }
3252
3253    RETVAL_STRING((char *)ret->name, 1);
3254}
3255/* }}} */
3256
3257/* {{{ proto mixed mb_list_encodings()
3258   Returns an array of all supported entity encodings */
3259PHP_FUNCTION(mb_list_encodings)
3260{
3261    const mbfl_encoding **encodings;
3262    const mbfl_encoding *encoding;
3263    int i;
3264
3265    array_init(return_value);
3266    i = 0;
3267    encodings = mbfl_get_supported_encodings();
3268    while ((encoding = encodings[i++]) != NULL) {
3269        add_next_index_string(return_value, (char *) encoding->name, 1);
3270    }
3271}
3272/* }}} */
3273
3274/* {{{ proto array mb_encoding_aliases(string encoding)
3275   Returns an array of the aliases of a given encoding name */
3276PHP_FUNCTION(mb_encoding_aliases)
3277{
3278    const mbfl_encoding *encoding;
3279    char *name = NULL;
3280    int name_len;
3281
3282    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
3283        RETURN_FALSE;
3284    }
3285
3286    encoding = mbfl_name2encoding(name);
3287    if (!encoding) {
3288        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
3289        RETURN_FALSE;
3290    }
3291
3292    array_init(return_value);
3293    if (encoding->aliases != NULL) {
3294        const char **alias;
3295        for (alias = *encoding->aliases; *alias; ++alias) {
3296            add_next_index_string(return_value, (char *)*alias, 1);
3297        }
3298    }
3299}
3300/* }}} */
3301
3302/* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3303   Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
3304PHP_FUNCTION(mb_encode_mimeheader)
3305{
3306    enum mbfl_no_encoding charset, transenc;
3307    mbfl_string  string, result, *ret;
3308    char *charset_name = NULL;
3309    int charset_name_len;
3310    char *trans_enc_name = NULL;
3311    int trans_enc_name_len;
3312    char *linefeed = "\r\n";
3313    int linefeed_len;
3314    long indent = 0;
3315
3316    mbfl_string_init(&string);
3317    string.no_language = MBSTRG(language);
3318    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3319
3320    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3321        return;
3322    }
3323
3324    charset = mbfl_no_encoding_pass;
3325    transenc = mbfl_no_encoding_base64;
3326
3327    if (charset_name != NULL) {
3328        charset = mbfl_name2no_encoding(charset_name);
3329        if (charset == mbfl_no_encoding_invalid) {
3330            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3331            RETURN_FALSE;
3332        }
3333    } else {
3334        const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3335        if (lang != NULL) {
3336            charset = lang->mail_charset;
3337            transenc = lang->mail_header_encoding;
3338        }
3339    }
3340
3341    if (trans_enc_name != NULL) {
3342        if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3343            transenc = mbfl_no_encoding_base64;
3344        } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3345            transenc = mbfl_no_encoding_qprint;
3346        }
3347    }
3348
3349    mbfl_string_init(&result);
3350    ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3351    if (ret != NULL) {
3352        RETVAL_STRINGL((char *)ret->val, ret->len, 0);  /* the string is already strdup()'ed */
3353    } else {
3354        RETVAL_FALSE;
3355    }
3356}
3357/* }}} */
3358
3359/* {{{ proto string mb_decode_mimeheader(string string)
3360   Decodes the MIME "encoded-word" in the string */
3361PHP_FUNCTION(mb_decode_mimeheader)
3362{
3363    mbfl_string string, result, *ret;
3364
3365    mbfl_string_init(&string);
3366    string.no_language = MBSTRG(language);
3367    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3368
3369    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
3370        return;
3371    }
3372
3373    mbfl_string_init(&result);
3374    ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
3375    if (ret != NULL) {
3376        RETVAL_STRINGL((char *)ret->val, ret->len, 0);  /* the string is already strdup()'ed */
3377    } else {
3378        RETVAL_FALSE;
3379    }
3380}
3381/* }}} */
3382
3383/* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3384   Conversion between full-width character and half-width character (Japanese) */
3385PHP_FUNCTION(mb_convert_kana)
3386{
3387    int opt, i;
3388    mbfl_string string, result, *ret;
3389    char *optstr = NULL;
3390    int optstr_len;
3391    char *encname = NULL;
3392    int encname_len;
3393
3394    mbfl_string_init(&string);
3395    string.no_language = MBSTRG(language);
3396    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3397
3398    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3399        return;
3400    }
3401
3402    /* option */
3403    if (optstr != NULL) {
3404        char *p = optstr;
3405        int n = optstr_len;
3406        i = 0;
3407        opt = 0;
3408        while (i < n) {
3409            i++;
3410            switch (*p++) {
3411            case 'A':
3412                opt |= 0x1;
3413                break;
3414            case 'a':
3415                opt |= 0x10;
3416                break;
3417            case 'R':
3418                opt |= 0x2;
3419                break;
3420            case 'r':
3421                opt |= 0x20;
3422                break;
3423            case 'N':
3424                opt |= 0x4;
3425                break;
3426            case 'n':
3427                opt |= 0x40;
3428                break;
3429            case 'S':
3430                opt |= 0x8;
3431                break;
3432            case 's':
3433                opt |= 0x80;
3434                break;
3435            case 'K':
3436                opt |= 0x100;
3437                break;
3438            case 'k':
3439                opt |= 0x1000;
3440                break;
3441            case 'H':
3442                opt |= 0x200;
3443                break;
3444            case 'h':
3445                opt |= 0x2000;
3446                break;
3447            case 'V':
3448                opt |= 0x800;
3449                break;
3450            case 'C':
3451                opt |= 0x10000;
3452                break;
3453            case 'c':
3454                opt |= 0x20000;
3455                break;
3456            case 'M':
3457                opt |= 0x100000;
3458                break;
3459            case 'm':
3460                opt |= 0x200000;
3461                break;
3462            }
3463        }
3464    } else {
3465        opt = 0x900;
3466    }
3467
3468    /* encoding */
3469    if (encname != NULL) {
3470        string.no_encoding = mbfl_name2no_encoding(encname);
3471        if (string.no_encoding == mbfl_no_encoding_invalid) {
3472            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
3473            RETURN_FALSE;
3474        }
3475    }
3476
3477    ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3478    if (ret != NULL) {
3479        RETVAL_STRINGL((char *)ret->val, ret->len, 0);      /* the string is already strdup()'ed */
3480    } else {
3481        RETVAL_FALSE;
3482    }
3483}
3484/* }}} */
3485
3486#define PHP_MBSTR_STACK_BLOCK_SIZE 32
3487
3488/* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3489   Converts the string resource in variables to desired encoding */
3490PHP_FUNCTION(mb_convert_variables)
3491{
3492    zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
3493    HashTable *target_hash;
3494    mbfl_string string, result, *ret;
3495    const mbfl_encoding *from_encoding, *to_encoding;
3496    mbfl_encoding_detector *identd;
3497    mbfl_buffer_converter *convd;
3498    int n, to_enc_len, argc, stack_level, stack_max;
3499    size_t elistsz;
3500    const mbfl_encoding **elist;
3501    char *to_enc;
3502    void *ptmp;
3503
3504    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3505        return;
3506    }
3507
3508    /* new encoding */
3509    to_encoding = mbfl_name2encoding(to_enc);
3510    if (!to_encoding) {
3511        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3512        efree(args);
3513        RETURN_FALSE;
3514    }
3515
3516    /* initialize string */
3517    mbfl_string_init(&string);
3518    mbfl_string_init(&result);
3519    from_encoding = MBSTRG(current_internal_encoding);
3520    string.no_encoding = from_encoding->no_encoding;
3521    string.no_language = MBSTRG(language);
3522
3523    /* pre-conversion encoding */
3524    elist = NULL;
3525    elistsz = 0;
3526    switch (Z_TYPE_PP(zfrom_enc)) {
3527    case IS_ARRAY:
3528        php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
3529        break;
3530    default:
3531        convert_to_string_ex(zfrom_enc);
3532        php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
3533        break;
3534    }
3535    if (elistsz <= 0) {
3536        from_encoding = &mbfl_encoding_pass;
3537    } else if (elistsz == 1) {
3538        from_encoding = *elist;
3539    } else {
3540        /* auto detect */
3541        from_encoding = NULL;
3542        stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3543        stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3544        stack_level = 0;
3545        identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
3546        if (identd != NULL) {
3547            n = 0;
3548            while (n < argc || stack_level > 0) {
3549                if (stack_level <= 0) {
3550                    var = args[n++];
3551                    if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3552                        target_hash = HASH_OF(*var);
3553                        if (target_hash != NULL) {
3554                            zend_hash_internal_pointer_reset(target_hash);
3555                        }
3556                    }
3557                } else {
3558                    stack_level--;
3559                    var = stack[stack_level];
3560                }
3561                if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3562                    target_hash = HASH_OF(*var);
3563                    if (target_hash != NULL) {
3564                        while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3565                            zend_hash_move_forward(target_hash);
3566                            if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3567                                if (stack_level >= stack_max) {
3568                                    stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3569                                    ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3570                                    stack = (zval ***)ptmp;
3571                                }
3572                                stack[stack_level] = var;
3573                                stack_level++;
3574                                var = hash_entry;
3575                                target_hash = HASH_OF(*var);
3576                                if (target_hash != NULL) {
3577                                    zend_hash_internal_pointer_reset(target_hash);
3578                                    continue;
3579                                }
3580                            } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3581                                string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3582                                string.len = Z_STRLEN_PP(hash_entry);
3583                                if (mbfl_encoding_detector_feed(identd, &string)) {
3584                                    goto detect_end;        /* complete detecting */
3585                                }
3586                            }
3587                        }
3588                    }
3589                } else if (Z_TYPE_PP(var) == IS_STRING) {
3590                    string.val = (unsigned char *)Z_STRVAL_PP(var);
3591                    string.len = Z_STRLEN_PP(var);
3592                    if (mbfl_encoding_detector_feed(identd, &string)) {
3593                        goto detect_end;        /* complete detecting */
3594                    }
3595                }
3596            }
3597detect_end:
3598            from_encoding = mbfl_encoding_detector_judge2(identd);
3599            mbfl_encoding_detector_delete(identd);
3600        }
3601        efree(stack);
3602
3603        if (!from_encoding) {
3604            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
3605            from_encoding = &mbfl_encoding_pass;
3606        }
3607    }
3608    if (elist != NULL) {
3609        efree((void *)elist);
3610    }
3611    /* create converter */
3612    convd = NULL;
3613    if (from_encoding != &mbfl_encoding_pass) {
3614        convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
3615        if (convd == NULL) {
3616            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
3617            RETURN_FALSE;
3618        }
3619        mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3620        mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3621    }
3622
3623    /* convert */
3624    if (convd != NULL) {
3625        stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3626        stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3627        stack_level = 0;
3628        n = 0;
3629        while (n < argc || stack_level > 0) {
3630            if (stack_level <= 0) {
3631                var = args[n++];
3632                if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3633                    target_hash = HASH_OF(*var);
3634                    if (target_hash != NULL) {
3635                        zend_hash_internal_pointer_reset(target_hash);
3636                    }
3637                }
3638            } else {
3639                stack_level--;
3640                var = stack[stack_level];
3641            }
3642            if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3643                target_hash = HASH_OF(*var);
3644                if (target_hash != NULL) {
3645                    while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3646                        zend_hash_move_forward(target_hash);
3647                        if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3648                            if (stack_level >= stack_max) {
3649                                stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3650                                ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3651                                stack = (zval ***)ptmp;
3652                            }
3653                            stack[stack_level] = var;
3654                            stack_level++;
3655                            var = hash_entry;
3656                            SEPARATE_ZVAL(hash_entry);
3657                            target_hash = HASH_OF(*var);
3658                            if (target_hash != NULL) {
3659                                zend_hash_internal_pointer_reset(target_hash);
3660                                continue;
3661                            }
3662                        } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3663                            string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3664                            string.len = Z_STRLEN_PP(hash_entry);
3665                            ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3666                            if (ret != NULL) {
3667                                if (Z_REFCOUNT_PP(hash_entry) > 1) {
3668                                    Z_DELREF_PP(hash_entry);
3669                                    MAKE_STD_ZVAL(*hash_entry);
3670                                } else {
3671                                    zval_dtor(*hash_entry);
3672                                }
3673                            ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
3674                        }
3675                    }
3676                }
3677            }
3678        } else if (Z_TYPE_PP(var) == IS_STRING) {
3679            string.val = (unsigned char *)Z_STRVAL_PP(var);
3680            string.len = Z_STRLEN_PP(var);
3681            ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3682            if (ret != NULL) {
3683                zval_dtor(*var);
3684                ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
3685                }
3686            }
3687        }
3688        efree(stack);
3689
3690        MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3691        mbfl_buffer_converter_delete(convd);
3692    }
3693
3694    efree(args);
3695
3696    if (from_encoding) {
3697        RETURN_STRING(from_encoding->name, 1);
3698    } else {
3699        RETURN_FALSE;
3700    }
3701}
3702/* }}} */
3703
3704/* {{{ HTML numeric entity */
3705/* {{{ static void php_mb_numericentity_exec() */
3706static void
3707php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3708{
3709    char *str, *encoding;
3710    int str_len, encoding_len;
3711    zval *zconvmap, **hash_entry;
3712    HashTable *target_hash;
3713    size_t argc = ZEND_NUM_ARGS();
3714    int i, *convmap, *mapelm, mapsize=0;
3715    zend_bool is_hex = 0;
3716    mbfl_string string, result, *ret;
3717    enum mbfl_no_encoding no_encoding;
3718
3719    if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
3720        return;
3721    }
3722
3723    mbfl_string_init(&string);
3724    string.no_language = MBSTRG(language);
3725    string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3726    string.val = (unsigned char *)str;
3727    string.len = str_len;
3728
3729    /* encoding */
3730    if ((argc == 3 || argc == 4) && encoding_len > 0) {
3731        no_encoding = mbfl_name2no_encoding(encoding);
3732        if (no_encoding == mbfl_no_encoding_invalid) {
3733            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
3734            RETURN_FALSE;
3735        } else {
3736            string.no_encoding = no_encoding;
3737        }
3738    }
3739
3740    if (argc == 4) {
3741        if (type == 0 && is_hex) {
3742            type = 2; /* output in hex format */
3743        }
3744    }
3745
3746    /* conversion map */
3747    convmap = NULL;
3748    if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3749        target_hash = Z_ARRVAL_P(zconvmap);
3750        zend_hash_internal_pointer_reset(target_hash);
3751        i = zend_hash_num_elements(target_hash);
3752        if (i > 0) {
3753            convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3754            mapelm = convmap;
3755            mapsize = 0;
3756            while (i > 0) {
3757                if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3758                    break;
3759                }
3760                convert_to_long_ex(hash_entry);
3761                *mapelm++ = Z_LVAL_PP(hash_entry);
3762                mapsize++;
3763                i--;
3764                zend_hash_move_forward(target_hash);
3765            }
3766        }
3767    }
3768    if (convmap == NULL) {
3769        RETURN_FALSE;
3770    }
3771    mapsize /= 4;
3772
3773    ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3774    if (ret != NULL) {
3775        RETVAL_STRINGL((char *)ret->val, ret->len, 0);
3776    } else {
3777        RETVAL_FALSE;
3778    }
3779    efree((void *)convmap);
3780}
3781/* }}} */
3782
3783/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]])
3784   Converts specified characters to HTML numeric entities */
3785PHP_FUNCTION(mb_encode_numericentity)
3786{
3787    php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3788}
3789/* }}} */
3790
3791/* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
3792   Converts HTML numeric entities to character code */
3793PHP_FUNCTION(mb_decode_numericentity)
3794{
3795    php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3796}
3797/* }}} */
3798/* }}} */
3799
3800/* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
3801 *  Sends an email message with MIME scheme
3802 */
3803
3804#define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)                                     \
3805    if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) {    \
3806        pos += 2;                                           \
3807        while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {                           \
3808            pos++;                                          \
3809        }                                               \
3810        continue;                                           \
3811    }
3812
3813#define MAIL_ASCIIZ_CHECK_MBSTRING(str, len)            \
3814    pp = str;                   \
3815    ee = pp + len;                  \
3816    while ((pp = memchr(pp, '\0', (ee - pp)))) {    \
3817        *pp = ' ';              \
3818    }                       \
3819
3820#define APPEND_ONE_CHAR(ch) do { \
3821    if (token.a > 0) { \
3822        smart_str_appendc(&token, ch); \
3823    } else {\
3824        token.len++; \
3825    } \
3826} while (0)
3827
3828#define SEPARATE_SMART_STR(str) do {\
3829    if ((str)->a == 0) { \
3830        char *tmp_ptr; \
3831        (str)->a = 1; \
3832        while ((str)->a < (str)->len) { \
3833            (str)->a <<= 1; \
3834        } \
3835        tmp_ptr = emalloc((str)->a + 1); \
3836        memcpy(tmp_ptr, (str)->c, (str)->len); \
3837        (str)->c = tmp_ptr; \
3838    } \
3839} while (0)
3840
3841static void my_smart_str_dtor(smart_str *s)
3842{
3843    if (s->a > 0) {
3844        smart_str_free(s);
3845    }
3846}
3847
3848static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3849{
3850    const char *ps;
3851    size_t icnt;
3852    int state = 0;
3853    int crlf_state = -1;
3854
3855    smart_str token = { 0, 0, 0 };
3856    smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
3857
3858    ps = str;
3859    icnt = str_len;
3860
3861    /*
3862     *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3863     *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3864     *      state  0            1           2          3
3865     *
3866     *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3867     *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3868     * crlf_state -1                       0                     1 -1
3869     *
3870     */
3871
3872    while (icnt > 0) {
3873        switch (*ps) {
3874            case ':':
3875                if (crlf_state == 1) {
3876                    APPEND_ONE_CHAR('\r');
3877                }
3878
3879                if (state == 0 || state == 1) {
3880                    fld_name = token;
3881
3882                    state = 2;
3883                } else {
3884                    APPEND_ONE_CHAR(*ps);
3885                }
3886
3887                crlf_state = 0;
3888                break;
3889
3890            case '\n':
3891                if (crlf_state == -1) {
3892                    goto out;
3893                }
3894                crlf_state = -1;
3895                break;
3896
3897            case '\r':
3898                if (crlf_state == 1) {
3899                    APPEND_ONE_CHAR('\r');
3900                } else {
3901                    crlf_state = 1;
3902                }
3903                break;
3904
3905            case ' ': case '\t':
3906                if (crlf_state == -1) {
3907                    if (state == 3) {
3908                        /* continuing from the previous line */
3909                        SEPARATE_SMART_STR(&token);
3910                        state = 4;
3911                    } else {
3912                        /* simply skipping this new line */
3913                        state = 5;
3914                    }
3915                } else {
3916                    if (crlf_state == 1) {
3917                        APPEND_ONE_CHAR('\r');
3918                    }
3919                    if (state == 1 || state == 3) {
3920                        APPEND_ONE_CHAR(*ps);
3921                    }
3922                }
3923                crlf_state = 0;
3924                break;
3925
3926            default:
3927                switch (state) {
3928                    case 0:
3929                        token.c = (char *)ps;
3930                        token.len = 0;
3931                        token.a = 0;
3932                        state = 1;
3933                        break;
3934
3935                    case 2:
3936                        if (crlf_state != -1) {
3937                            token.c = (char *)ps;
3938                            token.len = 0;
3939                            token.a = 0;
3940
3941                            state = 3;
3942                            break;
3943                        }
3944                        /* break is missing intentionally */
3945
3946                    case 3:
3947                        if (crlf_state == -1) {
3948                            fld_val = token;
3949
3950                            if (fld_name.c != NULL && fld_val.c != NULL) {
3951                                char *dummy;
3952
3953                                /* FIXME: some locale free implementation is
3954                                 * really required here,,, */
3955                                SEPARATE_SMART_STR(&fld_name);
3956                                php_strtoupper(fld_name.c, fld_name.len);
3957
3958                                zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
3959
3960                                my_smart_str_dtor(&fld_name);
3961                            }
3962
3963                            memset(&fld_name, 0, sizeof(smart_str));
3964                            memset(&fld_val, 0, sizeof(smart_str));
3965
3966                            token.c = (char *)ps;
3967                            token.len = 0;
3968                            token.a = 0;
3969
3970                            state = 1;
3971                        }
3972                        break;
3973
3974                    case 4:
3975                        APPEND_ONE_CHAR(' ');
3976                        state = 3;
3977                        break;
3978                }
3979
3980                if (crlf_state == 1) {
3981                    APPEND_ONE_CHAR('\r');
3982                }
3983
3984                APPEND_ONE_CHAR(*ps);
3985
3986                crlf_state = 0;
3987                break;
3988        }
3989        ps++, icnt--;
3990    }
3991out:
3992    if (state == 2) {
3993        token.c = "";
3994        token.len = 0;
3995        token.a = 0;
3996
3997        state = 3;
3998    }
3999    if (state == 3) {
4000        fld_val = token;
4001
4002        if (fld_name.c != NULL && fld_val.c != NULL) {
4003            void *dummy;
4004
4005            /* FIXME: some locale free implementation is
4006             * really required here,,, */
4007            SEPARATE_SMART_STR(&fld_name);
4008            php_strtoupper(fld_name.c, fld_name.len);
4009
4010            zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
4011
4012            my_smart_str_dtor(&fld_name);
4013        }
4014    }
4015    return state;
4016}
4017
4018PHP_FUNCTION(mb_send_mail)
4019{
4020    int n;
4021    char *to = NULL;
4022    int to_len;
4023    char *message = NULL;
4024    int message_len;
4025    char *headers = NULL;
4026    int headers_len;
4027    char *subject = NULL;
4028    int subject_len;
4029    char *extra_cmd = NULL;
4030    int extra_cmd_len;
4031    int i;
4032    char *to_r = NULL;
4033    char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4034    struct {
4035        int cnt_type:1;
4036        int cnt_trans_enc:1;
4037    } suppressed_hdrs = { 0, 0 };
4038
4039    char *message_buf = NULL, *subject_buf = NULL, *p;
4040    mbfl_string orig_str, conv_str;
4041    mbfl_string *pstr;  /* pointer to mbfl string for return value */
4042    enum mbfl_no_encoding
4043        tran_cs,    /* transfar text charset */
4044        head_enc,   /* header transfar encoding */
4045        body_enc;   /* body transfar encoding */
4046    mbfl_memory_device device;  /* automatic allocateable buffer for additional header */
4047    const mbfl_language *lang;
4048    int err = 0;
4049    HashTable ht_headers;
4050    smart_str *s;
4051    extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4052    char *pp, *ee;
4053
4054    /* initialize */
4055    mbfl_memory_device_init(&device, 0, 0);
4056    mbfl_string_init(&orig_str);
4057    mbfl_string_init(&conv_str);
4058
4059    /* character-set, transfer-encoding */
4060    tran_cs = mbfl_no_encoding_utf8;
4061    head_enc = mbfl_no_encoding_base64;
4062    body_enc = mbfl_no_encoding_base64;
4063    lang = mbfl_no2language(MBSTRG(language));
4064    if (lang != NULL) {
4065        tran_cs = lang->mail_charset;
4066        head_enc = lang->mail_header_encoding;
4067        body_enc = lang->mail_body_encoding;
4068    }
4069
4070    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
4071        return;
4072    }
4073
4074    /* ASCIIZ check */
4075    MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4076    MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4077    MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4078    if (headers) {
4079        MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
4080    }
4081    if (extra_cmd) {
4082        MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
4083    }
4084
4085    zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
4086
4087    if (headers != NULL) {
4088        _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
4089    }
4090
4091    if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
4092        char *tmp;
4093        char *param_name;
4094        char *charset = NULL;
4095
4096        SEPARATE_SMART_STR(s);
4097        smart_str_0(s);
4098
4099        p = strchr(s->c, ';');
4100
4101        if (p != NULL) {
4102            /* skipping the padded spaces */
4103            do {
4104                ++p;
4105            } while (*p == ' ' || *p == '\t');
4106
4107            if (*p != '\0') {
4108                if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4109                    if (strcasecmp(param_name, "charset") == 0) {
4110                        enum mbfl_no_encoding _tran_cs = tran_cs;
4111
4112                        charset = php_strtok_r(NULL, "= \"", &tmp);
4113                        if (charset != NULL) {
4114                            _tran_cs = mbfl_name2no_encoding(charset);
4115                        }
4116
4117                        if (_tran_cs == mbfl_no_encoding_invalid) {
4118                            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4119                            _tran_cs = mbfl_no_encoding_ascii;
4120                        }
4121                        tran_cs = _tran_cs;
4122                    }
4123                }
4124            }
4125        }
4126        suppressed_hdrs.cnt_type = 1;
4127    }
4128
4129    if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
4130        enum mbfl_no_encoding _body_enc;
4131        SEPARATE_SMART_STR(s);
4132        smart_str_0(s);
4133
4134        _body_enc = mbfl_name2no_encoding(s->c);
4135        switch (_body_enc) {
4136            case mbfl_no_encoding_base64:
4137            case mbfl_no_encoding_7bit:
4138            case mbfl_no_encoding_8bit:
4139                body_enc = _body_enc;
4140                break;
4141
4142            default:
4143                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
4144                body_enc =  mbfl_no_encoding_8bit;
4145                break;
4146        }
4147        suppressed_hdrs.cnt_trans_enc = 1;
4148    }
4149
4150    /* To: */
4151    if (to != NULL) {
4152        if (to_len > 0) {
4153            to_r = estrndup(to, to_len);
4154            for (; to_len; to_len--) {
4155                if (!isspace((unsigned char) to_r[to_len - 1])) {
4156                    break;
4157                }
4158                to_r[to_len - 1] = '\0';
4159            }
4160            for (i = 0; to_r[i]; i++) {
4161            if (iscntrl((unsigned char) to_r[i])) {
4162                /* According to RFC 822, section 3.1.1 long headers may be separated into
4163                 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
4164                 * To prevent these separators from being replaced with a space, we use the
4165                 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
4166                 */
4167                SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4168                to_r[i] = ' ';
4169            }
4170            }
4171        } else {
4172            to_r = to;
4173        }
4174    } else {
4175        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
4176        err = 1;
4177    }
4178
4179    /* Subject: */
4180    if (subject != NULL && subject_len >= 0) {
4181        orig_str.no_language = MBSTRG(language);
4182        orig_str.val = (unsigned char *)subject;
4183        orig_str.len = subject_len;
4184        orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4185        if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4186            const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4187            orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4188        }
4189        pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4190        if (pstr != NULL) {
4191            subject_buf = subject = (char *)pstr->val;
4192        }
4193    } else {
4194        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
4195        err = 1;
4196    }
4197
4198    /* message body */
4199    if (message != NULL) {
4200        orig_str.no_language = MBSTRG(language);
4201        orig_str.val = (unsigned char *)message;
4202        orig_str.len = (unsigned int)message_len;
4203        orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4204
4205        if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4206            const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4207            orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4208        }
4209
4210        pstr = NULL;
4211        {
4212            mbfl_string tmpstr;
4213
4214            if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4215                tmpstr.no_encoding=mbfl_no_encoding_8bit;
4216                pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4217                efree(tmpstr.val);
4218            }
4219        }
4220        if (pstr != NULL) {
4221            message_buf = message = (char *)pstr->val;
4222        }
4223    } else {
4224        /* this is not really an error, so it is allowed. */
4225        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
4226        message = NULL;
4227    }
4228
4229    /* other headers */
4230#define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4231#define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4232#define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4233#define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4234    if (headers != NULL) {
4235        p = headers;
4236        n = headers_len;
4237        mbfl_memory_device_strncat(&device, p, n);
4238        if (n > 0 && p[n - 1] != '\n') {
4239            mbfl_memory_device_strncat(&device, "\n", 1);
4240        }
4241    }
4242
4243    if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4244        mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4245        mbfl_memory_device_strncat(&device, "\n", 1);
4246    }
4247
4248    if (!suppressed_hdrs.cnt_type) {
4249        mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4250
4251        p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4252        if (p != NULL) {
4253            mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4254            mbfl_memory_device_strcat(&device, p);
4255        }
4256        mbfl_memory_device_strncat(&device, "\n", 1);
4257    }
4258    if (!suppressed_hdrs.cnt_trans_enc) {
4259        mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4260        p = (char *)mbfl_no2preferred_mime_name(body_enc);
4261        if (p == NULL) {
4262            p = "7bit";
4263        }
4264        mbfl_memory_device_strcat(&device, p);
4265        mbfl_memory_device_strncat(&device, "\n", 1);
4266    }
4267
4268    mbfl_memory_device_unput(&device);
4269    mbfl_memory_device_output('\0', &device);
4270    headers = (char *)device.buffer;
4271
4272    if (force_extra_parameters) {
4273        extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4274    } else if (extra_cmd) {
4275        extra_cmd = php_escape_shell_cmd(extra_cmd);
4276    }
4277
4278    if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
4279        RETVAL_TRUE;
4280    } else {
4281        RETVAL_FALSE;
4282    }
4283
4284    if (extra_cmd) {
4285        efree(extra_cmd);
4286    }
4287    if (to_r != to) {
4288        efree(to_r);
4289    }
4290    if (subject_buf) {
4291        efree((void *)subject_buf);
4292    }
4293    if (message_buf) {
4294        efree((void *)message_buf);
4295    }
4296    mbfl_memory_device_clear(&device);
4297    zend_hash_destroy(&ht_headers);
4298}
4299
4300#undef SKIP_LONG_HEADER_SEP_MBSTRING
4301#undef MAIL_ASCIIZ_CHECK_MBSTRING
4302#undef APPEND_ONE_CHAR
4303#undef SEPARATE_SMART_STR
4304#undef PHP_MBSTR_MAIL_MIME_HEADER1
4305#undef PHP_MBSTR_MAIL_MIME_HEADER2
4306#undef PHP_MBSTR_MAIL_MIME_HEADER3
4307#undef PHP_MBSTR_MAIL_MIME_HEADER4
4308/* }}} */
4309
4310/* {{{ proto mixed mb_get_info([string type])
4311   Returns the current settings of mbstring */
4312PHP_FUNCTION(mb_get_info)
4313{
4314    char *typ = NULL;
4315    int typ_len;
4316    size_t n;
4317    char *name;
4318    const struct mb_overload_def *over_func;
4319    zval *row1, *row2;
4320    const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4321    const mbfl_encoding **entry;
4322
4323    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
4324        RETURN_FALSE;
4325    }
4326
4327    if (!typ || !strcasecmp("all", typ)) {
4328        array_init(return_value);
4329        if (MBSTRG(current_internal_encoding)) {
4330            add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name, 1);
4331        }
4332        if (MBSTRG(http_input_identify)) {
4333            add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name, 1);
4334        }
4335        if (MBSTRG(current_http_output_encoding)) {
4336            add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name, 1);
4337        }
4338        if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4339            add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
4340        }
4341        add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4342        if (MBSTRG(func_overload)){
4343            over_func = &(mb_ovld[0]);
4344            MAKE_STD_ZVAL(row1);
4345            array_init(row1);
4346            while (over_func->type > 0) {
4347                if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4348                    add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
4349                }
4350                over_func++;
4351            }
4352            add_assoc_zval(return_value, "func_overload_list", row1);
4353        } else {
4354            add_assoc_string(return_value, "func_overload_list", "no overload", 1);
4355        }
4356        if (lang != NULL) {
4357            if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4358                add_assoc_string(return_value, "mail_charset", name, 1);
4359            }
4360            if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4361                add_assoc_string(return_value, "mail_header_encoding", name, 1);
4362            }
4363            if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4364                add_assoc_string(return_value, "mail_body_encoding", name, 1);
4365            }
4366        }
4367        add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4368        if (MBSTRG(encoding_translation)) {
4369            add_assoc_string(return_value, "encoding_translation", "On", 1);
4370        } else {
4371            add_assoc_string(return_value, "encoding_translation", "Off", 1);
4372        }
4373        if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4374            add_assoc_string(return_value, "language", name, 1);
4375        }
4376        n = MBSTRG(current_detect_order_list_size);
4377        entry = MBSTRG(current_detect_order_list);
4378        if (n > 0) {
4379            size_t i;
4380            MAKE_STD_ZVAL(row2);
4381            array_init(row2);
4382            for (i = 0; i < n; i++) {
4383                add_next_index_string(row2, (*entry)->name, 1);
4384                entry++;
4385            }
4386            add_assoc_zval(return_value, "detect_order", row2);
4387        }
4388        if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4389            add_assoc_string(return_value, "substitute_character", "none", 1);
4390        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4391            add_assoc_string(return_value, "substitute_character", "long", 1);
4392        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4393            add_assoc_string(return_value, "substitute_character", "entity", 1);
4394        } else {
4395            add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4396        }
4397        if (MBSTRG(strict_detection)) {
4398            add_assoc_string(return_value, "strict_detection", "On", 1);
4399        } else {
4400            add_assoc_string(return_value, "strict_detection", "Off", 1);
4401        }
4402    } else if (!strcasecmp("internal_encoding", typ)) {
4403        if (MBSTRG(current_internal_encoding)) {
4404            RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name, 1);
4405        }
4406    } else if (!strcasecmp("http_input", typ)) {
4407        if (MBSTRG(http_input_identify)) {
4408            RETVAL_STRING((char *)MBSTRG(http_input_identify)->name, 1);
4409        }
4410    } else if (!strcasecmp("http_output", typ)) {
4411        if (MBSTRG(current_http_output_encoding)) {
4412            RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name, 1);
4413        }
4414    } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4415        if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4416            RETVAL_STRING(name, 1);
4417        }
4418    } else if (!strcasecmp("func_overload", typ)) {
4419        RETVAL_LONG(MBSTRG(func_overload));
4420    } else if (!strcasecmp("func_overload_list", typ)) {
4421        if (MBSTRG(func_overload)){
4422                over_func = &(mb_ovld[0]);
4423                array_init(return_value);
4424                while (over_func->type > 0) {
4425                    if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4426                        add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
4427                    }
4428                    over_func++;
4429                }
4430        } else {
4431            RETVAL_STRING("no overload", 1);
4432        }
4433    } else if (!strcasecmp("mail_charset", typ)) {
4434        if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4435            RETVAL_STRING(name, 1);
4436        }
4437    } else if (!strcasecmp("mail_header_encoding", typ)) {
4438        if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4439            RETVAL_STRING(name, 1);
4440        }
4441    } else if (!strcasecmp("mail_body_encoding", typ)) {
4442        if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4443            RETVAL_STRING(name, 1);
4444        }
4445    } else if (!strcasecmp("illegal_chars", typ)) {
4446        RETVAL_LONG(MBSTRG(illegalchars));
4447    } else if (!strcasecmp("encoding_translation", typ)) {
4448        if (MBSTRG(encoding_translation)) {
4449            RETVAL_STRING("On", 1);
4450        } else {
4451            RETVAL_STRING("Off", 1);
4452        }
4453    } else if (!strcasecmp("language", typ)) {
4454        if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4455            RETVAL_STRING(name, 1);
4456        }
4457    } else if (!strcasecmp("detect_order", typ)) {
4458        n = MBSTRG(current_detect_order_list_size);
4459        entry = MBSTRG(current_detect_order_list);
4460        if (n > 0) {
4461            size_t i;
4462            array_init(return_value);
4463            for (i = 0; i < n; i++) {
4464                add_next_index_string(return_value, (*entry)->name, 1);
4465                entry++;
4466            }
4467        }
4468    } else if (!strcasecmp("substitute_character", typ)) {
4469        if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4470            RETVAL_STRING("none", 1);
4471        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4472            RETVAL_STRING("long", 1);
4473        } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4474            RETVAL_STRING("entity", 1);
4475        } else {
4476            RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4477        }
4478    } else if (!strcasecmp("strict_detection", typ)) {
4479        if (MBSTRG(strict_detection)) {
4480            RETVAL_STRING("On", 1);
4481        } else {
4482            RETVAL_STRING("Off", 1);
4483        }
4484    } else {
4485        RETURN_FALSE;
4486    }
4487}
4488/* }}} */
4489
4490/* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4491   Check if the string is valid for the specified encoding */
4492PHP_FUNCTION(mb_check_encoding)
4493{
4494    char *var = NULL;
4495    int var_len;
4496    char *enc = NULL;
4497    int enc_len;
4498    mbfl_buffer_converter *convd;
4499    const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4500    mbfl_string string, result, *ret = NULL;
4501    long illegalchars = 0;
4502
4503    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4504        RETURN_FALSE;
4505    }
4506
4507    if (var == NULL) {
4508        RETURN_BOOL(MBSTRG(illegalchars) == 0);
4509    }
4510
4511    if (enc != NULL) {
4512        encoding = mbfl_name2encoding(enc);
4513        if (!encoding || encoding == &mbfl_encoding_pass) {
4514            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
4515            RETURN_FALSE;
4516        }
4517    }
4518
4519    convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
4520    if (convd == NULL) {
4521        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
4522        RETURN_FALSE;
4523    }
4524    mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4525    mbfl_buffer_converter_illegal_substchar(convd, 0);
4526
4527    /* initialize string */
4528    mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
4529    mbfl_string_init(&result);
4530
4531    string.val = (unsigned char *)var;
4532    string.len = var_len;
4533    ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4534    illegalchars = mbfl_buffer_illegalchars(convd);
4535    mbfl_buffer_converter_delete(convd);
4536
4537    RETVAL_FALSE;
4538    if (ret != NULL) {
4539        if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4540            RETVAL_TRUE;
4541        }
4542        mbfl_string_clear(&result);
4543    }
4544}
4545/* }}} */
4546
4547
4548/* {{{ php_mb_populate_current_detect_order_list */
4549static void php_mb_populate_current_detect_order_list(TSRMLS_D)
4550{
4551    const mbfl_encoding **entry = 0;
4552    size_t nentries;
4553
4554    if (MBSTRG(current_detect_order_list)) {
4555        return;
4556    }
4557
4558    if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4559        nentries = MBSTRG(detect_order_list_size);
4560        entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4561        memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4562    } else {
4563        const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4564        size_t i;
4565        nentries = MBSTRG(default_detect_order_list_size);
4566        entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4567        for (i = 0; i < nentries; i++) {
4568            entry[i] = mbfl_no2encoding(src[i]);
4569        }
4570    }
4571    MBSTRG(current_detect_order_list) = entry;
4572    MBSTRG(current_detect_order_list_size) = nentries;
4573}
4574
4575/* {{{ static int php_mb_encoding_translation() */
4576static int php_mb_encoding_translation(TSRMLS_D)
4577{
4578    return MBSTRG(encoding_translation);
4579}
4580/* }}} */
4581
4582/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
4583MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4584{
4585    if (enc != NULL) {
4586        if (enc->flag & MBFL_ENCTYPE_MBCS) {
4587            if (enc->mblen_table != NULL) {
4588                if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4589            }
4590        } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4591            return 2;
4592        } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4593            return 4;
4594        }
4595    }
4596    return 1;
4597}
4598/* }}} */
4599
4600/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
4601MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
4602{
4603    return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4604}
4605/* }}} */
4606
4607/* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
4608MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4609{
4610    register const char *p = s;
4611    char *last=NULL;
4612
4613    if (nbytes == (size_t)-1) {
4614        size_t nb = 0;
4615
4616        while (*p != '\0') {
4617            if (nb == 0) {
4618                if ((unsigned char)*p == (unsigned char)c) {
4619                    last = (char *)p;
4620                }
4621                nb = php_mb_mbchar_bytes_ex(p, enc);
4622                if (nb == 0) {
4623                    return NULL; /* something is going wrong! */
4624                }
4625            }
4626            --nb;
4627            ++p;
4628        }
4629    } else {
4630        register size_t bcnt = nbytes;
4631        register size_t nbytes_char;
4632        while (bcnt > 0) {
4633            if ((unsigned char)*p == (unsigned char)c) {
4634                last = (char *)p;
4635            }
4636            nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4637            if (bcnt < nbytes_char) {
4638                return NULL;
4639            }
4640            p += nbytes_char;
4641            bcnt -= nbytes_char;
4642        }
4643    }
4644    return last;
4645}
4646/* }}} */
4647
4648/* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
4649MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
4650{
4651    return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4652}
4653/* }}} */
4654
4655/* {{{ MBSTRING_API int php_mb_stripos()
4656 */
4657MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
4658{
4659    int n;
4660    mbfl_string haystack, needle;
4661    n = -1;
4662
4663    mbfl_string_init(&haystack);
4664    mbfl_string_init(&needle);
4665    haystack.no_language = MBSTRG(language);
4666    haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4667    needle.no_language = MBSTRG(language);
4668    needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4669
4670    do {
4671        size_t len = 0;
4672        haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
4673        haystack.len = len;
4674
4675        if (!haystack.val) {
4676            break;
4677        }
4678
4679        if (haystack.len <= 0) {
4680            break;
4681        }
4682
4683        needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
4684        needle.len = len;
4685
4686        if (!needle.val) {
4687            break;
4688        }
4689
4690        if (needle.len <= 0) {
4691            break;
4692        }
4693
4694        haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4695        if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4696            php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4697            break;
4698        }
4699
4700        {
4701            int haystack_char_len = mbfl_strlen(&haystack);
4702
4703            if (mode) {
4704                if ((offset > 0 && offset > haystack_char_len) ||
4705                    (offset < 0 && -offset > haystack_char_len)) {
4706                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
4707                    break;
4708                }
4709            } else {
4710                if (offset < 0 || offset > haystack_char_len) {
4711                    php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
4712                    break;
4713                }
4714            }
4715        }
4716
4717        n = mbfl_strpos(&haystack, &needle, offset, mode);
4718    } while(0);
4719
4720    if (haystack.val) {
4721        efree(haystack.val);
4722    }
4723
4724    if (needle.val) {
4725        efree(needle.val);
4726    }
4727
4728    return n;
4729}
4730/* }}} */
4731
4732static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC) /* {{{ */
4733{
4734    *list = (const zend_encoding **)MBSTRG(http_input_list);
4735    *list_size = MBSTRG(http_input_list_size);
4736}
4737/* }}} */
4738
4739static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC) /* {{{ */
4740{
4741    MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4742}
4743/* }}} */
4744
4745#endif  /* HAVE_MBSTRING */
4746
4747/*
4748 * Local variables:
4749 * tab-width: 4
4750 * c-basic-offset: 4
4751 * End:
4752 * vim600: fdm=marker
4753 * vim: noet sw=4 ts=4
4754 */
4755