1/* 2 +----------------------------------------------------------------------+ 3 | PHP Version 5 | 4 +----------------------------------------------------------------------+ 5 | Copyright (c) 1997-2013 The PHP Group | 6 +----------------------------------------------------------------------+ 7 | This source file is subject to version 3.01 of the PHP license, | 8 | that is bundled with this package in the file LICENSE, and is | 9 | available through the world-wide-web at the following url: | 10 | http://www.php.net/license/3_01.txt | 11 | If you did not receive a copy of the PHP license and are unable to | 12 | obtain it through the world-wide-web, please send a note to | 13 | license@php.net so we can mail you a copy immediately. | 14 +----------------------------------------------------------------------+ 15 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> | 16 | Rui Hirokawa <hirokawa@php.net> | 17 +----------------------------------------------------------------------+ 18 */ 19 20/* $Id$ */ 21 22/* 23 * PHP 4 Multibyte String module "mbstring" 24 * 25 * History: 26 * 2000.5.19 Release php-4.0RC2_jstring-1.0 27 * 2001.4.1 Release php4_jstring-1.0.91 28 * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group) 29 * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net) 30 */ 31 32/* 33 * PHP3 Internationalization support program. 34 * 35 * Copyright (c) 1999,2000 by the PHP3 internationalization team. 36 * All rights reserved. 37 * 38 * See README_PHP3-i18n-ja for more detail. 39 * 40 * Authors: 41 * Hironori Sato <satoh@jpnnet.com> 42 * Shigeru Kanemoto <sgk@happysize.co.jp> 43 * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> 44 * Rui Hirokawa <rui_hirokawa@ybb.ne.jp> 45 */ 46 47/* {{{ includes */ 48#ifdef HAVE_CONFIG_H 49#include "config.h" 50#endif 51 52#include "php.h" 53#include "php_ini.h" 54#include "php_variables.h" 55#include "mbstring.h" 56#include "ext/standard/php_string.h" 57#include "ext/standard/php_mail.h" 58#include "ext/standard/exec.h" 59#include "ext/standard/php_smart_str.h" 60#include "ext/standard/url.h" 61#include "main/php_output.h" 62#include "ext/standard/info.h" 63 64#include "libmbfl/mbfl/mbfl_allocators.h" 65 66#include "php_variables.h" 67#include "php_globals.h" 68#include "rfc1867.h" 69#include "php_content_types.h" 70#include "SAPI.h" 71#include "php_unicode.h" 72#include "TSRM.h" 73 74#include "mb_gpc.h" 75 76#if HAVE_MBREGEX 77#include "php_mbregex.h" 78#endif 79 80#ifdef ZEND_MULTIBYTE 81#include "zend_multibyte.h" 82#endif /* ZEND_MULTIBYTE */ 83 84#if HAVE_ONIG 85#include "php_onig_compat.h" 86#include <oniguruma.h> 87#undef UChar 88#elif HAVE_PCRE || HAVE_BUNDLED_PCRE 89#include "ext/pcre/php_pcre.h" 90#endif 91/* }}} */ 92 93#if HAVE_MBSTRING 94 95/* {{{ prototypes */ 96ZEND_DECLARE_MODULE_GLOBALS(mbstring) 97 98static PHP_GINIT_FUNCTION(mbstring); 99static PHP_GSHUTDOWN_FUNCTION(mbstring); 100 101#ifdef ZEND_MULTIBYTE 102static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC); 103static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC); 104static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC); 105static int php_mb_set_zend_encoding(TSRMLS_D); 106#endif 107/* }}} */ 108 109/* {{{ php_mb_default_identify_list */ 110typedef struct _php_mb_nls_ident_list { 111 enum mbfl_no_language lang; 112 const enum mbfl_no_encoding* list; 113 int list_size; 114} php_mb_nls_ident_list; 115 116static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = { 117 mbfl_no_encoding_ascii, 118 mbfl_no_encoding_jis, 119 mbfl_no_encoding_utf8, 120 mbfl_no_encoding_euc_jp, 121 mbfl_no_encoding_sjis 122}; 123 124static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = { 125 mbfl_no_encoding_ascii, 126 mbfl_no_encoding_utf8, 127 mbfl_no_encoding_euc_cn, 128 mbfl_no_encoding_cp936 129}; 130 131static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = { 132 mbfl_no_encoding_ascii, 133 mbfl_no_encoding_utf8, 134 mbfl_no_encoding_euc_tw, 135 mbfl_no_encoding_big5 136}; 137 138static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = { 139 mbfl_no_encoding_ascii, 140 mbfl_no_encoding_utf8, 141 mbfl_no_encoding_euc_kr, 142 mbfl_no_encoding_uhc 143}; 144 145static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = { 146 mbfl_no_encoding_ascii, 147 mbfl_no_encoding_utf8, 148 mbfl_no_encoding_koi8r, 149 mbfl_no_encoding_cp1251, 150 mbfl_no_encoding_cp866 151}; 152 153static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = { 154 mbfl_no_encoding_ascii, 155 mbfl_no_encoding_utf8, 156 mbfl_no_encoding_armscii8 157}; 158 159static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = { 160 mbfl_no_encoding_ascii, 161 mbfl_no_encoding_utf8, 162 mbfl_no_encoding_cp1254, 163 mbfl_no_encoding_8859_9 164}; 165 166static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = { 167 mbfl_no_encoding_ascii, 168 mbfl_no_encoding_utf8, 169 mbfl_no_encoding_koi8u 170}; 171 172static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = { 173 mbfl_no_encoding_ascii, 174 mbfl_no_encoding_utf8 175}; 176 177 178static const php_mb_nls_ident_list php_mb_default_identify_list[] = { 179 { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) }, 180 { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) }, 181 { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) }, 182 { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) }, 183 { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) }, 184 { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) }, 185 { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) }, 186 { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) }, 187 { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) } 188}; 189 190/* }}} */ 191 192/* {{{ mb_overload_def mb_ovld[] */ 193static const struct mb_overload_def mb_ovld[] = { 194 {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"}, 195 {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"}, 196 {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"}, 197 {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"}, 198 {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"}, 199 {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"}, 200 {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"}, 201 {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"}, 202 {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"}, 203 {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"}, 204 {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"}, 205 {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"}, 206 {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"}, 207#if HAVE_MBREGEX 208 {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"}, 209 {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"}, 210 {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"}, 211 {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"}, 212 {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"}, 213#endif 214 {0, NULL, NULL, NULL} 215}; 216/* }}} */ 217 218/* {{{ arginfo */ 219ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0) 220 ZEND_ARG_INFO(0, language) 221ZEND_END_ARG_INFO() 222 223ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0) 224 ZEND_ARG_INFO(0, encoding) 225ZEND_END_ARG_INFO() 226 227ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0) 228 ZEND_ARG_INFO(0, type) 229ZEND_END_ARG_INFO() 230 231ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0) 232 ZEND_ARG_INFO(0, encoding) 233ZEND_END_ARG_INFO() 234 235ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0) 236 ZEND_ARG_INFO(0, encoding) 237ZEND_END_ARG_INFO() 238 239ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0) 240 ZEND_ARG_INFO(0, substchar) 241ZEND_END_ARG_INFO() 242 243ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1) 244 ZEND_ARG_INFO(0, encoding) 245ZEND_END_ARG_INFO() 246 247ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1) 248 ZEND_ARG_INFO(0, encoded_string) 249 ZEND_ARG_INFO(1, result) 250ZEND_END_ARG_INFO() 251 252ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2) 253 ZEND_ARG_INFO(0, contents) 254 ZEND_ARG_INFO(0, status) 255ZEND_END_ARG_INFO() 256 257ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1) 258 ZEND_ARG_INFO(0, str) 259 ZEND_ARG_INFO(0, encoding) 260ZEND_END_ARG_INFO() 261 262ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2) 263 ZEND_ARG_INFO(0, haystack) 264 ZEND_ARG_INFO(0, needle) 265 ZEND_ARG_INFO(0, offset) 266 ZEND_ARG_INFO(0, encoding) 267ZEND_END_ARG_INFO() 268 269ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2) 270 ZEND_ARG_INFO(0, haystack) 271 ZEND_ARG_INFO(0, needle) 272 ZEND_ARG_INFO(0, offset) 273 ZEND_ARG_INFO(0, encoding) 274ZEND_END_ARG_INFO() 275 276ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2) 277 ZEND_ARG_INFO(0, haystack) 278 ZEND_ARG_INFO(0, needle) 279 ZEND_ARG_INFO(0, offset) 280 ZEND_ARG_INFO(0, encoding) 281ZEND_END_ARG_INFO() 282 283ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2) 284 ZEND_ARG_INFO(0, haystack) 285 ZEND_ARG_INFO(0, needle) 286 ZEND_ARG_INFO(0, offset) 287 ZEND_ARG_INFO(0, encoding) 288ZEND_END_ARG_INFO() 289 290ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2) 291 ZEND_ARG_INFO(0, haystack) 292 ZEND_ARG_INFO(0, needle) 293 ZEND_ARG_INFO(0, part) 294 ZEND_ARG_INFO(0, encoding) 295ZEND_END_ARG_INFO() 296 297ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2) 298 ZEND_ARG_INFO(0, haystack) 299 ZEND_ARG_INFO(0, needle) 300 ZEND_ARG_INFO(0, part) 301 ZEND_ARG_INFO(0, encoding) 302ZEND_END_ARG_INFO() 303 304ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2) 305 ZEND_ARG_INFO(0, haystack) 306 ZEND_ARG_INFO(0, needle) 307 ZEND_ARG_INFO(0, part) 308 ZEND_ARG_INFO(0, encoding) 309ZEND_END_ARG_INFO() 310 311ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2) 312 ZEND_ARG_INFO(0, haystack) 313 ZEND_ARG_INFO(0, needle) 314 ZEND_ARG_INFO(0, part) 315 ZEND_ARG_INFO(0, encoding) 316ZEND_END_ARG_INFO() 317 318ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2) 319 ZEND_ARG_INFO(0, haystack) 320 ZEND_ARG_INFO(0, needle) 321 ZEND_ARG_INFO(0, encoding) 322ZEND_END_ARG_INFO() 323 324ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2) 325 ZEND_ARG_INFO(0, str) 326 ZEND_ARG_INFO(0, start) 327 ZEND_ARG_INFO(0, length) 328 ZEND_ARG_INFO(0, encoding) 329ZEND_END_ARG_INFO() 330 331ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2) 332 ZEND_ARG_INFO(0, str) 333 ZEND_ARG_INFO(0, start) 334 ZEND_ARG_INFO(0, length) 335 ZEND_ARG_INFO(0, encoding) 336ZEND_END_ARG_INFO() 337 338ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1) 339 ZEND_ARG_INFO(0, str) 340 ZEND_ARG_INFO(0, encoding) 341ZEND_END_ARG_INFO() 342 343ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3) 344 ZEND_ARG_INFO(0, str) 345 ZEND_ARG_INFO(0, start) 346 ZEND_ARG_INFO(0, width) 347 ZEND_ARG_INFO(0, trimmarker) 348 ZEND_ARG_INFO(0, encoding) 349ZEND_END_ARG_INFO() 350 351ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2) 352 ZEND_ARG_INFO(0, str) 353 ZEND_ARG_INFO(0, to) 354 ZEND_ARG_INFO(0, from) 355ZEND_END_ARG_INFO() 356 357ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2) 358 ZEND_ARG_INFO(0, sourcestring) 359 ZEND_ARG_INFO(0, mode) 360 ZEND_ARG_INFO(0, encoding) 361ZEND_END_ARG_INFO() 362 363ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1) 364 ZEND_ARG_INFO(0, sourcestring) 365 ZEND_ARG_INFO(0, encoding) 366ZEND_END_ARG_INFO() 367 368ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1) 369 ZEND_ARG_INFO(0, sourcestring) 370 ZEND_ARG_INFO(0, encoding) 371ZEND_END_ARG_INFO() 372 373ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1) 374 ZEND_ARG_INFO(0, str) 375 ZEND_ARG_INFO(0, encoding_list) 376 ZEND_ARG_INFO(0, strict) 377ZEND_END_ARG_INFO() 378 379ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0) 380ZEND_END_ARG_INFO() 381 382ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1) 383 ZEND_ARG_INFO(0, encoding) 384ZEND_END_ARG_INFO() 385 386ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1) 387 ZEND_ARG_INFO(0, str) 388 ZEND_ARG_INFO(0, charset) 389 ZEND_ARG_INFO(0, transfer) 390 ZEND_ARG_INFO(0, linefeed) 391 ZEND_ARG_INFO(0, indent) 392ZEND_END_ARG_INFO() 393 394ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1) 395 ZEND_ARG_INFO(0, string) 396ZEND_END_ARG_INFO() 397 398ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1) 399 ZEND_ARG_INFO(0, str) 400 ZEND_ARG_INFO(0, option) 401 ZEND_ARG_INFO(0, encoding) 402ZEND_END_ARG_INFO() 403 404ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 1, 0, 3) 405 ZEND_ARG_INFO(0, to) 406 ZEND_ARG_INFO(0, from) 407 ZEND_ARG_INFO(1, ...) 408ZEND_END_ARG_INFO() 409 410ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2) 411 ZEND_ARG_INFO(0, string) 412 ZEND_ARG_INFO(0, convmap) 413 ZEND_ARG_INFO(0, encoding) 414ZEND_END_ARG_INFO() 415 416ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2) 417 ZEND_ARG_INFO(0, string) 418 ZEND_ARG_INFO(0, convmap) 419 ZEND_ARG_INFO(0, encoding) 420ZEND_END_ARG_INFO() 421 422ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3) 423 ZEND_ARG_INFO(0, to) 424 ZEND_ARG_INFO(0, subject) 425 ZEND_ARG_INFO(0, message) 426 ZEND_ARG_INFO(0, additional_headers) 427 ZEND_ARG_INFO(0, additional_parameters) 428ZEND_END_ARG_INFO() 429 430ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0) 431 ZEND_ARG_INFO(0, type) 432ZEND_END_ARG_INFO() 433 434ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0) 435 ZEND_ARG_INFO(0, var) 436 ZEND_ARG_INFO(0, encoding) 437ZEND_END_ARG_INFO() 438 439ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0) 440 ZEND_ARG_INFO(0, encoding) 441ZEND_END_ARG_INFO() 442 443ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2) 444 ZEND_ARG_INFO(0, pattern) 445 ZEND_ARG_INFO(0, string) 446 ZEND_ARG_INFO(1, registers) 447ZEND_END_ARG_INFO() 448 449ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2) 450 ZEND_ARG_INFO(0, pattern) 451 ZEND_ARG_INFO(0, string) 452 ZEND_ARG_INFO(1, registers) 453ZEND_END_ARG_INFO() 454 455ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3) 456 ZEND_ARG_INFO(0, pattern) 457 ZEND_ARG_INFO(0, replacement) 458 ZEND_ARG_INFO(0, string) 459 ZEND_ARG_INFO(0, option) 460ZEND_END_ARG_INFO() 461 462ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3) 463 ZEND_ARG_INFO(0, pattern) 464 ZEND_ARG_INFO(0, replacement) 465 ZEND_ARG_INFO(0, string) 466ZEND_END_ARG_INFO() 467 468ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2) 469 ZEND_ARG_INFO(0, pattern) 470 ZEND_ARG_INFO(0, string) 471 ZEND_ARG_INFO(0, limit) 472ZEND_END_ARG_INFO() 473 474ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2) 475 ZEND_ARG_INFO(0, pattern) 476 ZEND_ARG_INFO(0, string) 477 ZEND_ARG_INFO(0, option) 478ZEND_END_ARG_INFO() 479 480ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0) 481 ZEND_ARG_INFO(0, pattern) 482 ZEND_ARG_INFO(0, option) 483ZEND_END_ARG_INFO() 484 485ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0) 486 ZEND_ARG_INFO(0, pattern) 487 ZEND_ARG_INFO(0, option) 488ZEND_END_ARG_INFO() 489 490ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0) 491 ZEND_ARG_INFO(0, pattern) 492 ZEND_ARG_INFO(0, option) 493ZEND_END_ARG_INFO() 494 495ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1) 496 ZEND_ARG_INFO(0, string) 497 ZEND_ARG_INFO(0, pattern) 498 ZEND_ARG_INFO(0, option) 499ZEND_END_ARG_INFO() 500 501ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0) 502ZEND_END_ARG_INFO() 503 504ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0) 505ZEND_END_ARG_INFO() 506 507ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1) 508 ZEND_ARG_INFO(0, position) 509ZEND_END_ARG_INFO() 510 511ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0) 512 ZEND_ARG_INFO(0, options) 513ZEND_END_ARG_INFO() 514/* }}} */ 515 516/* {{{ zend_function_entry mbstring_functions[] */ 517const zend_function_entry mbstring_functions[] = { 518 PHP_FE(mb_convert_case, arginfo_mb_convert_case) 519 PHP_FE(mb_strtoupper, arginfo_mb_strtoupper) 520 PHP_FE(mb_strtolower, arginfo_mb_strtolower) 521 PHP_FE(mb_language, arginfo_mb_language) 522 PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding) 523 PHP_FE(mb_http_input, arginfo_mb_http_input) 524 PHP_FE(mb_http_output, arginfo_mb_http_output) 525 PHP_FE(mb_detect_order, arginfo_mb_detect_order) 526 PHP_FE(mb_substitute_character, arginfo_mb_substitute_character) 527 PHP_FE(mb_parse_str, arginfo_mb_parse_str) 528 PHP_FE(mb_output_handler, arginfo_mb_output_handler) 529 PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name) 530 PHP_FE(mb_strlen, arginfo_mb_strlen) 531 PHP_FE(mb_strpos, arginfo_mb_strpos) 532 PHP_FE(mb_strrpos, arginfo_mb_strrpos) 533 PHP_FE(mb_stripos, arginfo_mb_stripos) 534 PHP_FE(mb_strripos, arginfo_mb_strripos) 535 PHP_FE(mb_strstr, arginfo_mb_strstr) 536 PHP_FE(mb_strrchr, arginfo_mb_strrchr) 537 PHP_FE(mb_stristr, arginfo_mb_stristr) 538 PHP_FE(mb_strrichr, arginfo_mb_strrichr) 539 PHP_FE(mb_substr_count, arginfo_mb_substr_count) 540 PHP_FE(mb_substr, arginfo_mb_substr) 541 PHP_FE(mb_strcut, arginfo_mb_strcut) 542 PHP_FE(mb_strwidth, arginfo_mb_strwidth) 543 PHP_FE(mb_strimwidth, arginfo_mb_strimwidth) 544 PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding) 545 PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding) 546 PHP_FE(mb_list_encodings, arginfo_mb_list_encodings) 547 PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases) 548 PHP_FE(mb_convert_kana, arginfo_mb_convert_kana) 549 PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader) 550 PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader) 551 PHP_FE(mb_convert_variables, arginfo_mb_convert_variables) 552 PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity) 553 PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity) 554 PHP_FE(mb_send_mail, arginfo_mb_send_mail) 555 PHP_FE(mb_get_info, arginfo_mb_get_info) 556 PHP_FE(mb_check_encoding, arginfo_mb_check_encoding) 557#if HAVE_MBREGEX 558 PHP_MBREGEX_FUNCTION_ENTRIES 559#endif 560 PHP_FE_END 561}; 562/* }}} */ 563 564/* {{{ zend_module_entry mbstring_module_entry */ 565zend_module_entry mbstring_module_entry = { 566 STANDARD_MODULE_HEADER, 567 "mbstring", 568 mbstring_functions, 569 PHP_MINIT(mbstring), 570 PHP_MSHUTDOWN(mbstring), 571 PHP_RINIT(mbstring), 572 PHP_RSHUTDOWN(mbstring), 573 PHP_MINFO(mbstring), 574 NO_VERSION_YET, 575 PHP_MODULE_GLOBALS(mbstring), 576 PHP_GINIT(mbstring), 577 PHP_GSHUTDOWN(mbstring), 578 NULL, 579 STANDARD_MODULE_PROPERTIES_EX 580}; 581/* }}} */ 582 583/* {{{ static sapi_post_entry php_post_entries[] */ 584static sapi_post_entry php_post_entries[] = { 585 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler }, 586 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler }, 587 { NULL, 0, NULL, NULL } 588}; 589/* }}} */ 590 591#ifdef COMPILE_DL_MBSTRING 592ZEND_GET_MODULE(mbstring) 593#endif 594 595/* {{{ allocators */ 596static void *_php_mb_allocators_malloc(unsigned int sz) 597{ 598 return emalloc(sz); 599} 600 601static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz) 602{ 603 return erealloc(ptr, sz); 604} 605 606static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem) 607{ 608 return ecalloc(nelems, szelem); 609} 610 611static void _php_mb_allocators_free(void *ptr) 612{ 613 efree(ptr); 614} 615 616static void *_php_mb_allocators_pmalloc(unsigned int sz) 617{ 618 return pemalloc(sz, 1); 619} 620 621static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz) 622{ 623 return perealloc(ptr, sz, 1); 624} 625 626static void _php_mb_allocators_pfree(void *ptr) 627{ 628 pefree(ptr, 1); 629} 630 631static mbfl_allocators _php_mb_allocators = { 632 _php_mb_allocators_malloc, 633 _php_mb_allocators_realloc, 634 _php_mb_allocators_calloc, 635 _php_mb_allocators_free, 636 _php_mb_allocators_pmalloc, 637 _php_mb_allocators_prealloc, 638 _php_mb_allocators_pfree 639}; 640/* }}} */ 641 642/* {{{ static sapi_post_entry mbstr_post_entries[] */ 643static sapi_post_entry mbstr_post_entries[] = { 644 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler }, 645 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler }, 646 { NULL, 0, NULL, NULL } 647}; 648/* }}} */ 649 650/* {{{ static int php_mb_parse_encoding_list() 651 * Return 0 if input contains any illegal encoding, otherwise 1. 652 * Even if any illegal encoding is detected the result may contain a list 653 * of parsed encodings. 654 */ 655static int 656php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC) 657{ 658 int n, l, size, bauto, ret = 1; 659 char *p, *p1, *p2, *endp, *tmpstr; 660 enum mbfl_no_encoding no_encoding; 661 enum mbfl_no_encoding *src, *entry, *list; 662 663 list = NULL; 664 if (value == NULL || value_length <= 0) { 665 if (return_list) { 666 *return_list = NULL; 667 } 668 if (return_size) { 669 *return_size = 0; 670 } 671 return 0; 672 } else { 673 enum mbfl_no_encoding *identify_list; 674 int identify_list_size; 675 676 identify_list = MBSTRG(default_detect_order_list); 677 identify_list_size = MBSTRG(default_detect_order_list_size); 678 679 /* copy the value string for work */ 680 if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) { 681 tmpstr = (char *)estrndup(value+1, value_length-2); 682 value_length -= 2; 683 } 684 else 685 tmpstr = (char *)estrndup(value, value_length); 686 if (tmpstr == NULL) { 687 return 0; 688 } 689 /* count the number of listed encoding names */ 690 endp = tmpstr + value_length; 691 n = 1; 692 p1 = tmpstr; 693 while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) { 694 p1 = p2 + 1; 695 n++; 696 } 697 size = n + identify_list_size; 698 /* make list */ 699 list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent); 700 if (list != NULL) { 701 entry = list; 702 n = 0; 703 bauto = 0; 704 p1 = tmpstr; 705 do { 706 p2 = p = php_memnstr(p1, ",", 1, endp); 707 if (p == NULL) { 708 p = endp; 709 } 710 *p = '\0'; 711 /* trim spaces */ 712 while (p1 < p && (*p1 == ' ' || *p1 == '\t')) { 713 p1++; 714 } 715 p--; 716 while (p > p1 && (*p == ' ' || *p == '\t')) { 717 *p = '\0'; 718 p--; 719 } 720 /* convert to the encoding number and check encoding */ 721 if (strcasecmp(p1, "auto") == 0) { 722 if (!bauto) { 723 bauto = 1; 724 l = identify_list_size; 725 src = identify_list; 726 while (l > 0) { 727 *entry++ = *src++; 728 l--; 729 n++; 730 } 731 } 732 } else { 733 no_encoding = mbfl_name2no_encoding(p1); 734 if (no_encoding != mbfl_no_encoding_invalid) { 735 *entry++ = no_encoding; 736 n++; 737 } else { 738 ret = 0; 739 } 740 } 741 p1 = p2 + 1; 742 } while (n < size && p2 != NULL); 743 if (n > 0) { 744 if (return_list) { 745 *return_list = list; 746 } else { 747 pefree(list, persistent); 748 } 749 } else { 750 pefree(list, persistent); 751 if (return_list) { 752 *return_list = NULL; 753 } 754 ret = 0; 755 } 756 if (return_size) { 757 *return_size = n; 758 } 759 } else { 760 if (return_list) { 761 *return_list = NULL; 762 } 763 if (return_size) { 764 *return_size = 0; 765 } 766 ret = 0; 767 } 768 efree(tmpstr); 769 } 770 771 return ret; 772} 773/* }}} */ 774 775/* {{{ MBSTRING_API php_mb_check_encoding_list */ 776MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) { 777 return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC); 778} 779/* }}} */ 780 781/* {{{ static int php_mb_parse_encoding_array() 782 * Return 0 if input contains any illegal encoding, otherwise 1. 783 * Even if any illegal encoding is detected the result may contain a list 784 * of parsed encodings. 785 */ 786static int 787php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC) 788{ 789 zval **hash_entry; 790 HashTable *target_hash; 791 int i, n, l, size, bauto,ret = 1; 792 enum mbfl_no_encoding no_encoding; 793 enum mbfl_no_encoding *src, *list, *entry; 794 795 list = NULL; 796 if (Z_TYPE_P(array) == IS_ARRAY) { 797 enum mbfl_no_encoding *identify_list; 798 int identify_list_size; 799 800 identify_list = MBSTRG(default_detect_order_list); 801 identify_list_size = MBSTRG(default_detect_order_list_size); 802 803 target_hash = Z_ARRVAL_P(array); 804 zend_hash_internal_pointer_reset(target_hash); 805 i = zend_hash_num_elements(target_hash); 806 size = i + identify_list_size; 807 list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent); 808 if (list != NULL) { 809 entry = list; 810 bauto = 0; 811 n = 0; 812 while (i > 0) { 813 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) { 814 break; 815 } 816 convert_to_string_ex(hash_entry); 817 if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) { 818 if (!bauto) { 819 bauto = 1; 820 l = identify_list_size; 821 src = identify_list; 822 while (l > 0) { 823 *entry++ = *src++; 824 l--; 825 n++; 826 } 827 } 828 } else { 829 no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry)); 830 if (no_encoding != mbfl_no_encoding_invalid) { 831 *entry++ = no_encoding; 832 n++; 833 } else { 834 ret = 0; 835 } 836 } 837 zend_hash_move_forward(target_hash); 838 i--; 839 } 840 if (n > 0) { 841 if (return_list) { 842 *return_list = list; 843 } else { 844 pefree(list, persistent); 845 } 846 } else { 847 pefree(list, persistent); 848 if (return_list) { 849 *return_list = NULL; 850 } 851 ret = 0; 852 } 853 if (return_size) { 854 *return_size = n; 855 } 856 } else { 857 if (return_list) { 858 *return_list = NULL; 859 } 860 if (return_size) { 861 *return_size = 0; 862 } 863 ret = 0; 864 } 865 } 866 867 return ret; 868} 869/* }}} */ 870 871static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC); 872static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len); 873static void _php_mb_free_regex(void *opaque); 874 875#if HAVE_ONIG 876/* {{{ _php_mb_compile_regex */ 877static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC) 878{ 879 php_mb_regex_t *retval; 880 OnigErrorInfo err_info; 881 int err_code; 882 883 if ((err_code = onig_new(&retval, 884 (const OnigUChar *)pattern, 885 (const OnigUChar *)pattern + strlen(pattern), 886 ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP, 887 ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) { 888 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; 889 onig_error_code_to_str(err_str, err_code, err_info); 890 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str); 891 retval = NULL; 892 } 893 return retval; 894} 895/* }}} */ 896 897/* {{{ _php_mb_match_regex */ 898static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len) 899{ 900 return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str, 901 (const OnigUChar*)str + str_len, (const OnigUChar *)str, 902 (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0; 903} 904/* }}} */ 905 906/* {{{ _php_mb_free_regex */ 907static void _php_mb_free_regex(void *opaque) 908{ 909 onig_free((php_mb_regex_t *)opaque); 910} 911/* }}} */ 912#elif HAVE_PCRE || HAVE_BUNDLED_PCRE 913/* {{{ _php_mb_compile_regex */ 914static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC) 915{ 916 pcre *retval; 917 const char *err_str; 918 int err_offset; 919 920 if (!(retval = pcre_compile(pattern, 921 PCRE_CASELESS, &err_str, &err_offset, NULL))) { 922 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str); 923 } 924 return retval; 925} 926/* }}} */ 927 928/* {{{ _php_mb_match_regex */ 929static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len) 930{ 931 return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0, 932 0, NULL, 0) >= 0; 933} 934/* }}} */ 935 936/* {{{ _php_mb_free_regex */ 937static void _php_mb_free_regex(void *opaque) 938{ 939 pcre_free(opaque); 940} 941/* }}} */ 942#endif 943 944/* {{{ php_mb_nls_get_default_detect_order_list */ 945static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size) 946{ 947 size_t i; 948 949 *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut; 950 *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]); 951 952 for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) { 953 if (php_mb_default_identify_list[i].lang == lang) { 954 *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list; 955 *plist_size = php_mb_default_identify_list[i].list_size; 956 return 1; 957 } 958 } 959 return 0; 960} 961/* }}} */ 962 963/* {{{ php.ini directive handler */ 964/* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */ 965static PHP_INI_MH(OnUpdate_mbstring_language) 966{ 967 enum mbfl_no_language no_language; 968 969 no_language = mbfl_name2no_language(new_value); 970 if (no_language == mbfl_no_language_invalid) { 971 MBSTRG(language) = mbfl_no_language_neutral; 972 return FAILURE; 973 } 974 MBSTRG(language) = no_language; 975 php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size)); 976 return SUCCESS; 977} 978/* }}} */ 979 980/* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */ 981static PHP_INI_MH(OnUpdate_mbstring_detect_order) 982{ 983 enum mbfl_no_encoding *list; 984 int size; 985 986 if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { 987 if (MBSTRG(detect_order_list)) { 988 free(MBSTRG(detect_order_list)); 989 } 990 MBSTRG(detect_order_list) = list; 991 MBSTRG(detect_order_list_size) = size; 992 } else { 993 if (MBSTRG(detect_order_list)) { 994 free(MBSTRG(detect_order_list)); 995 MBSTRG(detect_order_list) = NULL; 996 } 997 return FAILURE; 998 } 999 1000 return SUCCESS; 1001} 1002/* }}} */ 1003 1004/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */ 1005static PHP_INI_MH(OnUpdate_mbstring_http_input) 1006{ 1007 enum mbfl_no_encoding *list; 1008 int size; 1009 1010 if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { 1011 if (MBSTRG(http_input_list)) { 1012 free(MBSTRG(http_input_list)); 1013 } 1014 MBSTRG(http_input_list) = list; 1015 MBSTRG(http_input_list_size) = size; 1016 } else { 1017 if (MBSTRG(http_input_list)) { 1018 free(MBSTRG(http_input_list)); 1019 MBSTRG(http_input_list) = NULL; 1020 } 1021 MBSTRG(http_input_list_size) = 0; 1022 return FAILURE; 1023 } 1024 1025 return SUCCESS; 1026} 1027/* }}} */ 1028 1029/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */ 1030static PHP_INI_MH(OnUpdate_mbstring_http_output) 1031{ 1032 enum mbfl_no_encoding no_encoding; 1033 1034 no_encoding = mbfl_name2no_encoding(new_value); 1035 if (no_encoding != mbfl_no_encoding_invalid) { 1036 MBSTRG(http_output_encoding) = no_encoding; 1037 MBSTRG(current_http_output_encoding) = no_encoding; 1038 } else { 1039 MBSTRG(http_output_encoding) = mbfl_no_encoding_pass; 1040 MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass; 1041 if (new_value != NULL && new_value_length > 0) { 1042 return FAILURE; 1043 } 1044 } 1045 1046 return SUCCESS; 1047} 1048/* }}} */ 1049 1050/* {{{ static _php_mb_ini_mbstring_internal_encoding_set */ 1051int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC) 1052{ 1053 enum mbfl_no_encoding no_encoding; 1054 const char *enc_name = NULL; 1055 uint enc_name_len = 0; 1056 1057 no_encoding = new_value ? mbfl_name2no_encoding(new_value): 1058 mbfl_no_encoding_invalid; 1059 if (no_encoding != mbfl_no_encoding_invalid) { 1060 enc_name = new_value; 1061 enc_name_len = new_value_length; 1062 } else { 1063 switch (MBSTRG(language)) { 1064 case mbfl_no_language_uni: 1065 enc_name = "UTF-8"; 1066 enc_name_len = sizeof("UTF-8") - 1; 1067 break; 1068 case mbfl_no_language_japanese: 1069 enc_name = "EUC-JP"; 1070 enc_name_len = sizeof("EUC-JP") - 1; 1071 break; 1072 case mbfl_no_language_korean: 1073 enc_name = "EUC-KR"; 1074 enc_name_len = sizeof("EUC-KR") - 1; 1075 break; 1076 case mbfl_no_language_simplified_chinese: 1077 enc_name = "EUC-CN"; 1078 enc_name_len = sizeof("EUC-CN") - 1; 1079 break; 1080 case mbfl_no_language_traditional_chinese: 1081 enc_name = "EUC-TW"; 1082 enc_name_len = sizeof("EUC-TW") - 1; 1083 break; 1084 case mbfl_no_language_russian: 1085 enc_name = "KOI8-R"; 1086 enc_name_len = sizeof("KOI8-R") - 1; 1087 break; 1088 case mbfl_no_language_german: 1089 enc_name = "ISO-8859-15"; 1090 enc_name_len = sizeof("ISO-8859-15") - 1; 1091 break; 1092 case mbfl_no_language_armenian: 1093 enc_name = "ArmSCII-8"; 1094 enc_name_len = sizeof("ArmSCII-8") - 1; 1095 break; 1096 case mbfl_no_language_turkish: 1097 enc_name = "ISO-8859-9"; 1098 enc_name_len = sizeof("ISO-8859-9") - 1; 1099 break; 1100 default: 1101 enc_name = "ISO-8859-1"; 1102 enc_name_len = sizeof("ISO-8859-1") - 1; 1103 break; 1104 } 1105 no_encoding = mbfl_name2no_encoding(enc_name); 1106 } 1107 MBSTRG(internal_encoding) = no_encoding; 1108 MBSTRG(current_internal_encoding) = no_encoding; 1109#if HAVE_MBREGEX 1110 { 1111 const char *enc_name = new_value; 1112 if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) { 1113 /* falls back to EUC-JP if an unknown encoding name is given */ 1114 enc_name = "EUC-JP"; 1115 php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC); 1116 } 1117 php_mb_regex_set_mbctype(new_value TSRMLS_CC); 1118 } 1119#endif 1120 return SUCCESS; 1121} 1122/* }}} */ 1123 1124/* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */ 1125static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) 1126{ 1127 if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN 1128 || stage == PHP_INI_STAGE_RUNTIME) { 1129 return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC); 1130 } else { 1131 /* the corresponding mbstring globals needs to be set according to the 1132 * ini value in the later stage because it never falls back to the 1133 * default value if 1. no value for mbstring.internal_encoding is given, 1134 * 2. mbstring.language directive is processed in per-dir or runtime 1135 * context and 3. call to the handler for mbstring.language is done 1136 * after mbstring.internal_encoding is handled. */ 1137 return SUCCESS; 1138 } 1139} 1140/* }}} */ 1141 1142#ifdef ZEND_MULTIBYTE 1143/* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */ 1144static PHP_INI_MH(OnUpdate_mbstring_script_encoding) 1145{ 1146 int *list, size; 1147 1148 if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { 1149 if (MBSTRG(script_encoding_list) != NULL) { 1150 free(MBSTRG(script_encoding_list)); 1151 } 1152 MBSTRG(script_encoding_list) = list; 1153 MBSTRG(script_encoding_list_size) = size; 1154 } else { 1155 if (MBSTRG(script_encoding_list) != NULL) { 1156 free(MBSTRG(script_encoding_list)); 1157 } 1158 MBSTRG(script_encoding_list) = NULL; 1159 MBSTRG(script_encoding_list_size) = 0; 1160 return FAILURE; 1161 } 1162 1163 return SUCCESS; 1164} 1165/* }}} */ 1166#endif /* ZEND_MULTIBYTE */ 1167 1168/* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */ 1169static PHP_INI_MH(OnUpdate_mbstring_substitute_character) 1170{ 1171 int c; 1172 char *endptr = NULL; 1173 1174 if (new_value != NULL) { 1175 if (strcasecmp("none", new_value) == 0) { 1176 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; 1177 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; 1178 } else if (strcasecmp("long", new_value) == 0) { 1179 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG; 1180 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG; 1181 } else if (strcasecmp("entity", new_value) == 0) { 1182 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY; 1183 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY; 1184 } else { 1185 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1186 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1187 if (new_value_length >0) { 1188 c = strtol(new_value, &endptr, 0); 1189 if (*endptr == '\0') { 1190 MBSTRG(filter_illegal_substchar) = c; 1191 MBSTRG(current_filter_illegal_substchar) = c; 1192 } 1193 } 1194 } 1195 } else { 1196 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1197 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1198 MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */ 1199 MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */ 1200 } 1201 1202 return SUCCESS; 1203} 1204/* }}} */ 1205 1206/* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */ 1207static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) 1208{ 1209 if (new_value == NULL) { 1210 return FAILURE; 1211 } 1212 1213 OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC); 1214 1215 if (MBSTRG(encoding_translation)) { 1216 sapi_unregister_post_entry(php_post_entries TSRMLS_CC); 1217 sapi_register_post_entries(mbstr_post_entries TSRMLS_CC); 1218 } else { 1219 sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC); 1220 sapi_register_post_entries(php_post_entries TSRMLS_CC); 1221 } 1222 1223 return SUCCESS; 1224} 1225/* }}} */ 1226 1227/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */ 1228static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes) 1229{ 1230 zval tmp; 1231 void *re = NULL; 1232 1233 if (!new_value) { 1234 new_value = entry->orig_value; 1235 new_value_length = entry->orig_value_length; 1236 } 1237 php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC); 1238 1239 if (Z_STRLEN(tmp) > 0) { 1240 if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) { 1241 zval_dtor(&tmp); 1242 return FAILURE; 1243 } 1244 } 1245 1246 if (MBSTRG(http_output_conv_mimetypes)) { 1247 _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes)); 1248 } 1249 1250 MBSTRG(http_output_conv_mimetypes) = re; 1251 1252 zval_dtor(&tmp); 1253 return SUCCESS; 1254} 1255/* }}} */ 1256/* }}} */ 1257 1258/* {{{ php.ini directive registration */ 1259PHP_INI_BEGIN() 1260 PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language) 1261 PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order) 1262 PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input) 1263 PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output) 1264 PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding) 1265#ifdef ZEND_MULTIBYTE 1266 PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding) 1267#endif /* ZEND_MULTIBYTE */ 1268 PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character) 1269 STD_PHP_INI_ENTRY("mbstring.func_overload", "0", 1270 PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals) 1271 1272 STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0", 1273 PHP_INI_SYSTEM | PHP_INI_PERDIR, 1274 OnUpdate_mbstring_encoding_translation, 1275 encoding_translation, zend_mbstring_globals, mbstring_globals) 1276 PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes", 1277 "^(text/|application/xhtml\\+xml)", 1278 PHP_INI_ALL, 1279 OnUpdate_mbstring_http_output_conv_mimetypes) 1280 1281 STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0", 1282 PHP_INI_ALL, 1283 OnUpdateLong, 1284 strict_detection, zend_mbstring_globals, mbstring_globals) 1285PHP_INI_END() 1286/* }}} */ 1287 1288/* {{{ module global initialize handler */ 1289static PHP_GINIT_FUNCTION(mbstring) 1290{ 1291 mbstring_globals->language = mbfl_no_language_uni; 1292 mbstring_globals->internal_encoding = mbfl_no_encoding_invalid; 1293 mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding; 1294#ifdef ZEND_MULTIBYTE 1295 mbstring_globals->script_encoding_list = NULL; 1296 mbstring_globals->script_encoding_list_size = 0; 1297#endif /* ZEND_MULTIBYTE */ 1298 mbstring_globals->http_output_encoding = mbfl_no_encoding_pass; 1299 mbstring_globals->current_http_output_encoding = mbfl_no_encoding_pass; 1300 mbstring_globals->http_input_identify = mbfl_no_encoding_invalid; 1301 mbstring_globals->http_input_identify_get = mbfl_no_encoding_invalid; 1302 mbstring_globals->http_input_identify_post = mbfl_no_encoding_invalid; 1303 mbstring_globals->http_input_identify_cookie = mbfl_no_encoding_invalid; 1304 mbstring_globals->http_input_identify_string = mbfl_no_encoding_invalid; 1305 mbstring_globals->http_input_list = NULL; 1306 mbstring_globals->http_input_list_size = 0; 1307 mbstring_globals->detect_order_list = NULL; 1308 mbstring_globals->detect_order_list_size = 0; 1309 mbstring_globals->current_detect_order_list = NULL; 1310 mbstring_globals->current_detect_order_list_size = 0; 1311 mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut; 1312 mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]); 1313 mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1314 mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */ 1315 mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1316 mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */ 1317 mbstring_globals->illegalchars = 0; 1318 mbstring_globals->func_overload = 0; 1319 mbstring_globals->encoding_translation = 0; 1320 mbstring_globals->strict_detection = 0; 1321 mbstring_globals->outconv = NULL; 1322 mbstring_globals->http_output_conv_mimetypes = NULL; 1323#if HAVE_MBREGEX 1324 mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C); 1325#endif 1326} 1327/* }}} */ 1328 1329/* {{{ PHP_GSHUTDOWN_FUNCTION */ 1330static PHP_GSHUTDOWN_FUNCTION(mbstring) 1331{ 1332 if (mbstring_globals->http_input_list) { 1333 free(mbstring_globals->http_input_list); 1334 } 1335#ifdef ZEND_MULTIBYTE 1336 if (mbstring_globals->script_encoding_list) { 1337 free(mbstring_globals->script_encoding_list); 1338 } 1339#endif /* ZEND_MULTIBYTE */ 1340 if (mbstring_globals->detect_order_list) { 1341 free(mbstring_globals->detect_order_list); 1342 } 1343 if (mbstring_globals->http_output_conv_mimetypes) { 1344 _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes); 1345 } 1346#if HAVE_MBREGEX 1347 php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC); 1348#endif 1349} 1350/* }}} */ 1351 1352/* {{{ PHP_MINIT_FUNCTION(mbstring) */ 1353PHP_MINIT_FUNCTION(mbstring) 1354{ 1355 __mbfl_allocators = &_php_mb_allocators; 1356 1357 REGISTER_INI_ENTRIES(); 1358 1359 /* This is a global handler. Should not be set in a per-request handler. */ 1360 sapi_register_treat_data(mbstr_treat_data); 1361 1362 /* Post handlers are stored in the thread-local context. */ 1363 if (MBSTRG(encoding_translation)) { 1364 sapi_register_post_entries(mbstr_post_entries TSRMLS_CC); 1365 } 1366 1367 REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT); 1368 REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT); 1369 REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT); 1370 1371 REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT); 1372 REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT); 1373 REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT); 1374 1375#if HAVE_MBREGEX 1376 PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); 1377#endif 1378 return SUCCESS; 1379} 1380/* }}} */ 1381 1382/* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */ 1383PHP_MSHUTDOWN_FUNCTION(mbstring) 1384{ 1385 UNREGISTER_INI_ENTRIES(); 1386 1387#if HAVE_MBREGEX 1388 PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); 1389#endif 1390 1391 return SUCCESS; 1392} 1393/* }}} */ 1394 1395/* {{{ PHP_RINIT_FUNCTION(mbstring) */ 1396PHP_RINIT_FUNCTION(mbstring) 1397{ 1398 int n; 1399 enum mbfl_no_encoding *list=NULL, *entry; 1400 zend_function *func, *orig; 1401 const struct mb_overload_def *p; 1402 1403 MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding); 1404 MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding); 1405 MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode); 1406 MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar); 1407 1408 MBSTRG(illegalchars) = 0; 1409 1410 n = 0; 1411 if (MBSTRG(detect_order_list)) { 1412 list = MBSTRG(detect_order_list); 1413 n = MBSTRG(detect_order_list_size); 1414 } 1415 if (n <= 0) { 1416 list = MBSTRG(default_detect_order_list); 1417 n = MBSTRG(default_detect_order_list_size); 1418 } 1419 entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0); 1420 MBSTRG(current_detect_order_list) = entry; 1421 MBSTRG(current_detect_order_list_size) = n; 1422 while (n > 0) { 1423 *entry++ = *list++; 1424 n--; 1425 } 1426 1427 /* override original function. */ 1428 if (MBSTRG(func_overload)){ 1429 p = &(mb_ovld[0]); 1430 1431 while (p->type > 0) { 1432 if ((MBSTRG(func_overload) & p->type) == p->type && 1433 zend_hash_find(EG(function_table), p->save_func, 1434 strlen(p->save_func)+1, (void **)&orig) != SUCCESS) { 1435 1436 zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func); 1437 1438 if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) { 1439 php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func); 1440 return FAILURE; 1441 } else { 1442 zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL); 1443 1444 if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function), 1445 NULL) == FAILURE) { 1446 php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func); 1447 return FAILURE; 1448 } 1449 } 1450 } 1451 p++; 1452 } 1453 } 1454#if HAVE_MBREGEX 1455 PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); 1456#endif 1457#ifdef ZEND_MULTIBYTE 1458 zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC); 1459 php_mb_set_zend_encoding(TSRMLS_C); 1460#endif /* ZEND_MULTIBYTE */ 1461 1462 return SUCCESS; 1463} 1464/* }}} */ 1465 1466/* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */ 1467PHP_RSHUTDOWN_FUNCTION(mbstring) 1468{ 1469 const struct mb_overload_def *p; 1470 zend_function *orig; 1471 1472 if (MBSTRG(current_detect_order_list) != NULL) { 1473 efree(MBSTRG(current_detect_order_list)); 1474 MBSTRG(current_detect_order_list) = NULL; 1475 MBSTRG(current_detect_order_list_size) = 0; 1476 } 1477 if (MBSTRG(outconv) != NULL) { 1478 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv)); 1479 mbfl_buffer_converter_delete(MBSTRG(outconv)); 1480 MBSTRG(outconv) = NULL; 1481 } 1482 1483 /* clear http input identification. */ 1484 MBSTRG(http_input_identify) = mbfl_no_encoding_invalid; 1485 MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid; 1486 MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid; 1487 MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid; 1488 MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid; 1489 1490 /* clear overloaded function. */ 1491 if (MBSTRG(func_overload)){ 1492 p = &(mb_ovld[0]); 1493 while (p->type > 0) { 1494 if ((MBSTRG(func_overload) & p->type) == p->type && 1495 zend_hash_find(EG(function_table), p->save_func, 1496 strlen(p->save_func)+1, (void **)&orig) == SUCCESS) { 1497 1498 zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL); 1499 zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1); 1500 } 1501 p++; 1502 } 1503 } 1504 1505#if HAVE_MBREGEX 1506 PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); 1507#endif 1508 1509 return SUCCESS; 1510} 1511/* }}} */ 1512 1513/* {{{ PHP_MINFO_FUNCTION(mbstring) */ 1514PHP_MINFO_FUNCTION(mbstring) 1515{ 1516 php_info_print_table_start(); 1517 php_info_print_table_row(2, "Multibyte Support", "enabled"); 1518 php_info_print_table_row(2, "Multibyte string engine", "libmbfl"); 1519 php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled"); 1520 php_info_print_table_end(); 1521 1522 php_info_print_table_start(); 1523 php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1."); 1524 php_info_print_table_end(); 1525 1526#if HAVE_MBREGEX 1527 PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU); 1528#endif 1529 1530 DISPLAY_INI_ENTRIES(); 1531} 1532/* }}} */ 1533 1534/* {{{ proto string mb_language([string language]) 1535 Sets the current language or Returns the current language as a string */ 1536PHP_FUNCTION(mb_language) 1537{ 1538 char *name = NULL; 1539 int name_len = 0; 1540 1541 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) { 1542 return; 1543 } 1544 if (name == NULL) { 1545 RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1); 1546 } else { 1547 if (FAILURE == zend_alter_ini_entry( 1548 "mbstring.language", sizeof("mbstring.language"), 1549 name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) { 1550 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name); 1551 RETVAL_FALSE; 1552 } else { 1553 RETVAL_TRUE; 1554 } 1555 } 1556} 1557/* }}} */ 1558 1559/* {{{ proto string mb_internal_encoding([string encoding]) 1560 Sets the current internal encoding or Returns the current internal encoding as a string */ 1561PHP_FUNCTION(mb_internal_encoding) 1562{ 1563 char *name = NULL; 1564 int name_len; 1565 enum mbfl_no_encoding no_encoding; 1566 1567 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) { 1568 RETURN_FALSE; 1569 } 1570 if (name == NULL) { 1571 name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding)); 1572 if (name != NULL) { 1573 RETURN_STRING(name, 1); 1574 } else { 1575 RETURN_FALSE; 1576 } 1577 } else { 1578 no_encoding = mbfl_name2no_encoding(name); 1579 if (no_encoding == mbfl_no_encoding_invalid) { 1580 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name); 1581 RETURN_FALSE; 1582 } else { 1583 MBSTRG(current_internal_encoding) = no_encoding; 1584#ifdef ZEND_MULTIBYTE 1585 /* TODO: make independent from mbstring.encoding_translation? */ 1586 if (MBSTRG(encoding_translation)) { 1587 zend_multibyte_set_internal_encoding(name TSRMLS_CC); 1588 } 1589#endif /* ZEND_MULTIBYTE */ 1590 RETURN_TRUE; 1591 } 1592 } 1593} 1594/* }}} */ 1595 1596/* {{{ proto mixed mb_http_input([string type]) 1597 Returns the input encoding */ 1598PHP_FUNCTION(mb_http_input) 1599{ 1600 char *typ = NULL; 1601 int typ_len; 1602 int retname, n; 1603 char *name, *list, *temp; 1604 enum mbfl_no_encoding *entry; 1605 enum mbfl_no_encoding result = mbfl_no_encoding_invalid; 1606 1607 retname = 1; 1608 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) { 1609 RETURN_FALSE; 1610 } 1611 if (typ == NULL) { 1612 result = MBSTRG(http_input_identify); 1613 } else { 1614 switch (*typ) { 1615 case 'G': 1616 case 'g': 1617 result = MBSTRG(http_input_identify_get); 1618 break; 1619 case 'P': 1620 case 'p': 1621 result = MBSTRG(http_input_identify_post); 1622 break; 1623 case 'C': 1624 case 'c': 1625 result = MBSTRG(http_input_identify_cookie); 1626 break; 1627 case 'S': 1628 case 's': 1629 result = MBSTRG(http_input_identify_string); 1630 break; 1631 case 'I': 1632 case 'i': 1633 array_init(return_value); 1634 entry = MBSTRG(http_input_list); 1635 n = MBSTRG(http_input_list_size); 1636 while (n > 0) { 1637 name = (char *)mbfl_no_encoding2name(*entry); 1638 if (name) { 1639 add_next_index_string(return_value, name, 1); 1640 } 1641 entry++; 1642 n--; 1643 } 1644 retname = 0; 1645 break; 1646 case 'L': 1647 case 'l': 1648 entry = MBSTRG(http_input_list); 1649 n = MBSTRG(http_input_list_size); 1650 list = NULL; 1651 while (n > 0) { 1652 name = (char *)mbfl_no_encoding2name(*entry); 1653 if (name) { 1654 if (list) { 1655 temp = list; 1656 spprintf(&list, 0, "%s,%s", temp, name); 1657 efree(temp); 1658 if (!list) { 1659 break; 1660 } 1661 } else { 1662 list = estrdup(name); 1663 } 1664 } 1665 entry++; 1666 n--; 1667 } 1668 if (!list) { 1669 RETURN_FALSE; 1670 } 1671 RETVAL_STRING(list, 0); 1672 retname = 0; 1673 break; 1674 default: 1675 result = MBSTRG(http_input_identify); 1676 break; 1677 } 1678 } 1679 1680 if (retname) { 1681 if (result != mbfl_no_encoding_invalid && 1682 (name = (char *)mbfl_no_encoding2name(result)) != NULL) { 1683 RETVAL_STRING(name, 1); 1684 } else { 1685 RETVAL_FALSE; 1686 } 1687 } 1688} 1689/* }}} */ 1690 1691/* {{{ proto string mb_http_output([string encoding]) 1692 Sets the current output_encoding or returns the current output_encoding as a string */ 1693PHP_FUNCTION(mb_http_output) 1694{ 1695 char *name = NULL; 1696 int name_len; 1697 enum mbfl_no_encoding no_encoding; 1698 1699 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) { 1700 RETURN_FALSE; 1701 } 1702 1703 if (name == NULL) { 1704 name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding)); 1705 if (name != NULL) { 1706 RETURN_STRING(name, 1); 1707 } else { 1708 RETURN_FALSE; 1709 } 1710 } else { 1711 no_encoding = mbfl_name2no_encoding(name); 1712 if (no_encoding == mbfl_no_encoding_invalid) { 1713 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name); 1714 RETURN_FALSE; 1715 } else { 1716 MBSTRG(current_http_output_encoding) = no_encoding; 1717 RETURN_TRUE; 1718 } 1719 } 1720} 1721/* }}} */ 1722 1723/* {{{ proto bool|array mb_detect_order([mixed encoding-list]) 1724 Sets the current detect_order or Return the current detect_order as a array */ 1725PHP_FUNCTION(mb_detect_order) 1726{ 1727 zval **arg1 = NULL; 1728 int n, size; 1729 enum mbfl_no_encoding *list, *entry; 1730 char *name; 1731 1732 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) { 1733 return; 1734 } 1735 1736 if (!arg1) { 1737 array_init(return_value); 1738 entry = MBSTRG(current_detect_order_list); 1739 n = MBSTRG(current_detect_order_list_size); 1740 while (n > 0) { 1741 name = (char *)mbfl_no_encoding2name(*entry); 1742 if (name) { 1743 add_next_index_string(return_value, name, 1); 1744 } 1745 entry++; 1746 n--; 1747 } 1748 } else { 1749 list = NULL; 1750 size = 0; 1751 switch (Z_TYPE_PP(arg1)) { 1752 case IS_ARRAY: 1753 if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) { 1754 if (list) { 1755 efree(list); 1756 } 1757 RETURN_FALSE; 1758 } 1759 break; 1760 default: 1761 convert_to_string_ex(arg1); 1762 if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) { 1763 if (list) { 1764 efree(list); 1765 } 1766 RETURN_FALSE; 1767 } 1768 break; 1769 } 1770 1771 if (list == NULL) { 1772 RETURN_FALSE; 1773 } 1774 1775 if (MBSTRG(current_detect_order_list)) { 1776 efree(MBSTRG(current_detect_order_list)); 1777 } 1778 MBSTRG(current_detect_order_list) = list; 1779 MBSTRG(current_detect_order_list_size) = size; 1780 RETURN_TRUE; 1781 } 1782} 1783/* }}} */ 1784 1785/* {{{ proto mixed mb_substitute_character([mixed substchar]) 1786 Sets the current substitute_character or returns the current substitute_character */ 1787PHP_FUNCTION(mb_substitute_character) 1788{ 1789 zval **arg1 = NULL; 1790 1791 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) { 1792 return; 1793 } 1794 1795 if (!arg1) { 1796 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { 1797 RETURN_STRING("none", 1); 1798 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) { 1799 RETURN_STRING("long", 1); 1800 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) { 1801 RETURN_STRING("entity", 1); 1802 } else { 1803 RETURN_LONG(MBSTRG(current_filter_illegal_substchar)); 1804 } 1805 } else { 1806 RETVAL_TRUE; 1807 1808 switch (Z_TYPE_PP(arg1)) { 1809 case IS_STRING: 1810 if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) { 1811 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; 1812 } else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) { 1813 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG; 1814 } else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) { 1815 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY; 1816 } else { 1817 convert_to_long_ex(arg1); 1818 1819 if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) { 1820 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1821 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1); 1822 } else { 1823 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character."); 1824 RETURN_FALSE; 1825 } 1826 } 1827 break; 1828 default: 1829 convert_to_long_ex(arg1); 1830 if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) { 1831 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1832 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1); 1833 } else { 1834 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character."); 1835 RETURN_FALSE; 1836 } 1837 break; 1838 } 1839 } 1840} 1841/* }}} */ 1842 1843/* {{{ proto string mb_preferred_mime_name(string encoding) 1844 Return the preferred MIME name (charset) as a string */ 1845PHP_FUNCTION(mb_preferred_mime_name) 1846{ 1847 enum mbfl_no_encoding no_encoding; 1848 char *name = NULL; 1849 int name_len; 1850 1851 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) { 1852 return; 1853 } else { 1854 no_encoding = mbfl_name2no_encoding(name); 1855 if (no_encoding == mbfl_no_encoding_invalid) { 1856 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name); 1857 RETVAL_FALSE; 1858 } else { 1859 const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding); 1860 if (preferred_name == NULL || *preferred_name == '\0') { 1861 php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name); 1862 RETVAL_FALSE; 1863 } else { 1864 RETVAL_STRING((char *)preferred_name, 1); 1865 } 1866 } 1867 } 1868} 1869/* }}} */ 1870 1871#define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0) 1872#define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0) 1873 1874/* {{{ proto bool mb_parse_str(string encoded_string [, array result]) 1875 Parses GET/POST/COOKIE data and sets global variables */ 1876PHP_FUNCTION(mb_parse_str) 1877{ 1878 zval *track_vars_array = NULL; 1879 char *encstr = NULL; 1880 int encstr_len; 1881 php_mb_encoding_handler_info_t info; 1882 enum mbfl_no_encoding detected; 1883 1884 track_vars_array = NULL; 1885 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) { 1886 return; 1887 } 1888 1889 /* Clear out the array */ 1890 if (track_vars_array != NULL) { 1891 zval_dtor(track_vars_array); 1892 array_init(track_vars_array); 1893 } 1894 1895 encstr = estrndup(encstr, encstr_len); 1896 1897 info.data_type = PARSE_STRING; 1898 info.separator = PG(arg_separator).input; 1899 info.force_register_globals = (track_vars_array == NULL); 1900 info.report_errors = 1; 1901 info.to_encoding = MBSTRG(current_internal_encoding); 1902 info.to_language = MBSTRG(language); 1903 info.from_encodings = MBSTRG(http_input_list); 1904 info.num_from_encodings = MBSTRG(http_input_list_size); 1905 info.from_language = MBSTRG(language); 1906 1907 detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC); 1908 1909 MBSTRG(http_input_identify) = detected; 1910 1911 RETVAL_BOOL(detected != mbfl_no_encoding_invalid); 1912 1913 if (encstr != NULL) efree(encstr); 1914} 1915/* }}} */ 1916 1917/* {{{ proto string mb_output_handler(string contents, int status) 1918 Returns string in output buffer converted to the http_output encoding */ 1919PHP_FUNCTION(mb_output_handler) 1920{ 1921 char *arg_string; 1922 int arg_string_len; 1923 long arg_status; 1924 mbfl_string string, result; 1925 const char *charset; 1926 char *p; 1927 enum mbfl_no_encoding encoding; 1928 int last_feed, len; 1929 unsigned char send_text_mimetype = 0; 1930 char *s, *mimetype = NULL; 1931 1932 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) { 1933 return; 1934 } 1935 1936 encoding = MBSTRG(current_http_output_encoding); 1937 1938 /* start phase only */ 1939 if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) { 1940 /* delete the converter just in case. */ 1941 if (MBSTRG(outconv)) { 1942 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv)); 1943 mbfl_buffer_converter_delete(MBSTRG(outconv)); 1944 MBSTRG(outconv) = NULL; 1945 } 1946 if (encoding == mbfl_no_encoding_pass) { 1947 RETURN_STRINGL(arg_string, arg_string_len, 1); 1948 } 1949 1950 /* analyze mime type */ 1951 if (SG(sapi_headers).mimetype && 1952 _php_mb_match_regex( 1953 MBSTRG(http_output_conv_mimetypes), 1954 SG(sapi_headers).mimetype, 1955 strlen(SG(sapi_headers).mimetype))) { 1956 if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){ 1957 mimetype = estrdup(SG(sapi_headers).mimetype); 1958 } else { 1959 mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype); 1960 } 1961 send_text_mimetype = 1; 1962 } else if (SG(sapi_headers).send_default_content_type) { 1963 mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE; 1964 } 1965 1966 /* if content-type is not yet set, set it and activate the converter */ 1967 if (SG(sapi_headers).send_default_content_type || send_text_mimetype) { 1968 charset = mbfl_no2preferred_mime_name(encoding); 1969 if (charset) { 1970 len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset ); 1971 if (sapi_add_header(p, len, 0) != FAILURE) { 1972 SG(sapi_headers).send_default_content_type = 0; 1973 } 1974 } 1975 /* activate the converter */ 1976 MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0); 1977 if (send_text_mimetype){ 1978 efree(mimetype); 1979 } 1980 } 1981 } 1982 1983 /* just return if the converter is not activated. */ 1984 if (MBSTRG(outconv) == NULL) { 1985 RETURN_STRINGL(arg_string, arg_string_len, 1); 1986 } 1987 1988 /* flag */ 1989 last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0); 1990 /* mode */ 1991 mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode)); 1992 mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar)); 1993 1994 /* feed the string */ 1995 mbfl_string_init(&string); 1996 string.no_language = MBSTRG(language); 1997 string.no_encoding = MBSTRG(current_internal_encoding); 1998 string.val = (unsigned char *)arg_string; 1999 string.len = arg_string_len; 2000 mbfl_buffer_converter_feed(MBSTRG(outconv), &string); 2001 if (last_feed) { 2002 mbfl_buffer_converter_flush(MBSTRG(outconv)); 2003 } 2004 /* get the converter output, and return it */ 2005 mbfl_buffer_converter_result(MBSTRG(outconv), &result); 2006 RETVAL_STRINGL((char *)result.val, result.len, 0); /* the string is already strdup()'ed */ 2007 2008 /* delete the converter if it is the last feed. */ 2009 if (last_feed) { 2010 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv)); 2011 mbfl_buffer_converter_delete(MBSTRG(outconv)); 2012 MBSTRG(outconv) = NULL; 2013 } 2014} 2015/* }}} */ 2016 2017/* {{{ proto int mb_strlen(string str [, string encoding]) 2018 Get character numbers of a string */ 2019PHP_FUNCTION(mb_strlen) 2020{ 2021 int n; 2022 mbfl_string string; 2023 char *enc_name = NULL; 2024 int enc_name_len; 2025 2026 mbfl_string_init(&string); 2027 2028 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) { 2029 RETURN_FALSE; 2030 } 2031 2032 string.no_language = MBSTRG(language); 2033 if (enc_name == NULL) { 2034 string.no_encoding = MBSTRG(current_internal_encoding); 2035 } else { 2036 string.no_encoding = mbfl_name2no_encoding(enc_name); 2037 if (string.no_encoding == mbfl_no_encoding_invalid) { 2038 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name); 2039 RETURN_FALSE; 2040 } 2041 } 2042 2043 n = mbfl_strlen(&string); 2044 if (n >= 0) { 2045 RETVAL_LONG(n); 2046 } else { 2047 RETVAL_FALSE; 2048 } 2049} 2050/* }}} */ 2051 2052/* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]]) 2053 Find position of first occurrence of a string within another */ 2054PHP_FUNCTION(mb_strpos) 2055{ 2056 int n, reverse = 0; 2057 long offset; 2058 mbfl_string haystack, needle; 2059 char *enc_name = NULL; 2060 int enc_name_len; 2061 2062 mbfl_string_init(&haystack); 2063 mbfl_string_init(&needle); 2064 haystack.no_language = MBSTRG(language); 2065 haystack.no_encoding = MBSTRG(current_internal_encoding); 2066 needle.no_language = MBSTRG(language); 2067 needle.no_encoding = MBSTRG(current_internal_encoding); 2068 offset = 0; 2069 2070 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) { 2071 RETURN_FALSE; 2072 } 2073 2074 if (enc_name != NULL) { 2075 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name); 2076 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 2077 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name); 2078 RETURN_FALSE; 2079 } 2080 } 2081 2082 if (offset < 0 || offset > mbfl_strlen(&haystack)) { 2083 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string"); 2084 RETURN_FALSE; 2085 } 2086 if (needle.len == 0) { 2087 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter"); 2088 RETURN_FALSE; 2089 } 2090 2091 n = mbfl_strpos(&haystack, &needle, offset, reverse); 2092 if (n >= 0) { 2093 RETVAL_LONG(n); 2094 } else { 2095 switch (-n) { 2096 case 1: 2097 break; 2098 case 2: 2099 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length"); 2100 break; 2101 case 4: 2102 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error"); 2103 break; 2104 case 8: 2105 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty"); 2106 break; 2107 default: 2108 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos"); 2109 break; 2110 } 2111 RETVAL_FALSE; 2112 } 2113} 2114/* }}} */ 2115 2116/* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]]) 2117 Find position of last occurrence of a string within another */ 2118PHP_FUNCTION(mb_strrpos) 2119{ 2120 int n; 2121 mbfl_string haystack, needle; 2122 char *enc_name = NULL; 2123 int enc_name_len; 2124 zval **zoffset = NULL; 2125 long offset = 0, str_flg; 2126 char *enc_name2 = NULL; 2127 int enc_name_len2; 2128 2129 mbfl_string_init(&haystack); 2130 mbfl_string_init(&needle); 2131 haystack.no_language = MBSTRG(language); 2132 haystack.no_encoding = MBSTRG(current_internal_encoding); 2133 needle.no_language = MBSTRG(language); 2134 needle.no_encoding = MBSTRG(current_internal_encoding); 2135 2136 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) { 2137 RETURN_FALSE; 2138 } 2139 2140 if (zoffset) { 2141 if (Z_TYPE_PP(zoffset) == IS_STRING) { 2142 enc_name2 = Z_STRVAL_PP(zoffset); 2143 enc_name_len2 = Z_STRLEN_PP(zoffset); 2144 str_flg = 1; 2145 2146 if (enc_name2 != NULL) { 2147 switch (*enc_name2) { 2148 case '0': 2149 case '1': 2150 case '2': 2151 case '3': 2152 case '4': 2153 case '5': 2154 case '6': 2155 case '7': 2156 case '8': 2157 case '9': 2158 case ' ': 2159 case '-': 2160 case '.': 2161 break; 2162 default : 2163 str_flg = 0; 2164 break; 2165 } 2166 } 2167 2168 if (str_flg) { 2169 convert_to_long_ex(zoffset); 2170 offset = Z_LVAL_PP(zoffset); 2171 } else { 2172 enc_name = enc_name2; 2173 enc_name_len = enc_name_len2; 2174 } 2175 } else { 2176 convert_to_long_ex(zoffset); 2177 offset = Z_LVAL_PP(zoffset); 2178 } 2179 } 2180 2181 if (enc_name != NULL) { 2182 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name); 2183 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 2184 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name); 2185 RETURN_FALSE; 2186 } 2187 } 2188 2189 if (haystack.len <= 0) { 2190 RETURN_FALSE; 2191 } 2192 if (needle.len <= 0) { 2193 RETURN_FALSE; 2194 } 2195 2196 { 2197 int haystack_char_len = mbfl_strlen(&haystack); 2198 if ((offset > 0 && offset > haystack_char_len) || 2199 (offset < 0 && -offset > haystack_char_len)) { 2200 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string"); 2201 RETURN_FALSE; 2202 } 2203 } 2204 2205 n = mbfl_strpos(&haystack, &needle, offset, 1); 2206 if (n >= 0) { 2207 RETVAL_LONG(n); 2208 } else { 2209 RETVAL_FALSE; 2210 } 2211} 2212/* }}} */ 2213 2214/* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]]) 2215 Finds position of first occurrence of a string within another, case insensitive */ 2216PHP_FUNCTION(mb_stripos) 2217{ 2218 int n; 2219 long offset; 2220 mbfl_string haystack, needle; 2221 char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); 2222 int from_encoding_len; 2223 n = -1; 2224 offset = 0; 2225 2226 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) { 2227 RETURN_FALSE; 2228 } 2229 if (needle.len == 0) { 2230 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter"); 2231 RETURN_FALSE; 2232 } 2233 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC); 2234 2235 if (n >= 0) { 2236 RETVAL_LONG(n); 2237 } else { 2238 RETVAL_FALSE; 2239 } 2240} 2241/* }}} */ 2242 2243/* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]]) 2244 Finds position of last occurrence of a string within another, case insensitive */ 2245PHP_FUNCTION(mb_strripos) 2246{ 2247 int n; 2248 long offset; 2249 mbfl_string haystack, needle; 2250 const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); 2251 int from_encoding_len; 2252 n = -1; 2253 offset = 0; 2254 2255 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) { 2256 RETURN_FALSE; 2257 } 2258 2259 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC); 2260 2261 if (n >= 0) { 2262 RETVAL_LONG(n); 2263 } else { 2264 RETVAL_FALSE; 2265 } 2266} 2267/* }}} */ 2268 2269/* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]]) 2270 Finds first occurrence of a string within another */ 2271PHP_FUNCTION(mb_strstr) 2272{ 2273 int n, len, mblen; 2274 mbfl_string haystack, needle, result, *ret = NULL; 2275 char *enc_name = NULL; 2276 int enc_name_len; 2277 zend_bool part = 0; 2278 2279 mbfl_string_init(&haystack); 2280 mbfl_string_init(&needle); 2281 haystack.no_language = MBSTRG(language); 2282 haystack.no_encoding = MBSTRG(current_internal_encoding); 2283 needle.no_language = MBSTRG(language); 2284 needle.no_encoding = MBSTRG(current_internal_encoding); 2285 2286 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) { 2287 RETURN_FALSE; 2288 } 2289 2290 if (enc_name != NULL) { 2291 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name); 2292 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 2293 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name); 2294 RETURN_FALSE; 2295 } 2296 } 2297 2298 if (needle.len <= 0) { 2299 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter"); 2300 RETURN_FALSE; 2301 } 2302 n = mbfl_strpos(&haystack, &needle, 0, 0); 2303 if (n >= 0) { 2304 mblen = mbfl_strlen(&haystack); 2305 if (part) { 2306 ret = mbfl_substr(&haystack, &result, 0, n); 2307 if (ret != NULL) { 2308 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2309 } else { 2310 RETVAL_FALSE; 2311 } 2312 } else { 2313 len = (mblen - n); 2314 ret = mbfl_substr(&haystack, &result, n, len); 2315 if (ret != NULL) { 2316 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2317 } else { 2318 RETVAL_FALSE; 2319 } 2320 } 2321 } else { 2322 RETVAL_FALSE; 2323 } 2324} 2325/* }}} */ 2326 2327/* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]]) 2328 Finds the last occurrence of a character in a string within another */ 2329PHP_FUNCTION(mb_strrchr) 2330{ 2331 int n, len, mblen; 2332 mbfl_string haystack, needle, result, *ret = NULL; 2333 char *enc_name = NULL; 2334 int enc_name_len; 2335 zend_bool part = 0; 2336 2337 mbfl_string_init(&haystack); 2338 mbfl_string_init(&needle); 2339 haystack.no_language = MBSTRG(language); 2340 haystack.no_encoding = MBSTRG(current_internal_encoding); 2341 needle.no_language = MBSTRG(language); 2342 needle.no_encoding = MBSTRG(current_internal_encoding); 2343 2344 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) { 2345 RETURN_FALSE; 2346 } 2347 2348 if (enc_name != NULL) { 2349 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name); 2350 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 2351 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name); 2352 RETURN_FALSE; 2353 } 2354 } 2355 2356 if (haystack.len <= 0) { 2357 RETURN_FALSE; 2358 } 2359 if (needle.len <= 0) { 2360 RETURN_FALSE; 2361 } 2362 n = mbfl_strpos(&haystack, &needle, 0, 1); 2363 if (n >= 0) { 2364 mblen = mbfl_strlen(&haystack); 2365 if (part) { 2366 ret = mbfl_substr(&haystack, &result, 0, n); 2367 if (ret != NULL) { 2368 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2369 } else { 2370 RETVAL_FALSE; 2371 } 2372 } else { 2373 len = (mblen - n); 2374 ret = mbfl_substr(&haystack, &result, n, len); 2375 if (ret != NULL) { 2376 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2377 } else { 2378 RETVAL_FALSE; 2379 } 2380 } 2381 } else { 2382 RETVAL_FALSE; 2383 } 2384} 2385/* }}} */ 2386 2387/* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]]) 2388 Finds first occurrence of a string within another, case insensitive */ 2389PHP_FUNCTION(mb_stristr) 2390{ 2391 zend_bool part = 0; 2392 unsigned int from_encoding_len, len, mblen; 2393 int n; 2394 mbfl_string haystack, needle, result, *ret = NULL; 2395 const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); 2396 mbfl_string_init(&haystack); 2397 mbfl_string_init(&needle); 2398 haystack.no_language = MBSTRG(language); 2399 haystack.no_encoding = MBSTRG(current_internal_encoding); 2400 needle.no_language = MBSTRG(language); 2401 needle.no_encoding = MBSTRG(current_internal_encoding); 2402 2403 2404 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) { 2405 RETURN_FALSE; 2406 } 2407 2408 if (!needle.len) { 2409 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter"); 2410 RETURN_FALSE; 2411 } 2412 2413 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding); 2414 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 2415 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding); 2416 RETURN_FALSE; 2417 } 2418 2419 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC); 2420 2421 if (n <0) { 2422 RETURN_FALSE; 2423 } 2424 2425 mblen = mbfl_strlen(&haystack); 2426 2427 if (part) { 2428 ret = mbfl_substr(&haystack, &result, 0, n); 2429 if (ret != NULL) { 2430 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2431 } else { 2432 RETVAL_FALSE; 2433 } 2434 } else { 2435 len = (mblen - n); 2436 ret = mbfl_substr(&haystack, &result, n, len); 2437 if (ret != NULL) { 2438 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2439 } else { 2440 RETVAL_FALSE; 2441 } 2442 } 2443} 2444/* }}} */ 2445 2446/* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]]) 2447 Finds the last occurrence of a character in a string within another, case insensitive */ 2448PHP_FUNCTION(mb_strrichr) 2449{ 2450 zend_bool part = 0; 2451 int n, from_encoding_len, len, mblen; 2452 mbfl_string haystack, needle, result, *ret = NULL; 2453 char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); 2454 mbfl_string_init(&haystack); 2455 mbfl_string_init(&needle); 2456 haystack.no_language = MBSTRG(language); 2457 haystack.no_encoding = MBSTRG(current_internal_encoding); 2458 needle.no_language = MBSTRG(language); 2459 needle.no_encoding = MBSTRG(current_internal_encoding); 2460 2461 2462 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) { 2463 RETURN_FALSE; 2464 } 2465 2466 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding); 2467 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 2468 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding); 2469 RETURN_FALSE; 2470 } 2471 2472 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC); 2473 2474 if (n <0) { 2475 RETURN_FALSE; 2476 } 2477 2478 mblen = mbfl_strlen(&haystack); 2479 2480 if (part) { 2481 ret = mbfl_substr(&haystack, &result, 0, n); 2482 if (ret != NULL) { 2483 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2484 } else { 2485 RETVAL_FALSE; 2486 } 2487 } else { 2488 len = (mblen - n); 2489 ret = mbfl_substr(&haystack, &result, n, len); 2490 if (ret != NULL) { 2491 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2492 } else { 2493 RETVAL_FALSE; 2494 } 2495 } 2496} 2497/* }}} */ 2498 2499/* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding]) 2500 Count the number of substring occurrences */ 2501PHP_FUNCTION(mb_substr_count) 2502{ 2503 int n; 2504 mbfl_string haystack, needle; 2505 char *enc_name = NULL; 2506 int enc_name_len; 2507 2508 mbfl_string_init(&haystack); 2509 mbfl_string_init(&needle); 2510 haystack.no_language = MBSTRG(language); 2511 haystack.no_encoding = MBSTRG(current_internal_encoding); 2512 needle.no_language = MBSTRG(language); 2513 needle.no_encoding = MBSTRG(current_internal_encoding); 2514 2515 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) { 2516 return; 2517 } 2518 2519 if (enc_name != NULL) { 2520 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name); 2521 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 2522 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name); 2523 RETURN_FALSE; 2524 } 2525 } 2526 2527 if (needle.len <= 0) { 2528 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring"); 2529 RETURN_FALSE; 2530 } 2531 2532 n = mbfl_substr_count(&haystack, &needle); 2533 if (n >= 0) { 2534 RETVAL_LONG(n); 2535 } else { 2536 RETVAL_FALSE; 2537 } 2538} 2539/* }}} */ 2540 2541/* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]]) 2542 Returns part of a string */ 2543PHP_FUNCTION(mb_substr) 2544{ 2545 size_t argc = ZEND_NUM_ARGS(); 2546 char *str, *encoding; 2547 long from, len; 2548 int mblen, str_len, encoding_len; 2549 mbfl_string string, result, *ret; 2550 2551 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", &str, &str_len, &from, &len, &encoding, &encoding_len) == FAILURE) { 2552 return; 2553 } 2554 2555 mbfl_string_init(&string); 2556 string.no_language = MBSTRG(language); 2557 string.no_encoding = MBSTRG(current_internal_encoding); 2558 2559 if (argc == 4) { 2560 string.no_encoding = mbfl_name2no_encoding(encoding); 2561 if (string.no_encoding == mbfl_no_encoding_invalid) { 2562 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding); 2563 RETURN_FALSE; 2564 } 2565 } 2566 2567 string.val = (unsigned char *)str; 2568 string.len = str_len; 2569 2570 if (argc < 3) { 2571 len = str_len; 2572 } 2573 2574 /* measures length */ 2575 mblen = 0; 2576 if (from < 0 || len < 0) { 2577 mblen = mbfl_strlen(&string); 2578 } 2579 2580 /* if "from" position is negative, count start position from the end 2581 * of the string 2582 */ 2583 if (from < 0) { 2584 from = mblen + from; 2585 if (from < 0) { 2586 from = 0; 2587 } 2588 } 2589 2590 /* if "length" position is negative, set it to the length 2591 * needed to stop that many chars from the end of the string 2592 */ 2593 if (len < 0) { 2594 len = (mblen - from) + len; 2595 if (len < 0) { 2596 len = 0; 2597 } 2598 } 2599 2600 if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING) 2601 && (from >= mbfl_strlen(&string))) { 2602 RETURN_FALSE; 2603 } 2604 2605 ret = mbfl_substr(&string, &result, from, len); 2606 if (NULL == ret) { 2607 RETURN_FALSE; 2608 } 2609 2610 RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */ 2611} 2612/* }}} */ 2613 2614/* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]]) 2615 Returns part of a string */ 2616PHP_FUNCTION(mb_strcut) 2617{ 2618 size_t argc = ZEND_NUM_ARGS(); 2619 char *encoding; 2620 long from, len; 2621 int encoding_len; 2622 mbfl_string string, result, *ret; 2623 2624 mbfl_string_init(&string); 2625 string.no_language = MBSTRG(language); 2626 string.no_encoding = MBSTRG(current_internal_encoding); 2627 2628 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", (char **)&string.val, (int **)&string.len, &from, &len, &encoding, &encoding_len) == FAILURE) { 2629 return; 2630 } 2631 2632 if (argc == 4) { 2633 string.no_encoding = mbfl_name2no_encoding(encoding); 2634 if (string.no_encoding == mbfl_no_encoding_invalid) { 2635 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding); 2636 RETURN_FALSE; 2637 } 2638 } 2639 2640 if (argc < 3) { 2641 len = string.len; 2642 } 2643 2644 /* if "from" position is negative, count start position from the end 2645 * of the string 2646 */ 2647 if (from < 0) { 2648 from = string.len + from; 2649 if (from < 0) { 2650 from = 0; 2651 } 2652 } 2653 2654 /* if "length" position is negative, set it to the length 2655 * needed to stop that many chars from the end of the string 2656 */ 2657 if (len < 0) { 2658 len = (string.len - from) + len; 2659 if (len < 0) { 2660 len = 0; 2661 } 2662 } 2663 2664 if ((unsigned int)from > string.len) { 2665 RETURN_FALSE; 2666 } 2667 2668 ret = mbfl_strcut(&string, &result, from, len); 2669 if (ret == NULL) { 2670 RETURN_FALSE; 2671 } 2672 2673 RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */ 2674} 2675/* }}} */ 2676 2677/* {{{ proto int mb_strwidth(string str [, string encoding]) 2678 Gets terminal width of a string */ 2679PHP_FUNCTION(mb_strwidth) 2680{ 2681 int n; 2682 mbfl_string string; 2683 char *enc_name = NULL; 2684 int enc_name_len; 2685 2686 mbfl_string_init(&string); 2687 2688 string.no_language = MBSTRG(language); 2689 string.no_encoding = MBSTRG(current_internal_encoding); 2690 2691 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) { 2692 return; 2693 } 2694 2695 if (enc_name != NULL) { 2696 string.no_encoding = mbfl_name2no_encoding(enc_name); 2697 if (string.no_encoding == mbfl_no_encoding_invalid) { 2698 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name); 2699 RETURN_FALSE; 2700 } 2701 } 2702 2703 n = mbfl_strwidth(&string); 2704 if (n >= 0) { 2705 RETVAL_LONG(n); 2706 } else { 2707 RETVAL_FALSE; 2708 } 2709} 2710/* }}} */ 2711 2712/* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]]) 2713 Trim the string in terminal width */ 2714PHP_FUNCTION(mb_strimwidth) 2715{ 2716 char *str, *trimmarker, *encoding; 2717 long from, width; 2718 int str_len, trimmarker_len, encoding_len; 2719 mbfl_string string, result, marker, *ret; 2720 2721 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) { 2722 return; 2723 } 2724 2725 mbfl_string_init(&string); 2726 mbfl_string_init(&marker); 2727 string.no_language = MBSTRG(language); 2728 string.no_encoding = MBSTRG(current_internal_encoding); 2729 marker.no_language = MBSTRG(language); 2730 marker.no_encoding = MBSTRG(current_internal_encoding); 2731 marker.val = NULL; 2732 marker.len = 0; 2733 2734 if (ZEND_NUM_ARGS() == 5) { 2735 string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding); 2736 if (string.no_encoding == mbfl_no_encoding_invalid) { 2737 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding); 2738 RETURN_FALSE; 2739 } 2740 } 2741 2742 string.val = (unsigned char *)str; 2743 string.len = str_len; 2744 2745 if (from < 0 || from > str_len) { 2746 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range"); 2747 RETURN_FALSE; 2748 } 2749 2750 if (width < 0) { 2751 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value"); 2752 RETURN_FALSE; 2753 } 2754 2755 if (ZEND_NUM_ARGS() >= 4) { 2756 marker.val = (unsigned char *)trimmarker; 2757 marker.len = trimmarker_len; 2758 } 2759 2760 ret = mbfl_strimwidth(&string, &marker, &result, from, width); 2761 2762 if (ret == NULL) { 2763 RETURN_FALSE; 2764 } 2765 2766 RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */ 2767} 2768/* }}} */ 2769 2770/* {{{ MBSTRING_API char *php_mb_convert_encoding() */ 2771MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC) 2772{ 2773 mbfl_string string, result, *ret; 2774 enum mbfl_no_encoding from_encoding, to_encoding; 2775 mbfl_buffer_converter *convd; 2776 int size, *list; 2777 char *output=NULL; 2778 2779 if (output_len) { 2780 *output_len = 0; 2781 } 2782 if (!input) { 2783 return NULL; 2784 } 2785 /* new encoding */ 2786 if (_to_encoding && strlen(_to_encoding)) { 2787 to_encoding = mbfl_name2no_encoding(_to_encoding); 2788 if (to_encoding == mbfl_no_encoding_invalid) { 2789 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding); 2790 return NULL; 2791 } 2792 } else { 2793 to_encoding = MBSTRG(current_internal_encoding); 2794 } 2795 2796 /* initialize string */ 2797 mbfl_string_init(&string); 2798 mbfl_string_init(&result); 2799 from_encoding = MBSTRG(current_internal_encoding); 2800 string.no_encoding = from_encoding; 2801 string.no_language = MBSTRG(language); 2802 string.val = (unsigned char *)input; 2803 string.len = length; 2804 2805 /* pre-conversion encoding */ 2806 if (_from_encodings) { 2807 list = NULL; 2808 size = 0; 2809 php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC); 2810 if (size == 1) { 2811 from_encoding = *list; 2812 string.no_encoding = from_encoding; 2813 } else if (size > 1) { 2814 /* auto detect */ 2815 from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection)); 2816 if (from_encoding != mbfl_no_encoding_invalid) { 2817 string.no_encoding = from_encoding; 2818 } else { 2819 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding"); 2820 from_encoding = mbfl_no_encoding_pass; 2821 to_encoding = from_encoding; 2822 string.no_encoding = from_encoding; 2823 } 2824 } else { 2825 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified"); 2826 } 2827 if (list != NULL) { 2828 efree((void *)list); 2829 } 2830 } 2831 2832 /* initialize converter */ 2833 convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len); 2834 if (convd == NULL) { 2835 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter"); 2836 return NULL; 2837 } 2838 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); 2839 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); 2840 2841 /* do it */ 2842 ret = mbfl_buffer_converter_feed_result(convd, &string, &result); 2843 if (ret) { 2844 if (output_len) { 2845 *output_len = ret->len; 2846 } 2847 output = (char *)ret->val; 2848 } 2849 2850 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); 2851 mbfl_buffer_converter_delete(convd); 2852 return output; 2853} 2854/* }}} */ 2855 2856/* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding]) 2857 Returns converted string in desired encoding */ 2858PHP_FUNCTION(mb_convert_encoding) 2859{ 2860 char *arg_str, *arg_new; 2861 int str_len, new_len; 2862 zval *arg_old; 2863 int i; 2864 size_t size, l, n; 2865 char *_from_encodings = NULL, *ret, *s_free = NULL; 2866 2867 zval **hash_entry; 2868 HashTable *target_hash; 2869 2870 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) { 2871 return; 2872 } 2873 2874 if (ZEND_NUM_ARGS() == 3) { 2875 switch (Z_TYPE_P(arg_old)) { 2876 case IS_ARRAY: 2877 target_hash = Z_ARRVAL_P(arg_old); 2878 zend_hash_internal_pointer_reset(target_hash); 2879 i = zend_hash_num_elements(target_hash); 2880 _from_encodings = NULL; 2881 2882 while (i > 0) { 2883 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) { 2884 break; 2885 } 2886 2887 convert_to_string_ex(hash_entry); 2888 2889 if ( _from_encodings) { 2890 l = strlen(_from_encodings); 2891 n = strlen(Z_STRVAL_PP(hash_entry)); 2892 _from_encodings = erealloc(_from_encodings, l+n+2); 2893 strcpy(_from_encodings+l, ","); 2894 strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry)); 2895 } else { 2896 _from_encodings = estrdup(Z_STRVAL_PP(hash_entry)); 2897 } 2898 2899 zend_hash_move_forward(target_hash); 2900 i--; 2901 } 2902 2903 if (_from_encodings != NULL && !strlen(_from_encodings)) { 2904 efree(_from_encodings); 2905 _from_encodings = NULL; 2906 } 2907 s_free = _from_encodings; 2908 break; 2909 default: 2910 convert_to_string(arg_old); 2911 _from_encodings = Z_STRVAL_P(arg_old); 2912 break; 2913 } 2914 } 2915 2916 /* new encoding */ 2917 ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC); 2918 if (ret != NULL) { 2919 RETVAL_STRINGL(ret, size, 0); /* the string is already strdup()'ed */ 2920 } else { 2921 RETVAL_FALSE; 2922 } 2923 2924 if ( s_free) { 2925 efree(s_free); 2926 } 2927} 2928/* }}} */ 2929 2930/* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding]) 2931 Returns a case-folded version of sourcestring */ 2932PHP_FUNCTION(mb_convert_case) 2933{ 2934 char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); 2935 int str_len, from_encoding_len; 2936 long case_mode = 0; 2937 char *newstr; 2938 size_t ret_len; 2939 2940 RETVAL_FALSE; 2941 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len, 2942 &case_mode, &from_encoding, &from_encoding_len) == FAILURE) 2943 RETURN_FALSE; 2944 2945 newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC); 2946 2947 if (newstr) { 2948 RETVAL_STRINGL(newstr, ret_len, 0); 2949 } 2950} 2951/* }}} */ 2952 2953/* {{{ proto string mb_strtoupper(string sourcestring [, string encoding]) 2954 * Returns a uppercased version of sourcestring 2955 */ 2956PHP_FUNCTION(mb_strtoupper) 2957{ 2958 char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); 2959 int str_len, from_encoding_len; 2960 char *newstr; 2961 size_t ret_len; 2962 2963 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len, 2964 &from_encoding, &from_encoding_len) == FAILURE) { 2965 return; 2966 } 2967 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC); 2968 2969 if (newstr) { 2970 RETURN_STRINGL(newstr, ret_len, 0); 2971 } 2972 RETURN_FALSE; 2973} 2974/* }}} */ 2975 2976/* {{{ proto string mb_strtolower(string sourcestring [, string encoding]) 2977 * Returns a lowercased version of sourcestring 2978 */ 2979PHP_FUNCTION(mb_strtolower) 2980{ 2981 char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding)); 2982 int str_len, from_encoding_len; 2983 char *newstr; 2984 size_t ret_len; 2985 2986 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len, 2987 &from_encoding, &from_encoding_len) == FAILURE) { 2988 return; 2989 } 2990 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC); 2991 2992 if (newstr) { 2993 RETURN_STRINGL(newstr, ret_len, 0); 2994 } 2995 RETURN_FALSE; 2996} 2997/* }}} */ 2998 2999/* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]]) 3000 Encodings of the given string is returned (as a string) */ 3001PHP_FUNCTION(mb_detect_encoding) 3002{ 3003 char *str; 3004 int str_len; 3005 zend_bool strict=0; 3006 zval *encoding_list; 3007 3008 mbfl_string string; 3009 const char *ret; 3010 enum mbfl_no_encoding *elist; 3011 int size, *list; 3012 3013 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) { 3014 return; 3015 } 3016 3017 /* make encoding list */ 3018 list = NULL; 3019 size = 0; 3020 if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) { 3021 switch (Z_TYPE_P(encoding_list)) { 3022 case IS_ARRAY: 3023 if (!php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) { 3024 if (list) { 3025 efree(list); 3026 list = NULL; 3027 size = 0; 3028 } 3029 } 3030 break; 3031 default: 3032 convert_to_string(encoding_list); 3033 if (!php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) { 3034 if (list) { 3035 efree(list); 3036 list = NULL; 3037 size = 0; 3038 } 3039 } 3040 break; 3041 } 3042 if (size <= 0) { 3043 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument"); 3044 } 3045 } 3046 3047 if (ZEND_NUM_ARGS() < 3) { 3048 strict = (zend_bool)MBSTRG(strict_detection); 3049 } 3050 3051 if (size > 0 && list != NULL) { 3052 elist = list; 3053 } else { 3054 elist = MBSTRG(current_detect_order_list); 3055 size = MBSTRG(current_detect_order_list_size); 3056 } 3057 3058 mbfl_string_init(&string); 3059 string.no_language = MBSTRG(language); 3060 string.val = (unsigned char *)str; 3061 string.len = str_len; 3062 ret = mbfl_identify_encoding_name(&string, elist, size, strict); 3063 3064 if (list != NULL) { 3065 efree((void *)list); 3066 } 3067 3068 if (ret == NULL) { 3069 RETURN_FALSE; 3070 } 3071 3072 RETVAL_STRING((char *)ret, 1); 3073} 3074/* }}} */ 3075 3076/* {{{ proto mixed mb_list_encodings() 3077 Returns an array of all supported entity encodings */ 3078PHP_FUNCTION(mb_list_encodings) 3079{ 3080 const mbfl_encoding **encodings; 3081 const mbfl_encoding *encoding; 3082 int i; 3083 3084 array_init(return_value); 3085 i = 0; 3086 encodings = mbfl_get_supported_encodings(); 3087 while ((encoding = encodings[i++]) != NULL) { 3088 add_next_index_string(return_value, (char *) encoding->name, 1); 3089 } 3090} 3091/* }}} */ 3092 3093/* {{{ proto array mb_encoding_aliases(string encoding) 3094 Returns an array of the aliases of a given encoding name */ 3095PHP_FUNCTION(mb_encoding_aliases) 3096{ 3097 const mbfl_encoding *encoding; 3098 char *name = NULL; 3099 int name_len; 3100 3101 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) { 3102 RETURN_FALSE; 3103 } 3104 3105 encoding = mbfl_name2encoding(name); 3106 if (!encoding) { 3107 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name); 3108 RETURN_FALSE; 3109 } 3110 3111 array_init(return_value); 3112 if (encoding->aliases != NULL) { 3113 const char **alias; 3114 for (alias = *encoding->aliases; *alias; ++alias) { 3115 add_next_index_string(return_value, (char *)*alias, 1); 3116 } 3117 } 3118} 3119/* }}} */ 3120 3121/* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]]) 3122 Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */ 3123PHP_FUNCTION(mb_encode_mimeheader) 3124{ 3125 enum mbfl_no_encoding charset, transenc; 3126 mbfl_string string, result, *ret; 3127 char *charset_name = NULL; 3128 int charset_name_len; 3129 char *trans_enc_name = NULL; 3130 int trans_enc_name_len; 3131 char *linefeed = "\r\n"; 3132 int linefeed_len; 3133 long indent = 0; 3134 3135 mbfl_string_init(&string); 3136 string.no_language = MBSTRG(language); 3137 string.no_encoding = MBSTRG(current_internal_encoding); 3138 3139 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) { 3140 return; 3141 } 3142 3143 charset = mbfl_no_encoding_pass; 3144 transenc = mbfl_no_encoding_base64; 3145 3146 if (charset_name != NULL) { 3147 charset = mbfl_name2no_encoding(charset_name); 3148 if (charset == mbfl_no_encoding_invalid) { 3149 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name); 3150 RETURN_FALSE; 3151 } 3152 } else { 3153 const mbfl_language *lang = mbfl_no2language(MBSTRG(language)); 3154 if (lang != NULL) { 3155 charset = lang->mail_charset; 3156 transenc = lang->mail_header_encoding; 3157 } 3158 } 3159 3160 if (trans_enc_name != NULL) { 3161 if (*trans_enc_name == 'B' || *trans_enc_name == 'b') { 3162 transenc = mbfl_no_encoding_base64; 3163 } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') { 3164 transenc = mbfl_no_encoding_qprint; 3165 } 3166 } 3167 3168 mbfl_string_init(&result); 3169 ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent); 3170 if (ret != NULL) { 3171 RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */ 3172 } else { 3173 RETVAL_FALSE; 3174 } 3175} 3176/* }}} */ 3177 3178/* {{{ proto string mb_decode_mimeheader(string string) 3179 Decodes the MIME "encoded-word" in the string */ 3180PHP_FUNCTION(mb_decode_mimeheader) 3181{ 3182 mbfl_string string, result, *ret; 3183 3184 mbfl_string_init(&string); 3185 string.no_language = MBSTRG(language); 3186 string.no_encoding = MBSTRG(current_internal_encoding); 3187 3188 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) { 3189 return; 3190 } 3191 3192 mbfl_string_init(&result); 3193 ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)); 3194 if (ret != NULL) { 3195 RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */ 3196 } else { 3197 RETVAL_FALSE; 3198 } 3199} 3200/* }}} */ 3201 3202/* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding]) 3203 Conversion between full-width character and half-width character (Japanese) */ 3204PHP_FUNCTION(mb_convert_kana) 3205{ 3206 int opt, i; 3207 mbfl_string string, result, *ret; 3208 char *optstr = NULL; 3209 int optstr_len; 3210 char *encname = NULL; 3211 int encname_len; 3212 3213 mbfl_string_init(&string); 3214 string.no_language = MBSTRG(language); 3215 string.no_encoding = MBSTRG(current_internal_encoding); 3216 3217 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) { 3218 return; 3219 } 3220 3221 /* option */ 3222 if (optstr != NULL) { 3223 char *p = optstr; 3224 int n = optstr_len; 3225 i = 0; 3226 opt = 0; 3227 while (i < n) { 3228 i++; 3229 switch (*p++) { 3230 case 'A': 3231 opt |= 0x1; 3232 break; 3233 case 'a': 3234 opt |= 0x10; 3235 break; 3236 case 'R': 3237 opt |= 0x2; 3238 break; 3239 case 'r': 3240 opt |= 0x20; 3241 break; 3242 case 'N': 3243 opt |= 0x4; 3244 break; 3245 case 'n': 3246 opt |= 0x40; 3247 break; 3248 case 'S': 3249 opt |= 0x8; 3250 break; 3251 case 's': 3252 opt |= 0x80; 3253 break; 3254 case 'K': 3255 opt |= 0x100; 3256 break; 3257 case 'k': 3258 opt |= 0x1000; 3259 break; 3260 case 'H': 3261 opt |= 0x200; 3262 break; 3263 case 'h': 3264 opt |= 0x2000; 3265 break; 3266 case 'V': 3267 opt |= 0x800; 3268 break; 3269 case 'C': 3270 opt |= 0x10000; 3271 break; 3272 case 'c': 3273 opt |= 0x20000; 3274 break; 3275 case 'M': 3276 opt |= 0x100000; 3277 break; 3278 case 'm': 3279 opt |= 0x200000; 3280 break; 3281 } 3282 } 3283 } else { 3284 opt = 0x900; 3285 } 3286 3287 /* encoding */ 3288 if (encname != NULL) { 3289 string.no_encoding = mbfl_name2no_encoding(encname); 3290 if (string.no_encoding == mbfl_no_encoding_invalid) { 3291 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname); 3292 RETURN_FALSE; 3293 } 3294 } 3295 3296 ret = mbfl_ja_jp_hantozen(&string, &result, opt); 3297 if (ret != NULL) { 3298 RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */ 3299 } else { 3300 RETVAL_FALSE; 3301 } 3302} 3303/* }}} */ 3304 3305#define PHP_MBSTR_STACK_BLOCK_SIZE 32 3306 3307/* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...]) 3308 Converts the string resource in variables to desired encoding */ 3309PHP_FUNCTION(mb_convert_variables) 3310{ 3311 zval ***args, ***stack, **var, **hash_entry, **zfrom_enc; 3312 HashTable *target_hash; 3313 mbfl_string string, result, *ret; 3314 enum mbfl_no_encoding from_encoding, to_encoding; 3315 mbfl_encoding_detector *identd; 3316 mbfl_buffer_converter *convd; 3317 int n, to_enc_len, argc, stack_level, stack_max, elistsz; 3318 enum mbfl_no_encoding *elist; 3319 char *name, *to_enc; 3320 void *ptmp; 3321 3322 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) { 3323 return; 3324 } 3325 3326 /* new encoding */ 3327 to_encoding = mbfl_name2no_encoding(to_enc); 3328 if (to_encoding == mbfl_no_encoding_invalid) { 3329 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc); 3330 efree(args); 3331 RETURN_FALSE; 3332 } 3333 3334 /* initialize string */ 3335 mbfl_string_init(&string); 3336 mbfl_string_init(&result); 3337 from_encoding = MBSTRG(current_internal_encoding); 3338 string.no_encoding = from_encoding; 3339 string.no_language = MBSTRG(language); 3340 3341 /* pre-conversion encoding */ 3342 elist = NULL; 3343 elistsz = 0; 3344 switch (Z_TYPE_PP(zfrom_enc)) { 3345 case IS_ARRAY: 3346 php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC); 3347 break; 3348 default: 3349 convert_to_string_ex(zfrom_enc); 3350 php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC); 3351 break; 3352 } 3353 if (elistsz <= 0) { 3354 from_encoding = mbfl_no_encoding_pass; 3355 } else if (elistsz == 1) { 3356 from_encoding = *elist; 3357 } else { 3358 /* auto detect */ 3359 from_encoding = mbfl_no_encoding_invalid; 3360 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE; 3361 stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0); 3362 stack_level = 0; 3363 identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection)); 3364 if (identd != NULL) { 3365 n = 0; 3366 while (n < argc || stack_level > 0) { 3367 if (stack_level <= 0) { 3368 var = args[n++]; 3369 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) { 3370 target_hash = HASH_OF(*var); 3371 if (target_hash != NULL) { 3372 zend_hash_internal_pointer_reset(target_hash); 3373 } 3374 } 3375 } else { 3376 stack_level--; 3377 var = stack[stack_level]; 3378 } 3379 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) { 3380 target_hash = HASH_OF(*var); 3381 if (target_hash != NULL) { 3382 while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) { 3383 zend_hash_move_forward(target_hash); 3384 if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) { 3385 if (stack_level >= stack_max) { 3386 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE; 3387 ptmp = erealloc(stack, sizeof(zval **)*stack_max); 3388 stack = (zval ***)ptmp; 3389 } 3390 stack[stack_level] = var; 3391 stack_level++; 3392 var = hash_entry; 3393 target_hash = HASH_OF(*var); 3394 if (target_hash != NULL) { 3395 zend_hash_internal_pointer_reset(target_hash); 3396 continue; 3397 } 3398 } else if (Z_TYPE_PP(hash_entry) == IS_STRING) { 3399 string.val = (unsigned char *)Z_STRVAL_PP(hash_entry); 3400 string.len = Z_STRLEN_PP(hash_entry); 3401 if (mbfl_encoding_detector_feed(identd, &string)) { 3402 goto detect_end; /* complete detecting */ 3403 } 3404 } 3405 } 3406 } 3407 } else if (Z_TYPE_PP(var) == IS_STRING) { 3408 string.val = (unsigned char *)Z_STRVAL_PP(var); 3409 string.len = Z_STRLEN_PP(var); 3410 if (mbfl_encoding_detector_feed(identd, &string)) { 3411 goto detect_end; /* complete detecting */ 3412 } 3413 } 3414 } 3415detect_end: 3416 from_encoding = mbfl_encoding_detector_judge(identd); 3417 mbfl_encoding_detector_delete(identd); 3418 } 3419 efree(stack); 3420 3421 if (from_encoding == mbfl_no_encoding_invalid) { 3422 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding"); 3423 from_encoding = mbfl_no_encoding_pass; 3424 } 3425 } 3426 if (elist != NULL) { 3427 efree((void *)elist); 3428 } 3429 /* create converter */ 3430 convd = NULL; 3431 if (from_encoding != mbfl_no_encoding_pass) { 3432 convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0); 3433 if (convd == NULL) { 3434 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter"); 3435 RETURN_FALSE; 3436 } 3437 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); 3438 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); 3439 } 3440 3441 /* convert */ 3442 if (convd != NULL) { 3443 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE; 3444 stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0); 3445 stack_level = 0; 3446 n = 0; 3447 while (n < argc || stack_level > 0) { 3448 if (stack_level <= 0) { 3449 var = args[n++]; 3450 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) { 3451 target_hash = HASH_OF(*var); 3452 if (target_hash != NULL) { 3453 zend_hash_internal_pointer_reset(target_hash); 3454 } 3455 } 3456 } else { 3457 stack_level--; 3458 var = stack[stack_level]; 3459 } 3460 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) { 3461 target_hash = HASH_OF(*var); 3462 if (target_hash != NULL) { 3463 while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) { 3464 zend_hash_move_forward(target_hash); 3465 if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) { 3466 if (stack_level >= stack_max) { 3467 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE; 3468 ptmp = erealloc(stack, sizeof(zval **)*stack_max); 3469 stack = (zval ***)ptmp; 3470 } 3471 stack[stack_level] = var; 3472 stack_level++; 3473 var = hash_entry; 3474 SEPARATE_ZVAL(hash_entry); 3475 target_hash = HASH_OF(*var); 3476 if (target_hash != NULL) { 3477 zend_hash_internal_pointer_reset(target_hash); 3478 continue; 3479 } 3480 } else if (Z_TYPE_PP(hash_entry) == IS_STRING) { 3481 string.val = (unsigned char *)Z_STRVAL_PP(hash_entry); 3482 string.len = Z_STRLEN_PP(hash_entry); 3483 ret = mbfl_buffer_converter_feed_result(convd, &string, &result); 3484 if (ret != NULL) { 3485 if (Z_REFCOUNT_PP(hash_entry) > 1) { 3486 Z_DELREF_PP(hash_entry); 3487 MAKE_STD_ZVAL(*hash_entry); 3488 } else { 3489 zval_dtor(*hash_entry); 3490 } 3491 ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0); 3492 } 3493 } 3494 } 3495 } 3496 } else if (Z_TYPE_PP(var) == IS_STRING) { 3497 string.val = (unsigned char *)Z_STRVAL_PP(var); 3498 string.len = Z_STRLEN_PP(var); 3499 ret = mbfl_buffer_converter_feed_result(convd, &string, &result); 3500 if (ret != NULL) { 3501 zval_dtor(*var); 3502 ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0); 3503 } 3504 } 3505 } 3506 efree(stack); 3507 3508 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); 3509 mbfl_buffer_converter_delete(convd); 3510 } 3511 3512 efree(args); 3513 3514 name = (char *)mbfl_no_encoding2name(from_encoding); 3515 if (name != NULL) { 3516 RETURN_STRING(name, 1); 3517 } else { 3518 RETURN_FALSE; 3519 } 3520} 3521/* }}} */ 3522 3523/* {{{ HTML numeric entity */ 3524/* {{{ static void php_mb_numericentity_exec() */ 3525static void 3526php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type) 3527{ 3528 char *str, *encoding; 3529 int str_len, encoding_len; 3530 zval *zconvmap, **hash_entry; 3531 HashTable *target_hash; 3532 size_t argc = ZEND_NUM_ARGS(); 3533 int i, *convmap, *mapelm, mapsize=0; 3534 mbfl_string string, result, *ret; 3535 enum mbfl_no_encoding no_encoding; 3536 3537 if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, &zconvmap, &encoding, &encoding_len) == FAILURE) { 3538 return; 3539 } 3540 3541 mbfl_string_init(&string); 3542 string.no_language = MBSTRG(language); 3543 string.no_encoding = MBSTRG(current_internal_encoding); 3544 string.val = (unsigned char *)str; 3545 string.len = str_len; 3546 3547 /* encoding */ 3548 if (argc == 3) { 3549 no_encoding = mbfl_name2no_encoding(encoding); 3550 if (no_encoding == mbfl_no_encoding_invalid) { 3551 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding); 3552 RETURN_FALSE; 3553 } else { 3554 string.no_encoding = no_encoding; 3555 } 3556 } 3557 3558 /* conversion map */ 3559 convmap = NULL; 3560 if (Z_TYPE_P(zconvmap) == IS_ARRAY) { 3561 target_hash = Z_ARRVAL_P(zconvmap); 3562 zend_hash_internal_pointer_reset(target_hash); 3563 i = zend_hash_num_elements(target_hash); 3564 if (i > 0) { 3565 convmap = (int *)safe_emalloc(i, sizeof(int), 0); 3566 mapelm = convmap; 3567 mapsize = 0; 3568 while (i > 0) { 3569 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) { 3570 break; 3571 } 3572 convert_to_long_ex(hash_entry); 3573 *mapelm++ = Z_LVAL_PP(hash_entry); 3574 mapsize++; 3575 i--; 3576 zend_hash_move_forward(target_hash); 3577 } 3578 } 3579 } 3580 if (convmap == NULL) { 3581 RETURN_FALSE; 3582 } 3583 mapsize /= 4; 3584 3585 ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type); 3586 if (ret != NULL) { 3587 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 3588 } else { 3589 RETVAL_FALSE; 3590 } 3591 efree((void *)convmap); 3592} 3593/* }}} */ 3594 3595/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding]) 3596 Converts specified characters to HTML numeric entities */ 3597PHP_FUNCTION(mb_encode_numericentity) 3598{ 3599 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); 3600} 3601/* }}} */ 3602 3603/* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding]) 3604 Converts HTML numeric entities to character code */ 3605PHP_FUNCTION(mb_decode_numericentity) 3606{ 3607 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); 3608} 3609/* }}} */ 3610/* }}} */ 3611 3612/* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]]) 3613 * Sends an email message with MIME scheme 3614 */ 3615 3616#define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \ 3617 if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \ 3618 pos += 2; \ 3619 while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \ 3620 pos++; \ 3621 } \ 3622 continue; \ 3623 } 3624 3625#define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \ 3626 pp = str; \ 3627 ee = pp + len; \ 3628 while ((pp = memchr(pp, '\0', (ee - pp)))) { \ 3629 *pp = ' '; \ 3630 } \ 3631 3632#define APPEND_ONE_CHAR(ch) do { \ 3633 if (token.a > 0) { \ 3634 smart_str_appendc(&token, ch); \ 3635 } else {\ 3636 token.len++; \ 3637 } \ 3638} while (0) 3639 3640#define SEPARATE_SMART_STR(str) do {\ 3641 if ((str)->a == 0) { \ 3642 char *tmp_ptr; \ 3643 (str)->a = 1; \ 3644 while ((str)->a < (str)->len) { \ 3645 (str)->a <<= 1; \ 3646 } \ 3647 tmp_ptr = emalloc((str)->a + 1); \ 3648 memcpy(tmp_ptr, (str)->c, (str)->len); \ 3649 (str)->c = tmp_ptr; \ 3650 } \ 3651} while (0) 3652 3653static void my_smart_str_dtor(smart_str *s) 3654{ 3655 if (s->a > 0) { 3656 smart_str_free(s); 3657 } 3658} 3659 3660static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len) 3661{ 3662 const char *ps; 3663 size_t icnt; 3664 int state = 0; 3665 int crlf_state = -1; 3666 3667 smart_str token = { 0, 0, 0 }; 3668 smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 }; 3669 3670 ps = str; 3671 icnt = str_len; 3672 3673 /* 3674 * C o n t e n t - T y p e : t e x t / h t m l \r\n 3675 * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^ 3676 * state 0 1 2 3 3677 * 3678 * C o n t e n t - T y p e : t e x t / h t m l \r\n 3679 * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^ 3680 * crlf_state -1 0 1 -1 3681 * 3682 */ 3683 3684 while (icnt > 0) { 3685 switch (*ps) { 3686 case ':': 3687 if (crlf_state == 1) { 3688 APPEND_ONE_CHAR('\r'); 3689 } 3690 3691 if (state == 0 || state == 1) { 3692 fld_name = token; 3693 3694 state = 2; 3695 } else { 3696 APPEND_ONE_CHAR(*ps); 3697 } 3698 3699 crlf_state = 0; 3700 break; 3701 3702 case '\n': 3703 if (crlf_state == -1) { 3704 goto out; 3705 } 3706 crlf_state = -1; 3707 break; 3708 3709 case '\r': 3710 if (crlf_state == 1) { 3711 APPEND_ONE_CHAR('\r'); 3712 } else { 3713 crlf_state = 1; 3714 } 3715 break; 3716 3717 case ' ': case '\t': 3718 if (crlf_state == -1) { 3719 if (state == 3) { 3720 /* continuing from the previous line */ 3721 SEPARATE_SMART_STR(&token); 3722 state = 4; 3723 } else { 3724 /* simply skipping this new line */ 3725 state = 5; 3726 } 3727 } else { 3728 if (crlf_state == 1) { 3729 APPEND_ONE_CHAR('\r'); 3730 } 3731 if (state == 1 || state == 3) { 3732 APPEND_ONE_CHAR(*ps); 3733 } 3734 } 3735 crlf_state = 0; 3736 break; 3737 3738 default: 3739 switch (state) { 3740 case 0: 3741 token.c = (char *)ps; 3742 token.len = 0; 3743 token.a = 0; 3744 state = 1; 3745 break; 3746 3747 case 2: 3748 if (crlf_state != -1) { 3749 token.c = (char *)ps; 3750 token.len = 0; 3751 token.a = 0; 3752 3753 state = 3; 3754 break; 3755 } 3756 /* break is missing intentionally */ 3757 3758 case 3: 3759 if (crlf_state == -1) { 3760 fld_val = token; 3761 3762 if (fld_name.c != NULL && fld_val.c != NULL) { 3763 char *dummy; 3764 3765 /* FIXME: some locale free implementation is 3766 * really required here,,, */ 3767 SEPARATE_SMART_STR(&fld_name); 3768 php_strtoupper(fld_name.c, fld_name.len); 3769 3770 zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy); 3771 3772 my_smart_str_dtor(&fld_name); 3773 } 3774 3775 memset(&fld_name, 0, sizeof(smart_str)); 3776 memset(&fld_val, 0, sizeof(smart_str)); 3777 3778 token.c = (char *)ps; 3779 token.len = 0; 3780 token.a = 0; 3781 3782 state = 1; 3783 } 3784 break; 3785 3786 case 4: 3787 APPEND_ONE_CHAR(' '); 3788 state = 3; 3789 break; 3790 } 3791 3792 if (crlf_state == 1) { 3793 APPEND_ONE_CHAR('\r'); 3794 } 3795 3796 APPEND_ONE_CHAR(*ps); 3797 3798 crlf_state = 0; 3799 break; 3800 } 3801 ps++, icnt--; 3802 } 3803out: 3804 if (state == 2) { 3805 token.c = ""; 3806 token.len = 0; 3807 token.a = 0; 3808 3809 state = 3; 3810 } 3811 if (state == 3) { 3812 fld_val = token; 3813 3814 if (fld_name.c != NULL && fld_val.c != NULL) { 3815 void *dummy; 3816 3817 /* FIXME: some locale free implementation is 3818 * really required here,,, */ 3819 SEPARATE_SMART_STR(&fld_name); 3820 php_strtoupper(fld_name.c, fld_name.len); 3821 3822 zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy); 3823 3824 my_smart_str_dtor(&fld_name); 3825 } 3826 } 3827 return state; 3828} 3829 3830PHP_FUNCTION(mb_send_mail) 3831{ 3832 int n; 3833 char *to = NULL; 3834 int to_len; 3835 char *message = NULL; 3836 int message_len; 3837 char *headers = NULL; 3838 int headers_len; 3839 char *subject = NULL; 3840 int subject_len; 3841 char *extra_cmd = NULL; 3842 int extra_cmd_len; 3843 int i; 3844 char *to_r = NULL; 3845 char *force_extra_parameters = INI_STR("mail.force_extra_parameters"); 3846 struct { 3847 int cnt_type:1; 3848 int cnt_trans_enc:1; 3849 } suppressed_hdrs = { 0, 0 }; 3850 3851 char *message_buf = NULL, *subject_buf = NULL, *p; 3852 mbfl_string orig_str, conv_str; 3853 mbfl_string *pstr; /* pointer to mbfl string for return value */ 3854 enum mbfl_no_encoding 3855 tran_cs, /* transfar text charset */ 3856 head_enc, /* header transfar encoding */ 3857 body_enc; /* body transfar encoding */ 3858 mbfl_memory_device device; /* automatic allocateable buffer for additional header */ 3859 const mbfl_language *lang; 3860 int err = 0; 3861 HashTable ht_headers; 3862 smart_str *s; 3863 extern void mbfl_memory_device_unput(mbfl_memory_device *device); 3864 char *pp, *ee; 3865 3866 if (PG(safe_mode) && (ZEND_NUM_ARGS() == 5)) { 3867 php_error_docref(NULL TSRMLS_CC, E_WARNING, "SAFE MODE Restriction in effect. The fifth parameter is disabled in SAFE MODE."); 3868 RETURN_FALSE; 3869 } 3870 3871 /* initialize */ 3872 mbfl_memory_device_init(&device, 0, 0); 3873 mbfl_string_init(&orig_str); 3874 mbfl_string_init(&conv_str); 3875 3876 /* character-set, transfer-encoding */ 3877 tran_cs = mbfl_no_encoding_utf8; 3878 head_enc = mbfl_no_encoding_base64; 3879 body_enc = mbfl_no_encoding_base64; 3880 lang = mbfl_no2language(MBSTRG(language)); 3881 if (lang != NULL) { 3882 tran_cs = lang->mail_charset; 3883 head_enc = lang->mail_header_encoding; 3884 body_enc = lang->mail_body_encoding; 3885 } 3886 3887 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) { 3888 return; 3889 } 3890 3891 /* ASCIIZ check */ 3892 MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len); 3893 MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len); 3894 MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len); 3895 if (headers) { 3896 MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len); 3897 } 3898 if (extra_cmd) { 3899 MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len); 3900 } 3901 3902 zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0); 3903 3904 if (headers != NULL) { 3905 _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len); 3906 } 3907 3908 if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) { 3909 char *tmp; 3910 char *param_name; 3911 char *charset = NULL; 3912 3913 SEPARATE_SMART_STR(s); 3914 smart_str_0(s); 3915 3916 p = strchr(s->c, ';'); 3917 3918 if (p != NULL) { 3919 /* skipping the padded spaces */ 3920 do { 3921 ++p; 3922 } while (*p == ' ' || *p == '\t'); 3923 3924 if (*p != '\0') { 3925 if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) { 3926 if (strcasecmp(param_name, "charset") == 0) { 3927 enum mbfl_no_encoding _tran_cs = tran_cs; 3928 3929 charset = php_strtok_r(NULL, "= \"", &tmp); 3930 if (charset != NULL) { 3931 _tran_cs = mbfl_name2no_encoding(charset); 3932 } 3933 3934 if (_tran_cs == mbfl_no_encoding_invalid) { 3935 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset); 3936 _tran_cs = mbfl_no_encoding_ascii; 3937 } 3938 tran_cs = _tran_cs; 3939 } 3940 } 3941 } 3942 } 3943 suppressed_hdrs.cnt_type = 1; 3944 } 3945 3946 if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) { 3947 enum mbfl_no_encoding _body_enc; 3948 SEPARATE_SMART_STR(s); 3949 smart_str_0(s); 3950 3951 _body_enc = mbfl_name2no_encoding(s->c); 3952 switch (_body_enc) { 3953 case mbfl_no_encoding_base64: 3954 case mbfl_no_encoding_7bit: 3955 case mbfl_no_encoding_8bit: 3956 body_enc = _body_enc; 3957 break; 3958 3959 default: 3960 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c); 3961 body_enc = mbfl_no_encoding_8bit; 3962 break; 3963 } 3964 suppressed_hdrs.cnt_trans_enc = 1; 3965 } 3966 3967 /* To: */ 3968 if (to != NULL) { 3969 if (to_len > 0) { 3970 to_r = estrndup(to, to_len); 3971 for (; to_len; to_len--) { 3972 if (!isspace((unsigned char) to_r[to_len - 1])) { 3973 break; 3974 } 3975 to_r[to_len - 1] = '\0'; 3976 } 3977 for (i = 0; to_r[i]; i++) { 3978 if (iscntrl((unsigned char) to_r[i])) { 3979 /* According to RFC 822, section 3.1.1 long headers may be separated into 3980 * parts using CRLF followed at least one linear-white-space character ('\t' or ' '). 3981 * To prevent these separators from being replaced with a space, we use the 3982 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them. 3983 */ 3984 SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i); 3985 to_r[i] = ' '; 3986 } 3987 } 3988 } else { 3989 to_r = to; 3990 } 3991 } else { 3992 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field"); 3993 err = 1; 3994 } 3995 3996 /* Subject: */ 3997 if (subject != NULL && subject_len >= 0) { 3998 orig_str.no_language = MBSTRG(language); 3999 orig_str.val = (unsigned char *)subject; 4000 orig_str.len = subject_len; 4001 orig_str.no_encoding = MBSTRG(current_internal_encoding); 4002 if (orig_str.no_encoding == mbfl_no_encoding_invalid 4003 || orig_str.no_encoding == mbfl_no_encoding_pass) { 4004 orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); 4005 } 4006 pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]")); 4007 if (pstr != NULL) { 4008 subject_buf = subject = (char *)pstr->val; 4009 } 4010 } else { 4011 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field"); 4012 err = 1; 4013 } 4014 4015 /* message body */ 4016 if (message != NULL) { 4017 orig_str.no_language = MBSTRG(language); 4018 orig_str.val = (unsigned char *)message; 4019 orig_str.len = (unsigned int)message_len; 4020 orig_str.no_encoding = MBSTRG(current_internal_encoding); 4021 4022 if (orig_str.no_encoding == mbfl_no_encoding_invalid 4023 || orig_str.no_encoding == mbfl_no_encoding_pass) { 4024 orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); 4025 } 4026 4027 pstr = NULL; 4028 { 4029 mbfl_string tmpstr; 4030 4031 if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) { 4032 tmpstr.no_encoding=mbfl_no_encoding_8bit; 4033 pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc); 4034 efree(tmpstr.val); 4035 } 4036 } 4037 if (pstr != NULL) { 4038 message_buf = message = (char *)pstr->val; 4039 } 4040 } else { 4041 /* this is not really an error, so it is allowed. */ 4042 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body"); 4043 message = NULL; 4044 } 4045 4046 /* other headers */ 4047#define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0" 4048#define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain" 4049#define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset=" 4050#define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: " 4051 if (headers != NULL) { 4052 p = headers; 4053 n = headers_len; 4054 mbfl_memory_device_strncat(&device, p, n); 4055 if (n > 0 && p[n - 1] != '\n') { 4056 mbfl_memory_device_strncat(&device, "\n", 1); 4057 } 4058 } 4059 4060 if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) { 4061 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1); 4062 mbfl_memory_device_strncat(&device, "\n", 1); 4063 } 4064 4065 if (!suppressed_hdrs.cnt_type) { 4066 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1); 4067 4068 p = (char *)mbfl_no2preferred_mime_name(tran_cs); 4069 if (p != NULL) { 4070 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1); 4071 mbfl_memory_device_strcat(&device, p); 4072 } 4073 mbfl_memory_device_strncat(&device, "\n", 1); 4074 } 4075 if (!suppressed_hdrs.cnt_trans_enc) { 4076 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1); 4077 p = (char *)mbfl_no2preferred_mime_name(body_enc); 4078 if (p == NULL) { 4079 p = "7bit"; 4080 } 4081 mbfl_memory_device_strcat(&device, p); 4082 mbfl_memory_device_strncat(&device, "\n", 1); 4083 } 4084 4085 mbfl_memory_device_unput(&device); 4086 mbfl_memory_device_output('\0', &device); 4087 headers = (char *)device.buffer; 4088 4089 if (force_extra_parameters) { 4090 extra_cmd = php_escape_shell_cmd(force_extra_parameters); 4091 } else if (extra_cmd) { 4092 extra_cmd = php_escape_shell_cmd(extra_cmd); 4093 } 4094 4095 if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) { 4096 RETVAL_TRUE; 4097 } else { 4098 RETVAL_FALSE; 4099 } 4100 4101 if (extra_cmd) { 4102 efree(extra_cmd); 4103 } 4104 if (to_r != to) { 4105 efree(to_r); 4106 } 4107 if (subject_buf) { 4108 efree((void *)subject_buf); 4109 } 4110 if (message_buf) { 4111 efree((void *)message_buf); 4112 } 4113 mbfl_memory_device_clear(&device); 4114 zend_hash_destroy(&ht_headers); 4115} 4116 4117#undef SKIP_LONG_HEADER_SEP_MBSTRING 4118#undef MAIL_ASCIIZ_CHECK_MBSTRING 4119#undef APPEND_ONE_CHAR 4120#undef SEPARATE_SMART_STR 4121#undef PHP_MBSTR_MAIL_MIME_HEADER1 4122#undef PHP_MBSTR_MAIL_MIME_HEADER2 4123#undef PHP_MBSTR_MAIL_MIME_HEADER3 4124#undef PHP_MBSTR_MAIL_MIME_HEADER4 4125/* }}} */ 4126 4127/* {{{ proto mixed mb_get_info([string type]) 4128 Returns the current settings of mbstring */ 4129PHP_FUNCTION(mb_get_info) 4130{ 4131 char *typ = NULL; 4132 int typ_len, n; 4133 char *name; 4134 const struct mb_overload_def *over_func; 4135 zval *row1, *row2; 4136 const mbfl_language *lang = mbfl_no2language(MBSTRG(language)); 4137 enum mbfl_no_encoding *entry; 4138#ifdef ZEND_MULTIBYTE 4139 zval *row3; 4140#endif /* ZEND_MULTIBYTE */ 4141 4142 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) { 4143 RETURN_FALSE; 4144 } 4145 4146 if (!typ || !strcasecmp("all", typ)) { 4147 array_init(return_value); 4148 if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) { 4149 add_assoc_string(return_value, "internal_encoding", name, 1); 4150 } 4151 if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) { 4152 add_assoc_string(return_value, "http_input", name, 1); 4153 } 4154 if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) { 4155 add_assoc_string(return_value, "http_output", name, 1); 4156 } 4157 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) { 4158 add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1); 4159 } 4160 add_assoc_long(return_value, "func_overload", MBSTRG(func_overload)); 4161 if (MBSTRG(func_overload)){ 4162 over_func = &(mb_ovld[0]); 4163 MAKE_STD_ZVAL(row1); 4164 array_init(row1); 4165 while (over_func->type > 0) { 4166 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) { 4167 add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1); 4168 } 4169 over_func++; 4170 } 4171 add_assoc_zval(return_value, "func_overload_list", row1); 4172 } else { 4173 add_assoc_string(return_value, "func_overload_list", "no overload", 1); 4174 } 4175 if (lang != NULL) { 4176 if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) { 4177 add_assoc_string(return_value, "mail_charset", name, 1); 4178 } 4179 if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) { 4180 add_assoc_string(return_value, "mail_header_encoding", name, 1); 4181 } 4182 if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) { 4183 add_assoc_string(return_value, "mail_body_encoding", name, 1); 4184 } 4185 } 4186 add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars)); 4187 if (MBSTRG(encoding_translation)) { 4188 add_assoc_string(return_value, "encoding_translation", "On", 1); 4189 } else { 4190 add_assoc_string(return_value, "encoding_translation", "Off", 1); 4191 } 4192 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) { 4193 add_assoc_string(return_value, "language", name, 1); 4194 } 4195 n = MBSTRG(current_detect_order_list_size); 4196 entry = MBSTRG(current_detect_order_list); 4197 if(n > 0) { 4198 MAKE_STD_ZVAL(row2); 4199 array_init(row2); 4200 while (n > 0) { 4201 if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) { 4202 add_next_index_string(row2, name, 1); 4203 } 4204 entry++; 4205 n--; 4206 } 4207 add_assoc_zval(return_value, "detect_order", row2); 4208 } 4209 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { 4210 add_assoc_string(return_value, "substitute_character", "none", 1); 4211 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) { 4212 add_assoc_string(return_value, "substitute_character", "long", 1); 4213 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) { 4214 add_assoc_string(return_value, "substitute_character", "entity", 1); 4215 } else { 4216 add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar)); 4217 } 4218 if (MBSTRG(strict_detection)) { 4219 add_assoc_string(return_value, "strict_detection", "On", 1); 4220 } else { 4221 add_assoc_string(return_value, "strict_detection", "Off", 1); 4222 } 4223#ifdef ZEND_MULTIBYTE 4224 entry = MBSTRG(script_encoding_list); 4225 n = MBSTRG(script_encoding_list_size); 4226 if(n > 0) { 4227 MAKE_STD_ZVAL(row3); 4228 array_init(row3); 4229 while (n > 0) { 4230 if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) { 4231 add_next_index_string(row3, name, 1); 4232 } 4233 entry++; 4234 n--; 4235 } 4236 add_assoc_zval(return_value, "script_encoding", row3); 4237 } 4238#endif /* ZEND_MULTIBYTE */ 4239 } else if (!strcasecmp("internal_encoding", typ)) { 4240 if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) { 4241 RETVAL_STRING(name, 1); 4242 } 4243 } else if (!strcasecmp("http_input", typ)) { 4244 if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) { 4245 RETVAL_STRING(name, 1); 4246 } 4247 } else if (!strcasecmp("http_output", typ)) { 4248 if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) { 4249 RETVAL_STRING(name, 1); 4250 } 4251 } else if (!strcasecmp("http_output_conv_mimetypes", typ)) { 4252 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) { 4253 RETVAL_STRING(name, 1); 4254 } 4255 } else if (!strcasecmp("func_overload", typ)) { 4256 RETVAL_LONG(MBSTRG(func_overload)); 4257 } else if (!strcasecmp("func_overload_list", typ)) { 4258 if (MBSTRG(func_overload)){ 4259 over_func = &(mb_ovld[0]); 4260 array_init(return_value); 4261 while (over_func->type > 0) { 4262 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) { 4263 add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1); 4264 } 4265 over_func++; 4266 } 4267 } else { 4268 RETVAL_STRING("no overload", 1); 4269 } 4270 } else if (!strcasecmp("mail_charset", typ)) { 4271 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) { 4272 RETVAL_STRING(name, 1); 4273 } 4274 } else if (!strcasecmp("mail_header_encoding", typ)) { 4275 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) { 4276 RETVAL_STRING(name, 1); 4277 } 4278 } else if (!strcasecmp("mail_body_encoding", typ)) { 4279 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) { 4280 RETVAL_STRING(name, 1); 4281 } 4282 } else if (!strcasecmp("illegal_chars", typ)) { 4283 RETVAL_LONG(MBSTRG(illegalchars)); 4284 } else if (!strcasecmp("encoding_translation", typ)) { 4285 if (MBSTRG(encoding_translation)) { 4286 RETVAL_STRING("On", 1); 4287 } else { 4288 RETVAL_STRING("Off", 1); 4289 } 4290 } else if (!strcasecmp("language", typ)) { 4291 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) { 4292 RETVAL_STRING(name, 1); 4293 } 4294 } else if (!strcasecmp("detect_order", typ)) { 4295 n = MBSTRG(current_detect_order_list_size); 4296 entry = MBSTRG(current_detect_order_list); 4297 if(n > 0) { 4298 array_init(return_value); 4299 while (n > 0) { 4300 name = (char *)mbfl_no_encoding2name(*entry); 4301 if (name) { 4302 add_next_index_string(return_value, name, 1); 4303 } 4304 entry++; 4305 n--; 4306 } 4307 } 4308 } else if (!strcasecmp("substitute_character", typ)) { 4309 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { 4310 RETVAL_STRING("none", 1); 4311 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) { 4312 RETVAL_STRING("long", 1); 4313 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) { 4314 RETVAL_STRING("entity", 1); 4315 } else { 4316 RETVAL_LONG(MBSTRG(current_filter_illegal_substchar)); 4317 } 4318 } else if (!strcasecmp("strict_detection", typ)) { 4319 if (MBSTRG(strict_detection)) { 4320 RETVAL_STRING("On", 1); 4321 } else { 4322 RETVAL_STRING("Off", 1); 4323 } 4324 } else { 4325#ifdef ZEND_MULTIBYTE 4326 if (!strcasecmp("script_encoding", typ)) { 4327 entry = MBSTRG(script_encoding_list); 4328 n = MBSTRG(script_encoding_list_size); 4329 if(n > 0) { 4330 array_init(return_value); 4331 while (n > 0) { 4332 name = (char *)mbfl_no_encoding2name(*entry); 4333 if (name) { 4334 add_next_index_string(return_value, name, 1); 4335 } 4336 entry++; 4337 n--; 4338 } 4339 } 4340 return; 4341 } 4342#endif /* ZEND_MULTIBYTE */ 4343 RETURN_FALSE; 4344 } 4345} 4346/* }}} */ 4347 4348/* {{{ proto bool mb_check_encoding([string var[, string encoding]]) 4349 Check if the string is valid for the specified encoding */ 4350PHP_FUNCTION(mb_check_encoding) 4351{ 4352 char *var = NULL; 4353 int var_len; 4354 char *enc = NULL; 4355 int enc_len; 4356 mbfl_buffer_converter *convd; 4357 enum mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding); 4358 mbfl_string string, result, *ret = NULL; 4359 long illegalchars = 0; 4360 4361 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) { 4362 RETURN_FALSE; 4363 } 4364 4365 if (var == NULL) { 4366 RETURN_BOOL(MBSTRG(illegalchars) == 0); 4367 } 4368 4369 if (enc != NULL) { 4370 no_encoding = mbfl_name2no_encoding(enc); 4371 if (no_encoding == mbfl_no_encoding_invalid || no_encoding == mbfl_no_encoding_pass) { 4372 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc); 4373 RETURN_FALSE; 4374 } 4375 } 4376 4377 convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0); 4378 if (convd == NULL) { 4379 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter"); 4380 RETURN_FALSE; 4381 } 4382 mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); 4383 mbfl_buffer_converter_illegal_substchar(convd, 0); 4384 4385 /* initialize string */ 4386 mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding); 4387 mbfl_string_init(&result); 4388 4389 string.val = (unsigned char *)var; 4390 string.len = var_len; 4391 ret = mbfl_buffer_converter_feed_result(convd, &string, &result); 4392 illegalchars = mbfl_buffer_illegalchars(convd); 4393 mbfl_buffer_converter_delete(convd); 4394 4395 RETVAL_FALSE; 4396 if (ret != NULL) { 4397 if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { 4398 RETVAL_TRUE; 4399 } 4400 mbfl_string_clear(&result); 4401 } 4402} 4403/* }}} */ 4404 4405/* {{{ MBSTRING_API int php_mb_encoding_translation() */ 4406MBSTRING_API int php_mb_encoding_translation(TSRMLS_D) 4407{ 4408 return MBSTRG(encoding_translation); 4409} 4410/* }}} */ 4411 4412/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */ 4413MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc) 4414{ 4415 if (enc != NULL) { 4416 if (enc->flag & MBFL_ENCTYPE_MBCS) { 4417 if (enc->mblen_table != NULL) { 4418 if (s != NULL) return enc->mblen_table[*(unsigned char *)s]; 4419 } 4420 } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { 4421 return 2; 4422 } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { 4423 return 4; 4424 } 4425 } 4426 return 1; 4427} 4428/* }}} */ 4429 4430/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */ 4431MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC) 4432{ 4433 return php_mb_mbchar_bytes_ex(s, 4434 mbfl_no2encoding(MBSTRG(internal_encoding))); 4435} 4436/* }}} */ 4437 4438/* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */ 4439MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc) 4440{ 4441 register const char *p = s; 4442 char *last=NULL; 4443 4444 if (nbytes == (size_t)-1) { 4445 size_t nb = 0; 4446 4447 while (*p != '\0') { 4448 if (nb == 0) { 4449 if ((unsigned char)*p == (unsigned char)c) { 4450 last = (char *)p; 4451 } 4452 nb = php_mb_mbchar_bytes_ex(p, enc); 4453 if (nb == 0) { 4454 return NULL; /* something is going wrong! */ 4455 } 4456 } 4457 --nb; 4458 ++p; 4459 } 4460 } else { 4461 register size_t bcnt = nbytes; 4462 register size_t nbytes_char; 4463 while (bcnt > 0) { 4464 if ((unsigned char)*p == (unsigned char)c) { 4465 last = (char *)p; 4466 } 4467 nbytes_char = php_mb_mbchar_bytes_ex(p, enc); 4468 if (bcnt < nbytes_char) { 4469 return NULL; 4470 } 4471 p += nbytes_char; 4472 bcnt -= nbytes_char; 4473 } 4474 } 4475 return last; 4476} 4477/* }}} */ 4478 4479/* {{{ MBSTRING_API char *php_mb_safe_strrchr() */ 4480MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC) 4481{ 4482 return php_mb_safe_strrchr_ex(s, c, nbytes, 4483 mbfl_no2encoding(MBSTRG(internal_encoding))); 4484} 4485/* }}} */ 4486 4487/* {{{ MBSTRING_API char *php_mb_strrchr() */ 4488MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC) 4489{ 4490 return php_mb_safe_strrchr(s, c, -1 TSRMLS_CC); 4491} 4492/* }}} */ 4493 4494/* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */ 4495MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC) 4496{ 4497 4498 if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){ 4499 return php_mb_mbchar_bytes_ex(s, 4500 mbfl_no2encoding(MBSTRG(http_input_identify))); 4501 } else { 4502 return php_mb_mbchar_bytes_ex(s, 4503 mbfl_no2encoding(MBSTRG(internal_encoding))); 4504 } 4505} 4506/* }}} */ 4507 4508/* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */ 4509MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC) 4510{ 4511 int i; 4512 mbfl_string string, result, *ret = NULL; 4513 enum mbfl_no_encoding from_encoding, to_encoding; 4514 mbfl_buffer_converter *convd; 4515 4516 if (encoding_to) { 4517 /* new encoding */ 4518 to_encoding = mbfl_name2no_encoding(encoding_to); 4519 if (to_encoding == mbfl_no_encoding_invalid) { 4520 return -1; 4521 } 4522 } else { 4523 to_encoding = MBSTRG(current_internal_encoding); 4524 } 4525 if (encoding_from) { 4526 /* old encoding */ 4527 from_encoding = mbfl_name2no_encoding(encoding_from); 4528 if (from_encoding == mbfl_no_encoding_invalid) { 4529 return -1; 4530 } 4531 } else { 4532 from_encoding = MBSTRG(http_input_identify); 4533 } 4534 4535 if (from_encoding == mbfl_no_encoding_pass) { 4536 return 0; 4537 } 4538 4539 /* initialize string */ 4540 mbfl_string_init(&string); 4541 mbfl_string_init(&result); 4542 string.no_encoding = from_encoding; 4543 string.no_language = MBSTRG(language); 4544 4545 for (i=0; i<num; i++){ 4546 string.val = (unsigned char *)str[i]; 4547 string.len = len[i]; 4548 4549 /* initialize converter */ 4550 convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len); 4551 if (convd == NULL) { 4552 return -1; 4553 } 4554 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); 4555 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); 4556 4557 /* do it */ 4558 ret = mbfl_buffer_converter_feed_result(convd, &string, &result); 4559 if (ret != NULL) { 4560 efree(str[i]); 4561 str[i] = (char *)ret->val; 4562 len[i] = (int)ret->len; 4563 } 4564 4565 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); 4566 mbfl_buffer_converter_delete(convd); 4567 } 4568 4569 return ret ? 0 : -1; 4570} 4571/* }}} */ 4572 4573/* {{{ MBSTRING_API int php_mb_gpc_encoding_detector() 4574 */ 4575MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC) 4576{ 4577 mbfl_string string; 4578 enum mbfl_no_encoding *elist; 4579 enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid; 4580 mbfl_encoding_detector *identd = NULL; 4581 4582 int size; 4583 enum mbfl_no_encoding *list; 4584 4585 if (MBSTRG(http_input_list_size) == 1 && 4586 MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) { 4587 MBSTRG(http_input_identify) = mbfl_no_encoding_pass; 4588 return SUCCESS; 4589 } 4590 4591 if (MBSTRG(http_input_list_size) == 1 && 4592 MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto && 4593 mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) { 4594 MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0]; 4595 return SUCCESS; 4596 } 4597 4598 if (arg_list && strlen(arg_list)>0) { 4599 /* make encoding list */ 4600 list = NULL; 4601 size = 0; 4602 php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC); 4603 4604 if (size > 0 && list != NULL) { 4605 elist = list; 4606 } else { 4607 elist = MBSTRG(current_detect_order_list); 4608 size = MBSTRG(current_detect_order_list_size); 4609 if (size <= 0){ 4610 elist = MBSTRG(default_detect_order_list); 4611 size = MBSTRG(default_detect_order_list_size); 4612 } 4613 } 4614 } else { 4615 elist = MBSTRG(current_detect_order_list); 4616 size = MBSTRG(current_detect_order_list_size); 4617 if (size <= 0){ 4618 elist = MBSTRG(default_detect_order_list); 4619 size = MBSTRG(default_detect_order_list_size); 4620 } 4621 } 4622 4623 mbfl_string_init(&string); 4624 string.no_language = MBSTRG(language); 4625 4626 identd = mbfl_encoding_detector_new(elist, size, MBSTRG(strict_detection)); 4627 4628 if (identd) { 4629 int n = 0; 4630 while(n < num){ 4631 string.val = (unsigned char *)arg_string[n]; 4632 string.len = arg_length[n]; 4633 if (mbfl_encoding_detector_feed(identd, &string)) { 4634 break; 4635 } 4636 n++; 4637 } 4638 encoding = mbfl_encoding_detector_judge(identd); 4639 mbfl_encoding_detector_delete(identd); 4640 } 4641 4642 if (encoding != mbfl_no_encoding_invalid) { 4643 MBSTRG(http_input_identify) = encoding; 4644 return SUCCESS; 4645 } else { 4646 return FAILURE; 4647 } 4648} 4649/* }}} */ 4650 4651/* {{{ MBSTRING_API int php_mb_stripos() 4652 */ 4653MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC) 4654{ 4655 int n; 4656 mbfl_string haystack, needle; 4657 n = -1; 4658 4659 mbfl_string_init(&haystack); 4660 mbfl_string_init(&needle); 4661 haystack.no_language = MBSTRG(language); 4662 haystack.no_encoding = MBSTRG(current_internal_encoding); 4663 needle.no_language = MBSTRG(language); 4664 needle.no_encoding = MBSTRG(current_internal_encoding); 4665 4666 do { 4667 size_t len = 0; 4668 haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC); 4669 haystack.len = len; 4670 4671 if (!haystack.val) { 4672 break; 4673 } 4674 4675 if (haystack.len <= 0) { 4676 break; 4677 } 4678 4679 needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC); 4680 needle.len = len; 4681 4682 if (!needle.val) { 4683 break; 4684 } 4685 4686 if (needle.len <= 0) { 4687 break; 4688 } 4689 4690 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding); 4691 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 4692 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding); 4693 break; 4694 } 4695 4696 { 4697 int haystack_char_len = mbfl_strlen(&haystack); 4698 4699 if (mode) { 4700 if ((offset > 0 && offset > haystack_char_len) || 4701 (offset < 0 && -offset > haystack_char_len)) { 4702 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string"); 4703 break; 4704 } 4705 } else { 4706 if (offset < 0 || offset > haystack_char_len) { 4707 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string"); 4708 break; 4709 } 4710 } 4711 } 4712 4713 n = mbfl_strpos(&haystack, &needle, offset, mode); 4714 } while(0); 4715 4716 if (haystack.val) { 4717 efree(haystack.val); 4718 } 4719 4720 if (needle.val) { 4721 efree(needle.val); 4722 } 4723 4724 return n; 4725} 4726/* }}} */ 4727 4728#ifdef ZEND_MULTIBYTE 4729/* {{{ php_mb_set_zend_encoding() */ 4730static int php_mb_set_zend_encoding(TSRMLS_D) 4731{ 4732 /* 'd better use mbfl_memory_device? */ 4733 char *name, *list = NULL; 4734 int n, *entry, list_size = 0; 4735 zend_encoding_detector encoding_detector; 4736 zend_encoding_converter encoding_converter; 4737 zend_encoding_oddlen encoding_oddlen; 4738 4739 /* notify script encoding to Zend Engine */ 4740 entry = MBSTRG(script_encoding_list); 4741 n = MBSTRG(script_encoding_list_size); 4742 while (n > 0) { 4743 name = (char *)mbfl_no_encoding2name(*entry); 4744 if (name) { 4745 list_size += strlen(name) + 1; 4746 if (!list) { 4747 list = (char*)emalloc(list_size); 4748 *list = '\0'; 4749 } else { 4750 list = (char*)erealloc(list, list_size); 4751 strcat(list, ","); 4752 } 4753 strcat(list, name); 4754 } 4755 entry++; 4756 n--; 4757 } 4758 zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC); 4759 if (list) { 4760 efree(list); 4761 } 4762 encoding_detector = php_mb_encoding_detector; 4763 encoding_converter = php_mb_encoding_converter; 4764 encoding_oddlen = php_mb_oddlen; 4765 4766 /* TODO: make independent from mbstring.encoding_translation? */ 4767 if (MBSTRG(encoding_translation)) { 4768 /* notify internal encoding to Zend Engine */ 4769 name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding)); 4770 zend_multibyte_set_internal_encoding(name TSRMLS_CC); 4771 } 4772 4773 zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC); 4774 4775 return 0; 4776} 4777/* }}} */ 4778 4779/* {{{ char *php_mb_encoding_detector() 4780 * Interface for Zend Engine 4781 */ 4782static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC) 4783{ 4784 mbfl_string string; 4785 const char *ret; 4786 enum mbfl_no_encoding *elist; 4787 int size, *list; 4788 4789 /* make encoding list */ 4790 list = NULL; 4791 size = 0; 4792 php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC); 4793 if (size <= 0) { 4794 return NULL; 4795 } 4796 if (size > 0 && list != NULL) { 4797 elist = list; 4798 } else { 4799 elist = MBSTRG(current_detect_order_list); 4800 size = MBSTRG(current_detect_order_list_size); 4801 } 4802 4803 mbfl_string_init(&string); 4804 string.no_language = MBSTRG(language); 4805 string.val = (unsigned char *)arg_string; 4806 string.len = arg_length; 4807 ret = mbfl_identify_encoding_name(&string, elist, size, 0); 4808 if (list != NULL) { 4809 efree((void *)list); 4810 } 4811 if (ret != NULL) { 4812 return estrdup(ret); 4813 } else { 4814 return NULL; 4815 } 4816} 4817/* }}} */ 4818 4819/* {{{ int php_mb_encoding_converter() */ 4820static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC) 4821{ 4822 mbfl_string string, result, *ret; 4823 enum mbfl_no_encoding from_encoding, to_encoding; 4824 mbfl_buffer_converter *convd; 4825 4826 /* new encoding */ 4827 to_encoding = mbfl_name2no_encoding(encoding_to); 4828 if (to_encoding == mbfl_no_encoding_invalid) { 4829 return -1; 4830 } 4831 /* old encoding */ 4832 from_encoding = mbfl_name2no_encoding(encoding_from); 4833 if (from_encoding == mbfl_no_encoding_invalid) { 4834 return -1; 4835 } 4836 /* initialize string */ 4837 mbfl_string_init(&string); 4838 mbfl_string_init(&result); 4839 string.no_encoding = from_encoding; 4840 string.no_language = MBSTRG(language); 4841 string.val = (unsigned char*)from; 4842 string.len = from_length; 4843 4844 /* initialize converter */ 4845 convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len); 4846 if (convd == NULL) { 4847 return -1; 4848 } 4849 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); 4850 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); 4851 4852 /* do it */ 4853 ret = mbfl_buffer_converter_feed_result(convd, &string, &result); 4854 if (ret != NULL) { 4855 *to = ret->val; 4856 *to_length = ret->len; 4857 } 4858 4859 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); 4860 mbfl_buffer_converter_delete(convd); 4861 4862 return ret ? 0 : -1; 4863} 4864/* }}} */ 4865 4866/* {{{ int php_mb_oddlen() 4867 * returns number of odd (e.g. appears only first byte of multibyte 4868 * character) chars 4869 */ 4870static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC) 4871{ 4872 mbfl_string mb_string; 4873 4874 mbfl_string_init(&mb_string); 4875 mb_string.no_language = MBSTRG(language); 4876 mb_string.no_encoding = mbfl_name2no_encoding(encoding); 4877 mb_string.val = (unsigned char *)string; 4878 mb_string.len = length; 4879 4880 if (mb_string.no_encoding == mbfl_no_encoding_invalid) { 4881 return 0; 4882 } 4883 return mbfl_oddlen(&mb_string); 4884} 4885/* }}} */ 4886#endif /* ZEND_MULTIBYTE */ 4887 4888#endif /* HAVE_MBSTRING */ 4889 4890/* 4891 * Local variables: 4892 * tab-width: 4 4893 * c-basic-offset: 4 4894 * End: 4895 * vim600: fdm=marker 4896 * vim: noet sw=4 ts=4 4897 */ 4898