1/* 2 +----------------------------------------------------------------------+ 3 | PHP Version 5 | 4 +----------------------------------------------------------------------+ 5 | Copyright (c) 1997-2013 The PHP Group | 6 +----------------------------------------------------------------------+ 7 | This source file is subject to version 3.01 of the PHP license, | 8 | that is bundled with this package in the file LICENSE, and is | 9 | available through the world-wide-web at the following url: | 10 | http://www.php.net/license/3_01.txt | 11 | If you did not receive a copy of the PHP license and are unable to | 12 | obtain it through the world-wide-web, please send a note to | 13 | license@php.net so we can mail you a copy immediately. | 14 +----------------------------------------------------------------------+ 15 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> | 16 | Rui Hirokawa <hirokawa@php.net> | 17 +----------------------------------------------------------------------+ 18 */ 19 20/* $Id$ */ 21 22/* 23 * PHP 4 Multibyte String module "mbstring" 24 * 25 * History: 26 * 2000.5.19 Release php-4.0RC2_jstring-1.0 27 * 2001.4.1 Release php4_jstring-1.0.91 28 * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group) 29 * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net) 30 */ 31 32/* 33 * PHP3 Internationalization support program. 34 * 35 * Copyright (c) 1999,2000 by the PHP3 internationalization team. 36 * All rights reserved. 37 * 38 * See README_PHP3-i18n-ja for more detail. 39 * 40 * Authors: 41 * Hironori Sato <satoh@jpnnet.com> 42 * Shigeru Kanemoto <sgk@happysize.co.jp> 43 * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> 44 * Rui Hirokawa <rui_hirokawa@ybb.ne.jp> 45 */ 46 47/* {{{ includes */ 48#ifdef HAVE_CONFIG_H 49#include "config.h" 50#endif 51 52#include "php.h" 53#include "php_ini.h" 54#include "php_variables.h" 55#include "mbstring.h" 56#include "ext/standard/php_string.h" 57#include "ext/standard/php_mail.h" 58#include "ext/standard/exec.h" 59#include "ext/standard/php_smart_str.h" 60#include "ext/standard/url.h" 61#include "main/php_output.h" 62#include "ext/standard/info.h" 63 64#include "libmbfl/mbfl/mbfl_allocators.h" 65#include "libmbfl/mbfl/mbfilter_pass.h" 66 67#include "php_variables.h" 68#include "php_globals.h" 69#include "rfc1867.h" 70#include "php_content_types.h" 71#include "SAPI.h" 72#include "php_unicode.h" 73#include "TSRM.h" 74 75#include "mb_gpc.h" 76 77#if HAVE_MBREGEX 78#include "php_mbregex.h" 79#endif 80 81#include "zend_multibyte.h" 82 83#if HAVE_ONIG 84#include "php_onig_compat.h" 85#include <oniguruma.h> 86#undef UChar 87#elif HAVE_PCRE || HAVE_BUNDLED_PCRE 88#include "ext/pcre/php_pcre.h" 89#endif 90/* }}} */ 91 92#if HAVE_MBSTRING 93 94/* {{{ prototypes */ 95ZEND_DECLARE_MODULE_GLOBALS(mbstring) 96 97static PHP_GINIT_FUNCTION(mbstring); 98static PHP_GSHUTDOWN_FUNCTION(mbstring); 99 100static void php_mb_populate_current_detect_order_list(TSRMLS_D); 101 102static int php_mb_encoding_translation(TSRMLS_D); 103 104static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC); 105 106static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC); 107 108/* }}} */ 109 110/* {{{ php_mb_default_identify_list */ 111typedef struct _php_mb_nls_ident_list { 112 enum mbfl_no_language lang; 113 const enum mbfl_no_encoding *list; 114 size_t list_size; 115} php_mb_nls_ident_list; 116 117static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = { 118 mbfl_no_encoding_ascii, 119 mbfl_no_encoding_jis, 120 mbfl_no_encoding_utf8, 121 mbfl_no_encoding_euc_jp, 122 mbfl_no_encoding_sjis 123}; 124 125static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = { 126 mbfl_no_encoding_ascii, 127 mbfl_no_encoding_utf8, 128 mbfl_no_encoding_euc_cn, 129 mbfl_no_encoding_cp936 130}; 131 132static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = { 133 mbfl_no_encoding_ascii, 134 mbfl_no_encoding_utf8, 135 mbfl_no_encoding_euc_tw, 136 mbfl_no_encoding_big5 137}; 138 139static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = { 140 mbfl_no_encoding_ascii, 141 mbfl_no_encoding_utf8, 142 mbfl_no_encoding_euc_kr, 143 mbfl_no_encoding_uhc 144}; 145 146static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = { 147 mbfl_no_encoding_ascii, 148 mbfl_no_encoding_utf8, 149 mbfl_no_encoding_koi8r, 150 mbfl_no_encoding_cp1251, 151 mbfl_no_encoding_cp866 152}; 153 154static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = { 155 mbfl_no_encoding_ascii, 156 mbfl_no_encoding_utf8, 157 mbfl_no_encoding_armscii8 158}; 159 160static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = { 161 mbfl_no_encoding_ascii, 162 mbfl_no_encoding_utf8, 163 mbfl_no_encoding_cp1254, 164 mbfl_no_encoding_8859_9 165}; 166 167static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = { 168 mbfl_no_encoding_ascii, 169 mbfl_no_encoding_utf8, 170 mbfl_no_encoding_koi8u 171}; 172 173static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = { 174 mbfl_no_encoding_ascii, 175 mbfl_no_encoding_utf8 176}; 177 178 179static const php_mb_nls_ident_list php_mb_default_identify_list[] = { 180 { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) }, 181 { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) }, 182 { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) }, 183 { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) }, 184 { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) }, 185 { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) }, 186 { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) }, 187 { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) }, 188 { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) } 189}; 190 191/* }}} */ 192 193/* {{{ mb_overload_def mb_ovld[] */ 194static const struct mb_overload_def mb_ovld[] = { 195 {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"}, 196 {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"}, 197 {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"}, 198 {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"}, 199 {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"}, 200 {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"}, 201 {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"}, 202 {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"}, 203 {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"}, 204 {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"}, 205 {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"}, 206 {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"}, 207 {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"}, 208#if HAVE_MBREGEX 209 {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"}, 210 {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"}, 211 {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"}, 212 {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"}, 213 {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"}, 214#endif 215 {0, NULL, NULL, NULL} 216}; 217/* }}} */ 218 219/* {{{ arginfo */ 220ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0) 221 ZEND_ARG_INFO(0, language) 222ZEND_END_ARG_INFO() 223 224ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0) 225 ZEND_ARG_INFO(0, encoding) 226ZEND_END_ARG_INFO() 227 228ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0) 229 ZEND_ARG_INFO(0, type) 230ZEND_END_ARG_INFO() 231 232ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0) 233 ZEND_ARG_INFO(0, encoding) 234ZEND_END_ARG_INFO() 235 236ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0) 237 ZEND_ARG_INFO(0, encoding) 238ZEND_END_ARG_INFO() 239 240ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0) 241 ZEND_ARG_INFO(0, substchar) 242ZEND_END_ARG_INFO() 243 244ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1) 245 ZEND_ARG_INFO(0, encoding) 246ZEND_END_ARG_INFO() 247 248ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1) 249 ZEND_ARG_INFO(0, encoded_string) 250 ZEND_ARG_INFO(1, result) 251ZEND_END_ARG_INFO() 252 253ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2) 254 ZEND_ARG_INFO(0, contents) 255 ZEND_ARG_INFO(0, status) 256ZEND_END_ARG_INFO() 257 258ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1) 259 ZEND_ARG_INFO(0, str) 260 ZEND_ARG_INFO(0, encoding) 261ZEND_END_ARG_INFO() 262 263ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2) 264 ZEND_ARG_INFO(0, haystack) 265 ZEND_ARG_INFO(0, needle) 266 ZEND_ARG_INFO(0, offset) 267 ZEND_ARG_INFO(0, encoding) 268ZEND_END_ARG_INFO() 269 270ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2) 271 ZEND_ARG_INFO(0, haystack) 272 ZEND_ARG_INFO(0, needle) 273 ZEND_ARG_INFO(0, offset) 274 ZEND_ARG_INFO(0, encoding) 275ZEND_END_ARG_INFO() 276 277ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2) 278 ZEND_ARG_INFO(0, haystack) 279 ZEND_ARG_INFO(0, needle) 280 ZEND_ARG_INFO(0, offset) 281 ZEND_ARG_INFO(0, encoding) 282ZEND_END_ARG_INFO() 283 284ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2) 285 ZEND_ARG_INFO(0, haystack) 286 ZEND_ARG_INFO(0, needle) 287 ZEND_ARG_INFO(0, offset) 288 ZEND_ARG_INFO(0, encoding) 289ZEND_END_ARG_INFO() 290 291ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2) 292 ZEND_ARG_INFO(0, haystack) 293 ZEND_ARG_INFO(0, needle) 294 ZEND_ARG_INFO(0, part) 295 ZEND_ARG_INFO(0, encoding) 296ZEND_END_ARG_INFO() 297 298ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2) 299 ZEND_ARG_INFO(0, haystack) 300 ZEND_ARG_INFO(0, needle) 301 ZEND_ARG_INFO(0, part) 302 ZEND_ARG_INFO(0, encoding) 303ZEND_END_ARG_INFO() 304 305ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2) 306 ZEND_ARG_INFO(0, haystack) 307 ZEND_ARG_INFO(0, needle) 308 ZEND_ARG_INFO(0, part) 309 ZEND_ARG_INFO(0, encoding) 310ZEND_END_ARG_INFO() 311 312ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2) 313 ZEND_ARG_INFO(0, haystack) 314 ZEND_ARG_INFO(0, needle) 315 ZEND_ARG_INFO(0, part) 316 ZEND_ARG_INFO(0, encoding) 317ZEND_END_ARG_INFO() 318 319ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2) 320 ZEND_ARG_INFO(0, haystack) 321 ZEND_ARG_INFO(0, needle) 322 ZEND_ARG_INFO(0, encoding) 323ZEND_END_ARG_INFO() 324 325ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2) 326 ZEND_ARG_INFO(0, str) 327 ZEND_ARG_INFO(0, start) 328 ZEND_ARG_INFO(0, length) 329 ZEND_ARG_INFO(0, encoding) 330ZEND_END_ARG_INFO() 331 332ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2) 333 ZEND_ARG_INFO(0, str) 334 ZEND_ARG_INFO(0, start) 335 ZEND_ARG_INFO(0, length) 336 ZEND_ARG_INFO(0, encoding) 337ZEND_END_ARG_INFO() 338 339ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1) 340 ZEND_ARG_INFO(0, str) 341 ZEND_ARG_INFO(0, encoding) 342ZEND_END_ARG_INFO() 343 344ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3) 345 ZEND_ARG_INFO(0, str) 346 ZEND_ARG_INFO(0, start) 347 ZEND_ARG_INFO(0, width) 348 ZEND_ARG_INFO(0, trimmarker) 349 ZEND_ARG_INFO(0, encoding) 350ZEND_END_ARG_INFO() 351 352ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2) 353 ZEND_ARG_INFO(0, str) 354 ZEND_ARG_INFO(0, to) 355 ZEND_ARG_INFO(0, from) 356ZEND_END_ARG_INFO() 357 358ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2) 359 ZEND_ARG_INFO(0, sourcestring) 360 ZEND_ARG_INFO(0, mode) 361 ZEND_ARG_INFO(0, encoding) 362ZEND_END_ARG_INFO() 363 364ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1) 365 ZEND_ARG_INFO(0, sourcestring) 366 ZEND_ARG_INFO(0, encoding) 367ZEND_END_ARG_INFO() 368 369ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1) 370 ZEND_ARG_INFO(0, sourcestring) 371 ZEND_ARG_INFO(0, encoding) 372ZEND_END_ARG_INFO() 373 374ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1) 375 ZEND_ARG_INFO(0, str) 376 ZEND_ARG_INFO(0, encoding_list) 377 ZEND_ARG_INFO(0, strict) 378ZEND_END_ARG_INFO() 379 380ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0) 381ZEND_END_ARG_INFO() 382 383ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1) 384 ZEND_ARG_INFO(0, encoding) 385ZEND_END_ARG_INFO() 386 387ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1) 388 ZEND_ARG_INFO(0, str) 389 ZEND_ARG_INFO(0, charset) 390 ZEND_ARG_INFO(0, transfer) 391 ZEND_ARG_INFO(0, linefeed) 392 ZEND_ARG_INFO(0, indent) 393ZEND_END_ARG_INFO() 394 395ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1) 396 ZEND_ARG_INFO(0, string) 397ZEND_END_ARG_INFO() 398 399ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1) 400 ZEND_ARG_INFO(0, str) 401 ZEND_ARG_INFO(0, option) 402 ZEND_ARG_INFO(0, encoding) 403ZEND_END_ARG_INFO() 404 405ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 1, 0, 3) 406 ZEND_ARG_INFO(0, to) 407 ZEND_ARG_INFO(0, from) 408 ZEND_ARG_INFO(1, ...) 409ZEND_END_ARG_INFO() 410 411ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2) 412 ZEND_ARG_INFO(0, string) 413 ZEND_ARG_INFO(0, convmap) 414 ZEND_ARG_INFO(0, encoding) 415 ZEND_ARG_INFO(0, is_hex) 416ZEND_END_ARG_INFO() 417 418ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2) 419 ZEND_ARG_INFO(0, string) 420 ZEND_ARG_INFO(0, convmap) 421 ZEND_ARG_INFO(0, encoding) 422ZEND_END_ARG_INFO() 423 424ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3) 425 ZEND_ARG_INFO(0, to) 426 ZEND_ARG_INFO(0, subject) 427 ZEND_ARG_INFO(0, message) 428 ZEND_ARG_INFO(0, additional_headers) 429 ZEND_ARG_INFO(0, additional_parameters) 430ZEND_END_ARG_INFO() 431 432ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0) 433 ZEND_ARG_INFO(0, type) 434ZEND_END_ARG_INFO() 435 436ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0) 437 ZEND_ARG_INFO(0, var) 438 ZEND_ARG_INFO(0, encoding) 439ZEND_END_ARG_INFO() 440 441ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0) 442 ZEND_ARG_INFO(0, encoding) 443ZEND_END_ARG_INFO() 444 445ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2) 446 ZEND_ARG_INFO(0, pattern) 447 ZEND_ARG_INFO(0, string) 448 ZEND_ARG_INFO(1, registers) 449ZEND_END_ARG_INFO() 450 451ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2) 452 ZEND_ARG_INFO(0, pattern) 453 ZEND_ARG_INFO(0, string) 454 ZEND_ARG_INFO(1, registers) 455ZEND_END_ARG_INFO() 456 457ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3) 458 ZEND_ARG_INFO(0, pattern) 459 ZEND_ARG_INFO(0, replacement) 460 ZEND_ARG_INFO(0, string) 461 ZEND_ARG_INFO(0, option) 462ZEND_END_ARG_INFO() 463 464ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3) 465 ZEND_ARG_INFO(0, pattern) 466 ZEND_ARG_INFO(0, replacement) 467 ZEND_ARG_INFO(0, string) 468ZEND_END_ARG_INFO() 469 470ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3) 471 ZEND_ARG_INFO(0, pattern) 472 ZEND_ARG_INFO(0, callback) 473 ZEND_ARG_INFO(0, string) 474 ZEND_ARG_INFO(0, option) 475ZEND_END_ARG_INFO() 476 477ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2) 478 ZEND_ARG_INFO(0, pattern) 479 ZEND_ARG_INFO(0, string) 480 ZEND_ARG_INFO(0, limit) 481ZEND_END_ARG_INFO() 482 483ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2) 484 ZEND_ARG_INFO(0, pattern) 485 ZEND_ARG_INFO(0, string) 486 ZEND_ARG_INFO(0, option) 487ZEND_END_ARG_INFO() 488 489ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0) 490 ZEND_ARG_INFO(0, pattern) 491 ZEND_ARG_INFO(0, option) 492ZEND_END_ARG_INFO() 493 494ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0) 495 ZEND_ARG_INFO(0, pattern) 496 ZEND_ARG_INFO(0, option) 497ZEND_END_ARG_INFO() 498 499ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0) 500 ZEND_ARG_INFO(0, pattern) 501 ZEND_ARG_INFO(0, option) 502ZEND_END_ARG_INFO() 503 504ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1) 505 ZEND_ARG_INFO(0, string) 506 ZEND_ARG_INFO(0, pattern) 507 ZEND_ARG_INFO(0, option) 508ZEND_END_ARG_INFO() 509 510ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0) 511ZEND_END_ARG_INFO() 512 513ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0) 514ZEND_END_ARG_INFO() 515 516ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1) 517 ZEND_ARG_INFO(0, position) 518ZEND_END_ARG_INFO() 519 520ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0) 521 ZEND_ARG_INFO(0, options) 522ZEND_END_ARG_INFO() 523/* }}} */ 524 525/* {{{ zend_function_entry mbstring_functions[] */ 526const zend_function_entry mbstring_functions[] = { 527 PHP_FE(mb_convert_case, arginfo_mb_convert_case) 528 PHP_FE(mb_strtoupper, arginfo_mb_strtoupper) 529 PHP_FE(mb_strtolower, arginfo_mb_strtolower) 530 PHP_FE(mb_language, arginfo_mb_language) 531 PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding) 532 PHP_FE(mb_http_input, arginfo_mb_http_input) 533 PHP_FE(mb_http_output, arginfo_mb_http_output) 534 PHP_FE(mb_detect_order, arginfo_mb_detect_order) 535 PHP_FE(mb_substitute_character, arginfo_mb_substitute_character) 536 PHP_FE(mb_parse_str, arginfo_mb_parse_str) 537 PHP_FE(mb_output_handler, arginfo_mb_output_handler) 538 PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name) 539 PHP_FE(mb_strlen, arginfo_mb_strlen) 540 PHP_FE(mb_strpos, arginfo_mb_strpos) 541 PHP_FE(mb_strrpos, arginfo_mb_strrpos) 542 PHP_FE(mb_stripos, arginfo_mb_stripos) 543 PHP_FE(mb_strripos, arginfo_mb_strripos) 544 PHP_FE(mb_strstr, arginfo_mb_strstr) 545 PHP_FE(mb_strrchr, arginfo_mb_strrchr) 546 PHP_FE(mb_stristr, arginfo_mb_stristr) 547 PHP_FE(mb_strrichr, arginfo_mb_strrichr) 548 PHP_FE(mb_substr_count, arginfo_mb_substr_count) 549 PHP_FE(mb_substr, arginfo_mb_substr) 550 PHP_FE(mb_strcut, arginfo_mb_strcut) 551 PHP_FE(mb_strwidth, arginfo_mb_strwidth) 552 PHP_FE(mb_strimwidth, arginfo_mb_strimwidth) 553 PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding) 554 PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding) 555 PHP_FE(mb_list_encodings, arginfo_mb_list_encodings) 556 PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases) 557 PHP_FE(mb_convert_kana, arginfo_mb_convert_kana) 558 PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader) 559 PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader) 560 PHP_FE(mb_convert_variables, arginfo_mb_convert_variables) 561 PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity) 562 PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity) 563 PHP_FE(mb_send_mail, arginfo_mb_send_mail) 564 PHP_FE(mb_get_info, arginfo_mb_get_info) 565 PHP_FE(mb_check_encoding, arginfo_mb_check_encoding) 566#if HAVE_MBREGEX 567 PHP_MBREGEX_FUNCTION_ENTRIES 568#endif 569 PHP_FE_END 570}; 571/* }}} */ 572 573/* {{{ zend_module_entry mbstring_module_entry */ 574zend_module_entry mbstring_module_entry = { 575 STANDARD_MODULE_HEADER, 576 "mbstring", 577 mbstring_functions, 578 PHP_MINIT(mbstring), 579 PHP_MSHUTDOWN(mbstring), 580 PHP_RINIT(mbstring), 581 PHP_RSHUTDOWN(mbstring), 582 PHP_MINFO(mbstring), 583 NO_VERSION_YET, 584 PHP_MODULE_GLOBALS(mbstring), 585 PHP_GINIT(mbstring), 586 PHP_GSHUTDOWN(mbstring), 587 NULL, 588 STANDARD_MODULE_PROPERTIES_EX 589}; 590/* }}} */ 591 592/* {{{ static sapi_post_entry php_post_entries[] */ 593static sapi_post_entry php_post_entries[] = { 594 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler }, 595 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler }, 596 { NULL, 0, NULL, NULL } 597}; 598/* }}} */ 599 600#ifdef COMPILE_DL_MBSTRING 601ZEND_GET_MODULE(mbstring) 602#endif 603 604/* {{{ allocators */ 605static void *_php_mb_allocators_malloc(unsigned int sz) 606{ 607 return emalloc(sz); 608} 609 610static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz) 611{ 612 return erealloc(ptr, sz); 613} 614 615static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem) 616{ 617 return ecalloc(nelems, szelem); 618} 619 620static void _php_mb_allocators_free(void *ptr) 621{ 622 efree(ptr); 623} 624 625static void *_php_mb_allocators_pmalloc(unsigned int sz) 626{ 627 return pemalloc(sz, 1); 628} 629 630static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz) 631{ 632 return perealloc(ptr, sz, 1); 633} 634 635static void _php_mb_allocators_pfree(void *ptr) 636{ 637 pefree(ptr, 1); 638} 639 640static mbfl_allocators _php_mb_allocators = { 641 _php_mb_allocators_malloc, 642 _php_mb_allocators_realloc, 643 _php_mb_allocators_calloc, 644 _php_mb_allocators_free, 645 _php_mb_allocators_pmalloc, 646 _php_mb_allocators_prealloc, 647 _php_mb_allocators_pfree 648}; 649/* }}} */ 650 651/* {{{ static sapi_post_entry mbstr_post_entries[] */ 652static sapi_post_entry mbstr_post_entries[] = { 653 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler }, 654 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler }, 655 { NULL, 0, NULL, NULL } 656}; 657/* }}} */ 658 659/* {{{ static int php_mb_parse_encoding_list() 660 * Return 0 if input contains any illegal encoding, otherwise 1. 661 * Even if any illegal encoding is detected the result may contain a list 662 * of parsed encodings. 663 */ 664static int 665php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC) 666{ 667 int size, bauto, ret = SUCCESS; 668 size_t n; 669 char *p, *p1, *p2, *endp, *tmpstr; 670 const mbfl_encoding **entry, **list; 671 672 list = NULL; 673 if (value == NULL || value_length <= 0) { 674 if (return_list) { 675 *return_list = NULL; 676 } 677 if (return_size) { 678 *return_size = 0; 679 } 680 return FAILURE; 681 } else { 682 /* copy the value string for work */ 683 if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) { 684 tmpstr = (char *)estrndup(value+1, value_length-2); 685 value_length -= 2; 686 } 687 else 688 tmpstr = (char *)estrndup(value, value_length); 689 if (tmpstr == NULL) { 690 return FAILURE; 691 } 692 /* count the number of listed encoding names */ 693 endp = tmpstr + value_length; 694 n = 1; 695 p1 = tmpstr; 696 while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) { 697 p1 = p2 + 1; 698 n++; 699 } 700 size = n + MBSTRG(default_detect_order_list_size); 701 /* make list */ 702 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent); 703 if (list != NULL) { 704 entry = list; 705 n = 0; 706 bauto = 0; 707 p1 = tmpstr; 708 do { 709 p2 = p = php_memnstr(p1, ",", 1, endp); 710 if (p == NULL) { 711 p = endp; 712 } 713 *p = '\0'; 714 /* trim spaces */ 715 while (p1 < p && (*p1 == ' ' || *p1 == '\t')) { 716 p1++; 717 } 718 p--; 719 while (p > p1 && (*p == ' ' || *p == '\t')) { 720 *p = '\0'; 721 p--; 722 } 723 /* convert to the encoding number and check encoding */ 724 if (strcasecmp(p1, "auto") == 0) { 725 if (!bauto) { 726 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); 727 const size_t identify_list_size = MBSTRG(default_detect_order_list_size); 728 size_t i; 729 bauto = 1; 730 for (i = 0; i < identify_list_size; i++) { 731 *entry++ = mbfl_no2encoding(*src++); 732 n++; 733 } 734 } 735 } else { 736 const mbfl_encoding *encoding = mbfl_name2encoding(p1); 737 if (encoding) { 738 *entry++ = encoding; 739 n++; 740 } else { 741 ret = 0; 742 } 743 } 744 p1 = p2 + 1; 745 } while (n < size && p2 != NULL); 746 if (n > 0) { 747 if (return_list) { 748 *return_list = list; 749 } else { 750 pefree(list, persistent); 751 } 752 } else { 753 pefree(list, persistent); 754 if (return_list) { 755 *return_list = NULL; 756 } 757 ret = 0; 758 } 759 if (return_size) { 760 *return_size = n; 761 } 762 } else { 763 if (return_list) { 764 *return_list = NULL; 765 } 766 if (return_size) { 767 *return_size = 0; 768 } 769 ret = 0; 770 } 771 efree(tmpstr); 772 } 773 774 return ret; 775} 776/* }}} */ 777 778/* {{{ static int php_mb_parse_encoding_array() 779 * Return 0 if input contains any illegal encoding, otherwise 1. 780 * Even if any illegal encoding is detected the result may contain a list 781 * of parsed encodings. 782 */ 783static int 784php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC) 785{ 786 zval **hash_entry; 787 HashTable *target_hash; 788 int i, n, size, bauto, ret = SUCCESS; 789 const mbfl_encoding **list, **entry; 790 791 list = NULL; 792 if (Z_TYPE_P(array) == IS_ARRAY) { 793 target_hash = Z_ARRVAL_P(array); 794 zend_hash_internal_pointer_reset(target_hash); 795 i = zend_hash_num_elements(target_hash); 796 size = i + MBSTRG(default_detect_order_list_size); 797 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent); 798 if (list != NULL) { 799 entry = list; 800 bauto = 0; 801 n = 0; 802 while (i > 0) { 803 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) { 804 break; 805 } 806 convert_to_string_ex(hash_entry); 807 if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) { 808 if (!bauto) { 809 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); 810 const size_t identify_list_size = MBSTRG(default_detect_order_list_size); 811 size_t j; 812 813 bauto = 1; 814 for (j = 0; j < identify_list_size; j++) { 815 *entry++ = mbfl_no2encoding(*src++); 816 n++; 817 } 818 } 819 } else { 820 const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_PP(hash_entry)); 821 if (encoding) { 822 *entry++ = encoding; 823 n++; 824 } else { 825 ret = FAILURE; 826 } 827 } 828 zend_hash_move_forward(target_hash); 829 i--; 830 } 831 if (n > 0) { 832 if (return_list) { 833 *return_list = list; 834 } else { 835 pefree(list, persistent); 836 } 837 } else { 838 pefree(list, persistent); 839 if (return_list) { 840 *return_list = NULL; 841 } 842 ret = FAILURE; 843 } 844 if (return_size) { 845 *return_size = n; 846 } 847 } else { 848 if (return_list) { 849 *return_list = NULL; 850 } 851 if (return_size) { 852 *return_size = 0; 853 } 854 ret = FAILURE; 855 } 856 } 857 858 return ret; 859} 860/* }}} */ 861 862/* {{{ zend_multibyte interface */ 863static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC) 864{ 865 return (const zend_encoding*)mbfl_name2encoding(encoding_name); 866} 867 868static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding) 869{ 870 return ((const mbfl_encoding *)encoding)->name; 871} 872 873static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding) 874{ 875 const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding; 876 if (encoding->flag & MBFL_ENCTYPE_SBCS) { 877 return 1; 878 } 879 if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) { 880 return 1; 881 } 882 return 0; 883} 884 885static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC) 886{ 887 mbfl_string string; 888 889 if (!list) { 890 list = (const zend_encoding **)MBSTRG(current_detect_order_list); 891 list_size = MBSTRG(current_detect_order_list_size); 892 } 893 894 mbfl_string_init(&string); 895 string.no_language = MBSTRG(language); 896 string.val = (unsigned char *)arg_string; 897 string.len = arg_length; 898 return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0); 899} 900 901static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC) 902{ 903 mbfl_string string, result; 904 mbfl_buffer_converter *convd; 905 int status, loc; 906 907 /* new encoding */ 908 /* initialize string */ 909 mbfl_string_init(&string); 910 mbfl_string_init(&result); 911 string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding; 912 string.no_language = MBSTRG(language); 913 string.val = (unsigned char*)from; 914 string.len = from_length; 915 916 /* initialize converter */ 917 convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len); 918 if (convd == NULL) { 919 return -1; 920 } 921 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); 922 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); 923 924 /* do it */ 925 status = mbfl_buffer_converter_feed2(convd, &string, &loc); 926 if (status) { 927 mbfl_buffer_converter_delete(convd); 928 return (size_t)-1; 929 } 930 931 mbfl_buffer_converter_flush(convd); 932 if (!mbfl_buffer_converter_result(convd, &result)) { 933 mbfl_buffer_converter_delete(convd); 934 return (size_t)-1; 935 } 936 937 *to = result.val; 938 *to_length = result.len; 939 940 mbfl_buffer_converter_delete(convd); 941 942 return loc; 943} 944 945static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC) 946{ 947 return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC); 948} 949 950static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D) 951{ 952 return (const zend_encoding *)MBSTRG(internal_encoding); 953} 954 955static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC) 956{ 957 MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding; 958 return SUCCESS; 959} 960 961static zend_multibyte_functions php_mb_zend_multibyte_functions = { 962 "mbstring", 963 php_mb_zend_encoding_fetcher, 964 php_mb_zend_encoding_name_getter, 965 php_mb_zend_encoding_lexer_compatibility_checker, 966 php_mb_zend_encoding_detector, 967 php_mb_zend_encoding_converter, 968 php_mb_zend_encoding_list_parser, 969 php_mb_zend_internal_encoding_getter, 970 php_mb_zend_internal_encoding_setter 971}; 972/* }}} */ 973 974static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC); 975static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len); 976static void _php_mb_free_regex(void *opaque); 977 978#if HAVE_ONIG 979/* {{{ _php_mb_compile_regex */ 980static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC) 981{ 982 php_mb_regex_t *retval; 983 OnigErrorInfo err_info; 984 int err_code; 985 986 if ((err_code = onig_new(&retval, 987 (const OnigUChar *)pattern, 988 (const OnigUChar *)pattern + strlen(pattern), 989 ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP, 990 ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) { 991 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; 992 onig_error_code_to_str(err_str, err_code, err_info); 993 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str); 994 retval = NULL; 995 } 996 return retval; 997} 998/* }}} */ 999 1000/* {{{ _php_mb_match_regex */ 1001static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len) 1002{ 1003 return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str, 1004 (const OnigUChar*)str + str_len, (const OnigUChar *)str, 1005 (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0; 1006} 1007/* }}} */ 1008 1009/* {{{ _php_mb_free_regex */ 1010static void _php_mb_free_regex(void *opaque) 1011{ 1012 onig_free((php_mb_regex_t *)opaque); 1013} 1014/* }}} */ 1015#elif HAVE_PCRE || HAVE_BUNDLED_PCRE 1016/* {{{ _php_mb_compile_regex */ 1017static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC) 1018{ 1019 pcre *retval; 1020 const char *err_str; 1021 int err_offset; 1022 1023 if (!(retval = pcre_compile(pattern, 1024 PCRE_CASELESS, &err_str, &err_offset, NULL))) { 1025 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str); 1026 } 1027 return retval; 1028} 1029/* }}} */ 1030 1031/* {{{ _php_mb_match_regex */ 1032static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len) 1033{ 1034 return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0, 1035 0, NULL, 0) >= 0; 1036} 1037/* }}} */ 1038 1039/* {{{ _php_mb_free_regex */ 1040static void _php_mb_free_regex(void *opaque) 1041{ 1042 pcre_free(opaque); 1043} 1044/* }}} */ 1045#endif 1046 1047/* {{{ php_mb_nls_get_default_detect_order_list */ 1048static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size) 1049{ 1050 size_t i; 1051 1052 *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut; 1053 *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]); 1054 1055 for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) { 1056 if (php_mb_default_identify_list[i].lang == lang) { 1057 *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list; 1058 *plist_size = php_mb_default_identify_list[i].list_size; 1059 return 1; 1060 } 1061 } 1062 return 0; 1063} 1064/* }}} */ 1065 1066static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote TSRMLS_DC) 1067{ 1068 char *result = emalloc(len + 2); 1069 char *resp = result; 1070 int i; 1071 1072 for (i = 0; i < len && start[i] != quote; ++i) { 1073 if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) { 1074 *resp++ = start[++i]; 1075 } else { 1076 size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding); 1077 1078 while (j-- > 0 && i < len) { 1079 *resp++ = start[i++]; 1080 } 1081 --i; 1082 } 1083 } 1084 1085 *resp = '\0'; 1086 return result; 1087} 1088 1089static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC) /* {{{ */ 1090{ 1091 char *pos = *line, quote; 1092 char *res; 1093 1094 while (*pos && *pos != stop) { 1095 if ((quote = *pos) == '"' || quote == '\'') { 1096 ++pos; 1097 while (*pos && *pos != quote) { 1098 if (*pos == '\\' && pos[1] && pos[1] == quote) { 1099 pos += 2; 1100 } else { 1101 ++pos; 1102 } 1103 } 1104 if (*pos) { 1105 ++pos; 1106 } 1107 } else { 1108 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding); 1109 1110 } 1111 } 1112 if (*pos == '\0') { 1113 res = estrdup(*line); 1114 *line += strlen(*line); 1115 return res; 1116 } 1117 1118 res = estrndup(*line, pos - *line); 1119 1120 while (*pos == stop) { 1121 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding); 1122 } 1123 1124 *line = pos; 1125 return res; 1126} 1127/* }}} */ 1128 1129static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC) /* {{{ */ 1130{ 1131 while (*str && isspace(*(unsigned char *)str)) { 1132 ++str; 1133 } 1134 1135 if (!*str) { 1136 return estrdup(""); 1137 } 1138 1139 if (*str == '"' || *str == '\'') { 1140 char quote = *str; 1141 1142 str++; 1143 return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote TSRMLS_CC); 1144 } else { 1145 char *strend = str; 1146 1147 while (*strend && !isspace(*(unsigned char *)strend)) { 1148 ++strend; 1149 } 1150 return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0 TSRMLS_CC); 1151 } 1152} 1153/* }}} */ 1154 1155static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename TSRMLS_DC) /* {{{ */ 1156{ 1157 char *s, *s2; 1158 const size_t filename_len = strlen(filename); 1159 1160 /* The \ check should technically be needed for win32 systems only where 1161 * it is a valid path separator. However, IE in all it's wisdom always sends 1162 * the full path of the file on the user's filesystem, which means that unless 1163 * the user does basename() they get a bogus file name. Until IE's user base drops 1164 * to nill or problem is fixed this code must remain enabled for all systems. */ 1165 s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding); 1166 s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding); 1167 1168 if (s && s2) { 1169 if (s > s2) { 1170 return ++s; 1171 } else { 1172 return ++s2; 1173 } 1174 } else if (s) { 1175 return ++s; 1176 } else if (s2) { 1177 return ++s2; 1178 } else { 1179 return filename; 1180 } 1181} 1182/* }}} */ 1183 1184/* {{{ php.ini directive handler */ 1185/* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */ 1186static PHP_INI_MH(OnUpdate_mbstring_language) 1187{ 1188 enum mbfl_no_language no_language; 1189 1190 no_language = mbfl_name2no_language(new_value); 1191 if (no_language == mbfl_no_language_invalid) { 1192 MBSTRG(language) = mbfl_no_language_neutral; 1193 return FAILURE; 1194 } 1195 MBSTRG(language) = no_language; 1196 php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size)); 1197 return SUCCESS; 1198} 1199/* }}} */ 1200 1201/* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */ 1202static PHP_INI_MH(OnUpdate_mbstring_detect_order) 1203{ 1204 const mbfl_encoding **list; 1205 size_t size; 1206 1207 if (!new_value) { 1208 if (MBSTRG(detect_order_list)) { 1209 pefree(MBSTRG(detect_order_list), 1); 1210 } 1211 MBSTRG(detect_order_list) = NULL; 1212 MBSTRG(detect_order_list_size) = 0; 1213 return SUCCESS; 1214 } 1215 1216 if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { 1217 return FAILURE; 1218 } 1219 1220 if (MBSTRG(detect_order_list)) { 1221 pefree(MBSTRG(detect_order_list), 1); 1222 } 1223 MBSTRG(detect_order_list) = list; 1224 MBSTRG(detect_order_list_size) = size; 1225 return SUCCESS; 1226} 1227/* }}} */ 1228 1229/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */ 1230static PHP_INI_MH(OnUpdate_mbstring_http_input) 1231{ 1232 const mbfl_encoding **list; 1233 size_t size; 1234 1235 if (!new_value) { 1236 if (MBSTRG(http_input_list)) { 1237 pefree(MBSTRG(http_input_list), 1); 1238 } 1239 MBSTRG(http_input_list) = NULL; 1240 MBSTRG(http_input_list_size) = 0; 1241 return SUCCESS; 1242 } 1243 1244 if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) { 1245 return FAILURE; 1246 } 1247 1248 if (MBSTRG(http_input_list)) { 1249 pefree(MBSTRG(http_input_list), 1); 1250 } 1251 MBSTRG(http_input_list) = list; 1252 MBSTRG(http_input_list_size) = size; 1253 1254 return SUCCESS; 1255} 1256/* }}} */ 1257 1258/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */ 1259static PHP_INI_MH(OnUpdate_mbstring_http_output) 1260{ 1261 const mbfl_encoding *encoding; 1262 1263 if (new_value == NULL || new_value_length == 0) { 1264 MBSTRG(http_output_encoding) = &mbfl_encoding_pass; 1265 MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; 1266 return SUCCESS; 1267 } 1268 1269 encoding = mbfl_name2encoding(new_value); 1270 if (!encoding) { 1271 MBSTRG(http_output_encoding) = &mbfl_encoding_pass; 1272 MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; 1273 return FAILURE; 1274 } 1275 1276 MBSTRG(http_output_encoding) = encoding; 1277 MBSTRG(current_http_output_encoding) = encoding; 1278 return SUCCESS; 1279} 1280/* }}} */ 1281 1282/* {{{ static _php_mb_ini_mbstring_internal_encoding_set */ 1283int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC) 1284{ 1285 const mbfl_encoding *encoding; 1286 1287 if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) { 1288 switch (MBSTRG(language)) { 1289 case mbfl_no_language_uni: 1290 encoding = mbfl_no2encoding(mbfl_no_encoding_utf8); 1291 break; 1292 case mbfl_no_language_japanese: 1293 encoding = mbfl_no2encoding(mbfl_no_encoding_euc_jp); 1294 break; 1295 case mbfl_no_language_korean: 1296 encoding = mbfl_no2encoding(mbfl_no_encoding_euc_kr); 1297 break; 1298 case mbfl_no_language_simplified_chinese: 1299 encoding = mbfl_no2encoding(mbfl_no_encoding_euc_cn); 1300 break; 1301 case mbfl_no_language_traditional_chinese: 1302 encoding = mbfl_no2encoding(mbfl_no_encoding_euc_tw); 1303 break; 1304 case mbfl_no_language_russian: 1305 encoding = mbfl_no2encoding(mbfl_no_encoding_koi8r); 1306 break; 1307 case mbfl_no_language_german: 1308 encoding = mbfl_no2encoding(mbfl_no_encoding_8859_15); 1309 break; 1310 case mbfl_no_language_armenian: 1311 encoding = mbfl_no2encoding(mbfl_no_encoding_armscii8); 1312 break; 1313 case mbfl_no_language_turkish: 1314 encoding = mbfl_no2encoding(mbfl_no_encoding_8859_9); 1315 break; 1316 default: 1317 encoding = mbfl_no2encoding(mbfl_no_encoding_8859_1); 1318 break; 1319 } 1320 } 1321 MBSTRG(internal_encoding) = encoding; 1322 MBSTRG(current_internal_encoding) = encoding; 1323#if HAVE_MBREGEX 1324 { 1325 const char *enc_name = new_value; 1326 if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) { 1327 /* falls back to EUC-JP if an unknown encoding name is given */ 1328 enc_name = "EUC-JP"; 1329 php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC); 1330 } 1331 php_mb_regex_set_mbctype(new_value TSRMLS_CC); 1332 } 1333#endif 1334 return SUCCESS; 1335} 1336/* }}} */ 1337 1338/* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */ 1339static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) 1340{ 1341 if (OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC) == FAILURE) { 1342 return FAILURE; 1343 } 1344 if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN 1345 || stage == PHP_INI_STAGE_RUNTIME) { 1346 return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC); 1347 } else { 1348 /* the corresponding mbstring globals needs to be set according to the 1349 * ini value in the later stage because it never falls back to the 1350 * default value if 1. no value for mbstring.internal_encoding is given, 1351 * 2. mbstring.language directive is processed in per-dir or runtime 1352 * context and 3. call to the handler for mbstring.language is done 1353 * after mbstring.internal_encoding is handled. */ 1354 return SUCCESS; 1355 } 1356} 1357/* }}} */ 1358 1359/* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */ 1360static PHP_INI_MH(OnUpdate_mbstring_substitute_character) 1361{ 1362 int c; 1363 char *endptr = NULL; 1364 1365 if (new_value != NULL) { 1366 if (strcasecmp("none", new_value) == 0) { 1367 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; 1368 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; 1369 } else if (strcasecmp("long", new_value) == 0) { 1370 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG; 1371 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG; 1372 } else if (strcasecmp("entity", new_value) == 0) { 1373 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY; 1374 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY; 1375 } else { 1376 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1377 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1378 if (new_value_length >0) { 1379 c = strtol(new_value, &endptr, 0); 1380 if (*endptr == '\0') { 1381 MBSTRG(filter_illegal_substchar) = c; 1382 MBSTRG(current_filter_illegal_substchar) = c; 1383 } 1384 } 1385 } 1386 } else { 1387 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1388 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1389 MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */ 1390 MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */ 1391 } 1392 1393 return SUCCESS; 1394} 1395/* }}} */ 1396 1397/* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */ 1398static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) 1399{ 1400 if (new_value == NULL) { 1401 return FAILURE; 1402 } 1403 1404 OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC); 1405 1406 if (MBSTRG(encoding_translation)) { 1407 sapi_unregister_post_entry(php_post_entries TSRMLS_CC); 1408 sapi_register_post_entries(mbstr_post_entries TSRMLS_CC); 1409 } else { 1410 sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC); 1411 sapi_register_post_entries(php_post_entries TSRMLS_CC); 1412 } 1413 1414 return SUCCESS; 1415} 1416/* }}} */ 1417 1418/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */ 1419static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes) 1420{ 1421 zval tmp; 1422 void *re = NULL; 1423 1424 if (!new_value) { 1425 new_value = entry->orig_value; 1426 new_value_length = entry->orig_value_length; 1427 } 1428 php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC); 1429 1430 if (Z_STRLEN(tmp) > 0) { 1431 if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) { 1432 zval_dtor(&tmp); 1433 return FAILURE; 1434 } 1435 } 1436 1437 if (MBSTRG(http_output_conv_mimetypes)) { 1438 _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes)); 1439 } 1440 1441 MBSTRG(http_output_conv_mimetypes) = re; 1442 1443 zval_dtor(&tmp); 1444 return SUCCESS; 1445} 1446/* }}} */ 1447/* }}} */ 1448 1449/* {{{ php.ini directive registration */ 1450PHP_INI_BEGIN() 1451 PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language) 1452 PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order) 1453 PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input) 1454 PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output) 1455 STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals) 1456 PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character) 1457 STD_PHP_INI_ENTRY("mbstring.func_overload", "0", 1458 PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals) 1459 1460 STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0", 1461 PHP_INI_SYSTEM | PHP_INI_PERDIR, 1462 OnUpdate_mbstring_encoding_translation, 1463 encoding_translation, zend_mbstring_globals, mbstring_globals) 1464 PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes", 1465 "^(text/|application/xhtml\\+xml)", 1466 PHP_INI_ALL, 1467 OnUpdate_mbstring_http_output_conv_mimetypes) 1468 1469 STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0", 1470 PHP_INI_ALL, 1471 OnUpdateLong, 1472 strict_detection, zend_mbstring_globals, mbstring_globals) 1473PHP_INI_END() 1474/* }}} */ 1475 1476/* {{{ module global initialize handler */ 1477static PHP_GINIT_FUNCTION(mbstring) 1478{ 1479 mbstring_globals->language = mbfl_no_language_uni; 1480 mbstring_globals->internal_encoding = NULL; 1481 mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding; 1482 mbstring_globals->http_output_encoding = &mbfl_encoding_pass; 1483 mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass; 1484 mbstring_globals->http_input_identify = NULL; 1485 mbstring_globals->http_input_identify_get = NULL; 1486 mbstring_globals->http_input_identify_post = NULL; 1487 mbstring_globals->http_input_identify_cookie = NULL; 1488 mbstring_globals->http_input_identify_string = NULL; 1489 mbstring_globals->http_input_list = NULL; 1490 mbstring_globals->http_input_list_size = 0; 1491 mbstring_globals->detect_order_list = NULL; 1492 mbstring_globals->detect_order_list_size = 0; 1493 mbstring_globals->current_detect_order_list = NULL; 1494 mbstring_globals->current_detect_order_list_size = 0; 1495 mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut; 1496 mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]); 1497 mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1498 mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */ 1499 mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1500 mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */ 1501 mbstring_globals->illegalchars = 0; 1502 mbstring_globals->func_overload = 0; 1503 mbstring_globals->encoding_translation = 0; 1504 mbstring_globals->strict_detection = 0; 1505 mbstring_globals->outconv = NULL; 1506 mbstring_globals->http_output_conv_mimetypes = NULL; 1507#if HAVE_MBREGEX 1508 mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C); 1509#endif 1510} 1511/* }}} */ 1512 1513/* {{{ PHP_GSHUTDOWN_FUNCTION */ 1514static PHP_GSHUTDOWN_FUNCTION(mbstring) 1515{ 1516 if (mbstring_globals->http_input_list) { 1517 free(mbstring_globals->http_input_list); 1518 } 1519 if (mbstring_globals->detect_order_list) { 1520 free(mbstring_globals->detect_order_list); 1521 } 1522 if (mbstring_globals->http_output_conv_mimetypes) { 1523 _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes); 1524 } 1525#if HAVE_MBREGEX 1526 php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC); 1527#endif 1528} 1529/* }}} */ 1530 1531/* {{{ PHP_MINIT_FUNCTION(mbstring) */ 1532PHP_MINIT_FUNCTION(mbstring) 1533{ 1534 __mbfl_allocators = &_php_mb_allocators; 1535 1536 REGISTER_INI_ENTRIES(); 1537 1538 /* This is a global handler. Should not be set in a per-request handler. */ 1539 sapi_register_treat_data(mbstr_treat_data TSRMLS_CC); 1540 1541 /* Post handlers are stored in the thread-local context. */ 1542 if (MBSTRG(encoding_translation)) { 1543 sapi_register_post_entries(mbstr_post_entries TSRMLS_CC); 1544 } 1545 1546 REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT); 1547 REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT); 1548 REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT); 1549 1550 REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT); 1551 REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT); 1552 REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT); 1553 1554#if HAVE_MBREGEX 1555 PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); 1556#endif 1557 1558 if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) { 1559 return FAILURE; 1560 } 1561 1562 php_rfc1867_set_multibyte_callbacks( 1563 php_mb_encoding_translation, 1564 php_mb_gpc_get_detect_order, 1565 php_mb_gpc_set_input_encoding, 1566 php_mb_rfc1867_getword, 1567 php_mb_rfc1867_getword_conf, 1568 php_mb_rfc1867_basename); 1569 1570 return SUCCESS; 1571} 1572/* }}} */ 1573 1574/* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */ 1575PHP_MSHUTDOWN_FUNCTION(mbstring) 1576{ 1577 UNREGISTER_INI_ENTRIES(); 1578 1579#if HAVE_MBREGEX 1580 PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); 1581#endif 1582 1583 return SUCCESS; 1584} 1585/* }}} */ 1586 1587/* {{{ PHP_RINIT_FUNCTION(mbstring) */ 1588PHP_RINIT_FUNCTION(mbstring) 1589{ 1590 zend_function *func, *orig; 1591 const struct mb_overload_def *p; 1592 1593 MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding); 1594 MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding); 1595 MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode); 1596 MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar); 1597 1598 MBSTRG(illegalchars) = 0; 1599 1600 php_mb_populate_current_detect_order_list(TSRMLS_C); 1601 1602 /* override original function. */ 1603 if (MBSTRG(func_overload)){ 1604 p = &(mb_ovld[0]); 1605 1606 while (p->type > 0) { 1607 if ((MBSTRG(func_overload) & p->type) == p->type && 1608 zend_hash_find(EG(function_table), p->save_func, 1609 strlen(p->save_func)+1, (void **)&orig) != SUCCESS) { 1610 1611 zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func); 1612 1613 if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) { 1614 php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func); 1615 return FAILURE; 1616 } else { 1617 zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL); 1618 1619 if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function), 1620 NULL) == FAILURE) { 1621 php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func); 1622 return FAILURE; 1623 } 1624 } 1625 } 1626 p++; 1627 } 1628 } 1629#if HAVE_MBREGEX 1630 PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); 1631#endif 1632 zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC); 1633 1634 return SUCCESS; 1635} 1636/* }}} */ 1637 1638/* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */ 1639PHP_RSHUTDOWN_FUNCTION(mbstring) 1640{ 1641 const struct mb_overload_def *p; 1642 zend_function *orig; 1643 1644 if (MBSTRG(current_detect_order_list) != NULL) { 1645 efree(MBSTRG(current_detect_order_list)); 1646 MBSTRG(current_detect_order_list) = NULL; 1647 MBSTRG(current_detect_order_list_size) = 0; 1648 } 1649 if (MBSTRG(outconv) != NULL) { 1650 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv)); 1651 mbfl_buffer_converter_delete(MBSTRG(outconv)); 1652 MBSTRG(outconv) = NULL; 1653 } 1654 1655 /* clear http input identification. */ 1656 MBSTRG(http_input_identify) = NULL; 1657 MBSTRG(http_input_identify_post) = NULL; 1658 MBSTRG(http_input_identify_get) = NULL; 1659 MBSTRG(http_input_identify_cookie) = NULL; 1660 MBSTRG(http_input_identify_string) = NULL; 1661 1662 /* clear overloaded function. */ 1663 if (MBSTRG(func_overload)){ 1664 p = &(mb_ovld[0]); 1665 while (p->type > 0) { 1666 if ((MBSTRG(func_overload) & p->type) == p->type && 1667 zend_hash_find(EG(function_table), p->save_func, 1668 strlen(p->save_func)+1, (void **)&orig) == SUCCESS) { 1669 1670 zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL); 1671 zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1); 1672 } 1673 p++; 1674 } 1675 } 1676 1677#if HAVE_MBREGEX 1678 PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); 1679#endif 1680 1681 return SUCCESS; 1682} 1683/* }}} */ 1684 1685/* {{{ PHP_MINFO_FUNCTION(mbstring) */ 1686PHP_MINFO_FUNCTION(mbstring) 1687{ 1688 php_info_print_table_start(); 1689 php_info_print_table_row(2, "Multibyte Support", "enabled"); 1690 php_info_print_table_row(2, "Multibyte string engine", "libmbfl"); 1691 php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled"); 1692 { 1693 char tmp[256]; 1694 snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY); 1695 php_info_print_table_row(2, "libmbfl version", tmp); 1696 } 1697 php_info_print_table_end(); 1698 1699 php_info_print_table_start(); 1700 php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1."); 1701 php_info_print_table_end(); 1702 1703#if HAVE_MBREGEX 1704 PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU); 1705#endif 1706 1707 DISPLAY_INI_ENTRIES(); 1708} 1709/* }}} */ 1710 1711/* {{{ proto string mb_language([string language]) 1712 Sets the current language or Returns the current language as a string */ 1713PHP_FUNCTION(mb_language) 1714{ 1715 char *name = NULL; 1716 int name_len = 0; 1717 1718 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) { 1719 return; 1720 } 1721 if (name == NULL) { 1722 RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1); 1723 } else { 1724 if (FAILURE == zend_alter_ini_entry( 1725 "mbstring.language", sizeof("mbstring.language"), 1726 name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) { 1727 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name); 1728 RETVAL_FALSE; 1729 } else { 1730 RETVAL_TRUE; 1731 } 1732 } 1733} 1734/* }}} */ 1735 1736/* {{{ proto string mb_internal_encoding([string encoding]) 1737 Sets the current internal encoding or Returns the current internal encoding as a string */ 1738PHP_FUNCTION(mb_internal_encoding) 1739{ 1740 const char *name = NULL; 1741 int name_len; 1742 const mbfl_encoding *encoding; 1743 1744 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) { 1745 RETURN_FALSE; 1746 } 1747 if (name == NULL) { 1748 name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL; 1749 if (name != NULL) { 1750 RETURN_STRING(name, 1); 1751 } else { 1752 RETURN_FALSE; 1753 } 1754 } else { 1755 encoding = mbfl_name2encoding(name); 1756 if (!encoding) { 1757 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name); 1758 RETURN_FALSE; 1759 } else { 1760 MBSTRG(current_internal_encoding) = encoding; 1761 RETURN_TRUE; 1762 } 1763 } 1764} 1765/* }}} */ 1766 1767/* {{{ proto mixed mb_http_input([string type]) 1768 Returns the input encoding */ 1769PHP_FUNCTION(mb_http_input) 1770{ 1771 char *typ = NULL; 1772 int typ_len; 1773 int retname; 1774 char *list, *temp; 1775 const mbfl_encoding *result = NULL; 1776 1777 retname = 1; 1778 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) { 1779 RETURN_FALSE; 1780 } 1781 if (typ == NULL) { 1782 result = MBSTRG(http_input_identify); 1783 } else { 1784 switch (*typ) { 1785 case 'G': 1786 case 'g': 1787 result = MBSTRG(http_input_identify_get); 1788 break; 1789 case 'P': 1790 case 'p': 1791 result = MBSTRG(http_input_identify_post); 1792 break; 1793 case 'C': 1794 case 'c': 1795 result = MBSTRG(http_input_identify_cookie); 1796 break; 1797 case 'S': 1798 case 's': 1799 result = MBSTRG(http_input_identify_string); 1800 break; 1801 case 'I': 1802 case 'i': 1803 { 1804 const mbfl_encoding **entry = MBSTRG(http_input_list); 1805 const size_t n = MBSTRG(http_input_list_size); 1806 size_t i; 1807 array_init(return_value); 1808 for (i = 0; i < n; i++) { 1809 add_next_index_string(return_value, (*entry)->name, 1); 1810 entry++; 1811 } 1812 retname = 0; 1813 } 1814 break; 1815 case 'L': 1816 case 'l': 1817 { 1818 const mbfl_encoding **entry = MBSTRG(http_input_list); 1819 const size_t n = MBSTRG(http_input_list_size); 1820 size_t i; 1821 list = NULL; 1822 for (i = 0; i < n; i++) { 1823 if (list) { 1824 temp = list; 1825 spprintf(&list, 0, "%s,%s", temp, (*entry)->name); 1826 efree(temp); 1827 if (!list) { 1828 break; 1829 } 1830 } else { 1831 list = estrdup((*entry)->name); 1832 } 1833 entry++; 1834 } 1835 } 1836 if (!list) { 1837 RETURN_FALSE; 1838 } 1839 RETVAL_STRING(list, 0); 1840 retname = 0; 1841 break; 1842 default: 1843 result = MBSTRG(http_input_identify); 1844 break; 1845 } 1846 } 1847 1848 if (retname) { 1849 if (result) { 1850 RETVAL_STRING(result->name, 1); 1851 } else { 1852 RETVAL_FALSE; 1853 } 1854 } 1855} 1856/* }}} */ 1857 1858/* {{{ proto string mb_http_output([string encoding]) 1859 Sets the current output_encoding or returns the current output_encoding as a string */ 1860PHP_FUNCTION(mb_http_output) 1861{ 1862 const char *name = NULL; 1863 int name_len; 1864 const mbfl_encoding *encoding; 1865 1866 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) { 1867 RETURN_FALSE; 1868 } 1869 1870 if (name == NULL) { 1871 name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL; 1872 if (name != NULL) { 1873 RETURN_STRING(name, 1); 1874 } else { 1875 RETURN_FALSE; 1876 } 1877 } else { 1878 encoding = mbfl_name2encoding(name); 1879 if (!encoding) { 1880 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name); 1881 RETURN_FALSE; 1882 } else { 1883 MBSTRG(current_http_output_encoding) = encoding; 1884 RETURN_TRUE; 1885 } 1886 } 1887} 1888/* }}} */ 1889 1890/* {{{ proto bool|array mb_detect_order([mixed encoding-list]) 1891 Sets the current detect_order or Return the current detect_order as a array */ 1892PHP_FUNCTION(mb_detect_order) 1893{ 1894 zval **arg1 = NULL; 1895 1896 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) { 1897 return; 1898 } 1899 1900 if (!arg1) { 1901 size_t i; 1902 size_t n = MBSTRG(current_detect_order_list_size); 1903 const mbfl_encoding **entry = MBSTRG(current_detect_order_list); 1904 array_init(return_value); 1905 for (i = 0; i < n; i++) { 1906 add_next_index_string(return_value, (*entry)->name, 1); 1907 entry++; 1908 } 1909 } else { 1910 const mbfl_encoding **list = NULL; 1911 size_t size = 0; 1912 switch (Z_TYPE_PP(arg1)) { 1913 case IS_ARRAY: 1914 if (FAILURE == php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) { 1915 if (list) { 1916 efree(list); 1917 } 1918 RETURN_FALSE; 1919 } 1920 break; 1921 default: 1922 convert_to_string_ex(arg1); 1923 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) { 1924 if (list) { 1925 efree(list); 1926 } 1927 RETURN_FALSE; 1928 } 1929 break; 1930 } 1931 1932 if (list == NULL) { 1933 RETURN_FALSE; 1934 } 1935 1936 if (MBSTRG(current_detect_order_list)) { 1937 efree(MBSTRG(current_detect_order_list)); 1938 } 1939 MBSTRG(current_detect_order_list) = list; 1940 MBSTRG(current_detect_order_list_size) = size; 1941 RETURN_TRUE; 1942 } 1943} 1944/* }}} */ 1945 1946/* {{{ proto mixed mb_substitute_character([mixed substchar]) 1947 Sets the current substitute_character or returns the current substitute_character */ 1948PHP_FUNCTION(mb_substitute_character) 1949{ 1950 zval **arg1 = NULL; 1951 1952 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) { 1953 return; 1954 } 1955 1956 if (!arg1) { 1957 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { 1958 RETURN_STRING("none", 1); 1959 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) { 1960 RETURN_STRING("long", 1); 1961 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) { 1962 RETURN_STRING("entity", 1); 1963 } else { 1964 RETURN_LONG(MBSTRG(current_filter_illegal_substchar)); 1965 } 1966 } else { 1967 RETVAL_TRUE; 1968 1969 switch (Z_TYPE_PP(arg1)) { 1970 case IS_STRING: 1971 if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) { 1972 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; 1973 } else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) { 1974 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG; 1975 } else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) { 1976 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY; 1977 } else { 1978 convert_to_long_ex(arg1); 1979 1980 if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) { 1981 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1982 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1); 1983 } else { 1984 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character."); 1985 RETURN_FALSE; 1986 } 1987 } 1988 break; 1989 default: 1990 convert_to_long_ex(arg1); 1991 if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) { 1992 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; 1993 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1); 1994 } else { 1995 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character."); 1996 RETURN_FALSE; 1997 } 1998 break; 1999 } 2000 } 2001} 2002/* }}} */ 2003 2004/* {{{ proto string mb_preferred_mime_name(string encoding) 2005 Return the preferred MIME name (charset) as a string */ 2006PHP_FUNCTION(mb_preferred_mime_name) 2007{ 2008 enum mbfl_no_encoding no_encoding; 2009 char *name = NULL; 2010 int name_len; 2011 2012 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) { 2013 return; 2014 } else { 2015 no_encoding = mbfl_name2no_encoding(name); 2016 if (no_encoding == mbfl_no_encoding_invalid) { 2017 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name); 2018 RETVAL_FALSE; 2019 } else { 2020 const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding); 2021 if (preferred_name == NULL || *preferred_name == '\0') { 2022 php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name); 2023 RETVAL_FALSE; 2024 } else { 2025 RETVAL_STRING((char *)preferred_name, 1); 2026 } 2027 } 2028 } 2029} 2030/* }}} */ 2031 2032#define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0) 2033#define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0) 2034 2035/* {{{ proto bool mb_parse_str(string encoded_string [, array result]) 2036 Parses GET/POST/COOKIE data and sets global variables */ 2037PHP_FUNCTION(mb_parse_str) 2038{ 2039 zval *track_vars_array = NULL; 2040 char *encstr = NULL; 2041 int encstr_len; 2042 php_mb_encoding_handler_info_t info; 2043 const mbfl_encoding *detected; 2044 2045 track_vars_array = NULL; 2046 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) { 2047 return; 2048 } 2049 2050 if (track_vars_array != NULL) { 2051 /* Clear out the array */ 2052 zval_dtor(track_vars_array); 2053 array_init(track_vars_array); 2054 } 2055 2056 encstr = estrndup(encstr, encstr_len); 2057 2058 info.data_type = PARSE_STRING; 2059 info.separator = PG(arg_separator).input; 2060 info.report_errors = 1; 2061 info.to_encoding = MBSTRG(current_internal_encoding); 2062 info.to_language = MBSTRG(language); 2063 info.from_encodings = MBSTRG(http_input_list); 2064 info.num_from_encodings = MBSTRG(http_input_list_size); 2065 info.from_language = MBSTRG(language); 2066 2067 if (track_vars_array != NULL) { 2068 detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC); 2069 } else { 2070 zval tmp; 2071 if (!EG(active_symbol_table)) { 2072 zend_rebuild_symbol_table(TSRMLS_C); 2073 } 2074 Z_ARRVAL(tmp) = EG(active_symbol_table); 2075 detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr TSRMLS_CC); 2076 } 2077 2078 MBSTRG(http_input_identify) = detected; 2079 2080 RETVAL_BOOL(detected); 2081 2082 if (encstr != NULL) efree(encstr); 2083} 2084/* }}} */ 2085 2086/* {{{ proto string mb_output_handler(string contents, int status) 2087 Returns string in output buffer converted to the http_output encoding */ 2088PHP_FUNCTION(mb_output_handler) 2089{ 2090 char *arg_string; 2091 int arg_string_len; 2092 long arg_status; 2093 mbfl_string string, result; 2094 const char *charset; 2095 char *p; 2096 const mbfl_encoding *encoding; 2097 int last_feed, len; 2098 unsigned char send_text_mimetype = 0; 2099 char *s, *mimetype = NULL; 2100 2101 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) { 2102 return; 2103 } 2104 2105 encoding = MBSTRG(current_http_output_encoding); 2106 2107 /* start phase only */ 2108 if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) { 2109 /* delete the converter just in case. */ 2110 if (MBSTRG(outconv)) { 2111 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv)); 2112 mbfl_buffer_converter_delete(MBSTRG(outconv)); 2113 MBSTRG(outconv) = NULL; 2114 } 2115 if (encoding == &mbfl_encoding_pass) { 2116 RETURN_STRINGL(arg_string, arg_string_len, 1); 2117 } 2118 2119 /* analyze mime type */ 2120 if (SG(sapi_headers).mimetype && 2121 _php_mb_match_regex( 2122 MBSTRG(http_output_conv_mimetypes), 2123 SG(sapi_headers).mimetype, 2124 strlen(SG(sapi_headers).mimetype))) { 2125 if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){ 2126 mimetype = estrdup(SG(sapi_headers).mimetype); 2127 } else { 2128 mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype); 2129 } 2130 send_text_mimetype = 1; 2131 } else if (SG(sapi_headers).send_default_content_type) { 2132 mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE; 2133 } 2134 2135 /* if content-type is not yet set, set it and activate the converter */ 2136 if (SG(sapi_headers).send_default_content_type || send_text_mimetype) { 2137 charset = encoding->mime_name; 2138 if (charset) { 2139 len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset ); 2140 if (sapi_add_header(p, len, 0) != FAILURE) { 2141 SG(sapi_headers).send_default_content_type = 0; 2142 } 2143 } 2144 /* activate the converter */ 2145 MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0); 2146 if (send_text_mimetype){ 2147 efree(mimetype); 2148 } 2149 } 2150 } 2151 2152 /* just return if the converter is not activated. */ 2153 if (MBSTRG(outconv) == NULL) { 2154 RETURN_STRINGL(arg_string, arg_string_len, 1); 2155 } 2156 2157 /* flag */ 2158 last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0); 2159 /* mode */ 2160 mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode)); 2161 mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar)); 2162 2163 /* feed the string */ 2164 mbfl_string_init(&string); 2165 string.no_language = MBSTRG(language); 2166 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2167 string.val = (unsigned char *)arg_string; 2168 string.len = arg_string_len; 2169 mbfl_buffer_converter_feed(MBSTRG(outconv), &string); 2170 if (last_feed) { 2171 mbfl_buffer_converter_flush(MBSTRG(outconv)); 2172 } 2173 /* get the converter output, and return it */ 2174 mbfl_buffer_converter_result(MBSTRG(outconv), &result); 2175 RETVAL_STRINGL((char *)result.val, result.len, 0); /* the string is already strdup()'ed */ 2176 2177 /* delete the converter if it is the last feed. */ 2178 if (last_feed) { 2179 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv)); 2180 mbfl_buffer_converter_delete(MBSTRG(outconv)); 2181 MBSTRG(outconv) = NULL; 2182 } 2183} 2184/* }}} */ 2185 2186/* {{{ proto int mb_strlen(string str [, string encoding]) 2187 Get character numbers of a string */ 2188PHP_FUNCTION(mb_strlen) 2189{ 2190 int n; 2191 mbfl_string string; 2192 char *enc_name = NULL; 2193 int enc_name_len; 2194 2195 mbfl_string_init(&string); 2196 2197 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) { 2198 RETURN_FALSE; 2199 } 2200 2201 string.no_language = MBSTRG(language); 2202 if (enc_name == NULL) { 2203 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2204 } else { 2205 string.no_encoding = mbfl_name2no_encoding(enc_name); 2206 if (string.no_encoding == mbfl_no_encoding_invalid) { 2207 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name); 2208 RETURN_FALSE; 2209 } 2210 } 2211 2212 n = mbfl_strlen(&string); 2213 if (n >= 0) { 2214 RETVAL_LONG(n); 2215 } else { 2216 RETVAL_FALSE; 2217 } 2218} 2219/* }}} */ 2220 2221/* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]]) 2222 Find position of first occurrence of a string within another */ 2223PHP_FUNCTION(mb_strpos) 2224{ 2225 int n, reverse = 0; 2226 long offset; 2227 mbfl_string haystack, needle; 2228 char *enc_name = NULL; 2229 int enc_name_len; 2230 2231 mbfl_string_init(&haystack); 2232 mbfl_string_init(&needle); 2233 haystack.no_language = MBSTRG(language); 2234 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2235 needle.no_language = MBSTRG(language); 2236 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2237 offset = 0; 2238 2239 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) { 2240 RETURN_FALSE; 2241 } 2242 2243 if (enc_name != NULL) { 2244 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name); 2245 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 2246 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name); 2247 RETURN_FALSE; 2248 } 2249 } 2250 2251 if (offset < 0 || offset > mbfl_strlen(&haystack)) { 2252 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string"); 2253 RETURN_FALSE; 2254 } 2255 if (needle.len == 0) { 2256 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter"); 2257 RETURN_FALSE; 2258 } 2259 2260 n = mbfl_strpos(&haystack, &needle, offset, reverse); 2261 if (n >= 0) { 2262 RETVAL_LONG(n); 2263 } else { 2264 switch (-n) { 2265 case 1: 2266 break; 2267 case 2: 2268 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length"); 2269 break; 2270 case 4: 2271 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error"); 2272 break; 2273 case 8: 2274 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty"); 2275 break; 2276 default: 2277 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos"); 2278 break; 2279 } 2280 RETVAL_FALSE; 2281 } 2282} 2283/* }}} */ 2284 2285/* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]]) 2286 Find position of last occurrence of a string within another */ 2287PHP_FUNCTION(mb_strrpos) 2288{ 2289 int n; 2290 mbfl_string haystack, needle; 2291 char *enc_name = NULL; 2292 int enc_name_len; 2293 zval **zoffset = NULL; 2294 long offset = 0, str_flg; 2295 char *enc_name2 = NULL; 2296 int enc_name_len2; 2297 2298 mbfl_string_init(&haystack); 2299 mbfl_string_init(&needle); 2300 haystack.no_language = MBSTRG(language); 2301 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2302 needle.no_language = MBSTRG(language); 2303 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2304 2305 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) { 2306 RETURN_FALSE; 2307 } 2308 2309 if (zoffset) { 2310 if (Z_TYPE_PP(zoffset) == IS_STRING) { 2311 enc_name2 = Z_STRVAL_PP(zoffset); 2312 enc_name_len2 = Z_STRLEN_PP(zoffset); 2313 str_flg = 1; 2314 2315 if (enc_name2 != NULL) { 2316 switch (*enc_name2) { 2317 case '0': 2318 case '1': 2319 case '2': 2320 case '3': 2321 case '4': 2322 case '5': 2323 case '6': 2324 case '7': 2325 case '8': 2326 case '9': 2327 case ' ': 2328 case '-': 2329 case '.': 2330 break; 2331 default : 2332 str_flg = 0; 2333 break; 2334 } 2335 } 2336 2337 if (str_flg) { 2338 convert_to_long_ex(zoffset); 2339 offset = Z_LVAL_PP(zoffset); 2340 } else { 2341 enc_name = enc_name2; 2342 enc_name_len = enc_name_len2; 2343 } 2344 } else { 2345 convert_to_long_ex(zoffset); 2346 offset = Z_LVAL_PP(zoffset); 2347 } 2348 } 2349 2350 if (enc_name != NULL) { 2351 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name); 2352 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 2353 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name); 2354 RETURN_FALSE; 2355 } 2356 } 2357 2358 if (haystack.len <= 0) { 2359 RETURN_FALSE; 2360 } 2361 if (needle.len <= 0) { 2362 RETURN_FALSE; 2363 } 2364 2365 { 2366 int haystack_char_len = mbfl_strlen(&haystack); 2367 if ((offset > 0 && offset > haystack_char_len) || 2368 (offset < 0 && -offset > haystack_char_len)) { 2369 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string"); 2370 RETURN_FALSE; 2371 } 2372 } 2373 2374 n = mbfl_strpos(&haystack, &needle, offset, 1); 2375 if (n >= 0) { 2376 RETVAL_LONG(n); 2377 } else { 2378 RETVAL_FALSE; 2379 } 2380} 2381/* }}} */ 2382 2383/* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]]) 2384 Finds position of first occurrence of a string within another, case insensitive */ 2385PHP_FUNCTION(mb_stripos) 2386{ 2387 int n; 2388 long offset; 2389 mbfl_string haystack, needle; 2390 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; 2391 int from_encoding_len; 2392 n = -1; 2393 offset = 0; 2394 2395 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) { 2396 RETURN_FALSE; 2397 } 2398 if (needle.len == 0) { 2399 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter"); 2400 RETURN_FALSE; 2401 } 2402 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC); 2403 2404 if (n >= 0) { 2405 RETVAL_LONG(n); 2406 } else { 2407 RETVAL_FALSE; 2408 } 2409} 2410/* }}} */ 2411 2412/* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]]) 2413 Finds position of last occurrence of a string within another, case insensitive */ 2414PHP_FUNCTION(mb_strripos) 2415{ 2416 int n; 2417 long offset; 2418 mbfl_string haystack, needle; 2419 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; 2420 int from_encoding_len; 2421 n = -1; 2422 offset = 0; 2423 2424 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) { 2425 RETURN_FALSE; 2426 } 2427 2428 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC); 2429 2430 if (n >= 0) { 2431 RETVAL_LONG(n); 2432 } else { 2433 RETVAL_FALSE; 2434 } 2435} 2436/* }}} */ 2437 2438/* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]]) 2439 Finds first occurrence of a string within another */ 2440PHP_FUNCTION(mb_strstr) 2441{ 2442 int n, len, mblen; 2443 mbfl_string haystack, needle, result, *ret = NULL; 2444 char *enc_name = NULL; 2445 int enc_name_len; 2446 zend_bool part = 0; 2447 2448 mbfl_string_init(&haystack); 2449 mbfl_string_init(&needle); 2450 haystack.no_language = MBSTRG(language); 2451 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2452 needle.no_language = MBSTRG(language); 2453 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2454 2455 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) { 2456 RETURN_FALSE; 2457 } 2458 2459 if (enc_name != NULL) { 2460 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name); 2461 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 2462 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name); 2463 RETURN_FALSE; 2464 } 2465 } 2466 2467 if (needle.len <= 0) { 2468 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter"); 2469 RETURN_FALSE; 2470 } 2471 n = mbfl_strpos(&haystack, &needle, 0, 0); 2472 if (n >= 0) { 2473 mblen = mbfl_strlen(&haystack); 2474 if (part) { 2475 ret = mbfl_substr(&haystack, &result, 0, n); 2476 if (ret != NULL) { 2477 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2478 } else { 2479 RETVAL_FALSE; 2480 } 2481 } else { 2482 len = (mblen - n); 2483 ret = mbfl_substr(&haystack, &result, n, len); 2484 if (ret != NULL) { 2485 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2486 } else { 2487 RETVAL_FALSE; 2488 } 2489 } 2490 } else { 2491 RETVAL_FALSE; 2492 } 2493} 2494/* }}} */ 2495 2496/* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]]) 2497 Finds the last occurrence of a character in a string within another */ 2498PHP_FUNCTION(mb_strrchr) 2499{ 2500 int n, len, mblen; 2501 mbfl_string haystack, needle, result, *ret = NULL; 2502 char *enc_name = NULL; 2503 int enc_name_len; 2504 zend_bool part = 0; 2505 2506 mbfl_string_init(&haystack); 2507 mbfl_string_init(&needle); 2508 haystack.no_language = MBSTRG(language); 2509 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2510 needle.no_language = MBSTRG(language); 2511 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2512 2513 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) { 2514 RETURN_FALSE; 2515 } 2516 2517 if (enc_name != NULL) { 2518 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name); 2519 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 2520 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name); 2521 RETURN_FALSE; 2522 } 2523 } 2524 2525 if (haystack.len <= 0) { 2526 RETURN_FALSE; 2527 } 2528 if (needle.len <= 0) { 2529 RETURN_FALSE; 2530 } 2531 n = mbfl_strpos(&haystack, &needle, 0, 1); 2532 if (n >= 0) { 2533 mblen = mbfl_strlen(&haystack); 2534 if (part) { 2535 ret = mbfl_substr(&haystack, &result, 0, n); 2536 if (ret != NULL) { 2537 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2538 } else { 2539 RETVAL_FALSE; 2540 } 2541 } else { 2542 len = (mblen - n); 2543 ret = mbfl_substr(&haystack, &result, n, len); 2544 if (ret != NULL) { 2545 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2546 } else { 2547 RETVAL_FALSE; 2548 } 2549 } 2550 } else { 2551 RETVAL_FALSE; 2552 } 2553} 2554/* }}} */ 2555 2556/* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]]) 2557 Finds first occurrence of a string within another, case insensitive */ 2558PHP_FUNCTION(mb_stristr) 2559{ 2560 zend_bool part = 0; 2561 unsigned int from_encoding_len, len, mblen; 2562 int n; 2563 mbfl_string haystack, needle, result, *ret = NULL; 2564 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; 2565 mbfl_string_init(&haystack); 2566 mbfl_string_init(&needle); 2567 haystack.no_language = MBSTRG(language); 2568 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2569 needle.no_language = MBSTRG(language); 2570 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2571 2572 2573 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) { 2574 RETURN_FALSE; 2575 } 2576 2577 if (!needle.len) { 2578 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter"); 2579 RETURN_FALSE; 2580 } 2581 2582 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding); 2583 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 2584 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding); 2585 RETURN_FALSE; 2586 } 2587 2588 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC); 2589 2590 if (n <0) { 2591 RETURN_FALSE; 2592 } 2593 2594 mblen = mbfl_strlen(&haystack); 2595 2596 if (part) { 2597 ret = mbfl_substr(&haystack, &result, 0, n); 2598 if (ret != NULL) { 2599 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2600 } else { 2601 RETVAL_FALSE; 2602 } 2603 } else { 2604 len = (mblen - n); 2605 ret = mbfl_substr(&haystack, &result, n, len); 2606 if (ret != NULL) { 2607 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2608 } else { 2609 RETVAL_FALSE; 2610 } 2611 } 2612} 2613/* }}} */ 2614 2615/* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]]) 2616 Finds the last occurrence of a character in a string within another, case insensitive */ 2617PHP_FUNCTION(mb_strrichr) 2618{ 2619 zend_bool part = 0; 2620 int n, from_encoding_len, len, mblen; 2621 mbfl_string haystack, needle, result, *ret = NULL; 2622 const char *from_encoding = MBSTRG(current_internal_encoding)->name; 2623 mbfl_string_init(&haystack); 2624 mbfl_string_init(&needle); 2625 haystack.no_language = MBSTRG(language); 2626 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2627 needle.no_language = MBSTRG(language); 2628 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2629 2630 2631 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) { 2632 RETURN_FALSE; 2633 } 2634 2635 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding); 2636 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 2637 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding); 2638 RETURN_FALSE; 2639 } 2640 2641 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC); 2642 2643 if (n <0) { 2644 RETURN_FALSE; 2645 } 2646 2647 mblen = mbfl_strlen(&haystack); 2648 2649 if (part) { 2650 ret = mbfl_substr(&haystack, &result, 0, n); 2651 if (ret != NULL) { 2652 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2653 } else { 2654 RETVAL_FALSE; 2655 } 2656 } else { 2657 len = (mblen - n); 2658 ret = mbfl_substr(&haystack, &result, n, len); 2659 if (ret != NULL) { 2660 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 2661 } else { 2662 RETVAL_FALSE; 2663 } 2664 } 2665} 2666/* }}} */ 2667 2668/* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding]) 2669 Count the number of substring occurrences */ 2670PHP_FUNCTION(mb_substr_count) 2671{ 2672 int n; 2673 mbfl_string haystack, needle; 2674 char *enc_name = NULL; 2675 int enc_name_len; 2676 2677 mbfl_string_init(&haystack); 2678 mbfl_string_init(&needle); 2679 haystack.no_language = MBSTRG(language); 2680 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2681 needle.no_language = MBSTRG(language); 2682 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2683 2684 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) { 2685 return; 2686 } 2687 2688 if (enc_name != NULL) { 2689 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name); 2690 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 2691 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name); 2692 RETURN_FALSE; 2693 } 2694 } 2695 2696 if (needle.len <= 0) { 2697 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring"); 2698 RETURN_FALSE; 2699 } 2700 2701 n = mbfl_substr_count(&haystack, &needle); 2702 if (n >= 0) { 2703 RETVAL_LONG(n); 2704 } else { 2705 RETVAL_FALSE; 2706 } 2707} 2708/* }}} */ 2709 2710/* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]]) 2711 Returns part of a string */ 2712PHP_FUNCTION(mb_substr) 2713{ 2714 size_t argc = ZEND_NUM_ARGS(); 2715 char *str, *encoding; 2716 long from, len; 2717 int mblen, str_len, encoding_len; 2718 zval **z_len = NULL; 2719 mbfl_string string, result, *ret; 2720 2721 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", &str, &str_len, &from, &z_len, &encoding, &encoding_len) == FAILURE) { 2722 return; 2723 } 2724 2725 mbfl_string_init(&string); 2726 string.no_language = MBSTRG(language); 2727 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2728 2729 if (argc == 4) { 2730 string.no_encoding = mbfl_name2no_encoding(encoding); 2731 if (string.no_encoding == mbfl_no_encoding_invalid) { 2732 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding); 2733 RETURN_FALSE; 2734 } 2735 } 2736 2737 string.val = (unsigned char *)str; 2738 string.len = str_len; 2739 2740 if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) { 2741 len = str_len; 2742 } else { 2743 convert_to_long_ex(z_len); 2744 len = Z_LVAL_PP(z_len); 2745 } 2746 2747 /* measures length */ 2748 mblen = 0; 2749 if (from < 0 || len < 0) { 2750 mblen = mbfl_strlen(&string); 2751 } 2752 2753 /* if "from" position is negative, count start position from the end 2754 * of the string 2755 */ 2756 if (from < 0) { 2757 from = mblen + from; 2758 if (from < 0) { 2759 from = 0; 2760 } 2761 } 2762 2763 /* if "length" position is negative, set it to the length 2764 * needed to stop that many chars from the end of the string 2765 */ 2766 if (len < 0) { 2767 len = (mblen - from) + len; 2768 if (len < 0) { 2769 len = 0; 2770 } 2771 } 2772 2773 if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING) 2774 && (from >= mbfl_strlen(&string))) { 2775 RETURN_FALSE; 2776 } 2777 2778 ret = mbfl_substr(&string, &result, from, len); 2779 if (NULL == ret) { 2780 RETURN_FALSE; 2781 } 2782 2783 RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */ 2784} 2785/* }}} */ 2786 2787/* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]]) 2788 Returns part of a string */ 2789PHP_FUNCTION(mb_strcut) 2790{ 2791 size_t argc = ZEND_NUM_ARGS(); 2792 char *encoding; 2793 long from, len; 2794 int encoding_len; 2795 zval **z_len = NULL; 2796 mbfl_string string, result, *ret; 2797 2798 mbfl_string_init(&string); 2799 string.no_language = MBSTRG(language); 2800 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2801 2802 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", (char **)&string.val, (int **)&string.len, &from, &z_len, &encoding, &encoding_len) == FAILURE) { 2803 return; 2804 } 2805 2806 if (argc == 4) { 2807 string.no_encoding = mbfl_name2no_encoding(encoding); 2808 if (string.no_encoding == mbfl_no_encoding_invalid) { 2809 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding); 2810 RETURN_FALSE; 2811 } 2812 } 2813 2814 if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) { 2815 len = string.len; 2816 } else { 2817 convert_to_long_ex(z_len); 2818 len = Z_LVAL_PP(z_len); 2819 } 2820 2821 /* if "from" position is negative, count start position from the end 2822 * of the string 2823 */ 2824 if (from < 0) { 2825 from = string.len + from; 2826 if (from < 0) { 2827 from = 0; 2828 } 2829 } 2830 2831 /* if "length" position is negative, set it to the length 2832 * needed to stop that many chars from the end of the string 2833 */ 2834 if (len < 0) { 2835 len = (string.len - from) + len; 2836 if (len < 0) { 2837 len = 0; 2838 } 2839 } 2840 2841 if ((unsigned int)from > string.len) { 2842 RETURN_FALSE; 2843 } 2844 2845 ret = mbfl_strcut(&string, &result, from, len); 2846 if (ret == NULL) { 2847 RETURN_FALSE; 2848 } 2849 2850 RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */ 2851} 2852/* }}} */ 2853 2854/* {{{ proto int mb_strwidth(string str [, string encoding]) 2855 Gets terminal width of a string */ 2856PHP_FUNCTION(mb_strwidth) 2857{ 2858 int n; 2859 mbfl_string string; 2860 char *enc_name = NULL; 2861 int enc_name_len; 2862 2863 mbfl_string_init(&string); 2864 2865 string.no_language = MBSTRG(language); 2866 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2867 2868 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) { 2869 return; 2870 } 2871 2872 if (enc_name != NULL) { 2873 string.no_encoding = mbfl_name2no_encoding(enc_name); 2874 if (string.no_encoding == mbfl_no_encoding_invalid) { 2875 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name); 2876 RETURN_FALSE; 2877 } 2878 } 2879 2880 n = mbfl_strwidth(&string); 2881 if (n >= 0) { 2882 RETVAL_LONG(n); 2883 } else { 2884 RETVAL_FALSE; 2885 } 2886} 2887/* }}} */ 2888 2889/* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]]) 2890 Trim the string in terminal width */ 2891PHP_FUNCTION(mb_strimwidth) 2892{ 2893 char *str, *trimmarker, *encoding; 2894 long from, width; 2895 int str_len, trimmarker_len, encoding_len; 2896 mbfl_string string, result, marker, *ret; 2897 2898 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) { 2899 return; 2900 } 2901 2902 mbfl_string_init(&string); 2903 mbfl_string_init(&marker); 2904 string.no_language = MBSTRG(language); 2905 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2906 marker.no_language = MBSTRG(language); 2907 marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 2908 marker.val = NULL; 2909 marker.len = 0; 2910 2911 if (ZEND_NUM_ARGS() == 5) { 2912 string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding); 2913 if (string.no_encoding == mbfl_no_encoding_invalid) { 2914 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding); 2915 RETURN_FALSE; 2916 } 2917 } 2918 2919 string.val = (unsigned char *)str; 2920 string.len = str_len; 2921 2922 if (from < 0 || from > str_len) { 2923 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range"); 2924 RETURN_FALSE; 2925 } 2926 2927 if (width < 0) { 2928 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value"); 2929 RETURN_FALSE; 2930 } 2931 2932 if (ZEND_NUM_ARGS() >= 4) { 2933 marker.val = (unsigned char *)trimmarker; 2934 marker.len = trimmarker_len; 2935 } 2936 2937 ret = mbfl_strimwidth(&string, &marker, &result, from, width); 2938 2939 if (ret == NULL) { 2940 RETURN_FALSE; 2941 } 2942 2943 RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */ 2944} 2945/* }}} */ 2946 2947/* {{{ MBSTRING_API char *php_mb_convert_encoding() */ 2948MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC) 2949{ 2950 mbfl_string string, result, *ret; 2951 const mbfl_encoding *from_encoding, *to_encoding; 2952 mbfl_buffer_converter *convd; 2953 size_t size; 2954 const mbfl_encoding **list; 2955 char *output=NULL; 2956 2957 if (output_len) { 2958 *output_len = 0; 2959 } 2960 if (!input) { 2961 return NULL; 2962 } 2963 /* new encoding */ 2964 if (_to_encoding && strlen(_to_encoding)) { 2965 to_encoding = mbfl_name2encoding(_to_encoding); 2966 if (!to_encoding) { 2967 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding); 2968 return NULL; 2969 } 2970 } else { 2971 to_encoding = MBSTRG(current_internal_encoding); 2972 } 2973 2974 /* initialize string */ 2975 mbfl_string_init(&string); 2976 mbfl_string_init(&result); 2977 from_encoding = MBSTRG(current_internal_encoding); 2978 string.no_encoding = from_encoding->no_encoding; 2979 string.no_language = MBSTRG(language); 2980 string.val = (unsigned char *)input; 2981 string.len = length; 2982 2983 /* pre-conversion encoding */ 2984 if (_from_encodings) { 2985 list = NULL; 2986 size = 0; 2987 php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC); 2988 if (size == 1) { 2989 from_encoding = *list; 2990 string.no_encoding = from_encoding->no_encoding; 2991 } else if (size > 1) { 2992 /* auto detect */ 2993 from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection)); 2994 if (from_encoding) { 2995 string.no_encoding = from_encoding->no_encoding; 2996 } else { 2997 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding"); 2998 from_encoding = &mbfl_encoding_pass; 2999 to_encoding = from_encoding; 3000 string.no_encoding = from_encoding->no_encoding; 3001 } 3002 } else { 3003 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified"); 3004 } 3005 if (list != NULL) { 3006 efree((void *)list); 3007 } 3008 } 3009 3010 /* initialize converter */ 3011 convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len); 3012 if (convd == NULL) { 3013 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter"); 3014 return NULL; 3015 } 3016 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); 3017 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); 3018 3019 /* do it */ 3020 ret = mbfl_buffer_converter_feed_result(convd, &string, &result); 3021 if (ret) { 3022 if (output_len) { 3023 *output_len = ret->len; 3024 } 3025 output = (char *)ret->val; 3026 } 3027 3028 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); 3029 mbfl_buffer_converter_delete(convd); 3030 return output; 3031} 3032/* }}} */ 3033 3034/* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding]) 3035 Returns converted string in desired encoding */ 3036PHP_FUNCTION(mb_convert_encoding) 3037{ 3038 char *arg_str, *arg_new; 3039 int str_len, new_len; 3040 zval *arg_old; 3041 int i; 3042 size_t size, l, n; 3043 char *_from_encodings = NULL, *ret, *s_free = NULL; 3044 3045 zval **hash_entry; 3046 HashTable *target_hash; 3047 3048 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) { 3049 return; 3050 } 3051 3052 if (ZEND_NUM_ARGS() == 3) { 3053 switch (Z_TYPE_P(arg_old)) { 3054 case IS_ARRAY: 3055 target_hash = Z_ARRVAL_P(arg_old); 3056 zend_hash_internal_pointer_reset(target_hash); 3057 i = zend_hash_num_elements(target_hash); 3058 _from_encodings = NULL; 3059 3060 while (i > 0) { 3061 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) { 3062 break; 3063 } 3064 3065 convert_to_string_ex(hash_entry); 3066 3067 if ( _from_encodings) { 3068 l = strlen(_from_encodings); 3069 n = strlen(Z_STRVAL_PP(hash_entry)); 3070 _from_encodings = erealloc(_from_encodings, l+n+2); 3071 strcpy(_from_encodings+l, ","); 3072 strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry)); 3073 } else { 3074 _from_encodings = estrdup(Z_STRVAL_PP(hash_entry)); 3075 } 3076 3077 zend_hash_move_forward(target_hash); 3078 i--; 3079 } 3080 3081 if (_from_encodings != NULL && !strlen(_from_encodings)) { 3082 efree(_from_encodings); 3083 _from_encodings = NULL; 3084 } 3085 s_free = _from_encodings; 3086 break; 3087 default: 3088 convert_to_string(arg_old); 3089 _from_encodings = Z_STRVAL_P(arg_old); 3090 break; 3091 } 3092 } 3093 3094 /* new encoding */ 3095 ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC); 3096 if (ret != NULL) { 3097 RETVAL_STRINGL(ret, size, 0); /* the string is already strdup()'ed */ 3098 } else { 3099 RETVAL_FALSE; 3100 } 3101 3102 if ( s_free) { 3103 efree(s_free); 3104 } 3105} 3106/* }}} */ 3107 3108/* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding]) 3109 Returns a case-folded version of sourcestring */ 3110PHP_FUNCTION(mb_convert_case) 3111{ 3112 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; 3113 char *str; 3114 int str_len, from_encoding_len; 3115 long case_mode = 0; 3116 char *newstr; 3117 size_t ret_len; 3118 3119 RETVAL_FALSE; 3120 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len, 3121 &case_mode, &from_encoding, &from_encoding_len) == FAILURE) 3122 RETURN_FALSE; 3123 3124 newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC); 3125 3126 if (newstr) { 3127 RETVAL_STRINGL(newstr, ret_len, 0); 3128 } 3129} 3130/* }}} */ 3131 3132/* {{{ proto string mb_strtoupper(string sourcestring [, string encoding]) 3133 * Returns a uppercased version of sourcestring 3134 */ 3135PHP_FUNCTION(mb_strtoupper) 3136{ 3137 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; 3138 char *str; 3139 int str_len, from_encoding_len; 3140 char *newstr; 3141 size_t ret_len; 3142 3143 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len, 3144 &from_encoding, &from_encoding_len) == FAILURE) { 3145 return; 3146 } 3147 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC); 3148 3149 if (newstr) { 3150 RETURN_STRINGL(newstr, ret_len, 0); 3151 } 3152 RETURN_FALSE; 3153} 3154/* }}} */ 3155 3156/* {{{ proto string mb_strtolower(string sourcestring [, string encoding]) 3157 * Returns a lowercased version of sourcestring 3158 */ 3159PHP_FUNCTION(mb_strtolower) 3160{ 3161 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name; 3162 char *str; 3163 int str_len, from_encoding_len; 3164 char *newstr; 3165 size_t ret_len; 3166 3167 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len, 3168 &from_encoding, &from_encoding_len) == FAILURE) { 3169 return; 3170 } 3171 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC); 3172 3173 if (newstr) { 3174 RETURN_STRINGL(newstr, ret_len, 0); 3175 } 3176 RETURN_FALSE; 3177} 3178/* }}} */ 3179 3180/* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]]) 3181 Encodings of the given string is returned (as a string) */ 3182PHP_FUNCTION(mb_detect_encoding) 3183{ 3184 char *str; 3185 int str_len; 3186 zend_bool strict=0; 3187 zval *encoding_list; 3188 3189 mbfl_string string; 3190 const mbfl_encoding *ret; 3191 const mbfl_encoding **elist, **list; 3192 size_t size; 3193 3194 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) { 3195 return; 3196 } 3197 3198 /* make encoding list */ 3199 list = NULL; 3200 size = 0; 3201 if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) { 3202 switch (Z_TYPE_P(encoding_list)) { 3203 case IS_ARRAY: 3204 if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) { 3205 if (list) { 3206 efree(list); 3207 list = NULL; 3208 size = 0; 3209 } 3210 } 3211 break; 3212 default: 3213 convert_to_string(encoding_list); 3214 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) { 3215 if (list) { 3216 efree(list); 3217 list = NULL; 3218 size = 0; 3219 } 3220 } 3221 break; 3222 } 3223 if (size <= 0) { 3224 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument"); 3225 } 3226 } 3227 3228 if (ZEND_NUM_ARGS() < 3) { 3229 strict = (zend_bool)MBSTRG(strict_detection); 3230 } 3231 3232 if (size > 0 && list != NULL) { 3233 elist = list; 3234 } else { 3235 elist = MBSTRG(current_detect_order_list); 3236 size = MBSTRG(current_detect_order_list_size); 3237 } 3238 3239 mbfl_string_init(&string); 3240 string.no_language = MBSTRG(language); 3241 string.val = (unsigned char *)str; 3242 string.len = str_len; 3243 ret = mbfl_identify_encoding2(&string, elist, size, strict); 3244 3245 if (list != NULL) { 3246 efree((void *)list); 3247 } 3248 3249 if (ret == NULL) { 3250 RETURN_FALSE; 3251 } 3252 3253 RETVAL_STRING((char *)ret->name, 1); 3254} 3255/* }}} */ 3256 3257/* {{{ proto mixed mb_list_encodings() 3258 Returns an array of all supported entity encodings */ 3259PHP_FUNCTION(mb_list_encodings) 3260{ 3261 const mbfl_encoding **encodings; 3262 const mbfl_encoding *encoding; 3263 int i; 3264 3265 array_init(return_value); 3266 i = 0; 3267 encodings = mbfl_get_supported_encodings(); 3268 while ((encoding = encodings[i++]) != NULL) { 3269 add_next_index_string(return_value, (char *) encoding->name, 1); 3270 } 3271} 3272/* }}} */ 3273 3274/* {{{ proto array mb_encoding_aliases(string encoding) 3275 Returns an array of the aliases of a given encoding name */ 3276PHP_FUNCTION(mb_encoding_aliases) 3277{ 3278 const mbfl_encoding *encoding; 3279 char *name = NULL; 3280 int name_len; 3281 3282 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) { 3283 RETURN_FALSE; 3284 } 3285 3286 encoding = mbfl_name2encoding(name); 3287 if (!encoding) { 3288 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name); 3289 RETURN_FALSE; 3290 } 3291 3292 array_init(return_value); 3293 if (encoding->aliases != NULL) { 3294 const char **alias; 3295 for (alias = *encoding->aliases; *alias; ++alias) { 3296 add_next_index_string(return_value, (char *)*alias, 1); 3297 } 3298 } 3299} 3300/* }}} */ 3301 3302/* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]]) 3303 Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */ 3304PHP_FUNCTION(mb_encode_mimeheader) 3305{ 3306 enum mbfl_no_encoding charset, transenc; 3307 mbfl_string string, result, *ret; 3308 char *charset_name = NULL; 3309 int charset_name_len; 3310 char *trans_enc_name = NULL; 3311 int trans_enc_name_len; 3312 char *linefeed = "\r\n"; 3313 int linefeed_len; 3314 long indent = 0; 3315 3316 mbfl_string_init(&string); 3317 string.no_language = MBSTRG(language); 3318 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 3319 3320 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) { 3321 return; 3322 } 3323 3324 charset = mbfl_no_encoding_pass; 3325 transenc = mbfl_no_encoding_base64; 3326 3327 if (charset_name != NULL) { 3328 charset = mbfl_name2no_encoding(charset_name); 3329 if (charset == mbfl_no_encoding_invalid) { 3330 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name); 3331 RETURN_FALSE; 3332 } 3333 } else { 3334 const mbfl_language *lang = mbfl_no2language(MBSTRG(language)); 3335 if (lang != NULL) { 3336 charset = lang->mail_charset; 3337 transenc = lang->mail_header_encoding; 3338 } 3339 } 3340 3341 if (trans_enc_name != NULL) { 3342 if (*trans_enc_name == 'B' || *trans_enc_name == 'b') { 3343 transenc = mbfl_no_encoding_base64; 3344 } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') { 3345 transenc = mbfl_no_encoding_qprint; 3346 } 3347 } 3348 3349 mbfl_string_init(&result); 3350 ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent); 3351 if (ret != NULL) { 3352 RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */ 3353 } else { 3354 RETVAL_FALSE; 3355 } 3356} 3357/* }}} */ 3358 3359/* {{{ proto string mb_decode_mimeheader(string string) 3360 Decodes the MIME "encoded-word" in the string */ 3361PHP_FUNCTION(mb_decode_mimeheader) 3362{ 3363 mbfl_string string, result, *ret; 3364 3365 mbfl_string_init(&string); 3366 string.no_language = MBSTRG(language); 3367 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 3368 3369 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) { 3370 return; 3371 } 3372 3373 mbfl_string_init(&result); 3374 ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding); 3375 if (ret != NULL) { 3376 RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */ 3377 } else { 3378 RETVAL_FALSE; 3379 } 3380} 3381/* }}} */ 3382 3383/* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding]) 3384 Conversion between full-width character and half-width character (Japanese) */ 3385PHP_FUNCTION(mb_convert_kana) 3386{ 3387 int opt, i; 3388 mbfl_string string, result, *ret; 3389 char *optstr = NULL; 3390 int optstr_len; 3391 char *encname = NULL; 3392 int encname_len; 3393 3394 mbfl_string_init(&string); 3395 string.no_language = MBSTRG(language); 3396 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 3397 3398 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) { 3399 return; 3400 } 3401 3402 /* option */ 3403 if (optstr != NULL) { 3404 char *p = optstr; 3405 int n = optstr_len; 3406 i = 0; 3407 opt = 0; 3408 while (i < n) { 3409 i++; 3410 switch (*p++) { 3411 case 'A': 3412 opt |= 0x1; 3413 break; 3414 case 'a': 3415 opt |= 0x10; 3416 break; 3417 case 'R': 3418 opt |= 0x2; 3419 break; 3420 case 'r': 3421 opt |= 0x20; 3422 break; 3423 case 'N': 3424 opt |= 0x4; 3425 break; 3426 case 'n': 3427 opt |= 0x40; 3428 break; 3429 case 'S': 3430 opt |= 0x8; 3431 break; 3432 case 's': 3433 opt |= 0x80; 3434 break; 3435 case 'K': 3436 opt |= 0x100; 3437 break; 3438 case 'k': 3439 opt |= 0x1000; 3440 break; 3441 case 'H': 3442 opt |= 0x200; 3443 break; 3444 case 'h': 3445 opt |= 0x2000; 3446 break; 3447 case 'V': 3448 opt |= 0x800; 3449 break; 3450 case 'C': 3451 opt |= 0x10000; 3452 break; 3453 case 'c': 3454 opt |= 0x20000; 3455 break; 3456 case 'M': 3457 opt |= 0x100000; 3458 break; 3459 case 'm': 3460 opt |= 0x200000; 3461 break; 3462 } 3463 } 3464 } else { 3465 opt = 0x900; 3466 } 3467 3468 /* encoding */ 3469 if (encname != NULL) { 3470 string.no_encoding = mbfl_name2no_encoding(encname); 3471 if (string.no_encoding == mbfl_no_encoding_invalid) { 3472 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname); 3473 RETURN_FALSE; 3474 } 3475 } 3476 3477 ret = mbfl_ja_jp_hantozen(&string, &result, opt); 3478 if (ret != NULL) { 3479 RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */ 3480 } else { 3481 RETVAL_FALSE; 3482 } 3483} 3484/* }}} */ 3485 3486#define PHP_MBSTR_STACK_BLOCK_SIZE 32 3487 3488/* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...]) 3489 Converts the string resource in variables to desired encoding */ 3490PHP_FUNCTION(mb_convert_variables) 3491{ 3492 zval ***args, ***stack, **var, **hash_entry, **zfrom_enc; 3493 HashTable *target_hash; 3494 mbfl_string string, result, *ret; 3495 const mbfl_encoding *from_encoding, *to_encoding; 3496 mbfl_encoding_detector *identd; 3497 mbfl_buffer_converter *convd; 3498 int n, to_enc_len, argc, stack_level, stack_max; 3499 size_t elistsz; 3500 const mbfl_encoding **elist; 3501 char *to_enc; 3502 void *ptmp; 3503 3504 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) { 3505 return; 3506 } 3507 3508 /* new encoding */ 3509 to_encoding = mbfl_name2encoding(to_enc); 3510 if (!to_encoding) { 3511 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc); 3512 efree(args); 3513 RETURN_FALSE; 3514 } 3515 3516 /* initialize string */ 3517 mbfl_string_init(&string); 3518 mbfl_string_init(&result); 3519 from_encoding = MBSTRG(current_internal_encoding); 3520 string.no_encoding = from_encoding->no_encoding; 3521 string.no_language = MBSTRG(language); 3522 3523 /* pre-conversion encoding */ 3524 elist = NULL; 3525 elistsz = 0; 3526 switch (Z_TYPE_PP(zfrom_enc)) { 3527 case IS_ARRAY: 3528 php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC); 3529 break; 3530 default: 3531 convert_to_string_ex(zfrom_enc); 3532 php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC); 3533 break; 3534 } 3535 if (elistsz <= 0) { 3536 from_encoding = &mbfl_encoding_pass; 3537 } else if (elistsz == 1) { 3538 from_encoding = *elist; 3539 } else { 3540 /* auto detect */ 3541 from_encoding = NULL; 3542 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE; 3543 stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0); 3544 stack_level = 0; 3545 identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection)); 3546 if (identd != NULL) { 3547 n = 0; 3548 while (n < argc || stack_level > 0) { 3549 if (stack_level <= 0) { 3550 var = args[n++]; 3551 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) { 3552 target_hash = HASH_OF(*var); 3553 if (target_hash != NULL) { 3554 zend_hash_internal_pointer_reset(target_hash); 3555 } 3556 } 3557 } else { 3558 stack_level--; 3559 var = stack[stack_level]; 3560 } 3561 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) { 3562 target_hash = HASH_OF(*var); 3563 if (target_hash != NULL) { 3564 while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) { 3565 zend_hash_move_forward(target_hash); 3566 if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) { 3567 if (stack_level >= stack_max) { 3568 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE; 3569 ptmp = erealloc(stack, sizeof(zval **)*stack_max); 3570 stack = (zval ***)ptmp; 3571 } 3572 stack[stack_level] = var; 3573 stack_level++; 3574 var = hash_entry; 3575 target_hash = HASH_OF(*var); 3576 if (target_hash != NULL) { 3577 zend_hash_internal_pointer_reset(target_hash); 3578 continue; 3579 } 3580 } else if (Z_TYPE_PP(hash_entry) == IS_STRING) { 3581 string.val = (unsigned char *)Z_STRVAL_PP(hash_entry); 3582 string.len = Z_STRLEN_PP(hash_entry); 3583 if (mbfl_encoding_detector_feed(identd, &string)) { 3584 goto detect_end; /* complete detecting */ 3585 } 3586 } 3587 } 3588 } 3589 } else if (Z_TYPE_PP(var) == IS_STRING) { 3590 string.val = (unsigned char *)Z_STRVAL_PP(var); 3591 string.len = Z_STRLEN_PP(var); 3592 if (mbfl_encoding_detector_feed(identd, &string)) { 3593 goto detect_end; /* complete detecting */ 3594 } 3595 } 3596 } 3597detect_end: 3598 from_encoding = mbfl_encoding_detector_judge2(identd); 3599 mbfl_encoding_detector_delete(identd); 3600 } 3601 efree(stack); 3602 3603 if (!from_encoding) { 3604 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding"); 3605 from_encoding = &mbfl_encoding_pass; 3606 } 3607 } 3608 if (elist != NULL) { 3609 efree((void *)elist); 3610 } 3611 /* create converter */ 3612 convd = NULL; 3613 if (from_encoding != &mbfl_encoding_pass) { 3614 convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0); 3615 if (convd == NULL) { 3616 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter"); 3617 RETURN_FALSE; 3618 } 3619 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); 3620 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); 3621 } 3622 3623 /* convert */ 3624 if (convd != NULL) { 3625 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE; 3626 stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0); 3627 stack_level = 0; 3628 n = 0; 3629 while (n < argc || stack_level > 0) { 3630 if (stack_level <= 0) { 3631 var = args[n++]; 3632 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) { 3633 target_hash = HASH_OF(*var); 3634 if (target_hash != NULL) { 3635 zend_hash_internal_pointer_reset(target_hash); 3636 } 3637 } 3638 } else { 3639 stack_level--; 3640 var = stack[stack_level]; 3641 } 3642 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) { 3643 target_hash = HASH_OF(*var); 3644 if (target_hash != NULL) { 3645 while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) { 3646 zend_hash_move_forward(target_hash); 3647 if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) { 3648 if (stack_level >= stack_max) { 3649 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE; 3650 ptmp = erealloc(stack, sizeof(zval **)*stack_max); 3651 stack = (zval ***)ptmp; 3652 } 3653 stack[stack_level] = var; 3654 stack_level++; 3655 var = hash_entry; 3656 SEPARATE_ZVAL(hash_entry); 3657 target_hash = HASH_OF(*var); 3658 if (target_hash != NULL) { 3659 zend_hash_internal_pointer_reset(target_hash); 3660 continue; 3661 } 3662 } else if (Z_TYPE_PP(hash_entry) == IS_STRING) { 3663 string.val = (unsigned char *)Z_STRVAL_PP(hash_entry); 3664 string.len = Z_STRLEN_PP(hash_entry); 3665 ret = mbfl_buffer_converter_feed_result(convd, &string, &result); 3666 if (ret != NULL) { 3667 if (Z_REFCOUNT_PP(hash_entry) > 1) { 3668 Z_DELREF_PP(hash_entry); 3669 MAKE_STD_ZVAL(*hash_entry); 3670 } else { 3671 zval_dtor(*hash_entry); 3672 } 3673 ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0); 3674 } 3675 } 3676 } 3677 } 3678 } else if (Z_TYPE_PP(var) == IS_STRING) { 3679 string.val = (unsigned char *)Z_STRVAL_PP(var); 3680 string.len = Z_STRLEN_PP(var); 3681 ret = mbfl_buffer_converter_feed_result(convd, &string, &result); 3682 if (ret != NULL) { 3683 zval_dtor(*var); 3684 ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0); 3685 } 3686 } 3687 } 3688 efree(stack); 3689 3690 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); 3691 mbfl_buffer_converter_delete(convd); 3692 } 3693 3694 efree(args); 3695 3696 if (from_encoding) { 3697 RETURN_STRING(from_encoding->name, 1); 3698 } else { 3699 RETURN_FALSE; 3700 } 3701} 3702/* }}} */ 3703 3704/* {{{ HTML numeric entity */ 3705/* {{{ static void php_mb_numericentity_exec() */ 3706static void 3707php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type) 3708{ 3709 char *str, *encoding; 3710 int str_len, encoding_len; 3711 zval *zconvmap, **hash_entry; 3712 HashTable *target_hash; 3713 size_t argc = ZEND_NUM_ARGS(); 3714 int i, *convmap, *mapelm, mapsize=0; 3715 zend_bool is_hex = 0; 3716 mbfl_string string, result, *ret; 3717 enum mbfl_no_encoding no_encoding; 3718 3719 if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) { 3720 return; 3721 } 3722 3723 mbfl_string_init(&string); 3724 string.no_language = MBSTRG(language); 3725 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 3726 string.val = (unsigned char *)str; 3727 string.len = str_len; 3728 3729 /* encoding */ 3730 if ((argc == 3 || argc == 4) && encoding_len > 0) { 3731 no_encoding = mbfl_name2no_encoding(encoding); 3732 if (no_encoding == mbfl_no_encoding_invalid) { 3733 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding); 3734 RETURN_FALSE; 3735 } else { 3736 string.no_encoding = no_encoding; 3737 } 3738 } 3739 3740 if (argc == 4) { 3741 if (type == 0 && is_hex) { 3742 type = 2; /* output in hex format */ 3743 } 3744 } 3745 3746 /* conversion map */ 3747 convmap = NULL; 3748 if (Z_TYPE_P(zconvmap) == IS_ARRAY) { 3749 target_hash = Z_ARRVAL_P(zconvmap); 3750 zend_hash_internal_pointer_reset(target_hash); 3751 i = zend_hash_num_elements(target_hash); 3752 if (i > 0) { 3753 convmap = (int *)safe_emalloc(i, sizeof(int), 0); 3754 mapelm = convmap; 3755 mapsize = 0; 3756 while (i > 0) { 3757 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) { 3758 break; 3759 } 3760 convert_to_long_ex(hash_entry); 3761 *mapelm++ = Z_LVAL_PP(hash_entry); 3762 mapsize++; 3763 i--; 3764 zend_hash_move_forward(target_hash); 3765 } 3766 } 3767 } 3768 if (convmap == NULL) { 3769 RETURN_FALSE; 3770 } 3771 mapsize /= 4; 3772 3773 ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type); 3774 if (ret != NULL) { 3775 RETVAL_STRINGL((char *)ret->val, ret->len, 0); 3776 } else { 3777 RETVAL_FALSE; 3778 } 3779 efree((void *)convmap); 3780} 3781/* }}} */ 3782 3783/* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding [, bool is_hex]]) 3784 Converts specified characters to HTML numeric entities */ 3785PHP_FUNCTION(mb_encode_numericentity) 3786{ 3787 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); 3788} 3789/* }}} */ 3790 3791/* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding]) 3792 Converts HTML numeric entities to character code */ 3793PHP_FUNCTION(mb_decode_numericentity) 3794{ 3795 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); 3796} 3797/* }}} */ 3798/* }}} */ 3799 3800/* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]]) 3801 * Sends an email message with MIME scheme 3802 */ 3803 3804#define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \ 3805 if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \ 3806 pos += 2; \ 3807 while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \ 3808 pos++; \ 3809 } \ 3810 continue; \ 3811 } 3812 3813#define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \ 3814 pp = str; \ 3815 ee = pp + len; \ 3816 while ((pp = memchr(pp, '\0', (ee - pp)))) { \ 3817 *pp = ' '; \ 3818 } \ 3819 3820#define APPEND_ONE_CHAR(ch) do { \ 3821 if (token.a > 0) { \ 3822 smart_str_appendc(&token, ch); \ 3823 } else {\ 3824 token.len++; \ 3825 } \ 3826} while (0) 3827 3828#define SEPARATE_SMART_STR(str) do {\ 3829 if ((str)->a == 0) { \ 3830 char *tmp_ptr; \ 3831 (str)->a = 1; \ 3832 while ((str)->a < (str)->len) { \ 3833 (str)->a <<= 1; \ 3834 } \ 3835 tmp_ptr = emalloc((str)->a + 1); \ 3836 memcpy(tmp_ptr, (str)->c, (str)->len); \ 3837 (str)->c = tmp_ptr; \ 3838 } \ 3839} while (0) 3840 3841static void my_smart_str_dtor(smart_str *s) 3842{ 3843 if (s->a > 0) { 3844 smart_str_free(s); 3845 } 3846} 3847 3848static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len) 3849{ 3850 const char *ps; 3851 size_t icnt; 3852 int state = 0; 3853 int crlf_state = -1; 3854 3855 smart_str token = { 0, 0, 0 }; 3856 smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 }; 3857 3858 ps = str; 3859 icnt = str_len; 3860 3861 /* 3862 * C o n t e n t - T y p e : t e x t / h t m l \r\n 3863 * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^ 3864 * state 0 1 2 3 3865 * 3866 * C o n t e n t - T y p e : t e x t / h t m l \r\n 3867 * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^ 3868 * crlf_state -1 0 1 -1 3869 * 3870 */ 3871 3872 while (icnt > 0) { 3873 switch (*ps) { 3874 case ':': 3875 if (crlf_state == 1) { 3876 APPEND_ONE_CHAR('\r'); 3877 } 3878 3879 if (state == 0 || state == 1) { 3880 fld_name = token; 3881 3882 state = 2; 3883 } else { 3884 APPEND_ONE_CHAR(*ps); 3885 } 3886 3887 crlf_state = 0; 3888 break; 3889 3890 case '\n': 3891 if (crlf_state == -1) { 3892 goto out; 3893 } 3894 crlf_state = -1; 3895 break; 3896 3897 case '\r': 3898 if (crlf_state == 1) { 3899 APPEND_ONE_CHAR('\r'); 3900 } else { 3901 crlf_state = 1; 3902 } 3903 break; 3904 3905 case ' ': case '\t': 3906 if (crlf_state == -1) { 3907 if (state == 3) { 3908 /* continuing from the previous line */ 3909 SEPARATE_SMART_STR(&token); 3910 state = 4; 3911 } else { 3912 /* simply skipping this new line */ 3913 state = 5; 3914 } 3915 } else { 3916 if (crlf_state == 1) { 3917 APPEND_ONE_CHAR('\r'); 3918 } 3919 if (state == 1 || state == 3) { 3920 APPEND_ONE_CHAR(*ps); 3921 } 3922 } 3923 crlf_state = 0; 3924 break; 3925 3926 default: 3927 switch (state) { 3928 case 0: 3929 token.c = (char *)ps; 3930 token.len = 0; 3931 token.a = 0; 3932 state = 1; 3933 break; 3934 3935 case 2: 3936 if (crlf_state != -1) { 3937 token.c = (char *)ps; 3938 token.len = 0; 3939 token.a = 0; 3940 3941 state = 3; 3942 break; 3943 } 3944 /* break is missing intentionally */ 3945 3946 case 3: 3947 if (crlf_state == -1) { 3948 fld_val = token; 3949 3950 if (fld_name.c != NULL && fld_val.c != NULL) { 3951 char *dummy; 3952 3953 /* FIXME: some locale free implementation is 3954 * really required here,,, */ 3955 SEPARATE_SMART_STR(&fld_name); 3956 php_strtoupper(fld_name.c, fld_name.len); 3957 3958 zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy); 3959 3960 my_smart_str_dtor(&fld_name); 3961 } 3962 3963 memset(&fld_name, 0, sizeof(smart_str)); 3964 memset(&fld_val, 0, sizeof(smart_str)); 3965 3966 token.c = (char *)ps; 3967 token.len = 0; 3968 token.a = 0; 3969 3970 state = 1; 3971 } 3972 break; 3973 3974 case 4: 3975 APPEND_ONE_CHAR(' '); 3976 state = 3; 3977 break; 3978 } 3979 3980 if (crlf_state == 1) { 3981 APPEND_ONE_CHAR('\r'); 3982 } 3983 3984 APPEND_ONE_CHAR(*ps); 3985 3986 crlf_state = 0; 3987 break; 3988 } 3989 ps++, icnt--; 3990 } 3991out: 3992 if (state == 2) { 3993 token.c = ""; 3994 token.len = 0; 3995 token.a = 0; 3996 3997 state = 3; 3998 } 3999 if (state == 3) { 4000 fld_val = token; 4001 4002 if (fld_name.c != NULL && fld_val.c != NULL) { 4003 void *dummy; 4004 4005 /* FIXME: some locale free implementation is 4006 * really required here,,, */ 4007 SEPARATE_SMART_STR(&fld_name); 4008 php_strtoupper(fld_name.c, fld_name.len); 4009 4010 zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy); 4011 4012 my_smart_str_dtor(&fld_name); 4013 } 4014 } 4015 return state; 4016} 4017 4018PHP_FUNCTION(mb_send_mail) 4019{ 4020 int n; 4021 char *to = NULL; 4022 int to_len; 4023 char *message = NULL; 4024 int message_len; 4025 char *headers = NULL; 4026 int headers_len; 4027 char *subject = NULL; 4028 int subject_len; 4029 char *extra_cmd = NULL; 4030 int extra_cmd_len; 4031 int i; 4032 char *to_r = NULL; 4033 char *force_extra_parameters = INI_STR("mail.force_extra_parameters"); 4034 struct { 4035 int cnt_type:1; 4036 int cnt_trans_enc:1; 4037 } suppressed_hdrs = { 0, 0 }; 4038 4039 char *message_buf = NULL, *subject_buf = NULL, *p; 4040 mbfl_string orig_str, conv_str; 4041 mbfl_string *pstr; /* pointer to mbfl string for return value */ 4042 enum mbfl_no_encoding 4043 tran_cs, /* transfar text charset */ 4044 head_enc, /* header transfar encoding */ 4045 body_enc; /* body transfar encoding */ 4046 mbfl_memory_device device; /* automatic allocateable buffer for additional header */ 4047 const mbfl_language *lang; 4048 int err = 0; 4049 HashTable ht_headers; 4050 smart_str *s; 4051 extern void mbfl_memory_device_unput(mbfl_memory_device *device); 4052 char *pp, *ee; 4053 4054 /* initialize */ 4055 mbfl_memory_device_init(&device, 0, 0); 4056 mbfl_string_init(&orig_str); 4057 mbfl_string_init(&conv_str); 4058 4059 /* character-set, transfer-encoding */ 4060 tran_cs = mbfl_no_encoding_utf8; 4061 head_enc = mbfl_no_encoding_base64; 4062 body_enc = mbfl_no_encoding_base64; 4063 lang = mbfl_no2language(MBSTRG(language)); 4064 if (lang != NULL) { 4065 tran_cs = lang->mail_charset; 4066 head_enc = lang->mail_header_encoding; 4067 body_enc = lang->mail_body_encoding; 4068 } 4069 4070 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) { 4071 return; 4072 } 4073 4074 /* ASCIIZ check */ 4075 MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len); 4076 MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len); 4077 MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len); 4078 if (headers) { 4079 MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len); 4080 } 4081 if (extra_cmd) { 4082 MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len); 4083 } 4084 4085 zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0); 4086 4087 if (headers != NULL) { 4088 _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len); 4089 } 4090 4091 if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) { 4092 char *tmp; 4093 char *param_name; 4094 char *charset = NULL; 4095 4096 SEPARATE_SMART_STR(s); 4097 smart_str_0(s); 4098 4099 p = strchr(s->c, ';'); 4100 4101 if (p != NULL) { 4102 /* skipping the padded spaces */ 4103 do { 4104 ++p; 4105 } while (*p == ' ' || *p == '\t'); 4106 4107 if (*p != '\0') { 4108 if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) { 4109 if (strcasecmp(param_name, "charset") == 0) { 4110 enum mbfl_no_encoding _tran_cs = tran_cs; 4111 4112 charset = php_strtok_r(NULL, "= \"", &tmp); 4113 if (charset != NULL) { 4114 _tran_cs = mbfl_name2no_encoding(charset); 4115 } 4116 4117 if (_tran_cs == mbfl_no_encoding_invalid) { 4118 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset); 4119 _tran_cs = mbfl_no_encoding_ascii; 4120 } 4121 tran_cs = _tran_cs; 4122 } 4123 } 4124 } 4125 } 4126 suppressed_hdrs.cnt_type = 1; 4127 } 4128 4129 if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) { 4130 enum mbfl_no_encoding _body_enc; 4131 SEPARATE_SMART_STR(s); 4132 smart_str_0(s); 4133 4134 _body_enc = mbfl_name2no_encoding(s->c); 4135 switch (_body_enc) { 4136 case mbfl_no_encoding_base64: 4137 case mbfl_no_encoding_7bit: 4138 case mbfl_no_encoding_8bit: 4139 body_enc = _body_enc; 4140 break; 4141 4142 default: 4143 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c); 4144 body_enc = mbfl_no_encoding_8bit; 4145 break; 4146 } 4147 suppressed_hdrs.cnt_trans_enc = 1; 4148 } 4149 4150 /* To: */ 4151 if (to != NULL) { 4152 if (to_len > 0) { 4153 to_r = estrndup(to, to_len); 4154 for (; to_len; to_len--) { 4155 if (!isspace((unsigned char) to_r[to_len - 1])) { 4156 break; 4157 } 4158 to_r[to_len - 1] = '\0'; 4159 } 4160 for (i = 0; to_r[i]; i++) { 4161 if (iscntrl((unsigned char) to_r[i])) { 4162 /* According to RFC 822, section 3.1.1 long headers may be separated into 4163 * parts using CRLF followed at least one linear-white-space character ('\t' or ' '). 4164 * To prevent these separators from being replaced with a space, we use the 4165 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them. 4166 */ 4167 SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i); 4168 to_r[i] = ' '; 4169 } 4170 } 4171 } else { 4172 to_r = to; 4173 } 4174 } else { 4175 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field"); 4176 err = 1; 4177 } 4178 4179 /* Subject: */ 4180 if (subject != NULL && subject_len >= 0) { 4181 orig_str.no_language = MBSTRG(language); 4182 orig_str.val = (unsigned char *)subject; 4183 orig_str.len = subject_len; 4184 orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 4185 if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) { 4186 const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); 4187 orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid; 4188 } 4189 pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]")); 4190 if (pstr != NULL) { 4191 subject_buf = subject = (char *)pstr->val; 4192 } 4193 } else { 4194 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field"); 4195 err = 1; 4196 } 4197 4198 /* message body */ 4199 if (message != NULL) { 4200 orig_str.no_language = MBSTRG(language); 4201 orig_str.val = (unsigned char *)message; 4202 orig_str.len = (unsigned int)message_len; 4203 orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 4204 4205 if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) { 4206 const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); 4207 orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid; 4208 } 4209 4210 pstr = NULL; 4211 { 4212 mbfl_string tmpstr; 4213 4214 if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) { 4215 tmpstr.no_encoding=mbfl_no_encoding_8bit; 4216 pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc); 4217 efree(tmpstr.val); 4218 } 4219 } 4220 if (pstr != NULL) { 4221 message_buf = message = (char *)pstr->val; 4222 } 4223 } else { 4224 /* this is not really an error, so it is allowed. */ 4225 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body"); 4226 message = NULL; 4227 } 4228 4229 /* other headers */ 4230#define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0" 4231#define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain" 4232#define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset=" 4233#define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: " 4234 if (headers != NULL) { 4235 p = headers; 4236 n = headers_len; 4237 mbfl_memory_device_strncat(&device, p, n); 4238 if (n > 0 && p[n - 1] != '\n') { 4239 mbfl_memory_device_strncat(&device, "\n", 1); 4240 } 4241 } 4242 4243 if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) { 4244 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1); 4245 mbfl_memory_device_strncat(&device, "\n", 1); 4246 } 4247 4248 if (!suppressed_hdrs.cnt_type) { 4249 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1); 4250 4251 p = (char *)mbfl_no2preferred_mime_name(tran_cs); 4252 if (p != NULL) { 4253 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1); 4254 mbfl_memory_device_strcat(&device, p); 4255 } 4256 mbfl_memory_device_strncat(&device, "\n", 1); 4257 } 4258 if (!suppressed_hdrs.cnt_trans_enc) { 4259 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1); 4260 p = (char *)mbfl_no2preferred_mime_name(body_enc); 4261 if (p == NULL) { 4262 p = "7bit"; 4263 } 4264 mbfl_memory_device_strcat(&device, p); 4265 mbfl_memory_device_strncat(&device, "\n", 1); 4266 } 4267 4268 mbfl_memory_device_unput(&device); 4269 mbfl_memory_device_output('\0', &device); 4270 headers = (char *)device.buffer; 4271 4272 if (force_extra_parameters) { 4273 extra_cmd = php_escape_shell_cmd(force_extra_parameters); 4274 } else if (extra_cmd) { 4275 extra_cmd = php_escape_shell_cmd(extra_cmd); 4276 } 4277 4278 if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) { 4279 RETVAL_TRUE; 4280 } else { 4281 RETVAL_FALSE; 4282 } 4283 4284 if (extra_cmd) { 4285 efree(extra_cmd); 4286 } 4287 if (to_r != to) { 4288 efree(to_r); 4289 } 4290 if (subject_buf) { 4291 efree((void *)subject_buf); 4292 } 4293 if (message_buf) { 4294 efree((void *)message_buf); 4295 } 4296 mbfl_memory_device_clear(&device); 4297 zend_hash_destroy(&ht_headers); 4298} 4299 4300#undef SKIP_LONG_HEADER_SEP_MBSTRING 4301#undef MAIL_ASCIIZ_CHECK_MBSTRING 4302#undef APPEND_ONE_CHAR 4303#undef SEPARATE_SMART_STR 4304#undef PHP_MBSTR_MAIL_MIME_HEADER1 4305#undef PHP_MBSTR_MAIL_MIME_HEADER2 4306#undef PHP_MBSTR_MAIL_MIME_HEADER3 4307#undef PHP_MBSTR_MAIL_MIME_HEADER4 4308/* }}} */ 4309 4310/* {{{ proto mixed mb_get_info([string type]) 4311 Returns the current settings of mbstring */ 4312PHP_FUNCTION(mb_get_info) 4313{ 4314 char *typ = NULL; 4315 int typ_len; 4316 size_t n; 4317 char *name; 4318 const struct mb_overload_def *over_func; 4319 zval *row1, *row2; 4320 const mbfl_language *lang = mbfl_no2language(MBSTRG(language)); 4321 const mbfl_encoding **entry; 4322 4323 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) { 4324 RETURN_FALSE; 4325 } 4326 4327 if (!typ || !strcasecmp("all", typ)) { 4328 array_init(return_value); 4329 if (MBSTRG(current_internal_encoding)) { 4330 add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name, 1); 4331 } 4332 if (MBSTRG(http_input_identify)) { 4333 add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name, 1); 4334 } 4335 if (MBSTRG(current_http_output_encoding)) { 4336 add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name, 1); 4337 } 4338 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) { 4339 add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1); 4340 } 4341 add_assoc_long(return_value, "func_overload", MBSTRG(func_overload)); 4342 if (MBSTRG(func_overload)){ 4343 over_func = &(mb_ovld[0]); 4344 MAKE_STD_ZVAL(row1); 4345 array_init(row1); 4346 while (over_func->type > 0) { 4347 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) { 4348 add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1); 4349 } 4350 over_func++; 4351 } 4352 add_assoc_zval(return_value, "func_overload_list", row1); 4353 } else { 4354 add_assoc_string(return_value, "func_overload_list", "no overload", 1); 4355 } 4356 if (lang != NULL) { 4357 if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) { 4358 add_assoc_string(return_value, "mail_charset", name, 1); 4359 } 4360 if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) { 4361 add_assoc_string(return_value, "mail_header_encoding", name, 1); 4362 } 4363 if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) { 4364 add_assoc_string(return_value, "mail_body_encoding", name, 1); 4365 } 4366 } 4367 add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars)); 4368 if (MBSTRG(encoding_translation)) { 4369 add_assoc_string(return_value, "encoding_translation", "On", 1); 4370 } else { 4371 add_assoc_string(return_value, "encoding_translation", "Off", 1); 4372 } 4373 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) { 4374 add_assoc_string(return_value, "language", name, 1); 4375 } 4376 n = MBSTRG(current_detect_order_list_size); 4377 entry = MBSTRG(current_detect_order_list); 4378 if (n > 0) { 4379 size_t i; 4380 MAKE_STD_ZVAL(row2); 4381 array_init(row2); 4382 for (i = 0; i < n; i++) { 4383 add_next_index_string(row2, (*entry)->name, 1); 4384 entry++; 4385 } 4386 add_assoc_zval(return_value, "detect_order", row2); 4387 } 4388 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { 4389 add_assoc_string(return_value, "substitute_character", "none", 1); 4390 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) { 4391 add_assoc_string(return_value, "substitute_character", "long", 1); 4392 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) { 4393 add_assoc_string(return_value, "substitute_character", "entity", 1); 4394 } else { 4395 add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar)); 4396 } 4397 if (MBSTRG(strict_detection)) { 4398 add_assoc_string(return_value, "strict_detection", "On", 1); 4399 } else { 4400 add_assoc_string(return_value, "strict_detection", "Off", 1); 4401 } 4402 } else if (!strcasecmp("internal_encoding", typ)) { 4403 if (MBSTRG(current_internal_encoding)) { 4404 RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name, 1); 4405 } 4406 } else if (!strcasecmp("http_input", typ)) { 4407 if (MBSTRG(http_input_identify)) { 4408 RETVAL_STRING((char *)MBSTRG(http_input_identify)->name, 1); 4409 } 4410 } else if (!strcasecmp("http_output", typ)) { 4411 if (MBSTRG(current_http_output_encoding)) { 4412 RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name, 1); 4413 } 4414 } else if (!strcasecmp("http_output_conv_mimetypes", typ)) { 4415 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) { 4416 RETVAL_STRING(name, 1); 4417 } 4418 } else if (!strcasecmp("func_overload", typ)) { 4419 RETVAL_LONG(MBSTRG(func_overload)); 4420 } else if (!strcasecmp("func_overload_list", typ)) { 4421 if (MBSTRG(func_overload)){ 4422 over_func = &(mb_ovld[0]); 4423 array_init(return_value); 4424 while (over_func->type > 0) { 4425 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) { 4426 add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1); 4427 } 4428 over_func++; 4429 } 4430 } else { 4431 RETVAL_STRING("no overload", 1); 4432 } 4433 } else if (!strcasecmp("mail_charset", typ)) { 4434 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) { 4435 RETVAL_STRING(name, 1); 4436 } 4437 } else if (!strcasecmp("mail_header_encoding", typ)) { 4438 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) { 4439 RETVAL_STRING(name, 1); 4440 } 4441 } else if (!strcasecmp("mail_body_encoding", typ)) { 4442 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) { 4443 RETVAL_STRING(name, 1); 4444 } 4445 } else if (!strcasecmp("illegal_chars", typ)) { 4446 RETVAL_LONG(MBSTRG(illegalchars)); 4447 } else if (!strcasecmp("encoding_translation", typ)) { 4448 if (MBSTRG(encoding_translation)) { 4449 RETVAL_STRING("On", 1); 4450 } else { 4451 RETVAL_STRING("Off", 1); 4452 } 4453 } else if (!strcasecmp("language", typ)) { 4454 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) { 4455 RETVAL_STRING(name, 1); 4456 } 4457 } else if (!strcasecmp("detect_order", typ)) { 4458 n = MBSTRG(current_detect_order_list_size); 4459 entry = MBSTRG(current_detect_order_list); 4460 if (n > 0) { 4461 size_t i; 4462 array_init(return_value); 4463 for (i = 0; i < n; i++) { 4464 add_next_index_string(return_value, (*entry)->name, 1); 4465 entry++; 4466 } 4467 } 4468 } else if (!strcasecmp("substitute_character", typ)) { 4469 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { 4470 RETVAL_STRING("none", 1); 4471 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) { 4472 RETVAL_STRING("long", 1); 4473 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) { 4474 RETVAL_STRING("entity", 1); 4475 } else { 4476 RETVAL_LONG(MBSTRG(current_filter_illegal_substchar)); 4477 } 4478 } else if (!strcasecmp("strict_detection", typ)) { 4479 if (MBSTRG(strict_detection)) { 4480 RETVAL_STRING("On", 1); 4481 } else { 4482 RETVAL_STRING("Off", 1); 4483 } 4484 } else { 4485 RETURN_FALSE; 4486 } 4487} 4488/* }}} */ 4489 4490/* {{{ proto bool mb_check_encoding([string var[, string encoding]]) 4491 Check if the string is valid for the specified encoding */ 4492PHP_FUNCTION(mb_check_encoding) 4493{ 4494 char *var = NULL; 4495 int var_len; 4496 char *enc = NULL; 4497 int enc_len; 4498 mbfl_buffer_converter *convd; 4499 const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); 4500 mbfl_string string, result, *ret = NULL; 4501 long illegalchars = 0; 4502 4503 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) { 4504 RETURN_FALSE; 4505 } 4506 4507 if (var == NULL) { 4508 RETURN_BOOL(MBSTRG(illegalchars) == 0); 4509 } 4510 4511 if (enc != NULL) { 4512 encoding = mbfl_name2encoding(enc); 4513 if (!encoding || encoding == &mbfl_encoding_pass) { 4514 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc); 4515 RETURN_FALSE; 4516 } 4517 } 4518 4519 convd = mbfl_buffer_converter_new2(encoding, encoding, 0); 4520 if (convd == NULL) { 4521 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter"); 4522 RETURN_FALSE; 4523 } 4524 mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); 4525 mbfl_buffer_converter_illegal_substchar(convd, 0); 4526 4527 /* initialize string */ 4528 mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding); 4529 mbfl_string_init(&result); 4530 4531 string.val = (unsigned char *)var; 4532 string.len = var_len; 4533 ret = mbfl_buffer_converter_feed_result(convd, &string, &result); 4534 illegalchars = mbfl_buffer_illegalchars(convd); 4535 mbfl_buffer_converter_delete(convd); 4536 4537 RETVAL_FALSE; 4538 if (ret != NULL) { 4539 if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { 4540 RETVAL_TRUE; 4541 } 4542 mbfl_string_clear(&result); 4543 } 4544} 4545/* }}} */ 4546 4547 4548/* {{{ php_mb_populate_current_detect_order_list */ 4549static void php_mb_populate_current_detect_order_list(TSRMLS_D) 4550{ 4551 const mbfl_encoding **entry = 0; 4552 size_t nentries; 4553 4554 if (MBSTRG(current_detect_order_list)) { 4555 return; 4556 } 4557 4558 if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) { 4559 nentries = MBSTRG(detect_order_list_size); 4560 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0); 4561 memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries); 4562 } else { 4563 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); 4564 size_t i; 4565 nentries = MBSTRG(default_detect_order_list_size); 4566 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0); 4567 for (i = 0; i < nentries; i++) { 4568 entry[i] = mbfl_no2encoding(src[i]); 4569 } 4570 } 4571 MBSTRG(current_detect_order_list) = entry; 4572 MBSTRG(current_detect_order_list_size) = nentries; 4573} 4574 4575/* {{{ static int php_mb_encoding_translation() */ 4576static int php_mb_encoding_translation(TSRMLS_D) 4577{ 4578 return MBSTRG(encoding_translation); 4579} 4580/* }}} */ 4581 4582/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */ 4583MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc) 4584{ 4585 if (enc != NULL) { 4586 if (enc->flag & MBFL_ENCTYPE_MBCS) { 4587 if (enc->mblen_table != NULL) { 4588 if (s != NULL) return enc->mblen_table[*(unsigned char *)s]; 4589 } 4590 } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { 4591 return 2; 4592 } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { 4593 return 4; 4594 } 4595 } 4596 return 1; 4597} 4598/* }}} */ 4599 4600/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */ 4601MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC) 4602{ 4603 return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding)); 4604} 4605/* }}} */ 4606 4607/* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */ 4608MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc) 4609{ 4610 register const char *p = s; 4611 char *last=NULL; 4612 4613 if (nbytes == (size_t)-1) { 4614 size_t nb = 0; 4615 4616 while (*p != '\0') { 4617 if (nb == 0) { 4618 if ((unsigned char)*p == (unsigned char)c) { 4619 last = (char *)p; 4620 } 4621 nb = php_mb_mbchar_bytes_ex(p, enc); 4622 if (nb == 0) { 4623 return NULL; /* something is going wrong! */ 4624 } 4625 } 4626 --nb; 4627 ++p; 4628 } 4629 } else { 4630 register size_t bcnt = nbytes; 4631 register size_t nbytes_char; 4632 while (bcnt > 0) { 4633 if ((unsigned char)*p == (unsigned char)c) { 4634 last = (char *)p; 4635 } 4636 nbytes_char = php_mb_mbchar_bytes_ex(p, enc); 4637 if (bcnt < nbytes_char) { 4638 return NULL; 4639 } 4640 p += nbytes_char; 4641 bcnt -= nbytes_char; 4642 } 4643 } 4644 return last; 4645} 4646/* }}} */ 4647 4648/* {{{ MBSTRING_API char *php_mb_safe_strrchr() */ 4649MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC) 4650{ 4651 return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding)); 4652} 4653/* }}} */ 4654 4655/* {{{ MBSTRING_API int php_mb_stripos() 4656 */ 4657MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC) 4658{ 4659 int n; 4660 mbfl_string haystack, needle; 4661 n = -1; 4662 4663 mbfl_string_init(&haystack); 4664 mbfl_string_init(&needle); 4665 haystack.no_language = MBSTRG(language); 4666 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 4667 needle.no_language = MBSTRG(language); 4668 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding; 4669 4670 do { 4671 size_t len = 0; 4672 haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC); 4673 haystack.len = len; 4674 4675 if (!haystack.val) { 4676 break; 4677 } 4678 4679 if (haystack.len <= 0) { 4680 break; 4681 } 4682 4683 needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC); 4684 needle.len = len; 4685 4686 if (!needle.val) { 4687 break; 4688 } 4689 4690 if (needle.len <= 0) { 4691 break; 4692 } 4693 4694 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding); 4695 if (haystack.no_encoding == mbfl_no_encoding_invalid) { 4696 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding); 4697 break; 4698 } 4699 4700 { 4701 int haystack_char_len = mbfl_strlen(&haystack); 4702 4703 if (mode) { 4704 if ((offset > 0 && offset > haystack_char_len) || 4705 (offset < 0 && -offset > haystack_char_len)) { 4706 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string"); 4707 break; 4708 } 4709 } else { 4710 if (offset < 0 || offset > haystack_char_len) { 4711 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string"); 4712 break; 4713 } 4714 } 4715 } 4716 4717 n = mbfl_strpos(&haystack, &needle, offset, mode); 4718 } while(0); 4719 4720 if (haystack.val) { 4721 efree(haystack.val); 4722 } 4723 4724 if (needle.val) { 4725 efree(needle.val); 4726 } 4727 4728 return n; 4729} 4730/* }}} */ 4731 4732static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC) /* {{{ */ 4733{ 4734 *list = (const zend_encoding **)MBSTRG(http_input_list); 4735 *list_size = MBSTRG(http_input_list_size); 4736} 4737/* }}} */ 4738 4739static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC) /* {{{ */ 4740{ 4741 MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding; 4742} 4743/* }}} */ 4744 4745#endif /* HAVE_MBSTRING */ 4746 4747/* 4748 * Local variables: 4749 * tab-width: 4 4750 * c-basic-offset: 4 4751 * End: 4752 * vim600: fdm=marker 4753 * vim: noet sw=4 ts=4 4754 */ 4755