1/* 2 * "streamable kanji code filter and converter" 3 * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. 4 * 5 * LICENSE NOTICES 6 * 7 * This file is part of "streamable kanji code filter and converter", 8 * which is distributed under the terms of GNU Lesser General Public 9 * License (version 2) as published by the Free Software Foundation. 10 * 11 * This software is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with "streamable kanji code filter and converter"; 18 * if not, write to the Free Software Foundation, Inc., 59 Temple Place, 19 * Suite 330, Boston, MA 02111-1307 USA 20 * 21 * The author of this file: 22 * 23 */ 24/* 25 * The source code included in this files was separated from mbfilter_iso2022_jp_ms.c 26 * by Rui Hirokawa <hirokawa@php.net> on 25 July 2011. 27 * 28 */ 29 30#ifdef HAVE_CONFIG_H 31#include "config.h" 32#endif 33 34#include "mbfilter.h" 35#include "mbfilter_iso2022jp_mobile.h" 36#include "mbfilter_sjis_mobile.h" 37 38#include "unicode_table_cp932_ext.h" 39#include "unicode_table_jis.h" 40#include "cp932_table.h" 41 42extern int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter); 43extern int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter); 44 45static const char *mbfl_encoding_2022jp_kddi_aliases[] = {"ISO-2022-JP-KDDI", NULL}; 46 47const mbfl_encoding mbfl_encoding_2022jp_kddi = { 48 mbfl_no_encoding_2022jp_kddi, 49 "ISO-2022-JP-MOBILE#KDDI", 50 "ISO-2022-JP", 51 mbfl_encoding_2022jp_kddi_aliases, 52 NULL, 53 MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE | MBFL_ENCTYPE_GL_UNSAFE 54}; 55 56const struct mbfl_identify_vtbl vtbl_identify_2022jp_kddi = { 57 mbfl_no_encoding_2022jp_kddi, 58 mbfl_filt_ident_common_ctor, 59 mbfl_filt_ident_common_dtor, 60 mbfl_filt_ident_2022jpms 61}; 62 63const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar = { 64 mbfl_no_encoding_2022jp_kddi, 65 mbfl_no_encoding_wchar, 66 mbfl_filt_conv_common_ctor, 67 mbfl_filt_conv_common_dtor, 68 mbfl_filt_conv_2022jp_mobile_wchar, 69 mbfl_filt_conv_common_flush 70}; 71 72const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi = { 73 mbfl_no_encoding_wchar, 74 mbfl_no_encoding_2022jp_kddi, 75 mbfl_filt_conv_common_ctor, 76 mbfl_filt_conv_common_dtor, 77 mbfl_filt_conv_wchar_2022jp_mobile, 78 mbfl_filt_conv_any_jis_flush 79}; 80 81#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) 82 83#define sjistoidx(c1, c2) \ 84 (((c1) > 0x9f) \ 85 ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \ 86 : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40))) 87#define idxtojis1(c) (((c) / 94) + 0x21) 88#define idxtojis2(c) (((c) % 94) + 0x21) 89 90#define SJIS_ENCODE(c1,c2,s1,s2) \ 91 do { \ 92 s1 = c1; \ 93 s1--; \ 94 s1 >>= 1; \ 95 if ((c1) < 0x5f) { \ 96 s1 += 0x71; \ 97 } else { \ 98 s1 += 0xb1; \ 99 } \ 100 s2 = c2; \ 101 if ((c1) & 1) { \ 102 if ((c2) < 0x60) { \ 103 s2--; \ 104 } \ 105 s2 += 0x20; \ 106 } else { \ 107 s2 += 0x7e; \ 108 } \ 109 } while (0) 110 111#define SJIS_DECODE(c1,c2,s1,s2) \ 112 do { \ 113 s1 = c1; \ 114 if (s1 < 0xa0) { \ 115 s1 -= 0x81; \ 116 } else { \ 117 s1 -= 0xc1; \ 118 } \ 119 s1 <<= 1; \ 120 s1 += 0x21; \ 121 s2 = c2; \ 122 if (s2 < 0x9f) { \ 123 if (s2 < 0x7f) { \ 124 s2++; \ 125 } \ 126 s2 -= 0x20; \ 127 } else { \ 128 s1++; \ 129 s2 -= 0x7e; \ 130 } \ 131 } while (0) 132 133#define CODE2JIS(c1,c2,s1,s2) \ 134 c1 = (s1)/94+0x21; \ 135 c2 = (s1)-94*((c1)-0x21)+0x21; \ 136 s1 = ((c1) << 8) | (c2); \ 137 s2 = 1 138 139/* 140 * ISO-2022-JP-Mobile => wchar 141 */ 142int 143mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter) 144{ 145 int c1, s, w, snd; 146 147retry: 148 switch (filter->status & 0xf) { 149/* case 0x00: ASCII */ 150/* case 0x10: X 0201 latin */ 151/* case 0x20: X 0201 kana */ 152/* case 0x80: X 0208 */ 153 case 0: 154 if (c == 0x1b) { 155 filter->status += 2; 156 } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ 157 CK((*filter->output_function)(0xff40 + c, filter->data)); 158 } else if (filter->status == 0x80 && c > 0x20 && c < 0x80) { /* kanji first char */ 159 filter->cache = c; 160 filter->status += 1; 161 } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ 162 CK((*filter->output_function)(c, filter->data)); 163 } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ 164 CK((*filter->output_function)(0xfec0 + c, filter->data)); 165 } else { 166 w = c & MBFL_WCSGROUP_MASK; 167 w |= MBFL_WCSGROUP_THROUGH; 168 CK((*filter->output_function)(w, filter->data)); 169 } 170 break; 171 172/* case 0x81: X 0208 second char */ 173 case 1: 174 w = 0; 175 filter->status &= ~0xf; 176 c1 = filter->cache; 177 if (c > 0x20 && c < 0x7f) { 178 s = (c1 - 0x21)*94 + c - 0x21; 179 180 if (s <= 137) { 181 if (s == 31) { 182 w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ 183 } else if (s == 32) { 184 w = 0xff5e; /* FULLWIDTH TILDE */ 185 } else if (s == 33) { 186 w = 0x2225; /* PARALLEL TO */ 187 } else if (s == 60) { 188 w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ 189 } else if (s == 80) { 190 w = 0xffe0; /* FULLWIDTH CENT SIGN */ 191 } else if (s == 81) { 192 w = 0xffe1; /* FULLWIDTH POUND SIGN */ 193 } else if (s == 137) { 194 w = 0xffe2; /* FULLWIDTH NOT SIGN */ 195 } 196 } 197 198 if (w == 0) { 199 if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ 200 w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; 201 } else if (s >= 0 && s < jisx0208_ucs_table_size) { 202 w = jisx0208_ucs_table[s]; 203 } else { 204 w = 0; 205 } 206 } 207 208 if (s >= (84*94) && s < 91*94) { 209 s += 22*94; 210 if (filter->from->no_encoding == mbfl_no_encoding_2022jp_kddi) { 211 w = mbfilter_sjis_emoji_kddi2unicode(s, &snd); 212 } 213 if (w > 0 && snd > 0) { 214 CK((*filter->output_function)(snd, filter->data)); 215 } 216 } 217 218 if (w <= 0) { 219 w = (c1 << 8) | c; 220 w &= MBFL_WCSPLANE_MASK; 221 w |= MBFL_WCSPLANE_JIS0208; 222 } 223 CK((*filter->output_function)(w, filter->data)); 224 } else if (c == 0x1b) { 225 filter->status += 2; 226 } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ 227 CK((*filter->output_function)(c, filter->data)); 228 } else { 229 w = (c1 << 8) | c; 230 w &= MBFL_WCSGROUP_MASK; 231 w |= MBFL_WCSGROUP_THROUGH; 232 CK((*filter->output_function)(w, filter->data)); 233 } 234 break; 235 236 /* ESC */ 237/* case 0x02: */ 238/* case 0x12: */ 239/* case 0x22: */ 240/* case 0x82: */ 241 case 2: 242 if (c == 0x24) { /* '$' */ 243 filter->status++; 244 } else if (c == 0x28) { /* '(' */ 245 filter->status += 3; 246 } else { 247 filter->status &= ~0xf; 248 CK((*filter->output_function)(0x1b, filter->data)); 249 goto retry; 250 } 251 break; 252 253 /* ESC $ */ 254/* case 0x03: */ 255/* case 0x13: */ 256/* case 0x23: */ 257/* case 0x83: */ 258 case 3: 259 if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ 260 filter->status = 0x80; 261 } else if (c == 0x28) { /* '(' */ 262 filter->status++; 263 } else { 264 filter->status &= ~0xf; 265 CK((*filter->output_function)(0x1b, filter->data)); 266 CK((*filter->output_function)(0x24, filter->data)); 267 goto retry; 268 } 269 break; 270 271 /* ESC $ ( */ 272/* case 0x04: */ 273/* case 0x14: */ 274/* case 0x24: */ 275/* case 0x84: */ 276 case 4: 277 if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ 278 filter->status = 0x80; 279 } else { 280 filter->status &= ~0xf; 281 CK((*filter->output_function)(0x1b, filter->data)); 282 CK((*filter->output_function)(0x24, filter->data)); 283 CK((*filter->output_function)(0x28, filter->data)); 284 goto retry; 285 } 286 break; 287 288 /* ESC ( */ 289/* case 0x05: */ 290/* case 0x15: */ 291/* case 0x25: */ 292/* case 0x85: */ 293 case 5: 294 if (c == 0x42) { /* 'B' */ 295 filter->status = 0; 296 } else if (c == 0x4a) { /* 'J' */ 297 filter->status = 0; 298 } else if (c == 0x49) { /* 'I' */ 299 filter->status = 0x20; 300 } else { 301 filter->status &= ~0xf; 302 CK((*filter->output_function)(0x1b, filter->data)); 303 CK((*filter->output_function)(0x28, filter->data)); 304 goto retry; 305 } 306 break; 307 308 default: 309 filter->status = 0; 310 break; 311 } 312 313 return c; 314} 315 316/* 317 * wchar => ISO-2022-JP-Mobile 318 */ 319int 320mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter) 321{ 322 int c1, c2, s1, s2; 323 324 s1 = 0; 325 s2 = 0; 326 if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { 327 s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; 328 } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { 329 s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; 330 } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { 331 s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; 332 } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { 333 s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; 334 } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */ 335 s1 = c - 0xe000; 336 c1 = s1/94 + 0x7f; 337 c2 = s1%94 + 0x21; 338 s1 = (c1 << 8) | c2; 339 } 340 if (s1 <= 0) { 341 c1 = c & ~MBFL_WCSPLANE_MASK; 342 if (c1 == MBFL_WCSPLANE_WINCP932) { 343 s1 = c & MBFL_WCSPLANE_MASK; 344 s2 = 1; 345 } else if (c1 == MBFL_WCSPLANE_JIS0208) { 346 s1 = c & MBFL_WCSPLANE_MASK; 347 } else if (c1 == MBFL_WCSPLANE_JIS0212) { 348 s1 = c & MBFL_WCSPLANE_MASK; 349 s1 |= 0x8080; 350 } else if (c == 0xa5) { /* YEN SIGN */ 351 s1 = 0x216f; /* FULLWIDTH YEN SIGN */ 352 } else if (c == 0x203e) { /* OVER LINE */ 353 s1 = 0x2131; /* FULLWIDTH MACRON */ 354 } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ 355 s1 = 0x2140; 356 } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ 357 s1 = 0x2141; 358 } else if (c == 0x2225) { /* PARALLEL TO */ 359 s1 = 0x2142; 360 } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ 361 s1 = 0x215d; 362 } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ 363 s1 = 0x2171; 364 } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ 365 s1 = 0x2172; 366 } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ 367 s1 = 0x224c; 368 } 369 } 370 371 if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */ 372 s1 = -1; 373 c1 = 0; 374 c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; 375 while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ 376 if (c == cp932ext1_ucs_table[c1]) { 377 s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); 378 break; 379 } 380 c1++; 381 } 382 if (c == 0) { 383 s1 = 0; 384 } else if (s1 <= 0) { 385 s1 = -1; 386 } 387 } 388 389 if (filter->to->no_encoding == mbfl_no_encoding_2022jp_kddi && 390 mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) { 391 CODE2JIS(c1,c2,s1,s2); 392 s1 -= 0x1600; 393 } 394 395 if (filter->status == 1 && filter->cache > 0) { 396 return c; 397 } 398 399 if (s1 >= 0) { 400 if (s1 < 0x80) { /* latin */ 401 if ((filter->status & 0xff00) != 0) { 402 CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ 403 CK((*filter->output_function)(0x28, filter->data)); /* '(' */ 404 CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ 405 } 406 CK((*filter->output_function)(s1, filter->data)); 407 filter->status = 0; 408 } else if (s1 > 0xa0 && s1 < 0xe0) { /* kana */ 409 if ((filter->status & 0xff00) != 0x100) { 410 CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ 411 CK((*filter->output_function)(0x28, filter->data)); /* '(' */ 412 CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ 413 } 414 filter->status = 0x100; 415 CK((*filter->output_function)(s1 & 0x7f, filter->data)); 416 } else if (s1 < 0x7e7f) { /* X 0208 */ 417 if ((filter->status & 0xff00) != 0x200) { 418 CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ 419 CK((*filter->output_function)(0x24, filter->data)); /* '$' */ 420 CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ 421 } 422 filter->status = 0x200; 423 CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data)); 424 CK((*filter->output_function)(s1 & 0x7f, filter->data)); 425 } 426 } else { 427 if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { 428 CK(mbfl_filt_conv_illegal_output(c, filter)); 429 } 430 } 431 432 return c; 433} 434