1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2008 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Stig Sæther Bakken <ssb@php.net> |
16 | Thies C. Arntzen <thies@thieso.net> |
17 | Sterling Hughes <sterling@php.net> |
18 +----------------------------------------------------------------------+
19 */
20
21 /* $Id: xml.c,v 1.181 2008/08/24 02:12:51 felipe Exp $ */
22
23 #define IS_EXT_MODULE
24
25 #ifdef HAVE_CONFIG_H
26 #include "config.h"
27 #endif
28
29 #include "php.h"
30
31 #define PHP_XML_INTERNAL
32 #include "zend_variables.h"
33 #include "ext/standard/php_string.h"
34 #include "ext/standard/info.h"
35
36 #if HAVE_XML
37
38 #include "php_xml.h"
39 # include "ext/standard/head.h"
40 #ifdef LIBXML_EXPAT_COMPAT
41 #include "ext/libxml/php_libxml.h"
42 #endif
43
44 /* Short-term TODO list:
45 * - Implement XML_ExternalEntityParserCreate()
46 * - XML_SetCommentHandler
47 * - XML_SetCdataSectionHandler
48 * - XML_SetParamEntityParsing
49 */
50
51 /* Long-term TODO list:
52 * - Fix the expat library so you can install your own memory manager
53 * functions
54 */
55
56 /* Known bugs:
57 * - Weird things happen with <![CDATA[]]> sections.
58 */
59
60 ZEND_DECLARE_MODULE_GLOBALS(xml)
61
62 /* {{{ dynamically loadable module stuff */
63 #ifdef COMPILE_DL_XML
64 ZEND_GET_MODULE(xml)
65 #endif /* COMPILE_DL_XML */
66 /* }}} */
67
68 /* {{{ function prototypes */
69 PHP_MINIT_FUNCTION(xml);
70 PHP_MINFO_FUNCTION(xml);
71 static PHP_GINIT_FUNCTION(xml);
72
73 static void xml_parser_dtor(zend_rsrc_list_entry *rsrc TSRMLS_DC);
74 static void xml_set_handler(zval **, zval ** TSRMLS_DC);
75 inline static unsigned short xml_encode_iso_8859_1(unsigned char);
76 inline static char xml_decode_iso_8859_1(unsigned short);
77 inline static unsigned short xml_encode_us_ascii(unsigned char);
78 inline static char xml_decode_us_ascii(unsigned short);
79 static zval *xml_call_handler(xml_parser *, zval *, zend_function *, int, zval **);
80 static zval *_xml_xmlchar_zval(const XML_Char *, int, const XML_Char *);
81 static int _xml_xmlcharlen(const XML_Char *);
82 static void _xml_add_to_info(xml_parser *parser,char *name);
83 inline static char *_xml_decode_tag(xml_parser *parser, const char *tag);
84
85 void _xml_startElementHandler(void *, const XML_Char *, const XML_Char **);
86 void _xml_endElementHandler(void *, const XML_Char *);
87 void _xml_characterDataHandler(void *, const XML_Char *, int);
88 void _xml_processingInstructionHandler(void *, const XML_Char *, const XML_Char *);
89 void _xml_defaultHandler(void *, const XML_Char *, int);
90 void _xml_unparsedEntityDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
91 void _xml_notationDeclHandler(void *, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
92 int _xml_externalEntityRefHandler(XML_Parser, const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
93
94 void _xml_startNamespaceDeclHandler(void *, const XML_Char *, const XML_Char *);
95 void _xml_endNamespaceDeclHandler(void *, const XML_Char *);
96 /* }}} */
97
98 /* {{{ extension definition structures */
99 static
100 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_parser_create, 0, 0, 0)
101 ZEND_ARG_INFO(0, encoding)
102 ZEND_END_ARG_INFO()
103
104 static
105 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_parser_create_ns, 0, 0, 0)
106 ZEND_ARG_INFO(0, encoding)
107 ZEND_ARG_INFO(0, sep)
108 ZEND_END_ARG_INFO()
109
110 static
111 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_set_object, 0, 0, 2)
112 ZEND_ARG_INFO(0, parser)
113 ZEND_ARG_INFO(1, obj)
114 ZEND_END_ARG_INFO()
115
116 static
117 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_set_element_handler, 0, 0, 3)
118 ZEND_ARG_INFO(0, parser)
119 ZEND_ARG_INFO(0, shdl)
120 ZEND_ARG_INFO(0, ehdl)
121 ZEND_END_ARG_INFO()
122
123 static
124 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_set_character_data_handler, 0, 0, 2)
125 ZEND_ARG_INFO(0, parser)
126 ZEND_ARG_INFO(0, hdl)
127 ZEND_END_ARG_INFO()
128
129 static
130 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_set_processing_instruction_handler, 0, 0, 2)
131 ZEND_ARG_INFO(0, parser)
132 ZEND_ARG_INFO(0, hdl)
133 ZEND_END_ARG_INFO()
134
135 static
136 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_set_default_handler, 0, 0, 2)
137 ZEND_ARG_INFO(0, parser)
138 ZEND_ARG_INFO(0, hdl)
139 ZEND_END_ARG_INFO()
140
141 static
142 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_set_unparsed_entity_decl_handler, 0, 0, 2)
143 ZEND_ARG_INFO(0, parser)
144 ZEND_ARG_INFO(0, hdl)
145 ZEND_END_ARG_INFO()
146
147 static
148 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_set_notation_decl_handler, 0, 0, 2)
149 ZEND_ARG_INFO(0, parser)
150 ZEND_ARG_INFO(0, hdl)
151 ZEND_END_ARG_INFO()
152
153 static
154 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_set_external_entity_ref_handler, 0, 0, 2)
155 ZEND_ARG_INFO(0, parser)
156 ZEND_ARG_INFO(0, hdl)
157 ZEND_END_ARG_INFO()
158
159 static
160 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_set_start_namespace_decl_handler, 0, 0, 2)
161 ZEND_ARG_INFO(0, parser)
162 ZEND_ARG_INFO(0, hdl)
163 ZEND_END_ARG_INFO()
164
165 static
166 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_set_end_namespace_decl_handler, 0, 0, 2)
167 ZEND_ARG_INFO(0, parser)
168 ZEND_ARG_INFO(0, hdl)
169 ZEND_END_ARG_INFO()
170
171 static
172 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_parse, 0, 0, 2)
173 ZEND_ARG_INFO(0, parser)
174 ZEND_ARG_INFO(0, data)
175 ZEND_ARG_INFO(0, isfinal)
176 ZEND_END_ARG_INFO()
177
178 static
179 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_parse_into_struct, 0, 0, 3)
180 ZEND_ARG_INFO(0, parser)
181 ZEND_ARG_INFO(0, data)
182 ZEND_ARG_INFO(1, values)
183 ZEND_ARG_INFO(1, index)
184 ZEND_END_ARG_INFO()
185
186 static
187 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_get_error_code, 0, 0, 1)
188 ZEND_ARG_INFO(0, parser)
189 ZEND_END_ARG_INFO()
190
191 static
192 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_error_string, 0, 0, 1)
193 ZEND_ARG_INFO(0, code)
194 ZEND_END_ARG_INFO()
195
196 static
197 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_get_current_line_number, 0, 0, 1)
198 ZEND_ARG_INFO(0, parser)
199 ZEND_END_ARG_INFO()
200
201 static
202 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_get_current_column_number, 0, 0, 1)
203 ZEND_ARG_INFO(0, parser)
204 ZEND_END_ARG_INFO()
205
206 static
207 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_get_current_byte_index, 0, 0, 1)
208 ZEND_ARG_INFO(0, parser)
209 ZEND_END_ARG_INFO()
210
211 static
212 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_parser_free, 0, 0, 1)
213 ZEND_ARG_INFO(0, parser)
214 ZEND_END_ARG_INFO()
215
216 static
217 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_parser_set_option, 0, 0, 3)
218 ZEND_ARG_INFO(0, parser)
219 ZEND_ARG_INFO(0, option)
220 ZEND_ARG_INFO(0, value)
221 ZEND_END_ARG_INFO()
222
223 static
224 ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_parser_get_option, 0, 0, 2)
225 ZEND_ARG_INFO(0, parser)
226 ZEND_ARG_INFO(0, option)
227 ZEND_END_ARG_INFO()
228
229 static
230 ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_encode, 0, 0, 1)
231 ZEND_ARG_INFO(0, data)
232 ZEND_END_ARG_INFO()
233
234 static
235 ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_decode, 0, 0, 1)
236 ZEND_ARG_INFO(0, data)
237 ZEND_END_ARG_INFO()
238
239 const zend_function_entry xml_functions[] = {
240 PHP_FE(xml_parser_create, arginfo_xml_parser_create)
241 PHP_FE(xml_parser_create_ns, arginfo_xml_parser_create_ns)
242 PHP_FE(xml_set_object, arginfo_xml_set_object)
243 PHP_FE(xml_set_element_handler, arginfo_xml_set_element_handler)
244 PHP_FE(xml_set_character_data_handler, arginfo_xml_set_character_data_handler)
245 PHP_FE(xml_set_processing_instruction_handler, arginfo_xml_set_processing_instruction_handler)
246 PHP_FE(xml_set_default_handler, arginfo_xml_set_default_handler)
247 PHP_FE(xml_set_unparsed_entity_decl_handler,arginfo_xml_set_unparsed_entity_decl_handler)
248 PHP_FE(xml_set_notation_decl_handler, arginfo_xml_set_notation_decl_handler)
249 PHP_FE(xml_set_external_entity_ref_handler, arginfo_xml_set_external_entity_ref_handler)
250 PHP_FE(xml_set_start_namespace_decl_handler,arginfo_xml_set_start_namespace_decl_handler)
251 PHP_FE(xml_set_end_namespace_decl_handler, arginfo_xml_set_end_namespace_decl_handler)
252 PHP_FE(xml_parse, arginfo_xml_parse)
253 PHP_FE(xml_parse_into_struct, arginfo_xml_parse_into_struct)
254 PHP_FE(xml_get_error_code, arginfo_xml_get_error_code)
255 PHP_FE(xml_error_string, arginfo_xml_error_string)
256 PHP_FE(xml_get_current_line_number, arginfo_xml_get_current_line_number)
257 PHP_FE(xml_get_current_column_number, arginfo_xml_get_current_column_number)
258 PHP_FE(xml_get_current_byte_index, arginfo_xml_get_current_byte_index)
259 PHP_FE(xml_parser_free, arginfo_xml_parser_free)
260 PHP_FE(xml_parser_set_option, arginfo_xml_parser_set_option)
261 PHP_FE(xml_parser_get_option, arginfo_xml_parser_get_option)
262 PHP_FE(utf8_encode, arginfo_utf8_encode)
263 PHP_FE(utf8_decode, arginfo_utf8_decode)
264 {NULL, NULL, NULL}
265 };
266
267 #ifdef LIBXML_EXPAT_COMPAT
268 static const zend_module_dep xml_deps[] = {
269 ZEND_MOD_REQUIRED("libxml")
270 {NULL, NULL, NULL}
271 };
272 #endif
273
274 zend_module_entry xml_module_entry = {
275 #ifdef LIBXML_EXPAT_COMPAT
276 STANDARD_MODULE_HEADER_EX, NULL,
277 xml_deps,
278 #else
279 STANDARD_MODULE_HEADER,
280 #endif
281 "xml", /* extension name */
282 xml_functions, /* extension function list */
283 PHP_MINIT(xml), /* extension-wide startup function */
284 NULL, /* extension-wide shutdown function */
285 NULL, /* per-request startup function */
286 NULL, /* per-request shutdown function */
287 PHP_MINFO(xml), /* information function */
288 NO_VERSION_YET,
289 PHP_MODULE_GLOBALS(xml), /* globals descriptor */
290 PHP_GINIT(xml), /* globals ctor */
291 NULL, /* globals dtor */
292 NULL, /* post deactivate */
293 STANDARD_MODULE_PROPERTIES_EX
294 };
295
296 /* All the encoding functions are set to NULL right now, since all
297 * the encoding is currently done internally by expat/xmltok.
298 */
299 xml_encoding xml_encodings[] = {
300 { "ISO-8859-1", xml_decode_iso_8859_1, xml_encode_iso_8859_1 },
301 { "US-ASCII", xml_decode_us_ascii, xml_encode_us_ascii },
302 { "UTF-8", NULL, NULL },
303 { NULL, NULL, NULL }
304 };
305
306 static XML_Memory_Handling_Suite php_xml_mem_hdlrs;
307
308 /* True globals, no need for thread safety */
309 static int le_xml_parser;
310
311 /* }}} */
312
313 /* {{{ startup, shutdown and info functions */
314 static PHP_GINIT_FUNCTION(xml)
315 {
316 xml_globals->default_encoding = "UTF-8";
317 }
318
319 static void *php_xml_malloc_wrapper(size_t sz)
320 {
321 return emalloc(sz);
322 }
323
324 static void *php_xml_realloc_wrapper(void *ptr, size_t sz)
325 {
326 return erealloc(ptr, sz);
327 }
328
329 static void php_xml_free_wrapper(void *ptr)
330 {
331 if (ptr != NULL) {
332 efree(ptr);
333 }
334 }
335
336 PHP_MINIT_FUNCTION(xml)
337 {
338 le_xml_parser = zend_register_list_destructors_ex(xml_parser_dtor, NULL, "xml", module_number);
339
340 REGISTER_LONG_CONSTANT("XML_ERROR_NONE", XML_ERROR_NONE, CONST_CS|CONST_PERSISTENT);
341 REGISTER_LONG_CONSTANT("XML_ERROR_NO_MEMORY", XML_ERROR_NO_MEMORY, CONST_CS|CONST_PERSISTENT);
342 REGISTER_LONG_CONSTANT("XML_ERROR_SYNTAX", XML_ERROR_SYNTAX, CONST_CS|CONST_PERSISTENT);
343 REGISTER_LONG_CONSTANT("XML_ERROR_NO_ELEMENTS", XML_ERROR_NO_ELEMENTS, CONST_CS|CONST_PERSISTENT);
344 REGISTER_LONG_CONSTANT("XML_ERROR_INVALID_TOKEN", XML_ERROR_INVALID_TOKEN, CONST_CS|CONST_PERSISTENT);
345 REGISTER_LONG_CONSTANT("XML_ERROR_UNCLOSED_TOKEN", XML_ERROR_UNCLOSED_TOKEN, CONST_CS|CONST_PERSISTENT);
346 REGISTER_LONG_CONSTANT("XML_ERROR_PARTIAL_CHAR", XML_ERROR_PARTIAL_CHAR, CONST_CS|CONST_PERSISTENT);
347 REGISTER_LONG_CONSTANT("XML_ERROR_TAG_MISMATCH", XML_ERROR_TAG_MISMATCH, CONST_CS|CONST_PERSISTENT);
348 REGISTER_LONG_CONSTANT("XML_ERROR_DUPLICATE_ATTRIBUTE", XML_ERROR_DUPLICATE_ATTRIBUTE, CONST_CS|CONST_PERSISTENT);
349 REGISTER_LONG_CONSTANT("XML_ERROR_JUNK_AFTER_DOC_ELEMENT", XML_ERROR_JUNK_AFTER_DOC_ELEMENT, CONST_CS|CONST_PERSISTENT);
350 REGISTER_LONG_CONSTANT("XML_ERROR_PARAM_ENTITY_REF", XML_ERROR_PARAM_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
351 REGISTER_LONG_CONSTANT("XML_ERROR_UNDEFINED_ENTITY", XML_ERROR_UNDEFINED_ENTITY, CONST_CS|CONST_PERSISTENT);
352 REGISTER_LONG_CONSTANT("XML_ERROR_RECURSIVE_ENTITY_REF", XML_ERROR_RECURSIVE_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
353 REGISTER_LONG_CONSTANT("XML_ERROR_ASYNC_ENTITY", XML_ERROR_ASYNC_ENTITY, CONST_CS|CONST_PERSISTENT);
354 REGISTER_LONG_CONSTANT("XML_ERROR_BAD_CHAR_REF", XML_ERROR_BAD_CHAR_REF, CONST_CS|CONST_PERSISTENT);
355 REGISTER_LONG_CONSTANT("XML_ERROR_BINARY_ENTITY_REF", XML_ERROR_BINARY_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
356 REGISTER_LONG_CONSTANT("XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, CONST_CS|CONST_PERSISTENT);
357 REGISTER_LONG_CONSTANT("XML_ERROR_MISPLACED_XML_PI", XML_ERROR_MISPLACED_XML_PI, CONST_CS|CONST_PERSISTENT);
358 REGISTER_LONG_CONSTANT("XML_ERROR_UNKNOWN_ENCODING", XML_ERROR_UNKNOWN_ENCODING, CONST_CS|CONST_PERSISTENT);
359 REGISTER_LONG_CONSTANT("XML_ERROR_INCORRECT_ENCODING", XML_ERROR_INCORRECT_ENCODING, CONST_CS|CONST_PERSISTENT);
360 REGISTER_LONG_CONSTANT("XML_ERROR_UNCLOSED_CDATA_SECTION", XML_ERROR_UNCLOSED_CDATA_SECTION, CONST_CS|CONST_PERSISTENT);
361 REGISTER_LONG_CONSTANT("XML_ERROR_EXTERNAL_ENTITY_HANDLING", XML_ERROR_EXTERNAL_ENTITY_HANDLING, CONST_CS|CONST_PERSISTENT);
362
363 REGISTER_LONG_CONSTANT("XML_OPTION_CASE_FOLDING", PHP_XML_OPTION_CASE_FOLDING, CONST_CS|CONST_PERSISTENT);
364 REGISTER_LONG_CONSTANT("XML_OPTION_TARGET_ENCODING", PHP_XML_OPTION_TARGET_ENCODING, CONST_CS|CONST_PERSISTENT);
365 REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_TAGSTART", PHP_XML_OPTION_SKIP_TAGSTART, CONST_CS|CONST_PERSISTENT);
366 REGISTER_LONG_CONSTANT("XML_OPTION_SKIP_WHITE", PHP_XML_OPTION_SKIP_WHITE, CONST_CS|CONST_PERSISTENT);
367
368 /* this object should not be pre-initialised at compile time,
369 as the order of members may vary */
370
371 php_xml_mem_hdlrs.malloc_fcn = php_xml_malloc_wrapper;
372 php_xml_mem_hdlrs.realloc_fcn = php_xml_realloc_wrapper;
373 php_xml_mem_hdlrs.free_fcn = php_xml_free_wrapper;
374
375 #ifdef LIBXML_EXPAT_COMPAT
376 REGISTER_STRING_CONSTANT("XML_SAX_IMPL", "libxml", CONST_CS|CONST_PERSISTENT);
377 #else
378 REGISTER_STRING_CONSTANT("XML_SAX_IMPL", "expat", CONST_CS|CONST_PERSISTENT);
379 #endif
380
381 return SUCCESS;
382 }
383
384 PHP_MINFO_FUNCTION(xml)
385 {
386 php_info_print_table_start();
387 php_info_print_table_row(2, "XML Support", "active");
388 php_info_print_table_row(2, "XML Namespace Support", "active");
389 #if defined(LIBXML_DOTTED_VERSION) && defined(LIBXML_EXPAT_COMPAT)
390 php_info_print_table_row(2, "libxml2 Version", LIBXML_DOTTED_VERSION);
391 #else
392 php_info_print_table_row(2, "EXPAT Version", XML_ExpatVersion());
393 #endif
394 php_info_print_table_end();
395 }
396 /* }}} */
397
398 /* {{{ extension-internal functions */
399 static zval *_xml_resource_zval(long value)
400 {
401 zval *ret;
402 TSRMLS_FETCH();
403
404 MAKE_STD_ZVAL(ret);
405
406 Z_TYPE_P(ret) = IS_RESOURCE;
407 Z_LVAL_P(ret) = value;
408
409 zend_list_addref(value);
410
411 return ret;
412 }
413
414 static zval *_xml_string_zval(const char *str)
415 {
416 zval *ret;
417 TSRMLS_FETCH();
418
419 MAKE_STD_ZVAL(ret);
420
421 ZVAL_UTF8_STRING(ret, (char *)str, ZSTR_DUPLICATE);
422 return ret;
423 }
424
425 static zval *_xml_xmlchar_zval(const XML_Char *s, int len, const XML_Char *encoding)
426 {
427 zval *ret;
428 TSRMLS_FETCH();
429
430 MAKE_STD_ZVAL(ret);
431
432 if (s == NULL) {
433 ZVAL_FALSE(ret);
434 return ret;
435 }
436 if (len == 0) {
437 len = _xml_xmlcharlen(s);
438 }
439
440 if (UG(unicode)) {
441 ZVAL_UTF8_STRINGL(ret, (char *)s, len, ZSTR_DUPLICATE);
442 } else {
443 Z_TYPE_P(ret) = IS_STRING;
444 Z_STRVAL_P(ret) = xml_utf8_decode(s, len, &Z_STRLEN_P(ret), encoding);
445 }
446
447 return ret;
448 }
449 /* }}} */
450
451 /* {{{ xml_parser_dtor() */
452 static void xml_parser_dtor(zend_rsrc_list_entry *rsrc TSRMLS_DC)
453 {
454 xml_parser *parser = (xml_parser *)rsrc->ptr;
455
456 if (parser->parser) {
457 XML_ParserFree(parser->parser);
458 }
459 if (parser->ltags) {
460 int inx;
461 for (inx = 0; inx < parser->level; inx++)
462 efree(parser->ltags[ inx ]);
463 efree(parser->ltags);
464 }
465 if (parser->startElementHandler) {
466 zval_ptr_dtor(&parser->startElementHandler);
467 }
468 if (parser->endElementHandler) {
469 zval_ptr_dtor(&parser->endElementHandler);
470 }
471 if (parser->characterDataHandler) {
472 zval_ptr_dtor(&parser->characterDataHandler);
473 }
474 if (parser->processingInstructionHandler) {
475 zval_ptr_dtor(&parser->processingInstructionHandler);
476 }
477 if (parser->defaultHandler) {
478 zval_ptr_dtor(&parser->defaultHandler);
479 }
480 if (parser->unparsedEntityDeclHandler) {
481 zval_ptr_dtor(&parser->unparsedEntityDeclHandler);
482 }
483 if (parser->notationDeclHandler) {
484 zval_ptr_dtor(&parser->notationDeclHandler);
485 }
486 if (parser->externalEntityRefHandler) {
487 zval_ptr_dtor(&parser->externalEntityRefHandler);
488 }
489 if (parser->unknownEncodingHandler) {
490 zval_ptr_dtor(&parser->unknownEncodingHandler);
491 }
492 if (parser->startNamespaceDeclHandler) {
493 zval_ptr_dtor(&parser->startNamespaceDeclHandler);
494 }
495 if (parser->endNamespaceDeclHandler) {
496 zval_ptr_dtor(&parser->endNamespaceDeclHandler);
497 }
498 if (parser->baseURI) {
499 efree(parser->baseURI);
500 }
501 if (parser->object) {
502 zval_ptr_dtor(&parser->object);
503 }
504
505 efree(parser);
506 }
507 /* }}} */
508
509 /* {{{ xml_set_handler() */
510 static void xml_set_handler(zval **handler, zval **data TSRMLS_DC)
511 {
512 /* If we have already a handler, release it */
513 if (*handler) {
514 zval_ptr_dtor(handler);
515 }
516
517 /* IS_ARRAY might indicate that we're using array($obj, 'method') syntax */
518 if (Z_TYPE_PP(data) != IS_ARRAY && Z_TYPE_PP(data) != IS_OBJECT) {
519 convert_to_text_ex(data);
520 if (((Z_TYPE_PP(data)==IS_UNICODE) && (Z_USTRLEN_PP(data) == 0)) ||
521 ((Z_TYPE_PP(data)==IS_STRING) && (Z_STRLEN_PP(data) == 0))) {
522
523 *handler = NULL;
524 return;
525 }
526 }
527
528 zval_add_ref(data);
529
530 *handler = *data;
531 }
532 /* }}} */
533
534 /* {{{ xml_call_handler() */
535 static zval *xml_call_handler(xml_parser *parser, zval *handler, zend_function *function_ptr, int argc, zval **argv)
536 {
537 int i;
538 TSRMLS_FETCH();
539
540 if (parser && handler && !EG(exception)) {
541 zval ***args;
542 zval *retval;
543 int result;
544 zend_fcall_info fci;
545
546 args = safe_emalloc(sizeof(zval **), argc, 0);
547 for (i = 0; i < argc; i++) {
548 args[i] = &argv[i];
549 }
550
551 fci.size = sizeof(fci);
552 fci.function_table = EG(function_table);
553 fci.function_name = handler;
554 fci.symbol_table = NULL;
555 fci.object_pp = &parser->object;
556 fci.retval_ptr_ptr = &retval;
557 fci.param_count = argc;
558 fci.params = args;
559 fci.no_separation = 0;
560 /*fci.function_handler_cache = &function_ptr;*/
561
562 result = zend_call_function(&fci, NULL TSRMLS_CC);
563 if (result == FAILURE) {
564 zval **method;
565 zval **obj;
566
567 if (Z_TYPE_P(handler) == IS_STRING || Z_TYPE_P(handler) == IS_UNICODE) {
568 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call handler %R()", Z_TYPE_P(handler), Z_UNIVAL_P(handler));
569 } else if (zend_hash_index_find(Z_ARRVAL_P(handler), 0, (void **) &obj) == SUCCESS &&
570 zend_hash_index_find(Z_ARRVAL_P(handler), 1, (void **) &method) == SUCCESS &&
571 Z_TYPE_PP(obj) == IS_OBJECT &&
572 (Z_TYPE_PP(method) == IS_STRING || Z_TYPE_PP(method) == IS_UNICODE)) {
573 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call handler %v::%R()", Z_OBJCE_PP(obj)->name, Z_TYPE_PP(method), Z_UNIVAL_PP(method));
574 } else
575 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call handler");
576 }
577
578 for (i = 0; i < argc; i++) {
579 zval_ptr_dtor(args[i]);
580 }
581 efree(args);
582
583 if (result == FAILURE) {
584 return NULL;
585 } else {
586 return EG(exception) ? NULL : retval;
587 }
588 } else {
589 for (i = 0; i < argc; i++) {
590 zval_ptr_dtor(&argv[i]);
591 }
592 return NULL;
593 }
594 }
595 /* }}} */
596
597 /* {{{ xml_encode_iso_8859_1() */
598 inline static unsigned short xml_encode_iso_8859_1(unsigned char c)
599 {
600 return (unsigned short)c;
601 }
602 /* }}} */
603
604 /* {{{ xml_decode_iso_8859_1() */
605 inline static char xml_decode_iso_8859_1(unsigned short c)
606 {
607 return (char)(c > 0xff ? '?' : c);
608 }
609 /* }}} */
610
611 /* {{{ xml_encode_us_ascii() */
612 inline static unsigned short xml_encode_us_ascii(unsigned char c)
613 {
614 return (unsigned short)c;
615 }
616 /* }}} */
617
618 /* {{{ xml_decode_us_ascii() */
619 inline static char xml_decode_us_ascii(unsigned short c)
620 {
621 return (char)(c > 0x7f ? '?' : c);
622 }
623 /* }}} */
624
625 /* {{{ xml_get_encoding() */
626 static xml_encoding *xml_get_encoding(const XML_Char *name)
627 {
628 xml_encoding *enc = &xml_encodings[0];
629
630 while (enc && enc->name) {
631 if (strcasecmp(name, enc->name) == 0) {
632 return enc;
633 }
634 enc++;
635 }
636 return NULL;
637 }
638 /* }}} */
639
640 /* {{{ xml_utf8_encode */
641 PHPAPI char *xml_utf8_encode(const char *s, int len, int *newlen, const XML_Char *encoding)
642 {
643 int pos = len;
644 char *newbuf;
645 unsigned int c;
646 unsigned short (*encoder)(unsigned char) = NULL;
647 xml_encoding *enc = xml_get_encoding(encoding);
648
649 *newlen = 0;
650 if (enc) {
651 encoder = enc->encoding_function;
652 } else {
653 /* If the target encoding was unknown, fail */
654 return NULL;
655 }
656 if (encoder == NULL) {
657 /* If no encoder function was specified, return the data as-is.
658 */
659 newbuf = emalloc(len + 1);
660 memcpy(newbuf, s, len);
661 *newlen = len;
662 newbuf[*newlen] = '\0';
663 return newbuf;
664 }
665 /* This is the theoretical max (will never get beyond len * 2 as long
666 * as we are converting from single-byte characters, though) */
667 newbuf = safe_emalloc(len, 4, 1);
668 while (pos > 0) {
669 c = encoder ? encoder((unsigned char)(*s)) : (unsigned short)(*s);
670 if (c < 0x80) {
671 newbuf[(*newlen)++] = (char) c;
672 } else if (c < 0x800) {
673 newbuf[(*newlen)++] = (0xc0 | (c >> 6));
674 newbuf[(*newlen)++] = (0x80 | (c & 0x3f));
675 } else if (c < 0x10000) {
676 newbuf[(*newlen)++] = (0xe0 | (c >> 12));
677 newbuf[(*newlen)++] = (0xc0 | ((c >> 6) & 0x3f));
678 newbuf[(*newlen)++] = (0x80 | (c & 0x3f));
679 } else if (c < 0x200000) {
680 newbuf[(*newlen)++] = (0xf0 | (c >> 18));
681 newbuf[(*newlen)++] = (0xe0 | ((c >> 12) & 0x3f));
682 newbuf[(*newlen)++] = (0xc0 | ((c >> 6) & 0x3f));
683 newbuf[(*newlen)++] = (0x80 | (c & 0x3f));
684 }
685 pos--;
686 s++;
687 }
688 newbuf[*newlen] = 0;
689 newbuf = erealloc(newbuf, (*newlen)+1);
690 return newbuf;
691 }
692 /* }}} */
693
694 /* {{{ xml_utf8_decode */
695 PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding)
696 {
697 int pos = len;
698 char *newbuf = emalloc(len + 1);
699 unsigned short c;
700 char (*decoder)(unsigned short) = NULL;
701 xml_encoding *enc = NULL;
702
703 *newlen = 0;
704
705 if (encoding) {
706 enc = xml_get_encoding(encoding);
707 if (enc) {
708 decoder = enc->decoding_function;
709 }
710 }
711 if (decoder == NULL) {
712 /* If the target encoding was unknown, or no decoder function
713 * was specified, return the UTF-8-encoded data as-is.
714 */
715 memcpy(newbuf, s, len);
716 *newlen = len;
717 newbuf[*newlen] = '\0';
718 return newbuf;
719 }
720 while (pos > 0) {
721 c = (unsigned char)(*s);
722 if (c >= 0xf0) { /* four bytes encoded, 21 bits */
723 if(pos-4 >= 0) {
724 c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63);
725 } else {
726 c = '?';
727 }
728 s += 4;
729 pos -= 4;
730 } else if (c >= 0xe0) { /* three bytes encoded, 16 bits */
731 if(pos-3 >= 0) {
732 c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63);
733 } else {
734 c = '?';
735 }
736 s += 3;
737 pos -= 3;
738 } else if (c >= 0xc0) { /* two bytes encoded, 11 bits */
739 if(pos-2 >= 0) {
740 c = ((s[0]&63)<<6) | (s[1]&63);
741 } else {
742 c = '?';
743 }
744 s += 2;
745 pos -= 2;
746 } else {
747 s++;
748 pos--;
749 }
750 newbuf[*newlen] = decoder ? decoder(c) : c;
751 ++*newlen;
752 }
753 if (*newlen < len) {
754 newbuf = erealloc(newbuf, *newlen + 1);
755 }
756 newbuf[*newlen] = '\0';
757 return newbuf;
758 }
759 /* }}} */
760
761 /* {{{ _xml_xmlcharlen() */
762 static int _xml_xmlcharlen(const XML_Char *s)
763 {
764 int len = 0;
765
766 while (*s) {
767 len++;
768 s++;
769 }
770 return len;
771 }
772 /* }}} */
773
774 /* {{{ _xml_zval_strdup() */
775 PHPAPI char *_xml_zval_strdup(zval *val)
776 {
777 if (Z_TYPE_P(val) == IS_STRING) {
778 char *buf = emalloc(Z_STRLEN_P(val) + 1);
779 memcpy(buf, Z_STRVAL_P(val), Z_STRLEN_P(val));
780 buf[Z_STRLEN_P(val)] = '\0';
781 return buf;
782 }
783 return NULL;
784 }
785 /* }}} */
786
787 /* {{{ _xml_add_to_info */
788 static void _xml_add_to_info(xml_parser *parser,char *name)
789 {
790 zval **element, *values;
791
792 if (! parser->info) {
793 return;
794 }
795
796 if (zend_hash_find(Z_ARRVAL_P(parser->info),name,strlen(name) + 1,(void **) &element) == FAILURE) {
797 MAKE_STD_ZVAL(values);
798
799 array_init(values);
800
801 zend_hash_update(Z_ARRVAL_P(parser->info), name, strlen(name)+1, (void *) &values, sizeof(zval*), (void **) &element);
802 }
803
804 add_next_index_long(*element,parser->curtag);
805
806 parser->curtag++;
807 }
808 /* }}} */
809
810 /* {{{ _xml_decode_tag() */
811 static char *_xml_decode_tag(xml_parser *parser, const char *tag)
812 {
813 char *newstr;
814 int out_len;
815
816 TSRMLS_FETCH();
817
818 if (UG(unicode)) {
819 newstr = xml_utf8_decode(tag, strlen(tag), &out_len, NULL);
820 } else {
821 newstr = xml_utf8_decode(tag, strlen(tag), &out_len, parser->target_encoding);
822 }
823
824 if (parser->case_folding) {
825 php_strtoupper(newstr, out_len);
826 }
827
828 return newstr;
829 }
830 /* }}} */
831
832 /* {{{ _xml_startElementHandler() */
833 void _xml_startElementHandler(void *userData, const XML_Char *name, const XML_Char **attributes)
834 {
835 xml_parser *parser = (xml_parser *)userData;
836 const char **attrs = (const char **) attributes;
837 char *tag_name;
838 char *att, *val;
839 int val_len;
840 zval *retval, *args[3];
841
842 if (parser) {
843 TSRMLS_FETCH();
844
845 parser->level++;
846
847 tag_name = _xml_decode_tag(parser, name);
848
849 if (parser->startElementHandler) {
850 args[0] = _xml_resource_zval(parser->index);
851 args[1] = _xml_string_zval(tag_name);
852 MAKE_STD_ZVAL(args[2]);
853 array_init(args[2]);
854
855 while (attributes && *attributes) {
856 att = _xml_decode_tag(parser, attributes[0]);
857 if (UG(unicode)) {
858 val = xml_utf8_decode(attributes[1], strlen(attributes[1]), &val_len, NULL);
859 } else {
860 val = xml_utf8_decode(attributes[1], strlen(attributes[1]), &val_len, parser->target_encoding);
861 }
862
863 add_utf8_assoc_utf8_stringl(args[2], att, val, val_len, ZSTR_AUTOFREE);
864
865 attributes += 2;
866
867 efree(att);
868 }
869
870 if ((retval = xml_call_handler(parser, parser->startElementHandler, parser->startElementPtr, 3, args))) {
871 zval_ptr_dtor(&retval);
872 }
873 }
874
875 if (parser->data) {
876 zval *tag, *atr;
877 int atcnt = 0;
878
879 MAKE_STD_ZVAL(tag);
880 MAKE_STD_ZVAL(atr);
881
882 array_init(tag);
883 array_init(atr);
884
885 _xml_add_to_info(parser,((char *) tag_name) + parser->toffset);
886
887 add_ascii_assoc_utf8_string(tag,"tag",((char *) tag_name) + parser->toffset,1); /* cast to avoid gcc-warning */
888 add_ascii_assoc_ascii_string(tag,"type","open",1);
889 add_ascii_assoc_long(tag,"level",parser->level);
890
891 parser->ltags[parser->level-1] = estrdup(tag_name);
892 parser->lastwasopen = 1;
893
894 attributes = (const XML_Char **) attrs;
895
896 while (attributes && *attributes) {
897 att = _xml_decode_tag(parser, attributes[0]);
898 if (UG(unicode)) {
899 val = xml_utf8_decode(attributes[1], strlen(attributes[1]), &val_len, NULL);
900 } else {
901 val = xml_utf8_decode(attributes[1], strlen(attributes[1]), &val_len, parser->target_encoding);
902 }
903
904 add_utf8_assoc_utf8_stringl(atr, att, val, val_len, ZSTR_AUTOFREE);
905
906 atcnt++;
907 attributes += 2;
908
909 efree(att);
910 }
911
912 if (atcnt) {
913 zend_ascii_hash_add(Z_ARRVAL_P(tag),"attributes",sizeof("attributes"),&atr,sizeof(zval*),NULL);
914 } else {
915 zval_ptr_dtor(&atr);
916 }
917
918 zend_hash_next_index_insert(Z_ARRVAL_P(parser->data),&tag,sizeof(zval*),(void *) &parser->ctag);
919 }
920
921 efree(tag_name);
922 }
923 }
924 /* }}} */
925
926 /* {{{ _xml_endElementHandler() */
927 void _xml_endElementHandler(void *userData, const XML_Char *name)
928 {
929 xml_parser *parser = (xml_parser *)userData;
930 char *tag_name;
931
932 if (parser) {
933 zval *retval, *args[2];
934
935 TSRMLS_FETCH();
936
937 tag_name = _xml_decode_t