Parser.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667
  1. <?php
  2. //
  3. // +----------------------------------------------------------------------+
  4. // | PHP Version 4 |
  5. // +----------------------------------------------------------------------+
  6. // | Copyright (c) 1997-2004 The PHP Group |
  7. // +----------------------------------------------------------------------+
  8. // | This source file is subject to version 3.0 of the PHP license, |
  9. // | that is bundled with this package in the file LICENSE, and is |
  10. // | available at through the world-wide-web at |
  11. // | http://www.php.net/license/3_0.txt. |
  12. // | If you did not receive a copy of the PHP license and are unable to |
  13. // | obtain it through the world-wide-web, please send a note to |
  14. // | license@php.net so we can mail you a copy immediately. |
  15. // +----------------------------------------------------------------------+
  16. // | Author: Stig Bakken <ssb@fast.no> |
  17. // | Tomas V.V.Cox <cox@idecnet.com> |
  18. // | Stephan Schmidt <schst@php-tools.net> |
  19. // +----------------------------------------------------------------------+
  20. //
  21. // $Id: Parser.php,v 1.25 2005/03/25 17:13:10 schst Exp $
  22. /**
  23. * XML Parser class.
  24. *
  25. * This is an XML parser based on PHP's "xml" extension,
  26. * based on the bundled expat library.
  27. *
  28. * @category XML
  29. * @package XML_Parser
  30. * @author Stig Bakken <ssb@fast.no>
  31. * @author Tomas V.V.Cox <cox@idecnet.com>
  32. * @author Stephan Schmidt <schst@php-tools.net>
  33. */
  34. /**
  35. * uses PEAR's error handling
  36. */
  37. require_once('PEAR.php');
  38. /**
  39. * resource could not be created
  40. */
  41. define('XML_PARSER_ERROR_NO_RESOURCE', 200);
  42. /**
  43. * unsupported mode
  44. */
  45. define('XML_PARSER_ERROR_UNSUPPORTED_MODE', 201);
  46. /**
  47. * invalid encoding was given
  48. */
  49. define('XML_PARSER_ERROR_INVALID_ENCODING', 202);
  50. /**
  51. * specified file could not be read
  52. */
  53. define('XML_PARSER_ERROR_FILE_NOT_READABLE', 203);
  54. /**
  55. * invalid input
  56. */
  57. define('XML_PARSER_ERROR_INVALID_INPUT', 204);
  58. /**
  59. * remote file cannot be retrieved in safe mode
  60. */
  61. define('XML_PARSER_ERROR_REMOTE', 205);
  62. /**
  63. * XML Parser class.
  64. *
  65. * This is an XML parser based on PHP's "xml" extension,
  66. * based on the bundled expat library.
  67. *
  68. * Notes:
  69. * - It requires PHP 4.0.4pl1 or greater
  70. * - From revision 1.17, the function names used by the 'func' mode
  71. * are in the format "xmltag_$elem", for example: use "xmltag_name"
  72. * to handle the <name></name> tags of your xml file.
  73. *
  74. * @category XML
  75. * @package XML_Parser
  76. * @author Stig Bakken <ssb@fast.no>
  77. * @author Tomas V.V.Cox <cox@idecnet.com>
  78. * @author Stephan Schmidt <schst@php-tools.net>
  79. * @todo create XML_Parser_Namespace to parse documents with namespaces
  80. * @todo create XML_Parser_Pull
  81. * @todo Tests that need to be made:
  82. * - mixing character encodings
  83. * - a test using all expat handlers
  84. * - options (folding, output charset)
  85. * - different parsing modes
  86. */
  87. class XML_Parser extends PEAR
  88. {
  89. // {{{ properties
  90. /**
  91. * XML parser handle
  92. *
  93. * @var resource
  94. * @see xml_parser_create()
  95. */
  96. var $parser;
  97. /**
  98. * File handle if parsing from a file
  99. *
  100. * @var resource
  101. */
  102. var $fp;
  103. /**
  104. * Whether to do case folding
  105. *
  106. * If set to true, all tag and attribute names will
  107. * be converted to UPPER CASE.
  108. *
  109. * @var boolean
  110. */
  111. var $folding = true;
  112. /**
  113. * Mode of operation, one of "event" or "func"
  114. *
  115. * @var string
  116. */
  117. var $mode;
  118. /**
  119. * Mapping from expat handler function to class method.
  120. *
  121. * @var array
  122. */
  123. var $handler = array(
  124. 'character_data_handler' => 'cdataHandler',
  125. 'default_handler' => 'defaultHandler',
  126. 'processing_instruction_handler' => 'piHandler',
  127. 'unparsed_entity_decl_handler' => 'unparsedHandler',
  128. 'notation_decl_handler' => 'notationHandler',
  129. 'external_entity_ref_handler' => 'entityrefHandler'
  130. );
  131. /**
  132. * source encoding
  133. *
  134. * @var string
  135. */
  136. var $srcenc;
  137. /**
  138. * target encoding
  139. *
  140. * @var string
  141. */
  142. var $tgtenc;
  143. /**
  144. * handler object
  145. *
  146. * @var object
  147. */
  148. var $_handlerObj;
  149. // }}}
  150. /**
  151. * PHP5 constructor
  152. *
  153. * @param string $srcenc source charset encoding, use NULL (default) to use
  154. * whatever the document specifies
  155. * @param string $mode how this parser object should work, "event" for
  156. * startelement/endelement-type events, "func"
  157. * to have it call functions named after elements
  158. * @param string $tgenc a valid target encoding
  159. */
  160. function __construct($srcenc = null, $mode = 'event', $tgtenc = null)
  161. {
  162. $this->PEAR('XML_Parser_Error');
  163. $this->mode = $mode;
  164. $this->srcenc = $srcenc;
  165. $this->tgtenc = $tgtenc;
  166. }
  167. // }}}
  168. /**
  169. * Sets the mode of the parser.
  170. *
  171. * Possible modes are:
  172. * - func
  173. * - event
  174. *
  175. * You can set the mode using the second parameter
  176. * in the constructor.
  177. *
  178. * This method is only needed, when switching to a new
  179. * mode at a later point.
  180. *
  181. * @access public
  182. * @param string mode, either 'func' or 'event'
  183. * @return boolean|object true on success, PEAR_Error otherwise
  184. */
  185. function setMode($mode)
  186. {
  187. if ($mode != 'func' && $mode != 'event') {
  188. $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
  189. }
  190. $this->mode = $mode;
  191. return true;
  192. }
  193. /**
  194. * Sets the object, that will handle the XML events
  195. *
  196. * This allows you to create a handler object independent of the
  197. * parser object that you are using and easily switch the underlying
  198. * parser.
  199. *
  200. * If no object will be set, XML_Parser assumes that you
  201. * extend this class and handle the events in $this.
  202. *
  203. * @access public
  204. * @param object object to handle the events
  205. * @return boolean will always return true
  206. * @since v1.2.0beta3
  207. */
  208. function setHandlerObj(&$obj)
  209. {
  210. $this->_handlerObj = &$obj;
  211. return true;
  212. }
  213. /**
  214. * Init the element handlers
  215. *
  216. * @access private
  217. */
  218. function _initHandlers()
  219. {
  220. if (!is_resource($this->parser)) {
  221. return false;
  222. }
  223. if (!is_object($this->_handlerObj)) {
  224. $this->_handlerObj = &$this;
  225. }
  226. switch ($this->mode) {
  227. case 'func':
  228. xml_set_object($this->parser, $this->_handlerObj);
  229. xml_set_element_handler($this->parser, array(&$this, 'funcStartHandler'), array(&$this, 'funcEndHandler'));
  230. break;
  231. case 'event':
  232. xml_set_object($this->parser, $this->_handlerObj);
  233. xml_set_element_handler($this->parser, 'startHandler', 'endHandler');
  234. break;
  235. default:
  236. return $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
  237. break;
  238. }
  239. /**
  240. * set additional handlers for character data, entities, etc.
  241. */
  242. foreach ($this->handler as $xml_func => $method) {
  243. if (method_exists($this->_handlerObj, $method)) {
  244. $xml_func = 'xml_set_' . $xml_func;
  245. $xml_func($this->parser, $method);
  246. }
  247. }
  248. }
  249. // {{{ _create()
  250. /**
  251. * create the XML parser resource
  252. *
  253. * Has been moved from the constructor to avoid
  254. * problems with object references.
  255. *
  256. * Furthermore it allows us returning an error
  257. * if something fails.
  258. *
  259. * @access private
  260. * @return boolean|object true on success, PEAR_Error otherwise
  261. *
  262. * @see xml_parser_create
  263. */
  264. function _create()
  265. {
  266. if ($this->srcenc === null) {
  267. $xp = @xml_parser_create();
  268. } else {
  269. $xp = @xml_parser_create($this->srcenc);
  270. }
  271. if (is_resource($xp)) {
  272. if ($this->tgtenc !== null) {
  273. if (!@xml_parser_set_option($xp, XML_OPTION_TARGET_ENCODING,
  274. $this->tgtenc)) {
  275. return $this->raiseError('invalid target encoding', XML_PARSER_ERROR_INVALID_ENCODING);
  276. }
  277. }
  278. $this->parser = $xp;
  279. $result = $this->_initHandlers($this->mode);
  280. if ($this->isError($result)) {
  281. return $result;
  282. }
  283. xml_parser_set_option($xp, XML_OPTION_CASE_FOLDING, $this->folding);
  284. return true;
  285. }
  286. return $this->raiseError('Unable to create XML parser resource.', XML_PARSER_ERROR_NO_RESOURCE);
  287. }
  288. // }}}
  289. // {{{ reset()
  290. /**
  291. * Reset the parser.
  292. *
  293. * This allows you to use one parser instance
  294. * to parse multiple XML documents.
  295. *
  296. * @access public
  297. * @return boolean|object true on success, PEAR_Error otherwise
  298. */
  299. function reset()
  300. {
  301. $result = $this->_create();
  302. if ($this->isError( $result )) {
  303. return $result;
  304. }
  305. return true;
  306. }
  307. // }}}
  308. // {{{ setInputFile()
  309. /**
  310. * Sets the input xml file to be parsed
  311. *
  312. * @param string Filename (full path)
  313. * @return resource fopen handle of the given file
  314. * @throws XML_Parser_Error
  315. * @see setInput(), setInputString(), parse()
  316. * @access public
  317. */
  318. function setInputFile($file)
  319. {
  320. /**
  321. * check, if file is a remote file
  322. */
  323. if (preg_match('[^(http|ftp)://]', substr($file, 0, 10))) {
  324. if (!ini_get('allow_url_fopen')) {
  325. return $this->raiseError('Remote files cannot be parsed, as safe mode is enabled.', XML_PARSER_ERROR_REMOTE);
  326. }
  327. }
  328. $fp = fopen($file, 'rb');
  329. if (is_resource($fp)) {
  330. $this->fp = $fp;
  331. return $fp;
  332. }
  333. return $this->raiseError('File could not be opened.', XML_PARSER_ERROR_FILE_NOT_READABLE);
  334. }
  335. // }}}
  336. // {{{ setInputString()
  337. /**
  338. * XML_Parser::setInputString()
  339. *
  340. * Sets the xml input from a string
  341. *
  342. * @param string $data a string containing the XML document
  343. * @return null
  344. **/
  345. function setInputString($data)
  346. {
  347. $this->fp = $data;
  348. return null;
  349. }
  350. // }}}
  351. // {{{ setInput()
  352. /**
  353. * Sets the file handle to use with parse().
  354. *
  355. * You should use setInputFile() or setInputString() if you
  356. * pass a string
  357. *
  358. * @param mixed $fp Can be either a resource returned from fopen(),
  359. * a URL, a local filename or a string.
  360. * @access public
  361. * @see parse()
  362. * @uses setInputString(), setInputFile()
  363. */
  364. function setInput($fp)
  365. {
  366. if (is_resource($fp)) {
  367. $this->fp = $fp;
  368. return true;
  369. }
  370. // see if it's an absolute URL (has a scheme at the beginning)
  371. elseif (eregi('^[a-z]+://', substr($fp, 0, 10))) {
  372. return $this->setInputFile($fp);
  373. }
  374. // see if it's a local file
  375. elseif (file_exists($fp)) {
  376. return $this->setInputFile($fp);
  377. }
  378. // it must be a string
  379. else {
  380. $this->fp = $fp;
  381. return true;
  382. }
  383. return $this->raiseError('Illegal input format', XML_PARSER_ERROR_INVALID_INPUT);
  384. }
  385. // }}}
  386. // {{{ parse()
  387. /**
  388. * Central parsing function.
  389. *
  390. * @return true|object PEAR error returns true on success, or a PEAR_Error otherwise
  391. * @access public
  392. */
  393. function parse()
  394. {
  395. /**
  396. * reset the parser
  397. */
  398. $result = $this->reset();
  399. if ($this->isError($result)) {
  400. return $result;
  401. }
  402. // if $this->fp was fopened previously
  403. if (is_resource($this->fp)) {
  404. while ($data = fread($this->fp, 4096)) {
  405. if (!$this->_parseString($data, feof($this->fp))) {
  406. $error = &$this->raiseError();
  407. $this->free();
  408. return $error;
  409. }
  410. }
  411. // otherwise, $this->fp must be a string
  412. } else {
  413. if (!$this->_parseString($this->fp, true)) {
  414. $error = &$this->raiseError();
  415. $this->free();
  416. return $error;
  417. }
  418. }
  419. $this->free();
  420. return true;
  421. }
  422. /**
  423. * XML_Parser::_parseString()
  424. *
  425. * @param string $data
  426. * @param boolean $eof
  427. * @return bool
  428. * @access private
  429. * @see parseString()
  430. **/
  431. function _parseString($data, $eof = false)
  432. {
  433. return xml_parse($this->parser, $data, $eof);
  434. }
  435. // }}}
  436. // {{{ parseString()
  437. /**
  438. * XML_Parser::parseString()
  439. *
  440. * Parses a string.
  441. *
  442. * @param string $data XML data
  443. * @param boolean $eof If set and TRUE, data is the last piece of data sent in this parser
  444. * @throws XML_Parser_Error
  445. * @return Pear Error|true true on success or a PEAR Error
  446. * @see _parseString()
  447. */
  448. function parseString($data, $eof = false)
  449. {
  450. if (!isset($this->parser) || !is_resource($this->parser)) {
  451. $this->reset();
  452. }
  453. if (!$this->_parseString($data, $eof)) {
  454. $error = &$this->raiseError();
  455. $this->free();
  456. return $error;
  457. }
  458. if ($eof === true) {
  459. $this->free();
  460. }
  461. return true;
  462. }
  463. /**
  464. * XML_Parser::free()
  465. *
  466. * Free the internal resources associated with the parser
  467. *
  468. * @return null
  469. **/
  470. function free()
  471. {
  472. if (isset($this->parser) && is_resource($this->parser)) {
  473. xml_parser_free($this->parser);
  474. unset( $this->parser );
  475. }
  476. if (isset($this->fp) && is_resource($this->fp)) {
  477. fclose($this->fp);
  478. }
  479. unset($this->fp);
  480. return null;
  481. }
  482. /**
  483. * XML_Parser::raiseError()
  484. *
  485. * Throws a XML_Parser_Error
  486. *
  487. * @param string $msg the error message
  488. * @param integer $ecode the error message code
  489. * @return XML_Parser_Error
  490. **/
  491. function raiseError($msg = null, $ecode = 0,$mode = null,
  492. $options = null,
  493. $userinfo = null,
  494. $error_class = null,
  495. $skipmsg = false)
  496. {
  497. $msg = !is_null($msg) ? $msg : $this->parser;
  498. $err = new XML_Parser_Error($msg, $ecode);
  499. return parent::raiseError($err);
  500. }
  501. // }}}
  502. // {{{ funcStartHandler()
  503. function funcStartHandler($xp, $elem, $attribs)
  504. {
  505. $func = 'xmltag_' . $elem;
  506. if (strchr($func, '.')) {
  507. $func = str_replace('.', '_', $func);
  508. }
  509. if (method_exists($this->_handlerObj, $func)) {
  510. call_user_func(array(&$this->_handlerObj, $func), $xp, $elem, $attribs);
  511. } elseif (method_exists($this->_handlerObj, 'xmltag')) {
  512. call_user_func(array(&$this->_handlerObj, 'xmltag'), $xp, $elem, $attribs);
  513. }
  514. }
  515. // }}}
  516. // {{{ funcEndHandler()
  517. function funcEndHandler($xp, $elem)
  518. {
  519. $func = 'xmltag_' . $elem . '_';
  520. if (strchr($func, '.')) {
  521. $func = str_replace('.', '_', $func);
  522. }
  523. if (method_exists($this->_handlerObj, $func)) {
  524. call_user_func(array(&$this->_handlerObj, $func), $xp, $elem);
  525. } elseif (method_exists($this->_handlerObj, 'xmltag_')) {
  526. call_user_func(array(&$this->_handlerObj, 'xmltag_'), $xp, $elem);
  527. }
  528. }
  529. // }}}
  530. // {{{ startHandler()
  531. /**
  532. *
  533. * @abstract
  534. */
  535. function startHandler($xp, $elem, $attribs)
  536. {
  537. return NULL;
  538. }
  539. // }}}
  540. // {{{ endHandler()
  541. /**
  542. *
  543. * @abstract
  544. */
  545. function endHandler($xp, $elem)
  546. {
  547. return NULL;
  548. }
  549. // }}}me
  550. }
  551. /**
  552. * error class, replaces PEAR_Error
  553. *
  554. * An instance of this class will be returned
  555. * if an error occurs inside XML_Parser.
  556. *
  557. * There are three advantages over using the standard PEAR_Error:
  558. * - All messages will be prefixed
  559. * - check for XML_Parser error, using is_a( $error, 'XML_Parser_Error' )
  560. * - messages can be generated from the xml_parser resource
  561. *
  562. * @package XML_Parser
  563. * @access public
  564. * @see PEAR_Error
  565. */
  566. class XML_Parser_Error extends PEAR_Error
  567. {
  568. // {{{ properties
  569. /**
  570. * prefix for all messages
  571. *
  572. * @var string
  573. */
  574. var $error_message_prefix = 'XML_Parser: ';
  575. // }}}
  576. // {{{ constructor()
  577. /**
  578. * construct a new error instance
  579. *
  580. * You may either pass a message or an xml_parser resource as first
  581. * parameter. If a resource has been passed, the last error that
  582. * happened will be retrieved and returned.
  583. *
  584. * @access public
  585. * @param string|resource message or parser resource
  586. * @param integer error code
  587. * @param integer error handling
  588. * @param integer error level
  589. */
  590. function XML_Parser_Error($msgorparser = 'unknown error', $code = 0, $mode = PEAR_ERROR_RETURN, $level = E_USER_NOTICE)
  591. {
  592. if (is_resource($msgorparser)) {
  593. $code = xml_get_error_code($msgorparser);
  594. $msgorparser = sprintf('%s at XML input line %d',
  595. xml_error_string($code),
  596. xml_get_current_line_number($msgorparser));
  597. }
  598. $this->PEAR_Error($msgorparser, $code, $mode, $level);
  599. }
  600. // }}}
  601. }
  602. ?>