Parser.php 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755
  1. <?php
  2. /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
  3. /**
  4. * XML_Parser
  5. *
  6. * XML Parser package
  7. *
  8. * PHP versions 4 and 5
  9. *
  10. * LICENSE:
  11. *
  12. * Copyright (c) 2002-2008 The PHP Group
  13. * All rights reserved.
  14. *
  15. * Redistribution and use in source and binary forms, with or without
  16. * modification, are permitted provided that the following conditions
  17. * are met:
  18. *
  19. * * Redistributions of source code must retain the above copyright
  20. * notice, this list of conditions and the following disclaimer.
  21. * * Redistributions in binary form must reproduce the above copyright
  22. * notice, this list of conditions and the following disclaimer in the
  23. * documentation and/or other materials provided with the distribution.
  24. * * The name of the author may not be used to endorse or promote products
  25. * derived from this software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  28. * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  29. * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  30. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  31. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  32. * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  33. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  34. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  35. * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  36. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  37. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  38. *
  39. * @category XML
  40. * @package XML_Parser
  41. * @author Stig Bakken <ssb@fast.no>
  42. * @author Tomas V.V.Cox <cox@idecnet.com>
  43. * @author Stephan Schmidt <schst@php.net>
  44. * @copyright 2002-2008 The PHP Group
  45. * @license http://opensource.org/licenses/bsd-license New BSD License
  46. * @version CVS: $Id: Parser.php 302733 2010-08-24 01:09:09Z clockwerx $
  47. * @link http://pear.php.net/package/XML_Parser
  48. */
  49. /**
  50. * uses PEAR's error handling
  51. */
  52. require_once 'PEAR.php';
  53. /**
  54. * resource could not be created
  55. */
  56. define('XML_PARSER_ERROR_NO_RESOURCE', 200);
  57. /**
  58. * unsupported mode
  59. */
  60. define('XML_PARSER_ERROR_UNSUPPORTED_MODE', 201);
  61. /**
  62. * invalid encoding was given
  63. */
  64. define('XML_PARSER_ERROR_INVALID_ENCODING', 202);
  65. /**
  66. * specified file could not be read
  67. */
  68. define('XML_PARSER_ERROR_FILE_NOT_READABLE', 203);
  69. /**
  70. * invalid input
  71. */
  72. define('XML_PARSER_ERROR_INVALID_INPUT', 204);
  73. /**
  74. * remote file cannot be retrieved in safe mode
  75. */
  76. define('XML_PARSER_ERROR_REMOTE', 205);
  77. /**
  78. * XML Parser class.
  79. *
  80. * This is an XML parser based on PHP's "xml" extension,
  81. * based on the bundled expat library.
  82. *
  83. * Notes:
  84. * - It requires PHP 4.0.4pl1 or greater
  85. * - From revision 1.17, the function names used by the 'func' mode
  86. * are in the format "xmltag_$elem", for example: use "xmltag_name"
  87. * to handle the <name></name> tags of your xml file.
  88. * - different parsing modes
  89. *
  90. * @category XML
  91. * @package XML_Parser
  92. * @author Stig Bakken <ssb@fast.no>
  93. * @author Tomas V.V.Cox <cox@idecnet.com>
  94. * @author Stephan Schmidt <schst@php.net>
  95. * @copyright 2002-2008 The PHP Group
  96. * @license http://opensource.org/licenses/bsd-license New BSD License
  97. * @version Release: @package_version@
  98. * @link http://pear.php.net/package/XML_Parser
  99. * @todo create XML_Parser_Namespace to parse documents with namespaces
  100. * @todo create XML_Parser_Pull
  101. * @todo Tests that need to be made:
  102. * - mixing character encodings
  103. * - a test using all expat handlers
  104. * - options (folding, output charset)
  105. */
  106. class XML_Parser extends PEAR
  107. {
  108. // {{{ properties
  109. /**
  110. * XML parser handle
  111. *
  112. * @var resource
  113. * @see xml_parser_create()
  114. */
  115. var $parser;
  116. /**
  117. * File handle if parsing from a file
  118. *
  119. * @var resource
  120. */
  121. var $fp;
  122. /**
  123. * Whether to do case folding
  124. *
  125. * If set to true, all tag and attribute names will
  126. * be converted to UPPER CASE.
  127. *
  128. * @var boolean
  129. */
  130. var $folding = true;
  131. /**
  132. * Mode of operation, one of "event" or "func"
  133. *
  134. * @var string
  135. */
  136. var $mode;
  137. /**
  138. * Mapping from expat handler function to class method.
  139. *
  140. * @var array
  141. */
  142. var $handler = array(
  143. 'character_data_handler' => 'cdataHandler',
  144. 'default_handler' => 'defaultHandler',
  145. 'processing_instruction_handler' => 'piHandler',
  146. 'unparsed_entity_decl_handler' => 'unparsedHandler',
  147. 'notation_decl_handler' => 'notationHandler',
  148. 'external_entity_ref_handler' => 'entityrefHandler'
  149. );
  150. /**
  151. * source encoding
  152. *
  153. * @var string
  154. */
  155. var $srcenc;
  156. /**
  157. * target encoding
  158. *
  159. * @var string
  160. */
  161. var $tgtenc;
  162. /**
  163. * handler object
  164. *
  165. * @var object
  166. */
  167. var $_handlerObj;
  168. /**
  169. * valid encodings
  170. *
  171. * @var array
  172. */
  173. var $_validEncodings = array('ISO-8859-1', 'UTF-8', 'US-ASCII');
  174. // }}}
  175. // {{{ php5 constructor
  176. /**
  177. * PHP5 constructor
  178. *
  179. * @param string $srcenc source charset encoding, use NULL (default) to use
  180. * whatever the document specifies
  181. * @param string $mode how this parser object should work, "event" for
  182. * startelement/endelement-type events, "func"
  183. * to have it call functions named after elements
  184. * @param string $tgtenc a valid target encoding
  185. */
  186. function __construct($srcenc = null, $mode = 'event', $tgtenc = null)
  187. {
  188. $this->PEAR('XML_Parser_Error');
  189. $this->mode = $mode;
  190. $this->srcenc = $srcenc;
  191. $this->tgtenc = $tgtenc;
  192. }
  193. // }}}
  194. /**
  195. * Sets the mode of the parser.
  196. *
  197. * Possible modes are:
  198. * - func
  199. * - event
  200. *
  201. * You can set the mode using the second parameter
  202. * in the constructor.
  203. *
  204. * This method is only needed, when switching to a new
  205. * mode at a later point.
  206. *
  207. * @param string $mode mode, either 'func' or 'event'
  208. *
  209. * @return boolean|object true on success, PEAR_Error otherwise
  210. * @access public
  211. */
  212. function setMode($mode)
  213. {
  214. if ($mode != 'func' && $mode != 'event') {
  215. $this->raiseError('Unsupported mode given',
  216. XML_PARSER_ERROR_UNSUPPORTED_MODE);
  217. }
  218. $this->mode = $mode;
  219. return true;
  220. }
  221. /**
  222. * Sets the object, that will handle the XML events
  223. *
  224. * This allows you to create a handler object independent of the
  225. * parser object that you are using and easily switch the underlying
  226. * parser.
  227. *
  228. * If no object will be set, XML_Parser assumes that you
  229. * extend this class and handle the events in $this.
  230. *
  231. * @param object &$obj object to handle the events
  232. *
  233. * @return boolean will always return true
  234. * @access public
  235. * @since v1.2.0beta3
  236. */
  237. function setHandlerObj(&$obj)
  238. {
  239. $this->_handlerObj = &$obj;
  240. return true;
  241. }
  242. /**
  243. * Init the element handlers
  244. *
  245. * @return mixed
  246. * @access private
  247. */
  248. function _initHandlers()
  249. {
  250. if (!is_resource($this->parser)) {
  251. return false;
  252. }
  253. if (!is_object($this->_handlerObj)) {
  254. $this->_handlerObj = &$this;
  255. }
  256. switch ($this->mode) {
  257. case 'func':
  258. xml_set_object($this->parser, $this->_handlerObj);
  259. xml_set_element_handler($this->parser,
  260. array(&$this, 'funcStartHandler'), array(&$this, 'funcEndHandler'));
  261. break;
  262. case 'event':
  263. xml_set_object($this->parser, $this->_handlerObj);
  264. xml_set_element_handler($this->parser, 'startHandler', 'endHandler');
  265. break;
  266. default:
  267. return $this->raiseError('Unsupported mode given',
  268. XML_PARSER_ERROR_UNSUPPORTED_MODE);
  269. break;
  270. }
  271. /**
  272. * set additional handlers for character data, entities, etc.
  273. */
  274. foreach ($this->handler as $xml_func => $method) {
  275. if (method_exists($this->_handlerObj, $method)) {
  276. $xml_func = 'xml_set_' . $xml_func;
  277. $xml_func($this->parser, $method);
  278. }
  279. }
  280. }
  281. // {{{ _create()
  282. /**
  283. * create the XML parser resource
  284. *
  285. * Has been moved from the constructor to avoid
  286. * problems with object references.
  287. *
  288. * Furthermore it allows us returning an error
  289. * if something fails.
  290. *
  291. * NOTE: uses '@' error suppresion in this method
  292. *
  293. * @return bool|PEAR_Error true on success, PEAR_Error otherwise
  294. * @access private
  295. * @see xml_parser_create
  296. */
  297. function _create()
  298. {
  299. if ($this->srcenc === null) {
  300. $xp = @xml_parser_create();
  301. } else {
  302. $xp = @xml_parser_create($this->srcenc);
  303. }
  304. if (is_resource($xp)) {
  305. if ($this->tgtenc !== null) {
  306. if (!@xml_parser_set_option($xp, XML_OPTION_TARGET_ENCODING,
  307. $this->tgtenc)
  308. ) {
  309. return $this->raiseError('invalid target encoding',
  310. XML_PARSER_ERROR_INVALID_ENCODING);
  311. }
  312. }
  313. $this->parser = $xp;
  314. $result = $this->_initHandlers($this->mode);
  315. if (PEAR::isError($result)) {
  316. return $result;
  317. }
  318. xml_parser_set_option($xp, XML_OPTION_CASE_FOLDING, $this->folding);
  319. return true;
  320. }
  321. if (!in_array(strtoupper($this->srcenc), $this->_validEncodings)) {
  322. return $this->raiseError('invalid source encoding',
  323. XML_PARSER_ERROR_INVALID_ENCODING);
  324. }
  325. return $this->raiseError('Unable to create XML parser resource.',
  326. XML_PARSER_ERROR_NO_RESOURCE);
  327. }
  328. // }}}
  329. // {{{ reset()
  330. /**
  331. * Reset the parser.
  332. *
  333. * This allows you to use one parser instance
  334. * to parse multiple XML documents.
  335. *
  336. * @access public
  337. * @return boolean|object true on success, PEAR_Error otherwise
  338. */
  339. function reset()
  340. {
  341. $result = $this->_create();
  342. if (PEAR::isError($result)) {
  343. return $result;
  344. }
  345. return true;
  346. }
  347. // }}}
  348. // {{{ setInputFile()
  349. /**
  350. * Sets the input xml file to be parsed
  351. *
  352. * @param string $file Filename (full path)
  353. *
  354. * @return resource fopen handle of the given file
  355. * @access public
  356. * @throws XML_Parser_Error
  357. * @see setInput(), setInputString(), parse()
  358. */
  359. function setInputFile($file)
  360. {
  361. /**
  362. * check, if file is a remote file
  363. */
  364. if (preg_match('/^(http|ftp):\/\//i', substr($file, 0, 10))) {
  365. if (!ini_get('allow_url_fopen')) {
  366. return $this->
  367. raiseError('Remote files cannot be parsed, as safe mode is enabled.',
  368. XML_PARSER_ERROR_REMOTE);
  369. }
  370. }
  371. $fp = @fopen($file, 'rb');
  372. if (is_resource($fp)) {
  373. $this->fp = $fp;
  374. return $fp;
  375. }
  376. return $this->raiseError('File could not be opened.',
  377. XML_PARSER_ERROR_FILE_NOT_READABLE);
  378. }
  379. // }}}
  380. // {{{ setInputString()
  381. /**
  382. * XML_Parser::setInputString()
  383. *
  384. * Sets the xml input from a string
  385. *
  386. * @param string $data a string containing the XML document
  387. *
  388. * @return null
  389. */
  390. function setInputString($data)
  391. {
  392. $this->fp = $data;
  393. return null;
  394. }
  395. // }}}
  396. // {{{ setInput()
  397. /**
  398. * Sets the file handle to use with parse().
  399. *
  400. * You should use setInputFile() or setInputString() if you
  401. * pass a string
  402. *
  403. * @param mixed $fp Can be either a resource returned from fopen(),
  404. * a URL, a local filename or a string.
  405. *
  406. * @return mixed
  407. * @access public
  408. * @see parse()
  409. * @uses setInputString(), setInputFile()
  410. */
  411. function setInput($fp)
  412. {
  413. if (is_resource($fp)) {
  414. $this->fp = $fp;
  415. return true;
  416. } elseif (preg_match('/^[a-z]+:\/\//i', substr($fp, 0, 10))) {
  417. // see if it's an absolute URL (has a scheme at the beginning)
  418. return $this->setInputFile($fp);
  419. } elseif (file_exists($fp)) {
  420. // see if it's a local file
  421. return $this->setInputFile($fp);
  422. } else {
  423. // it must be a string
  424. $this->fp = $fp;
  425. return true;
  426. }
  427. return $this->raiseError('Illegal input format',
  428. XML_PARSER_ERROR_INVALID_INPUT);
  429. }
  430. // }}}
  431. // {{{ parse()
  432. /**
  433. * Central parsing function.
  434. *
  435. * @return bool|PEAR_Error returns true on success, or a PEAR_Error otherwise
  436. * @access public
  437. */
  438. function parse()
  439. {
  440. /**
  441. * reset the parser
  442. */
  443. $result = $this->reset();
  444. if (PEAR::isError($result)) {
  445. return $result;
  446. }
  447. // if $this->fp was fopened previously
  448. if (is_resource($this->fp)) {
  449. while ($data = fread($this->fp, 4096)) {
  450. if (!$this->_parseString($data, feof($this->fp))) {
  451. $error = &$this->raiseError();
  452. $this->free();
  453. return $error;
  454. }
  455. }
  456. } else {
  457. // otherwise, $this->fp must be a string
  458. if (!$this->_parseString($this->fp, true)) {
  459. $error = &$this->raiseError();
  460. $this->free();
  461. return $error;
  462. }
  463. }
  464. $this->free();
  465. return true;
  466. }
  467. /**
  468. * XML_Parser::_parseString()
  469. *
  470. * @param string $data data
  471. * @param bool $eof end-of-file flag
  472. *
  473. * @return bool
  474. * @access private
  475. * @see parseString()
  476. **/
  477. function _parseString($data, $eof = false)
  478. {
  479. return xml_parse($this->parser, $data, $eof);
  480. }
  481. // }}}
  482. // {{{ parseString()
  483. /**
  484. * XML_Parser::parseString()
  485. *
  486. * Parses a string.
  487. *
  488. * @param string $data XML data
  489. * @param boolean $eof If set and TRUE, data is the last piece
  490. * of data sent in this parser
  491. *
  492. * @return bool|PEAR_Error true on success or a PEAR Error
  493. * @throws XML_Parser_Error
  494. * @see _parseString()
  495. */
  496. function parseString($data, $eof = false)
  497. {
  498. if (!isset($this->parser) || !is_resource($this->parser)) {
  499. $this->reset();
  500. }
  501. if (!$this->_parseString($data, $eof)) {
  502. $error = &$this->raiseError();
  503. $this->free();
  504. return $error;
  505. }
  506. if ($eof === true) {
  507. $this->free();
  508. }
  509. return true;
  510. }
  511. /**
  512. * XML_Parser::free()
  513. *
  514. * Free the internal resources associated with the parser
  515. *
  516. * @return null
  517. **/
  518. function free()
  519. {
  520. if (isset($this->parser) && is_resource($this->parser)) {
  521. xml_parser_free($this->parser);
  522. unset( $this->parser );
  523. }
  524. if (isset($this->fp) && is_resource($this->fp)) {
  525. fclose($this->fp);
  526. }
  527. unset($this->fp);
  528. return null;
  529. }
  530. /**
  531. * XML_Parser::raiseError()
  532. *
  533. * Throws a XML_Parser_Error
  534. *
  535. * @param string $msg the error message
  536. * @param integer $ecode the error message code
  537. *
  538. * @return XML_Parser_Error reference to the error object
  539. **/
  540. static function &raiseError($message = null,
  541. $code = 0,
  542. $mode = null,
  543. $options = null,
  544. $userinfo = null,
  545. $error_class = null,
  546. $skipmsg = false)
  547. {
  548. $msg = !is_null($msg) ? $msg : $this->parser;
  549. $err = new XML_Parser_Error($msg, $ecode);
  550. return parent::raiseError($err);
  551. }
  552. // }}}
  553. // {{{ funcStartHandler()
  554. /**
  555. * derives and calls the Start Handler function
  556. *
  557. * @param mixed $xp ??
  558. * @param mixed $elem ??
  559. * @param mixed $attribs ??
  560. *
  561. * @return void
  562. */
  563. function funcStartHandler($xp, $elem, $attribs)
  564. {
  565. $func = 'xmltag_' . $elem;
  566. $func = str_replace(array('.', '-', ':'), '_', $func);
  567. if (method_exists($this->_handlerObj, $func)) {
  568. call_user_func(array(&$this->_handlerObj, $func), $xp, $elem, $attribs);
  569. } elseif (method_exists($this->_handlerObj, 'xmltag')) {
  570. call_user_func(array(&$this->_handlerObj, 'xmltag'),
  571. $xp, $elem, $attribs);
  572. }
  573. }
  574. // }}}
  575. // {{{ funcEndHandler()
  576. /**
  577. * derives and calls the End Handler function
  578. *
  579. * @param mixed $xp ??
  580. * @param mixed $elem ??
  581. *
  582. * @return void
  583. */
  584. function funcEndHandler($xp, $elem)
  585. {
  586. $func = 'xmltag_' . $elem . '_';
  587. $func = str_replace(array('.', '-', ':'), '_', $func);
  588. if (method_exists($this->_handlerObj, $func)) {
  589. call_user_func(array(&$this->_handlerObj, $func), $xp, $elem);
  590. } elseif (method_exists($this->_handlerObj, 'xmltag_')) {
  591. call_user_func(array(&$this->_handlerObj, 'xmltag_'), $xp, $elem);
  592. }
  593. }
  594. // }}}
  595. // {{{ startHandler()
  596. /**
  597. * abstract method signature for Start Handler
  598. *
  599. * @param mixed $xp ??
  600. * @param mixed $elem ??
  601. * @param mixed &$attribs ??
  602. *
  603. * @return null
  604. * @abstract
  605. */
  606. function startHandler($xp, $elem, &$attribs)
  607. {
  608. return null;
  609. }
  610. // }}}
  611. // {{{ endHandler()
  612. /**
  613. * abstract method signature for End Handler
  614. *
  615. * @param mixed $xp ??
  616. * @param mixed $elem ??
  617. *
  618. * @return null
  619. * @abstract
  620. */
  621. function endHandler($xp, $elem)
  622. {
  623. return null;
  624. }
  625. // }}}me
  626. }
  627. /**
  628. * error class, replaces PEAR_Error
  629. *
  630. * An instance of this class will be returned
  631. * if an error occurs inside XML_Parser.
  632. *
  633. * There are three advantages over using the standard PEAR_Error:
  634. * - All messages will be prefixed
  635. * - check for XML_Parser error, using is_a( $error, 'XML_Parser_Error' )
  636. * - messages can be generated from the xml_parser resource
  637. *
  638. * @category XML
  639. * @package XML_Parser
  640. * @author Stig Bakken <ssb@fast.no>
  641. * @author Tomas V.V.Cox <cox@idecnet.com>
  642. * @author Stephan Schmidt <schst@php.net>
  643. * @copyright 2002-2008 The PHP Group
  644. * @license http://opensource.org/licenses/bsd-license New BSD License
  645. * @version Release: @package_version@
  646. * @link http://pear.php.net/package/XML_Parser
  647. * @see PEAR_Error
  648. */
  649. class XML_Parser_Error extends PEAR_Error
  650. {
  651. // {{{ properties
  652. /**
  653. * prefix for all messages
  654. *
  655. * @var string
  656. */
  657. var $error_message_prefix = 'XML_Parser: ';
  658. // }}}
  659. // {{{ constructor()
  660. /**
  661. * construct a new error instance
  662. *
  663. * You may either pass a message or an xml_parser resource as first
  664. * parameter. If a resource has been passed, the last error that
  665. * happened will be retrieved and returned.
  666. *
  667. * @param string|resource $msgorparser message or parser resource
  668. * @param integer $code error code
  669. * @param integer $mode error handling
  670. * @param integer $level error level
  671. *
  672. * @access public
  673. * @todo PEAR CS - can't meet 85char line limit without arg refactoring
  674. */
  675. function XML_Parser_Error($msgorparser = 'unknown error', $code = 0, $mode = PEAR_ERROR_RETURN, $level = E_USER_NOTICE)
  676. {
  677. if (is_resource($msgorparser)) {
  678. $code = xml_get_error_code($msgorparser);
  679. $msgorparser = sprintf('%s at XML input line %d:%d',
  680. xml_error_string($code),
  681. xml_get_current_line_number($msgorparser),
  682. xml_get_current_column_number($msgorparser));
  683. }
  684. $this->PEAR_Error($msgorparser, $code, $mode, $level);
  685. }
  686. // }}}
  687. }
  688. ?>