page.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. <?php
  2. /**
  3. * Base include file for SimpleTest
  4. * @package SimpleTest
  5. * @subpackage WebTester
  6. * @version $Id: page.php 1938 2009-08-05 17:16:23Z dgheath $
  7. */
  8. /**#@+
  9. * include other SimpleTest class files
  10. */
  11. require_once(dirname(__FILE__) . '/http.php');
  12. require_once(dirname(__FILE__) . '/php_parser.php');
  13. require_once(dirname(__FILE__) . '/tag.php');
  14. require_once(dirname(__FILE__) . '/form.php');
  15. require_once(dirname(__FILE__) . '/selector.php');
  16. /**#@-*/
  17. /**
  18. * A wrapper for a web page.
  19. * @package SimpleTest
  20. * @subpackage WebTester
  21. */
  22. class SimplePage {
  23. private $links = array();
  24. private $title = false;
  25. private $last_widget;
  26. private $label;
  27. private $forms = array();
  28. private $frames = array();
  29. private $transport_error;
  30. private $raw;
  31. private $text = false;
  32. private $sent;
  33. private $headers;
  34. private $method;
  35. private $url;
  36. private $base = false;
  37. private $request_data;
  38. /**
  39. * Parses a page ready to access it's contents.
  40. * @param SimpleHttpResponse $response Result of HTTP fetch.
  41. * @access public
  42. */
  43. function __construct($response = false) {
  44. if ($response) {
  45. $this->extractResponse($response);
  46. } else {
  47. $this->noResponse();
  48. }
  49. }
  50. /**
  51. * Extracts all of the response information.
  52. * @param SimpleHttpResponse $response Response being parsed.
  53. * @access private
  54. */
  55. protected function extractResponse($response) {
  56. $this->transport_error = $response->getError();
  57. $this->raw = $response->getContent();
  58. $this->sent = $response->getSent();
  59. $this->headers = $response->getHeaders();
  60. $this->method = $response->getMethod();
  61. $this->url = $response->getUrl();
  62. $this->request_data = $response->getRequestData();
  63. }
  64. /**
  65. * Sets up a missing response.
  66. * @access private
  67. */
  68. protected function noResponse() {
  69. $this->transport_error = 'No page fetched yet';
  70. $this->raw = false;
  71. $this->sent = false;
  72. $this->headers = false;
  73. $this->method = 'GET';
  74. $this->url = false;
  75. $this->request_data = false;
  76. }
  77. /**
  78. * Original request as bytes sent down the wire.
  79. * @return mixed Sent content.
  80. * @access public
  81. */
  82. function getRequest() {
  83. return $this->sent;
  84. }
  85. /**
  86. * Accessor for raw text of page.
  87. * @return string Raw unparsed content.
  88. * @access public
  89. */
  90. function getRaw() {
  91. return $this->raw;
  92. }
  93. /**
  94. * Accessor for plain text of page as a text browser
  95. * would see it.
  96. * @return string Plain text of page.
  97. * @access public
  98. */
  99. function getText() {
  100. if (! $this->text) {
  101. $this->text = SimplePage::normalise($this->raw);
  102. }
  103. return $this->text;
  104. }
  105. /**
  106. * Accessor for raw headers of page.
  107. * @return string Header block as text.
  108. * @access public
  109. */
  110. function getHeaders() {
  111. if ($this->headers) {
  112. return $this->headers->getRaw();
  113. }
  114. return false;
  115. }
  116. /**
  117. * Original request method.
  118. * @return string GET, POST or HEAD.
  119. * @access public
  120. */
  121. function getMethod() {
  122. return $this->method;
  123. }
  124. /**
  125. * Original resource name.
  126. * @return SimpleUrl Current url.
  127. * @access public
  128. */
  129. function getUrl() {
  130. return $this->url;
  131. }
  132. /**
  133. * Base URL if set via BASE tag page url otherwise
  134. * @return SimpleUrl Base url.
  135. * @access public
  136. */
  137. function getBaseUrl() {
  138. return $this->base;
  139. }
  140. /**
  141. * Original request data.
  142. * @return mixed Sent content.
  143. * @access public
  144. */
  145. function getRequestData() {
  146. return $this->request_data;
  147. }
  148. /**
  149. * Accessor for last error.
  150. * @return string Error from last response.
  151. * @access public
  152. */
  153. function getTransportError() {
  154. return $this->transport_error;
  155. }
  156. /**
  157. * Accessor for current MIME type.
  158. * @return string MIME type as string; e.g. 'text/html'
  159. * @access public
  160. */
  161. function getMimeType() {
  162. if ($this->headers) {
  163. return $this->headers->getMimeType();
  164. }
  165. return false;
  166. }
  167. /**
  168. * Accessor for HTTP response code.
  169. * @return integer HTTP response code received.
  170. * @access public
  171. */
  172. function getResponseCode() {
  173. if ($this->headers) {
  174. return $this->headers->getResponseCode();
  175. }
  176. return false;
  177. }
  178. /**
  179. * Accessor for last Authentication type. Only valid
  180. * straight after a challenge (401).
  181. * @return string Description of challenge type.
  182. * @access public
  183. */
  184. function getAuthentication() {
  185. if ($this->headers) {
  186. return $this->headers->getAuthentication();
  187. }
  188. return false;
  189. }
  190. /**
  191. * Accessor for last Authentication realm. Only valid
  192. * straight after a challenge (401).
  193. * @return string Name of security realm.
  194. * @access public
  195. */
  196. function getRealm() {
  197. if ($this->headers) {
  198. return $this->headers->getRealm();
  199. }
  200. return false;
  201. }
  202. /**
  203. * Accessor for current frame focus. Will be
  204. * false as no frames.
  205. * @return array Always empty.
  206. * @access public
  207. */
  208. function getFrameFocus() {
  209. return array();
  210. }
  211. /**
  212. * Sets the focus by index. The integer index starts from 1.
  213. * @param integer $choice Chosen frame.
  214. * @return boolean Always false.
  215. * @access public
  216. */
  217. function setFrameFocusByIndex($choice) {
  218. return false;
  219. }
  220. /**
  221. * Sets the focus by name. Always fails for a leaf page.
  222. * @param string $name Chosen frame.
  223. * @return boolean False as no frames.
  224. * @access public
  225. */
  226. function setFrameFocus($name) {
  227. return false;
  228. }
  229. /**
  230. * Clears the frame focus. Does nothing for a leaf page.
  231. * @access public
  232. */
  233. function clearFrameFocus() {
  234. }
  235. /**
  236. * TODO: write docs
  237. */
  238. function setFrames($frames) {
  239. $this->frames = $frames;
  240. }
  241. /**
  242. * Test to see if link is an absolute one.
  243. * @param string $url Url to test.
  244. * @return boolean True if absolute.
  245. * @access protected
  246. */
  247. protected function linkIsAbsolute($url) {
  248. $parsed = new SimpleUrl($url);
  249. return (boolean)($parsed->getScheme() && $parsed->getHost());
  250. }
  251. /**
  252. * Adds a link to the page.
  253. * @param SimpleAnchorTag $tag Link to accept.
  254. */
  255. function addLink($tag) {
  256. $this->links[] = $tag;
  257. }
  258. /**
  259. * Set the forms
  260. * @param array $forms An array of SimpleForm objects
  261. */
  262. function setForms($forms) {
  263. $this->forms = $forms;
  264. }
  265. /**
  266. * Test for the presence of a frameset.
  267. * @return boolean True if frameset.
  268. * @access public
  269. */
  270. function hasFrames() {
  271. return count($this->frames) > 0;
  272. }
  273. /**
  274. * Accessor for frame name and source URL for every frame that
  275. * will need to be loaded. Immediate children only.
  276. * @return boolean/array False if no frameset or
  277. * otherwise a hash of frame URLs.
  278. * The key is either a numerical
  279. * base one index or the name attribute.
  280. * @access public
  281. */
  282. function getFrameset() {
  283. if (! $this->hasFrames()) {
  284. return false;
  285. }
  286. $urls = array();
  287. for ($i = 0; $i < count($this->frames); $i++) {
  288. $name = $this->frames[$i]->getAttribute('name');
  289. $url = new SimpleUrl($this->frames[$i]->getAttribute('src'));
  290. $urls[$name ? $name : $i + 1] = $this->expandUrl($url);
  291. }
  292. return $urls;
  293. }
  294. /**
  295. * Fetches a list of loaded frames.
  296. * @return array/string Just the URL for a single page.
  297. * @access public
  298. */
  299. function getFrames() {
  300. $url = $this->expandUrl($this->getUrl());
  301. return $url->asString();
  302. }
  303. /**
  304. * Accessor for a list of all links.
  305. * @return array List of urls with scheme of
  306. * http or https and hostname.
  307. * @access public
  308. */
  309. function getUrls() {
  310. $all = array();
  311. foreach ($this->links as $link) {
  312. $url = $this->getUrlFromLink($link);
  313. $all[] = $url->asString();
  314. }
  315. return $all;
  316. }
  317. /**
  318. * Accessor for URLs by the link label. Label will match
  319. * regardess of whitespace issues and case.
  320. * @param string $label Text of link.
  321. * @return array List of links with that label.
  322. * @access public
  323. */
  324. function getUrlsByLabel($label) {
  325. $matches = array();
  326. foreach ($this->links as $link) {
  327. if ($link->getText() == $label) {
  328. $matches[] = $this->getUrlFromLink($link);
  329. }
  330. }
  331. return $matches;
  332. }
  333. /**
  334. * Accessor for a URL by the id attribute.
  335. * @param string $id Id attribute of link.
  336. * @return SimpleUrl URL with that id of false if none.
  337. * @access public
  338. */
  339. function getUrlById($id) {
  340. foreach ($this->links as $link) {
  341. if ($link->getAttribute('id') === (string)$id) {
  342. return $this->getUrlFromLink($link);
  343. }
  344. }
  345. return false;
  346. }
  347. /**
  348. * Converts a link tag into a target URL.
  349. * @param SimpleAnchor $link Parsed link.
  350. * @return SimpleUrl URL with frame target if any.
  351. * @access private
  352. */
  353. protected function getUrlFromLink($link) {
  354. $url = $this->expandUrl($link->getHref());
  355. if ($link->getAttribute('target')) {
  356. $url->setTarget($link->getAttribute('target'));
  357. }
  358. return $url;
  359. }
  360. /**
  361. * Expands expandomatic URLs into fully qualified
  362. * URLs.
  363. * @param SimpleUrl $url Relative URL.
  364. * @return SimpleUrl Absolute URL.
  365. * @access public
  366. */
  367. function expandUrl($url) {
  368. if (! is_object($url)) {
  369. $url = new SimpleUrl($url);
  370. }
  371. $location = $this->getBaseUrl() ? $this->getBaseUrl() : new SimpleUrl();
  372. return $url->makeAbsolute($location->makeAbsolute($this->getUrl()));
  373. }
  374. /**
  375. * Sets the base url for the page.
  376. * @param string $url Base URL for page.
  377. */
  378. function setBase($url) {
  379. $this->base = new SimpleUrl($url);
  380. }
  381. /**
  382. * Sets the title tag contents.
  383. * @param SimpleTitleTag $tag Title of page.
  384. */
  385. function setTitle($tag) {
  386. $this->title = $tag;
  387. }
  388. /**
  389. * Accessor for parsed title.
  390. * @return string Title or false if no title is present.
  391. * @access public
  392. */
  393. function getTitle() {
  394. if ($this->title) {
  395. return $this->title->getText();
  396. }
  397. return false;
  398. }
  399. /**
  400. * Finds a held form by button label. Will only
  401. * search correctly built forms.
  402. * @param SimpleSelector $selector Button finder.
  403. * @return SimpleForm Form object containing
  404. * the button.
  405. * @access public
  406. */
  407. function getFormBySubmit($selector) {
  408. for ($i = 0; $i < count($this->forms); $i++) {
  409. if ($this->forms[$i]->hasSubmit($selector)) {
  410. return $this->forms[$i];
  411. }
  412. }
  413. return null;
  414. }
  415. /**
  416. * Finds a held form by image using a selector.
  417. * Will only search correctly built forms.
  418. * @param SimpleSelector $selector Image finder.
  419. * @return SimpleForm Form object containing
  420. * the image.
  421. * @access public
  422. */
  423. function getFormByImage($selector) {
  424. for ($i = 0; $i < count($this->forms); $i++) {
  425. if ($this->forms[$i]->hasImage($selector)) {
  426. return $this->forms[$i];
  427. }
  428. }
  429. return null;
  430. }
  431. /**
  432. * Finds a held form by the form ID. A way of
  433. * identifying a specific form when we have control
  434. * of the HTML code.
  435. * @param string $id Form label.
  436. * @return SimpleForm Form object containing the matching ID.
  437. * @access public
  438. */
  439. function getFormById($id) {
  440. for ($i = 0; $i < count($this->forms); $i++) {
  441. if ($this->forms[$i]->getId() == $id) {
  442. return $this->forms[$i];
  443. }
  444. }
  445. return null;
  446. }
  447. /**
  448. * Sets a field on each form in which the field is
  449. * available.
  450. * @param SimpleSelector $selector Field finder.
  451. * @param string $value Value to set field to.
  452. * @return boolean True if value is valid.
  453. * @access public
  454. */
  455. function setField($selector, $value, $position=false) {
  456. $is_set = false;
  457. for ($i = 0; $i < count($this->forms); $i++) {
  458. if ($this->forms[$i]->setField($selector, $value, $position)) {
  459. $is_set = true;
  460. }
  461. }
  462. return $is_set;
  463. }
  464. /**
  465. * Accessor for a form element value within a page.
  466. * @param SimpleSelector $selector Field finder.
  467. * @return string/boolean A string if the field is
  468. * present, false if unchecked
  469. * and null if missing.
  470. * @access public
  471. */
  472. function getField($selector) {
  473. for ($i = 0; $i < count($this->forms); $i++) {
  474. $value = $this->forms[$i]->getValue($selector);
  475. if (isset($value)) {
  476. return $value;
  477. }
  478. }
  479. return null;
  480. }
  481. /**
  482. * Turns HTML into text browser visible text. Images
  483. * are converted to their alt text and tags are supressed.
  484. * Entities are converted to their visible representation.
  485. * @param string $html HTML to convert.
  486. * @return string Plain text.
  487. * @access public
  488. */
  489. static function normalise($html) {
  490. $text = preg_replace('#<!--.*?-->#si', '', $html);
  491. $text = preg_replace('#<(script|option|textarea)[^>]*>.*?</\1>#si', '', $text);
  492. $text = preg_replace('#<img[^>]*alt\s*=\s*("([^"]*)"|\'([^\']*)\'|([a-zA-Z_]+))[^>]*>#', ' \2\3\4 ', $text);
  493. $text = preg_replace('#<[^>]*>#', '', $text);
  494. $text = html_entity_decode($text, ENT_QUOTES);
  495. $text = preg_replace('#\s+#', ' ', $text);
  496. return trim(trim($text), "\xA0"); // TODO: The \xAO is a &nbsp;. Add a test for this.
  497. }
  498. }
  499. ?>