123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542 |
- <?php
- /**
- * Base include file for SimpleTest
- * @package SimpleTest
- * @subpackage WebTester
- * @version $Id: page.php 1938 2009-08-05 17:16:23Z dgheath $
- */
- /**#@+
- * include other SimpleTest class files
- */
- require_once(dirname(__FILE__) . '/http.php');
- require_once(dirname(__FILE__) . '/php_parser.php');
- require_once(dirname(__FILE__) . '/tag.php');
- require_once(dirname(__FILE__) . '/form.php');
- require_once(dirname(__FILE__) . '/selector.php');
- /**#@-*/
- /**
- * A wrapper for a web page.
- * @package SimpleTest
- * @subpackage WebTester
- */
- class SimplePage {
- private $links = array();
- private $title = false;
- private $last_widget;
- private $label;
- private $forms = array();
- private $frames = array();
- private $transport_error;
- private $raw;
- private $text = false;
- private $sent;
- private $headers;
- private $method;
- private $url;
- private $base = false;
- private $request_data;
- /**
- * Parses a page ready to access it's contents.
- * @param SimpleHttpResponse $response Result of HTTP fetch.
- * @access public
- */
- function __construct($response = false) {
- if ($response) {
- $this->extractResponse($response);
- } else {
- $this->noResponse();
- }
- }
- /**
- * Extracts all of the response information.
- * @param SimpleHttpResponse $response Response being parsed.
- * @access private
- */
- protected function extractResponse($response) {
- $this->transport_error = $response->getError();
- $this->raw = $response->getContent();
- $this->sent = $response->getSent();
- $this->headers = $response->getHeaders();
- $this->method = $response->getMethod();
- $this->url = $response->getUrl();
- $this->request_data = $response->getRequestData();
- }
- /**
- * Sets up a missing response.
- * @access private
- */
- protected function noResponse() {
- $this->transport_error = 'No page fetched yet';
- $this->raw = false;
- $this->sent = false;
- $this->headers = false;
- $this->method = 'GET';
- $this->url = false;
- $this->request_data = false;
- }
- /**
- * Original request as bytes sent down the wire.
- * @return mixed Sent content.
- * @access public
- */
- function getRequest() {
- return $this->sent;
- }
- /**
- * Accessor for raw text of page.
- * @return string Raw unparsed content.
- * @access public
- */
- function getRaw() {
- return $this->raw;
- }
- /**
- * Accessor for plain text of page as a text browser
- * would see it.
- * @return string Plain text of page.
- * @access public
- */
- function getText() {
- if (! $this->text) {
- $this->text = SimplePage::normalise($this->raw);
- }
- return $this->text;
- }
- /**
- * Accessor for raw headers of page.
- * @return string Header block as text.
- * @access public
- */
- function getHeaders() {
- if ($this->headers) {
- return $this->headers->getRaw();
- }
- return false;
- }
- /**
- * Original request method.
- * @return string GET, POST or HEAD.
- * @access public
- */
- function getMethod() {
- return $this->method;
- }
- /**
- * Original resource name.
- * @return SimpleUrl Current url.
- * @access public
- */
- function getUrl() {
- return $this->url;
- }
- /**
- * Base URL if set via BASE tag page url otherwise
- * @return SimpleUrl Base url.
- * @access public
- */
- function getBaseUrl() {
- return $this->base;
- }
- /**
- * Original request data.
- * @return mixed Sent content.
- * @access public
- */
- function getRequestData() {
- return $this->request_data;
- }
- /**
- * Accessor for last error.
- * @return string Error from last response.
- * @access public
- */
- function getTransportError() {
- return $this->transport_error;
- }
- /**
- * Accessor for current MIME type.
- * @return string MIME type as string; e.g. 'text/html'
- * @access public
- */
- function getMimeType() {
- if ($this->headers) {
- return $this->headers->getMimeType();
- }
- return false;
- }
- /**
- * Accessor for HTTP response code.
- * @return integer HTTP response code received.
- * @access public
- */
- function getResponseCode() {
- if ($this->headers) {
- return $this->headers->getResponseCode();
- }
- return false;
- }
- /**
- * Accessor for last Authentication type. Only valid
- * straight after a challenge (401).
- * @return string Description of challenge type.
- * @access public
- */
- function getAuthentication() {
- if ($this->headers) {
- return $this->headers->getAuthentication();
- }
- return false;
- }
- /**
- * Accessor for last Authentication realm. Only valid
- * straight after a challenge (401).
- * @return string Name of security realm.
- * @access public
- */
- function getRealm() {
- if ($this->headers) {
- return $this->headers->getRealm();
- }
- return false;
- }
- /**
- * Accessor for current frame focus. Will be
- * false as no frames.
- * @return array Always empty.
- * @access public
- */
- function getFrameFocus() {
- return array();
- }
- /**
- * Sets the focus by index. The integer index starts from 1.
- * @param integer $choice Chosen frame.
- * @return boolean Always false.
- * @access public
- */
- function setFrameFocusByIndex($choice) {
- return false;
- }
- /**
- * Sets the focus by name. Always fails for a leaf page.
- * @param string $name Chosen frame.
- * @return boolean False as no frames.
- * @access public
- */
- function setFrameFocus($name) {
- return false;
- }
- /**
- * Clears the frame focus. Does nothing for a leaf page.
- * @access public
- */
- function clearFrameFocus() {
- }
- /**
- * TODO: write docs
- */
- function setFrames($frames) {
- $this->frames = $frames;
- }
- /**
- * Test to see if link is an absolute one.
- * @param string $url Url to test.
- * @return boolean True if absolute.
- * @access protected
- */
- protected function linkIsAbsolute($url) {
- $parsed = new SimpleUrl($url);
- return (boolean)($parsed->getScheme() && $parsed->getHost());
- }
- /**
- * Adds a link to the page.
- * @param SimpleAnchorTag $tag Link to accept.
- */
- function addLink($tag) {
- $this->links[] = $tag;
- }
- /**
- * Set the forms
- * @param array $forms An array of SimpleForm objects
- */
- function setForms($forms) {
- $this->forms = $forms;
- }
- /**
- * Test for the presence of a frameset.
- * @return boolean True if frameset.
- * @access public
- */
- function hasFrames() {
- return count($this->frames) > 0;
- }
- /**
- * Accessor for frame name and source URL for every frame that
- * will need to be loaded. Immediate children only.
- * @return boolean/array False if no frameset or
- * otherwise a hash of frame URLs.
- * The key is either a numerical
- * base one index or the name attribute.
- * @access public
- */
- function getFrameset() {
- if (! $this->hasFrames()) {
- return false;
- }
- $urls = array();
- for ($i = 0; $i < count($this->frames); $i++) {
- $name = $this->frames[$i]->getAttribute('name');
- $url = new SimpleUrl($this->frames[$i]->getAttribute('src'));
- $urls[$name ? $name : $i + 1] = $this->expandUrl($url);
- }
- return $urls;
- }
- /**
- * Fetches a list of loaded frames.
- * @return array/string Just the URL for a single page.
- * @access public
- */
- function getFrames() {
- $url = $this->expandUrl($this->getUrl());
- return $url->asString();
- }
- /**
- * Accessor for a list of all links.
- * @return array List of urls with scheme of
- * http or https and hostname.
- * @access public
- */
- function getUrls() {
- $all = array();
- foreach ($this->links as $link) {
- $url = $this->getUrlFromLink($link);
- $all[] = $url->asString();
- }
- return $all;
- }
- /**
- * Accessor for URLs by the link label. Label will match
- * regardess of whitespace issues and case.
- * @param string $label Text of link.
- * @return array List of links with that label.
- * @access public
- */
- function getUrlsByLabel($label) {
- $matches = array();
- foreach ($this->links as $link) {
- if ($link->getText() == $label) {
- $matches[] = $this->getUrlFromLink($link);
- }
- }
- return $matches;
- }
- /**
- * Accessor for a URL by the id attribute.
- * @param string $id Id attribute of link.
- * @return SimpleUrl URL with that id of false if none.
- * @access public
- */
- function getUrlById($id) {
- foreach ($this->links as $link) {
- if ($link->getAttribute('id') === (string)$id) {
- return $this->getUrlFromLink($link);
- }
- }
- return false;
- }
- /**
- * Converts a link tag into a target URL.
- * @param SimpleAnchor $link Parsed link.
- * @return SimpleUrl URL with frame target if any.
- * @access private
- */
- protected function getUrlFromLink($link) {
- $url = $this->expandUrl($link->getHref());
- if ($link->getAttribute('target')) {
- $url->setTarget($link->getAttribute('target'));
- }
- return $url;
- }
- /**
- * Expands expandomatic URLs into fully qualified
- * URLs.
- * @param SimpleUrl $url Relative URL.
- * @return SimpleUrl Absolute URL.
- * @access public
- */
- function expandUrl($url) {
- if (! is_object($url)) {
- $url = new SimpleUrl($url);
- }
- $location = $this->getBaseUrl() ? $this->getBaseUrl() : new SimpleUrl();
- return $url->makeAbsolute($location->makeAbsolute($this->getUrl()));
- }
- /**
- * Sets the base url for the page.
- * @param string $url Base URL for page.
- */
- function setBase($url) {
- $this->base = new SimpleUrl($url);
- }
- /**
- * Sets the title tag contents.
- * @param SimpleTitleTag $tag Title of page.
- */
- function setTitle($tag) {
- $this->title = $tag;
- }
- /**
- * Accessor for parsed title.
- * @return string Title or false if no title is present.
- * @access public
- */
- function getTitle() {
- if ($this->title) {
- return $this->title->getText();
- }
- return false;
- }
- /**
- * Finds a held form by button label. Will only
- * search correctly built forms.
- * @param SimpleSelector $selector Button finder.
- * @return SimpleForm Form object containing
- * the button.
- * @access public
- */
- function getFormBySubmit($selector) {
- for ($i = 0; $i < count($this->forms); $i++) {
- if ($this->forms[$i]->hasSubmit($selector)) {
- return $this->forms[$i];
- }
- }
- return null;
- }
- /**
- * Finds a held form by image using a selector.
- * Will only search correctly built forms.
- * @param SimpleSelector $selector Image finder.
- * @return SimpleForm Form object containing
- * the image.
- * @access public
- */
- function getFormByImage($selector) {
- for ($i = 0; $i < count($this->forms); $i++) {
- if ($this->forms[$i]->hasImage($selector)) {
- return $this->forms[$i];
- }
- }
- return null;
- }
- /**
- * Finds a held form by the form ID. A way of
- * identifying a specific form when we have control
- * of the HTML code.
- * @param string $id Form label.
- * @return SimpleForm Form object containing the matching ID.
- * @access public
- */
- function getFormById($id) {
- for ($i = 0; $i < count($this->forms); $i++) {
- if ($this->forms[$i]->getId() == $id) {
- return $this->forms[$i];
- }
- }
- return null;
- }
- /**
- * Sets a field on each form in which the field is
- * available.
- * @param SimpleSelector $selector Field finder.
- * @param string $value Value to set field to.
- * @return boolean True if value is valid.
- * @access public
- */
- function setField($selector, $value, $position=false) {
- $is_set = false;
- for ($i = 0; $i < count($this->forms); $i++) {
- if ($this->forms[$i]->setField($selector, $value, $position)) {
- $is_set = true;
- }
- }
- return $is_set;
- }
- /**
- * Accessor for a form element value within a page.
- * @param SimpleSelector $selector Field finder.
- * @return string/boolean A string if the field is
- * present, false if unchecked
- * and null if missing.
- * @access public
- */
- function getField($selector) {
- for ($i = 0; $i < count($this->forms); $i++) {
- $value = $this->forms[$i]->getValue($selector);
- if (isset($value)) {
- return $value;
- }
- }
- return null;
- }
- /**
- * Turns HTML into text browser visible text. Images
- * are converted to their alt text and tags are supressed.
- * Entities are converted to their visible representation.
- * @param string $html HTML to convert.
- * @return string Plain text.
- * @access public
- */
- static function normalise($html) {
- $text = preg_replace('#<!--.*?-->#si', '', $html);
- $text = preg_replace('#<(script|option|textarea)[^>]*>.*?</\1>#si', '', $text);
- $text = preg_replace('#<img[^>]*alt\s*=\s*("([^"]*)"|\'([^\']*)\'|([a-zA-Z_]+))[^>]*>#', ' \2\3\4 ', $text);
- $text = preg_replace('#<[^>]*>#', '', $text);
- $text = html_entity_decode($text, ENT_QUOTES);
- $text = preg_replace('#\s+#', ' ', $text);
- return trim(trim($text), "\xA0"); // TODO: The \xAO is a . Add a test for this.
- }
- }
- ?>
|