123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550 |
- <?php
- /**
- * base include file for SimpleTest
- * @package SimpleTest
- * @subpackage WebTester
- * @version $Id: url.php 2011 2011-04-29 08:22:48Z pp11 $
- */
- /**#@+
- * include other SimpleTest class files
- */
- require_once(dirname(__FILE__) . '/encoding.php');
- /**#@-*/
- /**
- * URL parser to replace parse_url() PHP function which
- * got broken in PHP 4.3.0. Adds some browser specific
- * functionality such as expandomatics.
- * Guesses a bit trying to separate the host from
- * the path and tries to keep a raw, possibly unparsable,
- * request string as long as possible.
- * @package SimpleTest
- * @subpackage WebTester
- */
- class SimpleUrl {
- private $scheme;
- private $username;
- private $password;
- private $host;
- private $port;
- public $path;
- private $request;
- private $fragment;
- private $x;
- private $y;
- private $target;
- private $raw = false;
- /**
- * Constructor. Parses URL into sections.
- * @param string $url Incoming URL.
- * @access public
- */
- function __construct($url = '') {
- list($x, $y) = $this->chompCoordinates($url);
- $this->setCoordinates($x, $y);
- $this->scheme = $this->chompScheme($url);
- if ($this->scheme === 'file') {
- // Unescaped backslashes not used in directory separator context
- // will get caught by this, but they should have been urlencoded
- // anyway so we don't care. If this ends up being a problem, the
- // host regexp must be modified to match for backslashes when
- // the scheme is file.
- $url = str_replace('\\', '/', $url);
- }
- list($this->username, $this->password) = $this->chompLogin($url);
- $this->host = $this->chompHost($url);
- $this->port = false;
- if (preg_match('/(.*?):(.*)/', $this->host, $host_parts)) {
- if ($this->scheme === 'file' && strlen($this->host) === 2) {
- // DOS drive was placed in authority; promote it to path.
- $url = '/' . $this->host . $url;
- $this->host = false;
- } else {
- $this->host = $host_parts[1];
- $this->port = (integer)$host_parts[2];
- }
- }
- $this->path = $this->chompPath($url);
- $this->request = $this->parseRequest($this->chompRequest($url));
- $this->fragment = (strncmp($url, "#", 1) == 0 ? substr($url, 1) : false);
- $this->target = false;
- }
- /**
- * Extracts the X, Y coordinate pair from an image map.
- * @param string $url URL so far. The coordinates will be
- * removed.
- * @return array X, Y as a pair of integers.
- * @access private
- */
- protected function chompCoordinates(&$url) {
- if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) {
- $url = $matches[1];
- return array((integer)$matches[2], (integer)$matches[3]);
- }
- return array(false, false);
- }
- /**
- * Extracts the scheme part of an incoming URL.
- * @param string $url URL so far. The scheme will be
- * removed.
- * @return string Scheme part or false.
- * @access private
- */
- protected function chompScheme(&$url) {
- if (preg_match('#^([^/:]*):(//)(.*)#', $url, $matches)) {
- $url = $matches[2] . $matches[3];
- return $matches[1];
- }
- return false;
- }
- /**
- * Extracts the username and password from the
- * incoming URL. The // prefix will be reattached
- * to the URL after the doublet is extracted.
- * @param string $url URL so far. The username and
- * password are removed.
- * @return array Two item list of username and
- * password. Will urldecode() them.
- * @access private
- */
- protected function chompLogin(&$url) {
- $prefix = '';
- if (preg_match('#^(//)(.*)#', $url, $matches)) {
- $prefix = $matches[1];
- $url = $matches[2];
- }
- if (preg_match('#^([^/]*)@(.*)#', $url, $matches)) {
- $url = $prefix . $matches[2];
- $parts = explode(":", $matches[1]);
- return array(
- urldecode($parts[0]),
- isset($parts[1]) ? urldecode($parts[1]) : false);
- }
- $url = $prefix . $url;
- return array(false, false);
- }
- /**
- * Extracts the host part of an incoming URL.
- * Includes the port number part. Will extract
- * the host if it starts with // or it has
- * a top level domain or it has at least two
- * dots.
- * @param string $url URL so far. The host will be
- * removed.
- * @return string Host part guess or false.
- * @access private
- */
- protected function chompHost(&$url) {
- if (preg_match('!^(//)(.*?)(/.*|\?.*|#.*|$)!', $url, $matches)) {
- $url = $matches[3];
- return $matches[2];
- }
- if (preg_match('!(.*?)(\.\./|\./|/|\?|#|$)(.*)!', $url, $matches)) {
- $tlds = SimpleUrl::getAllTopLevelDomains();
- if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) {
- $url = $matches[2] . $matches[3];
- return $matches[1];
- } elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) {
- $url = $matches[2] . $matches[3];
- return $matches[1];
- }
- }
- return false;
- }
- /**
- * Extracts the path information from the incoming
- * URL. Strips this path from the URL.
- * @param string $url URL so far. The host will be
- * removed.
- * @return string Path part or '/'.
- * @access private
- */
- protected function chompPath(&$url) {
- if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) {
- $url = $matches[2] . $matches[3];
- return ($matches[1] ? $matches[1] : '');
- }
- return '';
- }
- /**
- * Strips off the request data.
- * @param string $url URL so far. The request will be
- * removed.
- * @return string Raw request part.
- * @access private
- */
- protected function chompRequest(&$url) {
- if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) {
- $url = $matches[2] . $matches[3];
- return $matches[1];
- }
- return '';
- }
- /**
- * Breaks the request down into an object.
- * @param string $raw Raw request.
- * @return SimpleFormEncoding Parsed data.
- * @access private
- */
- protected function parseRequest($raw) {
- $this->raw = $raw;
- $request = new SimpleGetEncoding();
- foreach (explode("&", $raw) as $pair) {
- if (preg_match('/(.*?)=(.*)/', $pair, $matches)) {
- $request->add(urldecode($matches[1]), urldecode($matches[2]));
- } elseif ($pair) {
- $request->add(urldecode($pair), '');
- }
- }
- return $request;
- }
- /**
- * Accessor for protocol part.
- * @param string $default Value to use if not present.
- * @return string Scheme name, e.g "http".
- * @access public
- */
- function getScheme($default = false) {
- return $this->scheme ? $this->scheme : $default;
- }
- /**
- * Accessor for user name.
- * @return string Username preceding host.
- * @access public
- */
- function getUsername() {
- return $this->username;
- }
- /**
- * Accessor for password.
- * @return string Password preceding host.
- * @access public
- */
- function getPassword() {
- return $this->password;
- }
- /**
- * Accessor for hostname and port.
- * @param string $default Value to use if not present.
- * @return string Hostname only.
- * @access public
- */
- function getHost($default = false) {
- return $this->host ? $this->host : $default;
- }
- /**
- * Accessor for top level domain.
- * @return string Last part of host.
- * @access public
- */
- function getTld() {
- $path_parts = pathinfo($this->getHost());
- return (isset($path_parts['extension']) ? $path_parts['extension'] : false);
- }
- /**
- * Accessor for port number.
- * @return integer TCP/IP port number.
- * @access public
- */
- function getPort() {
- return $this->port;
- }
- /**
- * Accessor for path.
- * @return string Full path including leading slash if implied.
- * @access public
- */
- function getPath() {
- if (! $this->path && $this->host) {
- return '/';
- }
- return $this->path;
- }
- /**
- * Accessor for page if any. This may be a
- * directory name if ambiguious.
- * @return Page name.
- * @access public
- */
- function getPage() {
- if (! preg_match('/([^\/]*?)$/', $this->getPath(), $matches)) {
- return false;
- }
- return $matches[1];
- }
- /**
- * Gets the path to the page.
- * @return string Path less the page.
- * @access public
- */
- function getBasePath() {
- if (! preg_match('/(.*\/)[^\/]*?$/', $this->getPath(), $matches)) {
- return false;
- }
- return $matches[1];
- }
- /**
- * Accessor for fragment at end of URL after the "#".
- * @return string Part after "#".
- * @access public
- */
- function getFragment() {
- return $this->fragment;
- }
- /**
- * Sets image coordinates. Set to false to clear
- * them.
- * @param integer $x Horizontal position.
- * @param integer $y Vertical position.
- * @access public
- */
- function setCoordinates($x = false, $y = false) {
- if (($x === false) || ($y === false)) {
- $this->x = $this->y = false;
- return;
- }
- $this->x = (integer)$x;
- $this->y = (integer)$y;
- }
- /**
- * Accessor for horizontal image coordinate.
- * @return integer X value.
- * @access public
- */
- function getX() {
- return $this->x;
- }
- /**
- * Accessor for vertical image coordinate.
- * @return integer Y value.
- * @access public
- */
- function getY() {
- return $this->y;
- }
- /**
- * Accessor for current request parameters
- * in URL string form. Will return teh original request
- * if at all possible even if it doesn't make much
- * sense.
- * @return string Form is string "?a=1&b=2", etc.
- * @access public
- */
- function getEncodedRequest() {
- if ($this->raw) {
- $encoded = $this->raw;
- } else {
- $encoded = $this->request->asUrlRequest();
- }
- if ($encoded) {
- return '?' . preg_replace('/^\?/', '', $encoded);
- }
- return '';
- }
- /**
- * Adds an additional parameter to the request.
- * @param string $key Name of parameter.
- * @param string $value Value as string.
- * @access public
- */
- function addRequestParameter($key, $value) {
- $this->raw = false;
- $this->request->add($key, $value);
- }
- /**
- * Adds additional parameters to the request.
- * @param hash/SimpleFormEncoding $parameters Additional
- * parameters.
- * @access public
- */
- function addRequestParameters($parameters) {
- $this->raw = false;
- $this->request->merge($parameters);
- }
- /**
- * Clears down all parameters.
- * @access public
- */
- function clearRequest() {
- $this->raw = false;
- $this->request = new SimpleGetEncoding();
- }
- /**
- * Gets the frame target if present. Although
- * not strictly part of the URL specification it
- * acts as similarily to the browser.
- * @return boolean/string Frame name or false if none.
- * @access public
- */
- function getTarget() {
- return $this->target;
- }
- /**
- * Attaches a frame target.
- * @param string $frame Name of frame.
- * @access public
- */
- function setTarget($frame) {
- $this->raw = false;
- $this->target = $frame;
- }
- /**
- * Renders the URL back into a string.
- * @return string URL in canonical form.
- * @access public
- */
- function asString() {
- $path = $this->path;
- $scheme = $identity = $host = $port = $encoded = $fragment = '';
- if ($this->username && $this->password) {
- $identity = $this->username . ':' . $this->password . '@';
- }
- if ($this->getHost()) {
- $scheme = $this->getScheme() ? $this->getScheme() : 'http';
- $scheme .= '://';
- $host = $this->getHost();
- } elseif ($this->getScheme() === 'file') {
- // Safest way; otherwise, file URLs on Windows have an extra
- // leading slash. It might be possible to convert file://
- // URIs to local file paths, but that requires more research.
- $scheme = 'file://';
- }
- if ($this->getPort() && $this->getPort() != 80 ) {
- $port = ':'.$this->getPort();
- }
- if (substr($this->path, 0, 1) == '/') {
- $path = $this->normalisePath($this->path);
- }
- $encoded = $this->getEncodedRequest();
- $fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
- $coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
- return "$scheme$identity$host$port$path$encoded$fragment$coords";
- }
- /**
- * Replaces unknown sections to turn a relative
- * URL into an absolute one. The base URL can
- * be either a string or a SimpleUrl object.
- * @param string/SimpleUrl $base Base URL.
- * @access public
- */
- function makeAbsolute($base) {
- if (! is_object($base)) {
- $base = new SimpleUrl($base);
- }
- if ($this->getHost()) {
- $scheme = $this->getScheme();
- $host = $this->getHost();
- $port = $this->getPort() ? ':' . $this->getPort() : '';
- $identity = $this->getIdentity() ? $this->getIdentity() . '@' : '';
- if (! $identity) {
- $identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
- }
- } else {
- $scheme = $base->getScheme();
- $host = $base->getHost();
- $port = $base->getPort() ? ':' . $base->getPort() : '';
- $identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
- }
- $path = $this->normalisePath($this->extractAbsolutePath($base));
- $encoded = $this->getEncodedRequest();
- $fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
- $coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
- return new SimpleUrl("$scheme://$identity$host$port$path$encoded$fragment$coords");
- }
- /**
- * Replaces unknown sections of the path with base parts
- * to return a complete absolute one.
- * @param string/SimpleUrl $base Base URL.
- * @param string Absolute path.
- * @access private
- */
- protected function extractAbsolutePath($base) {
- if ($this->getHost()) {
- return $this->path;
- }
- if (! $this->isRelativePath($this->path)) {
- return $this->path;
- }
- if ($this->path) {
- return $base->getBasePath() . $this->path;
- }
- return $base->getPath();
- }
- /**
- * Simple test to see if a path part is relative.
- * @param string $path Path to test.
- * @return boolean True if starts with a "/".
- * @access private
- */
- protected function isRelativePath($path) {
- return (substr($path, 0, 1) != '/');
- }
- /**
- * Extracts the username and password for use in rendering
- * a URL.
- * @return string/boolean Form of username:password or false.
- * @access public
- */
- function getIdentity() {
- if ($this->username && $this->password) {
- return $this->username . ':' . $this->password;
- }
- return false;
- }
- /**
- * Replaces . and .. sections of the path.
- * @param string $path Unoptimised path.
- * @return string Path with dots removed if possible.
- * @access public
- */
- function normalisePath($path) {
- $path = preg_replace('|/\./|', '/', $path);
- return preg_replace('|/[^/]+/\.\./|', '/', $path);
- }
- /**
- * A pipe seperated list of all TLDs that result in two part
- * domain names.
- * @return string Pipe separated list.
- * @access public
- */
- static function getAllTopLevelDomains() {
- return 'com|edu|net|org|gov|mil|int|biz|info|name|pro|aero|coop|museum';
- }
- }
- ?>
|