Bgbot — a MediaWiki robot Code
Status: Beta
Brought to you by:
bmanolov
--- a/trunk/browser.php +++ b/trunk/browser.php @@ -1,6 +1,4 @@ <?php -# $Id$ -# # browser utils # # Copyright (C) 2004 Borislav Manolov @@ -20,46 +18,51 @@ # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # http://www.gnu.org/copyleft/gpl.html # -# Author: Borislav Manolov <b.manolov at web dot de> -# http://purl.oclc.org/NET/borislav/ -# -# This program uses portions of +# Author: Borislav Manolov <b.manolov at gmail dot com> +# http://purl.org/NET/borislav/ +# +# This program uses portions of # Snoopy - the PHP net client # Author: Monte Ohrt <monte@ispi.net> # Copyright (c): 1999-2000 ispi, all rights reserved # Version: 1.01 # http://snoopy.sourceforge.net/ ############################################################################# +require_once('utils.php'); class Browser { - var $host = ''; // host for connection - var $agent = 'Mozilla/5.0 (PHPBrowser)'; // user agent - var $cookies = array(); // cookies - var $print_cookies = false; // whether to print cookies + var $host = ''; // host for connection + var $agent = 'Mozilla/5.0 (PHPBrowser)'; // user agent + var $cookies = array(); // cookies + var $print_cookies = false; // whether to print cookies var $cookies_file = 'cookies.txt'; // cookies - - var $content = ''; // content returned from server - var $headers = array(); // headers returned from server - - var $error = ''; // error messages - var $conn_timeout = 120; // timeout for socket connection - var $is_redirect = false; // true if the fetched page is a redirect - - var $fetch_method = 'GET'; // fetch method - var $submit_method = 'POST'; // submit method - var $http_version = 'HTTP/1.1';// http version - var $content_type = array( // content types - 'text' => 'application/x-www-form-urlencoded', - 'binary' => 'multipart/form-data' - ); - var $mime_boundary = ''; // MIME boundary for binary submission + + # data for basic HTTP Authentication + var $user = ''; + var $pass = ''; + + var $content = ''; // content returned from server + var $headers = array(); // headers returned from server + + var $error = ''; // error messages + var $conn_timeout = 120; // timeout for socket connection + var $is_redirect = false; // true if the fetched page is a redirect + + var $fetch_method = 'GET'; // fetch method + var $submit_method = 'POST'; // submit method + var $http_version = 'HTTP/1.1';// http version + var $content_type = array( // content types + 'text' => 'application/x-www-form-urlencoded', + 'binary' => 'multipart/form-data' + ); + var $mime_boundary = ''; // MIME boundary for binary submission # constructor # $params - assoc array (name => value) # return nothing - function Browser($params) { + function Browser($params = array()) { settype($params, 'array'); foreach ( $params as $field => $value ) { if ( isset($this->$field) ) { @@ -73,9 +76,10 @@ # fetch a page # $uri - location of the page + # $do_auth:boolean - add an authentication header # return true by success - function fetch($uri) { - return $this->make_request($uri, $this->fetch_method); + function fetch($uri, $do_auth = false) { + return $this->make_request($uri, $this->fetch_method, '', '', $do_auth); } @@ -84,8 +88,9 @@ # $vars - assoc array with form fields and their values # $file - assoc array (field name => file name) # set only by upload + # $do_auth:boolean - add an authentication header # return true by success - function submit( $uri, $vars, $file = array() ) { + function submit( $uri, $vars, $file = array(), $do_auth = false ) { $postdata = ''; if ( empty($file) ) { foreach ( $vars as $key => $val ) { @@ -110,8 +115,8 @@ $base_name = basename($file_name); $postdata .= '--'. $this->mime_boundary ."\r\n"; - $postdata .= '"Content-Disposition: form-data; name="'. - $field_name . '"; filename="' . $base_name . "\"\r\n\r\n"; + $postdata .= 'Content-Disposition: form-data; name="'. $field_name . + '"; filename="' . $base_name . "\"\r\n\r\n"; $postdata .= $file_content . "\r\n"; $postdata .= '--'. $this->mime_boundary ."--\r\n"; } @@ -121,7 +126,7 @@ : $this->content_type['binary'] ; return $this->make_request($uri, $this->submit_method, - $content_type, $postdata); + $content_type, $postdata, $do_auth); } @@ -130,8 +135,9 @@ # $request_method - GET / POST # $content_type - content type (for POST submission) # $postdata - data (for POST submission) + # $do_auth:boolean - add an authentication header based on $this->user and $this->pass # return true if the request succeeded, false otherwise - function make_request($uri, $request_method, $content_type = '', $postdata = '') { + function make_request($uri, $request_method, $content_type = '', $postdata = '', $do_auth = false) { $uri_parts = parse_url($uri); if ( $uri_parts['scheme'] != 'http') { // not a valid protocol $this->error = "Invalid protocol: $uri_parts[scheme]"; @@ -156,12 +162,18 @@ if ( empty($path) ) { $path = '/'; } $headers = "$request_method $path $this->http_version\r\n" . "User-Agent: $this->agent\r\nHost: $this->host\r\nAccept: */*\r\n"; + + if ($do_auth) { + $headers .= 'Authorization: Basic '. + base64_encode($this->user.':'.$this->pass) . "\r\n"; + } + if ( isset($this->cookies[$this->host]) ) { $cookie_headers .= 'Cookie: '; foreach ($this->cookies[$this->host] as $cookie_name => $cookie_data) { $cookie_headers .= $cookie_name .'='. urlencode($cookie_data[0]) .'; '; } - # add $cookie_headers w/o last 2 chars + "\r\n" + # add $cookie_headers w/o last 2 chars $headers .= substr($cookie_headers, 0, -2) . "\r\n"; } @@ -209,45 +221,45 @@ # read cookies from file function read_cookies() { - $curr_time = time(); - $lines = file($this->cookies_file); - foreach ($lines as $line) { - $line = trim($line); - if ( empty($line) ) { continue; } - list($host, $cookie_expire, $cookie_name, $cookie_val) = explode("\t", $line); - # add cookie if not expired - if ($curr_time < $cookie_expire) { - $this->cookies[$host][$cookie_name] = array($cookie_val, $cookie_expire); + if (file_exists($this->cookies_file)) { + $curr_time = time(); + $lines = file($this->cookies_file); + foreach ($lines as $line) { + $line = trim($line); + if ( empty($line) ) { continue; } + list($host, $cookie_expire, $cookie_name, $cookie_val) = explode("\t", $line); + # add cookie if not expired + if ($curr_time < $cookie_expire) { + $this->cookies[$host][$cookie_name] = array($cookie_val, $cookie_expire); + } } - } - # write not expired cookies back to file - $cookies_str = ''; - foreach ($this->cookies as $host => $cookie_data) { - foreach ($cookie_data as $cookie_name => $cookie_subdata) { - $cookies_str .= "$host\t$cookie_subdata[1]\t$cookie_name\t$cookie_subdata[0]\n"; + # write not expired cookies back to file + $cookies_str = ''; + foreach ($this->cookies as $host => $cookie_data) { + foreach ($cookie_data as $cookie_name => $cookie_subdata) { + $cookies_str .= "$host\t$cookie_subdata[1]\t$cookie_name\t$cookie_subdata[0]\n"; + } } } my_fwrite($this->cookies_file, $cookies_str, 'w'); } - # set cookies for a redirection - function set_cookies() { + # set cookies + function set_cookies() { $len = count($this->headers); $cookies_str = ''; - for ($i = 0; $i < $len; $i++) { - if (preg_match('/^Set-Cookie:\s+([^=]+)=([^;]+);\s+expires=([^;]+)/i', + for ($i = 0; $i < $len; $i++) { + if (preg_match('/^Set-Cookie:\s+([^=]+)=([^;]+);\s+(expires=([^;]+))?/i', $this->headers[$i], $matches)) { - $exp_time = strtotime($matches[3]); + $exp_time = isset($matches[4]) ? strtotime($matches[4]) : time() + 60*60*24*30; $cookies_str .= "$this->host\t$exp_time\t$matches[1]\t$matches[2]\n"; - $this->cookies[$this->host][$matches[1]] = $matches[2]; - if ( $this->print_cookies ) { - echo "$matches[1] = $matches[2]; expires at $matches[3]\n"; - } - } - } + $this->cookies[$this->host][$matches[1]] = array($matches[2], $matches[4]); + if ( $this->print_cookies ) { + echo "$matches[1] = $matches[2]; expires at $matches[4]\n"; + } + } + } my_fwrite($this->cookies_file, $cookies_str); - } + } } // end of class Browser - -?>