<?php
# $Id$
#
# A Wikimedia bot - used for automated editing of pages on Wikipedia
# and its sister projects
#
# Copyright (C) 2004 Borislav Manolov
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# http://www.gnu.org/copyleft/gpl.html
#
# Author: Borislav Manolov <b.manolov at web dot de>
# http://purl.oclc.org/NET/borislav/
# Bot's home page: http://phpwikibot.sourceforge.net/
#############################################################################
require_once('browser.php');
require_once('utils.php');
require_once('wikis.php');
class Bgbot {
var $wiki; // main wiki on which the bot will work
var $user; // user name
var $pass; // password for this user
var $browser; // browser object
var $agent = 'Mozilla/5.0 (Bgbot 0.3)';
var $page_name;
var $page_is_redirect;
var $page_content;
var $page_timestamp;
var $log = 'bot.log';
var $echo_log = true;
var $localdb = 'local.txt';
var $special_nss; // translations of "Special" namespace
var $sites; // all wiki sites
function Bgbot($wiki='bg', $user='', $pass='') {
global $wiki_sites, $wiki_special_nss;
$this->wiki = $wiki;
$this->user = $user;
$this->pass = $pass;
$this->special_nss = & $wiki_special_nss;
$this->sites = & $wiki_sites;
$this->base_url = 'http://{{DOMAIN}}/w/wiki.phtml?title=';
$this->page_url = $this->base_url .'{{PAGE}}';
$this->special_url = $this->base_url .'{{SPECIAL}}';
$this->export_url = $this->special_url .':Export';
$this->fetch_url = $this->export_url .'/{{PAGE}}';
$this->submit_url = $this->page_url .'&action=submit';
$this->delete_url = $this->page_url .'&action=delete';
$this->history_url = $this->page_url .'&action=history';
$this->diff_url = $this->page_url .'&diff=0';
$this->move_url = $this->special_url .':Movepage&action=submit';
$this->login_url = $this->special_url .':Userlogin&action=submit';
$this->upload_url = $this->special_url .':Upload';
$params = array(
'agent' => $this->agent,
);
$this->browser = new Browser($params);
}
# fetch a page
# $page - page name (in proper encoding for the wiki or urlencoded)
# $wiki - a wiki code
# $get_time - whether to return a timestamp
function fetch($page, $wiki='', $get_time = true, $follow_redirect = true) {
if ( empty($wiki) ) { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}', '{{PAGE}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki], my_urlencode($page));
$url = str_replace($s, $r, $this->fetch_url);
if ( $this->browser->fetch($url) ) {
$text_content = getCDATA('text', $this->browser->content);
if ($follow_redirect) {
if ($this->is_redirect($text_content)) {
preg_match('/\[\[([^]]+)\]\]/', $text_content, $matches);
$new_page = $matches[1];
$this->log("$wiki: [[". urldecode($page) .
"]] redirects to [[$new_page]]");
$this->page_is_redirect = true;
return $this->fetch($new_page, $wiki, $get_time, false);
} else {
$this->page_is_redirect = false;
}
}
if ($get_time) {
$this->page_timestamp = getCDATA('timestamp', $this->browser->content);
$this->page_timestamp = preg_replace('/\D/', '', $this->page_timestamp);
}
$this->page_content = html_entity_decode($text_content);
$this->page_name = urldecode($page);
if ( empty($this->page_content) ) {
$this->page_timestamp = 0;
}
} else {
$this->log('Fetch error: '. $this->browser->error);
return false;
}
return true;
}
# return true if given page is redirect, false otherwise
function is_redirect($content) {
return $content{0} === '#';
}
# fetch a set of pages
# $pages - array with page names (in proper encoding for the wiki)
# $wiki - a wiki code
# $get_time - whether to return a timestamp
function fetch2local($pages, $wiki='', $get_time = true) {
if ( empty($wiki) ) { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->domains[$wiki], $this->specials[$wiki]);
$url = str_replace( $s, $r, $this->export_url );
$vars = array(
'curonly' => 'true',
'action' => 'submit',
);
foreach ($pages as $page) {
$vars['pages'] .= "$page\n";
}
if ( $this->browser->submit($url, $vars) ) {
$content = $this->browser->content;
if ($get_time) {
$timestamps = getCDATAs('timestamp', preg_replace('/\D/', '', $content));
}
$contents = getCDATAs('text', $content);
$contents = array_map('html_entity_decode', $contents);
$i = 0;
foreach ($pages as $page) {
$this->content_many[$page] = array($contents[$i], $timestamps[$i]);
$i++;
}
} else {
$this->log('Fetch_m error: '. $this->browser->error);
return false;
}
return true;
}
# submit a page
# $page - page name (in proper encoding for the wiki or urlencoded)
# $content - content to be submitted
# $summary - summary text
# $isminor - should the edit be marked as minor
# $wiki - wiki code
function submit($page, $content, $summary, $wiki='', $isminor = true) {
global $latin;
if ( empty($wiki) ) { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{PAGE}}');
$r = array($this->sites[$wiki], my_urlencode($page));
$url = str_replace( $s, $r, $this->submit_url );
if ( in_array($wiki, $latin) ) {
$content = utf8_decode($content);
}
$vars = array(
'wpTextbox1' => $content,
'wpSummary' => $summary,
'wpEdittime' => $this->page_timestamp,
);
if ($isminor) { $vars['wpMinoredit'] = 1; }
if (!$this->is_logged()) { $this->login($wiki); }
$dpage = urldecode($page);
if ( $this->browser->submit($url, $vars) ) {
if (preg_match("/<h1.+$dpage.*<\/h1>/", $this->browser->content)) {
$this->log("$wiki: [[$dpage]] was submitted.");
} else {
$this->log("$wiki: [[$dpage]] was NOT submitted. Probably the server is down or there were some other problems.");
}
} else {
$this->log('Submit error: '. $this->browser->error);
return false;
}
return true;
}
# upload a file on wiki
# $file - file to be uploaded
# $desc - description for this file
# $wiki - wiki code
function upload($file, $desc, $wiki='') {
if ( empty($wiki) ) { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki]);
$url = str_replace( $s, $r, $this->upload_url );
$vars = array(
'wpUploadDescription' => $desc,
'wpUploadAffirm' => 1,
);
$file_field = array('wpUploadFile' => $file);
if (!$this->is_logged()) { $this->login($wiki); }
if ( $this->browser->submit($url, $vars, $file_field) ) {
$this->log("$wiki: $file was uploaded.");
} else {
$this->log('Upload error: '. $this->browser->error);
return false;
}
return true;
}
# move a page to other name on wiki
# $page - page which will be moved to
# $new_page - new page name (in proper encoding)
# $wiki - wiki code
function move($page, $new_page, $wiki='') {
if ( empty($wiki) ) { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki]);
$url = str_replace( $s, $r, $this->move_url );
$page = str_replace(' ', '_', $page);
$new_page = str_replace('_', ' ', $new_page);
$vars = array(
'wpOldTitle' => $page,
'wpNewTitle' => $new_page,
'wpMovetalk' => 1, // move the talk page (discussion) too
);
if (!$this->is_logged()) { $this->login($wiki); }
if ( $this->browser->submit($url, $vars) ) {
$this->log("$wiki: [[$page]] was moved to [[$new_page]].");
} else {
$this->log('Move error: '. $this->browser->error);
return false;
}
return true;
}
# delete a page
# $page - page to be deleted (in proper encoding for the wiki)
# $reason - reason for deletion (Unicode)
# $wiki - wiki code
function delete($page, $reason, $wiki='') {
global $latin;
if ( empty($wiki) ) { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{PAGE}}');
$r = array($this->sites[$wiki], my_urlencode($page));
$url = str_replace( $s, $r, $this->delete_url );
$vars = array(
'wpReason' => $reason,
'wpConfirm' => 1, // confirm deletion
);
if (!$this->is_logged()) { $this->login($wiki); }
if ( $this->browser->submit($url, $vars) ) {
$this->log("$wiki: [[". urldecode($page) .']] was deleted.');
} else {
$this->log('Delete error: '. $this->browser->error);
return false;
}
return true;
}
# return true if bot is logged on $wiki, false otherwise
# $wiki - wiki code
# $user - user name
function is_logged($wiki = '', $user = '') {
if ( empty($wiki) ) { $wiki = $this->wiki; }
if ( empty($user) ) { $user = $this->user; }
$is_logged = false;
if ( isset($this->browser->cookies[$this->sites[$wiki]]) ) {
foreach ($this->browser->cookies[$this->sites[$wiki]]
as $cookie_name => $cookie_vals) {
if ($user == $cookie_vals[0]) {
$is_logged = true;
}
}
}
return $is_logged;
}
# login to wiki
# $wiki - wiki code
# $user - user name
# $pass - user password
function login($wiki = '', $user = '', $pass = '') {
if (empty($wiki)) { $wiki = $this->wiki; }
if (empty($user)) { $user = $this->user; }
if (empty($pass)) { $pass = $this->pass; }
if ( empty($wiki) || empty($user) || empty($pass) ) { return false; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki]);
$url = str_replace( $s, $r, $this->login_url );
$vars = array(
'wpName' => $user,
'wpPassword' => $pass,
'wpRemember' => 1,
);
if ( $this->browser->submit($url, $vars) ) {
$this->log('['. date('Y/m/d') .'] '. $user . " is logged on $wiki:");
} else {
$this->log("Login error: ". $this->browser->error);
return false;
}
return true;
}
# signup to wiki
# $wiki - wiki code
# $user - chosen user name
# $pass - chosen user password
function signup($wiki, $user='', $pass='') {
if ( empty($user) ) { $user = $this->user; }
if ( empty($pass) ) { $pass = $this->pass; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki]);
$url = str_replace( $s, $r, $this->login_url );
$vars = array(
'wpName' => $user,
'wpPassword' => $pass,
'wpRetype' => $pass,
'wpCreateaccount' => 1
);
if ( $this->browser->submit($url, $vars) ) {
$this->log('['. date('Y/m/d') .'] '. $user . " is signuped on $wiki:");
} else {
$this->log("Signup error: ". $this->browser->error);
return false;
}
return true;
}
function log($msg) {
$msg = '['. date('H:i:s') .'] '. $msg ."\n";
if ($this->echo_log) { echo $msg; }
my_fwrite($this->log, $msg);
}
# update interwikis in a page (add or delete)
# $page - page name
# $content - page content
# return string with all updated interwikis
function update_interwikis($page, $content) {
$langs[] = $this->wiki;
$tmp[$start] = $this->get_interwikis($content);
$wikis = array();
for ($i = 0; $i < count($langs); $i++ ) {
$lang = $langs[$i];
foreach ($tmp[$lang] as $lang2 => $page) {
if ( in_array($lang2, $langs) ) {
continue;
}
$ret = $this->fetch($page, $lang2, false);
$content = $ret['content'];
if ( !empty($content) ) {
$tmp[$lang2] = $this->get_interwikis($content);
$langs[] = $lang2;
$wikis[$lang2] = $page;
}
}
}
ksort($wikis);
$str = "\n\n";
foreach ($wikis as $wiki => $page) {
$str .= "\n[[$wiki:$page]]";
}
return $str;
}
# extract interwikis from a page
# $content - content of a page
# return assoc array of interwikis (wikicode => pagename)
function get_interwikis($content) {
global $latin;
$count = preg_match_all('/\[\[(\w+):(.+)\]\]/U', $content, $matches);
$wikis = array();
for ($i = 0; $i < $count; $i++) {
$wiki = $matches[1][$i];
if ( array_key_exists( $wiki, $this->special_nss ) ) {
$page = str_replace(array(' ', '&'), array('_', '&'), $matches[2][$i]);
// replace html numeric entities with UTF-8
$callback = create_function('$matches', 'return unicode2utf8($matches[1]);');
$page = preg_replace_callback('/&#(\d+);/', $callback, $page);
$wikis[$wiki] = $page;
}
}
ksort($wikis);
return $wikis;
}
# strip interwikis from a page
# $content - content of a page
# return content without interwikis
function strip_interwikis($content) {
$content = preg_replace('/\[\[(\w+):(.+)\]\]/U', '', $content);
$content = trim($content);
return $content;
}
# add an interwiki to a page
# $content - page content
# $page - page name to be added
# $wiki - wiki code
# $replace - force replacement if the interwiki $wiki exists
# return content with the new interwiki
function add_interwiki($local_wiki, $content, $page, $wiki='', $replace = false) {
if ( empty($wiki) ) { $wiki = $this->wiki; }
$page = trim($page);
$new_interwiki = "[[$wiki:$page]]";
$has_interwiki = preg_match("/\[\[$wiki:([^]]+)\]\]/", $content, $matches);
if ($has_interwiki) {
$cur_page = trim($matches[1]);
if ( $replace && $page != $cur_page ) {
$this->log("The interwiki [[$wiki:$cur_page]] already exists on $local_wiki:. Replacing it with [[$wiki:$page]].");
return preg_replace("/\[\[$wiki:[^]]+\]\]/", $new_interwiki, $content);
} else {
$this->log("The interwiki [[$wiki:$cur_page]] already exists on $local_wiki:.");
return $content;
}
}
$pos = 0;
reset($this->sites);
$found = false;
while ( list($wiki_code, ) = each($this->sites) ) {
if ( $wiki == $wiki_code ) { break; }
}
$is_added = false;
$one_line_interwikis = array('fr', 'hu', 'pl');
while ( list($wiki_code, ) = each($this->sites) ) {
$search = "[[$wiki_code:";
$pos = strpos($content, $search, $pos);
if ( $pos !== false ) {
if ( !in_array($local_wiki, $one_line_interwikis) ) {
$new_interwiki .= "\n";
}
$content = substr_replace($content, $new_interwiki, $pos, 0);
$is_added = true;
break;
}
}
if ( !$is_added ) { $content .= "\n\n$new_interwiki"; }
return $content;
}
# $page - page name (in proper encoding for the wiki)
# $wiki - wiki code
# return last diff url for page
function get_diff_url($page, $wiki='') {
if ( empty($wiki) ) { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{PAGE}}');
$r = array($this->sites[$wiki], $page);
return str_replace( $s, $r, $this->diff_url );
}
} // end of class Bgbot
?>