<?php
# $Id$
#
# A Wikimedia bot - used for automated editing of pages on Wikipedia
# and its sister projects
#
# Copyright (C) 2004 Borislav Manolov
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# http://www.gnu.org/copyleft/gpl.html
#
# Author: Borislav Manolov <b.manolov at web.de>
# http://purl.oclc.org/NET/manolov/
# Bot's home page: http://phpwikibot.sourceforge.net/
#############################################################################
require_once('browser.php');
require_once('utils.inc');
require_once('wikies.php');
require_once('cyrillic2unicode.inc');
include_once('html2wiki-tables.inc');
class Bgbot {
var $wiki; // main wiki on which the bot will work
var $user; // user name
var $pass; // password for this user
var $browser; // browser object
var $agent = 'Mozilla/5.0 (Bgbot/0.2)';
var $content;
var $timestamp;
var $log = 'bgbot.log';
var $echo_log = true;
var $localdb = 'local';
var $special_nss; // translations of "Special" namespace
var $sites; // all wiki sites
function Bgbot($wiki, $user, $pass) {
global $wiki_sites, $wiki_special_nss;
$this->wiki = $wiki;
$this->user = $user;
$this->pass = $pass;
$this->special_nss = & $wiki_special_nss;
$this->sites = & $wiki_sites;
$this->base_url = 'http://{{DOMAIN}}/w/wiki.phtml?title=';
$this->page_url = $this->base_url .'{{PAGE}}';
$this->special_url = $this->base_url .'{{SPECIAL}}';
$this->export_url = $this->special_url .':Export';
$this->fetch_url = $this->export_url .'/{{PAGE}}';
$this->submit_url = $this->page_url .'&action=submit';
$this->delete_url = $this->page_url .'&action=delete';
$this->history_url = $this->page_url .'&diff=0';
$this->move_url = $this->special_url .':Movepage&action=submit';
$this->login_url = $this->special_url .':Userlogin&action=submit';
$this->upload_url = $this->special_url .':Upload';
$params = array(
'agent' => $this->agent,
);
$this->browser = new Browser($params);
$this->login();
}
# fetch a page
# $page - name of the page
# $wiki - a wiki code
# $get_time - whether to return a timestamp
function fetch($page, $wiki = '_empty_', $get_time = true) {
if ($wiki == '_empty_') { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}', '{{PAGE}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki], $page);
$url = str_replace( $s, $r, $this->fetch_url );
if ( $this->browser->fetch($url) ) {
$content = $this->browser->content;
if ($get_time) {
$this->timestamp = getCDATA('timestamp', $content);
$this->timestamp = preg_replace('/\D/', '', $this->timestamp);
}
$content = getCDATA('text', $content);
$this->content = html_entity_decode($content);
if ( empty($this->content) ) {
$this->timestamp = 0;
}
} else {
$this->log('Fetch error: '. $this->browser->error);
return false;
}
return true;
}
# fetch a set of pages
# $pages - array with page names (in proper encoding for the wiki)
# $wiki - a wiki code
# $get_time - whether to return a timestamp
function fetch2local($pages, $wiki = '_empty_', $get_time = true) {
if ($wiki == '_empty_') { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->domains[$wiki], $this->specials[$wiki]);
$url = str_replace( $s, $r, $this->export_url );
$vars = array(
'curonly' => 'true',
'action' => 'submit',
);
foreach ($pages as $page) {
$vars['pages'] .= "$page\n";
}
if ( $this->browser->submit($url, $vars) ) {
$content = $this->browser->content;
if ($get_time) {
$timestamps = getCDATAs('timestamp', preg_replace('/\D/', '', $content));
}
$contents = getCDATAs('text', $content);
$contents = array_map('html_entity_decode', $contents);
$i = 0;
foreach ($pages as $page) {
$this->content_many[$page] = array($contents[$i], $timestamps[$i]);
$i++;
}
} else {
$this->log('Fetch_m error: '. $this->browser->error);
return false;
}
return true;
}
# submit a page
# $page - page name (in proper encoding for the wiki)
# $content - content to be submitted
# $summary - summary text
# $isminor - should the edit be marked as minor
# $wiki - wiki code
function submit($page, $content, $summary, $wiki = '_empty_', $isminor = true) {
global $latin;
if ($wiki == '_empty_') { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{PAGE}}');
$r = array($this->sites[$wiki], $page);
$url = str_replace( $s, $r, $this->submit_url );
if ( in_array($wiki, $latin) ) {
$content = utf8_decode($content);
}
$vars = array(
'wpTextbox1' => $content,
'wpSummary' => $summary,
'wpEdittime' => $this->timestamp,
);
$vars['wpMinoredit'] = $isminor ? 1 : 0;
if ( $this->browser->submit($url, $vars) ) {
$this->log("$wiki: $page was submitted.");
} else {
$this->log('Submit error: '. $this->browser->error);
return false;
}
return true;
}
# upload a file on wiki
# $file - file to be uploaded
# $desc - description for this file
# $wiki - wiki code
function upload($file, $desc, $wiki = '_empty_') {
if ($wiki == '_empty_') { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki]);
$url = str_replace( $s, $r, $this->upload_url );
$vars = array(
'wpUploadDescription' => $desc,
'wpUploadAffirm' => 1,
);
$file_field = array('wpUploadFile' => $file);
if ( $this->browser->submit($url, $vars, $file_field) ) {
$this->log("$wiki: $file was uploaded.");
} else {
$this->log('Upload error: '. $this->browser->error);
return false;
}
return true;
}
# move a page to other name on wiki
# $page - page which will be moved to
# $new_page - new page name (in proper encoding)
# $wiki - wiki code
function move($page, $new_page, $wiki = '_empty_') {
if ($wiki == '_empty_') { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki]);
$url = str_replace( $s, $r, $this->move_url );
$page = str_replace(' ', '_', $page);
$new_page = str_replace('_', ' ', $new_page);
$vars = array(
'wpOldTitle' => $page,
'wpNewTitle' => $new_page,
'wpMovetalk' => 1, // move the discussion page too
);
if ( $this->browser->submit($url, $vars) ) {
$this->log("$wiki: $page was moved to $new_page.");
} else {
$this->log('Move error: '. $this->browser->error);
return false;
}
return true;
}
# delete a page
# $page - page to be deleted (in proper encoding for the wiki)
# $reason - reason for deletion (Unicode)
# $wiki - wiki code
function delete($page, $reason, $wiki = '_empty_') {
global $latin;
if ($wiki == '_empty_') { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{PAGE}}');
$r = array($this->sites[$wiki], urlencode($page));
$url = str_replace( $s, $r, $this->delete_url );
$vars = array(
'wpReason' => $reason,
'wpConfirm' => 1, // confirm deletion
);
if ( $this->browser->submit($url, $vars) ) {
$this->log("$wiki: $page was deleted.");
} else {
$this->log('Delete error: '. $this->browser->error);
return false;
}
return true;
}
# login to wiki
# $wiki - wiki code
function login($wiki = '_empty_') {
if ($wiki == '_empty_') { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki]);
$url = str_replace( $s, $r, $this->login_url );
$vars = array(
'wpName' => $this->user,
'wpPassword' => $this->pass,
'wpRemember' => 1,
);
if ( $this->browser->submit($url, $vars) ) {
$this->log('['. date('Y/m/d') .'] '. $this->user . " is logged on $wiki:");
} else {
$this->log("Login error: ". $this->browser->error);
return false;
}
return true;
}
# signup to wiki
# $wiki - wiki code
function signup($wiki) {
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki]);
$url = str_replace( $s, $r, $this->login_url );
$vars = array(
'wpName' => $this->user,
'wpPassword' => $this->pass,
'wpRetype' => $this->pass,
'wpCreateaccount' => 1
);
if ( $this->browser->submit($url, $vars) ) {
$this->log('['. date('Y/m/d') .'] '. $this->user . " is signuped on $wiki:");
} else {
$this->log("Signup error: ". $this->browser->error);
return false;
}
return true;
}
function log($msg) {
$msg = '['. date('H:i:s') .'] '. $msg ."\n";
if ($this->echo_log) { echo $msg; }
my_fwrite($this->log, $msg);
}
# update interwikies in a page (add or delete)
# $page - page name
# $content - page content
# return string with all updated interwikies
function update_interwikies($page, $content) {
$langs[] = $this->wiki;
$tmp[$start] = $this->get_interwikies($content);
$wikies = array();
for ($i = 0; $i < count($langs); $i++ ) {
$lang = $langs[$i];
foreach ($tmp[$lang] as $lang2 => $page) {
if ( in_array($lang2, $langs) ) {
continue;
}
$ret = $this->fetch($page, $lang2, false);
$content = $ret['content'];
if ( !empty($content) ) {
$tmp[$lang2] = $this->get_interwikies($content);
$langs[] = $lang2;
$wikies[$lang2] = $page;
}
}
}
ksort($wikies);
$str = "\n\n";
foreach ($wikies as $wiki => $page) {
$str .= "\n[[$wiki:$page]]";
}
return $str;
}
# extract interwikies from a page
# $content - content of a page
# return assoc array of interwikies (wikicode => pagename)
function get_interwikies($content) {
global $latin;
$count = preg_match_all('/\[\[(\w+):(.+)\]\]/U', $content, $matches);
$wikies = array();
for ($i = 0; $i < $count; $i++) {
$wiki = $matches[1][$i];
if ( array_key_exists( $wiki, $this->special_nss ) ) {
$page = str_replace(array(' ', '&'), array('_', '&'), $matches[2][$i]);
// replace html numeric entities with UTF-8
$callback = create_function('$matches', 'return unicode2utf8($matches[1]);');
$page = preg_replace_callback('/&#(\d+);/', $callback, $page);
$wikies[$wiki] = $page;
}
}
ksort($wikies);
return $wikies;
}
# strip interwikies from a page
# $content - content of a page
# return content without interwikies
function strip_interwikies($content) {
$content = preg_replace('/\[\[(\w+):(.+)\]\]/U', '', $content);
$content = trim($content);
return $content;
}
# add an interwiki to a page
# $content - page content
# $page - page name to be added
# $wiki - wiki code
# return content with the new interwiki
function add_interwiki($content, $page, $wiki = '_empty_') {
if ($wiki == '_empty_') { $wiki = $this->wiki; }
if ( strpos($content, '[['.$wiki.':') !== false ) {
$this->log("The interwiki $wiki already exists");
return $content;
}
$new_interwiki = "[[$wiki:$page]]\n";
$pos = 0;
reset($this->sites);
while ( list($wiki_code, ) = each($this->sites) ) {
if ( $wiki == $wiki_code ) break;
}
while ( list($wiki_code, ) = each($this->sites) ) {
$search = "[[$wiki_code:";
$pos = strpos($content, $search, $pos);
if ( $pos !== false ) {
$content = substr_replace($content, $new_interwiki, $pos, 0);
break;
}
}
return $content;
}
# $page - page name (in proper encoding for the wiki)
# $wiki - wiki code
# return history url () for page
function get_history_url($page, $wiki = '_empty_') {
if ($wiki == '_empty_') { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{PAGE}}');
$r = array($this->sites[$wiki], $page);
return str_replace( $s, $r, $this->history_url );
}
# insert a text before some other text
# $content - main string
# $text2insert - text to be inserted
# $before - before this text will be added
# return main string with added text
function insert($content, $text2insert, $before) {
$pos = strpos($content, $before);
$content = substr_replace($content, $text2insert, $pos, 0);
return html_entity_decode($content);
}
# replace a text with some other text
# $content - main string
# $toreplace - text to be replaced
# $replacement - replacement text
# return main string with the replaced text
function replace($content, $toreplace, $replacement) {
$content = str_replace($toreplace, $replacement, $content);
return html_entity_decode($content);
}
} // end of class Bgbot
?>