<?php
#
# A MediaWiki bot - used for automated editing of pages on sites
# powered by MediaWiki
#
# Copyright (C) 2004 Borislav Manolov
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# http://www.gnu.org/copyleft/gpl.html
#
# Author: Borislav Manolov <b.manolov at gmail dot com>
# http://purl.org/NET/borislav/
# Bot's home page: http://phpwikibot.sourceforge.net/
#
# Vandalised by Grigor Gatchev - grigor: gatchev.info, www.gatchev.info
#############################################################################
require_once 'browser.php';
require_once 'utils.php';
require_once 'wikis.php';
class Bgbot {
var $wiki; // main wiki on which the bot will work
var $user; // user name
var $pass; // password for this user
var $is_anonymous = true;
var $browser; // browser object
var $agent = 'Mozilla/5.0 (Bgbot 0.5)';
var $page_name;
var $page_is_redirect;
var $page_is_watched;
var $page_content;
var $page_timestamp;
var $edit_token = 0;
var $language = 'bg';
var $log = 'bot.log';
var $echo_log = true;
var $localdb = 'local.txt';
var $special_nss; // translations of "Special" namespace
var $sites; // all wiki sites
function Bgbot($wiki='bg', $user='', $pass='') {
global $wiki_sites, $wiki_special_nss, $wiki_base_pages;
$this->wiki = $wiki;
$this->user = $user;
$this->pass = $pass;
if ( !empty($this->user) && !empty($this->pass) ) {
$this->is_anonymous = false;
}
$this->special_nss =& $wiki_special_nss;
$this->sites =& $wiki_sites;
$this->base_pages =& $wiki_base_pages;
$this->base_url = 'http://{{DOMAIN}}/'.
(isset($this->base_pages[$wiki]) ? $this->base_pages[$wiki] : 'w/index.php') .
'?title=';
$this->page_url = $this->base_url .'{{PAGE}}';
$this->special_url = $this->base_url .'{{SPECIAL}}';
$this->export_url = $this->special_url .':Export';
$this->fetch_url = $this->export_url .'/{{PAGE}}';
$this->edit_url = $this->page_url .'&action=edit';
$this->submit_url = $this->page_url .'&action=submit';
$this->delete_url = $this->page_url .'&action=delete';
$this->delete_file_url = $this->page_url .'&action=delete&image={{FILE}}';
$this->history_url = $this->page_url .'&action=history';
$this->diff_url = $this->page_url .'&diff=0';
$this->move_url = $this->special_url .':Movepage&action=submit';
$this->login_url = $this->special_url .':Userlogin&action=submitlogin';
$this->upload_url = $this->special_url .':Upload';
$params = array(
'agent' => $this->agent,
);
$this->browser = new Browser($params);
//$this->set_lang($this->language);
}
# retrieve wiki content from an edit page
# $page - page name (in proper encoding for the wiki or urlencoded)
# $wiki - a wiki code
# $follow_redirect - whether to follow a redirect page
function edit($page, $wiki='', $follow_redirect = true) {
if ( empty($wiki) ) { $wiki = $this->wiki; }
$page = (string) $page;
$s = array('{{DOMAIN}}', '{{PAGE}}');
$r = array($this->sites[$wiki], my_urlencode($page));
$url = str_replace($s, $r, $this->edit_url);
if (!$this->is_logged($wiki)) { $this->login($wiki); }
if ( $this->browser->fetch($url) ) {
if (strpos($this->browser->content, 'id="editform"') === false) {
$this->log("[[$wiki:$page]] was NOT fetched for editing.", true);
return false;
}
$this->log("Fetching [[$wiki:$page]] for editing...");
$content = trim(getCDATA('textarea', $this->browser->content));
if ($follow_redirect) {
if ($this->is_redirect($content)) {
preg_match('/\[\[([^]]+)\]\]/', $content, $matches);
$new_page = $matches[1];
$this->log("$wiki: [[". urldecode($page) . "]] redirects to [[$new_page]]");
$this->page_is_redirect = true;
return $this->edit($new_page, $wiki, false);
} else {
$this->page_is_redirect = false;
}
}
$this->page_timestamp = $this->get_form_field('wpEdittime');
$this->page_starttime = $this->get_form_field('wpStarttime');
if (!$this->is_anonymous) {
$this->edit_token = $this->get_form_field('wpEditToken');
if ( empty($this->edit_token) ) {
$errlog = 'edit_page_'.time().'.html';
file_put_contents($file, $this->browser->content);
$this->log('Fetch edit error: There is no edit token! See '.$file, true);
return false;
}
}
$this->page_is_watched = preg_match(
'/name=[\'"]wpWatchthis[\'"] checked=[\'"]checked[\'"]/', $this->browser->content);
$this->page_content = html_entity_decode($content);
$this->page_name = urldecode($page);
} else {
$this->log('Fetch edit error: '. $this->browser->error, true);
return false;
}
return true;
}
# fetch a page
# $page - page name (in proper encoding for the wiki or urlencoded)
# $wiki - a wiki code
# $follow_redirect - whether to follow a redirect page
function fetch($page, $wiki='', $follow_redirect = true) {
if ( empty($wiki) ) { $wiki = $this->wiki; }
$page = (string) $page;
$s = array('{{DOMAIN}}', '{{SPECIAL}}', '{{PAGE}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki], my_urlencode($page));
$url = str_replace($s, $r, $this->fetch_url);
if ( $this->browser->fetch($url) ) {
if (strpos($this->browser->content, '</mediawiki>') === false) {
$this->log("[[$wiki:$page]] was NOT fetched.", true);
return false;
}
$this->log("Fetching [[$wiki:$page]]...");
$text_content = getCDATA('text', $this->browser->content);
if ($follow_redirect) {
if ($this->is_redirect($text_content)) {
preg_match('/\[\[([^]]+)\]\]/', $text_content, $matches);
$new_page = $matches[1];
$this->log("$wiki: ".
$this->msg('redirects_to', array(urldecode($page), $new_page)));
$this->page_is_redirect = true;
return $this->fetch($new_page, $wiki, false);
} else {
$this->page_is_redirect = false;
}
}
$this->page_timestamp = getCDATA('timestamp', $this->browser->content);
$this->page_timestamp = preg_replace('/\D/', '', $this->page_timestamp);
$this->page_content = html_entity_decode($text_content);
$this->page_name = urldecode($page);
if ( empty($this->page_content) ) {
$this->page_timestamp = 0;
}
} else {
$this->log('Fetch error: '. $this->browser->error, true);
return false;
}
return true;
}
# Get page history. (Smuggled in by Grigor Gatchev.)
# $page - page name (in proper encoding for the wiki or urlencoded)
# $wiki - a wiki code
# $follow_redirect - whether to follow a redirect page
function history($page, $wiki='', $follow_redirect = true, $offset='', $limit='100' ) {
if ( empty($wiki ) ) { $wiki = $this->wiki; }
$page = (string) $page;
$s = array('{{DOMAIN}}', '{{SPECIAL}}', '{{PAGE}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki], my_urlencode($page));
$url = str_replace($s, $r, $this->history_url);
if ( ! empty ( $offset ) ) { $url .= "&offset=" . $offset; }
if ( ! empty ( $limit ) ) { $url .= "&limit=" . $limit; }
if ( $this->browser->fetch($url) ) {
if (strpos($this->browser->content, '</html>') === false) {
$this->log("[[$wiki:$page]] history was NOT fetched.", true);
return false;
}
$log = "Fetching [[$wiki:$page]] history";
if ( ! empty ( $offset ) ) { $log .= " at offset " . $offset; }
$this->log($log . "...");
$text_content = getCDATA('text', $this->browser->content);
if ($follow_redirect) {
if ($this->is_redirect($text_content)) {
preg_match('/\[\[([^]]+)\]\]/', $text_content, $matches);
$new_page = $matches[1];
$this->log("$wiki: ".
$this->msg('redirects_to', array(urldecode($page), $new_page)));
$this->page_is_redirect = true;
return $this->fetch($new_page, $wiki, $get_time, false);
} else {
$this->page_is_redirect = false;
}
}
$this->page_timestamp = getCDATA('timestamp', $this->browser->content);
$this->page_timestamp = preg_replace('/\D/', '', $this->page_timestamp);
$this->page_content = html_entity_decode($text_content);
$this->page_name = urldecode($page);
if ( empty($this->page_content) ) {
$this->page_timestamp = 0;
}
} else {
$this->log('History fetch error: '. $this->browser->error, true);
return false;
}
return true;
}
# return true if given page is redirect, false otherwise
function is_redirect($content) {
return $content{0} === '#';
}
# fetch a set of pages
# $pages - array with page names (in proper encoding for the wiki)
# $wiki - a wiki code
# $get_time - whether to return a timestamp
function fetch2local($pages, $wiki='', $get_time = true) {
if ( empty($wiki) ) { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->domains[$wiki], $this->specials[$wiki]);
$url = str_replace( $s, $r, $this->export_url );
$vars = array(
'curonly' => 'true',
'action' => 'submit',
);
foreach ($pages as $page) {
$vars['pages'] .= "$page\n";
}
if ( $this->browser->submit($url, $vars) ) {
$content = $this->browser->content;
if ($get_time) {
$timestamps = getCDATAs('timestamp', preg_replace('/\D/', '', $content));
}
$contents = getCDATAs('text', $content);
$contents = array_map('html_entity_decode', $contents);
$i = 0;
foreach ($pages as $page) {
$this->content_many[$page] = array($contents[$i], $timestamps[$i]);
$i++;
}
} else {
$this->log('Fetch_m error: '. $this->browser->error, true);
return false;
}
return true;
}
# submit a page
# $content - content to be submitted
# $summary - summary text
# $isminor - should the edit be marked as minor
# $wiki - wiki code
# $purge - should the page be submitted when no content changes are made
function submit($content, $summary, $wiki='', $isminor = true, $purge = false) {
global $latin;
if (!$purge && $content == $this->page_content) {
$this->log('There were made NO CHANGES to the page content.');
return true;
}
if ( empty($wiki) ) { $wiki = $this->wiki; }
if (!$this->is_logged($wiki)) { $this->login($wiki); }
$s = array('{{DOMAIN}}', '{{PAGE}}');
$r = array($this->sites[$wiki], my_urlencode($this->page_name));
$url = str_replace( $s, $r, $this->submit_url );
if ( in_array($wiki, $latin) ) {
$content = utf8_decode($content);
}
$vars = array(
'wpTextbox1' => $content,
'wpSummary' => $summary,
'wpStarttime' => $this->page_starttime,
'wpEdittime' => $this->page_timestamp,
'wpEditToken' => $this->edit_token,
);
if ($isminor) { $vars['wpMinoredit'] = 1; }
if ($this->page_is_watched) { $vars['wpWatchthis'] = 1; }
$dpage = urldecode($this->page_name);
if ( $this->browser->submit($url, $vars) ) {
#$regexp = '/'. escape_regexp(trim(str_replace('_', ' ', $dpage))) .'.*<\/h1>/i';
if ( !preg_match('/content *= *"noindex,nofollow"/', $this->browser->content) ) {
$this->log("[[$wiki:$dpage]] was submitted.");
} else {
if ( strpos($this->browser->content, 'pt-login') !== false ) {
$this->log("[[$wiki:$dpage]] was NOT submitted - user is NOT logged in.", true);
} else {
$this->log("[[$wiki:$dpage]] was NOT submitted.", true);
}
return false;
}
} else {
$this->log('Submit error: '. $this->browser->error, true);
return false;
}
return true;
}
# upload a file on wiki
# $file - file to be uploaded
# $desc - description for this file
# $wiki - wiki code
function upload($file, $desc, $wiki='') {
if ( empty($wiki) ) { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki]);
$url = str_replace( $s, $r, $this->upload_url );
$vars = array(
'wpUploadDescription' => $desc,
'wpUploadAffirm' => 1,
'wpUpload' => 'Upload',
);
$file_field = array('wpUploadFile' => $file);
if (!$this->is_logged($wiki)) { $this->login($wiki); }
if ( $this->browser->submit($url, $vars, $file_field) ) {
$regexp = '/error[\'"]>(.+)</';
if ( preg_match($regexp, $this->browser->content, $matches) ) {
$this->log("$wiki: $file was NOT uploaded. MediaWiki says: $matches[1]", true);
return false;
} else {
$this->log("$wiki: $file was uploaded.");
}
} else {
$this->log('Upload error: '. $this->browser->error, true);
return false;
}
return true;
}
# move a page to other name on wiki
# $page - page which will be moved to
# $new_page - new page name (in proper encoding)
# $wiki - wiki code
function move($page, $new_page, $reason = '', $wiki='') {
if ( empty($wiki) ) { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki]);
$url = str_replace( $s, $r, $this->move_url );
$page = str_replace(' ', '_', $page);
$new_page = str_replace('_', ' ', $new_page);
$vars = array(
'wpOldTitle' => $page,
'wpNewTitle' => $new_page,
'wpReason' => $reason,
'wpMovetalk' => 1, // move the talk page (discussion) too
'wpMove' => 1,
'wpEditToken' => $this->edit_token,
);
if (!$this->is_logged($wiki)) { $this->login($wiki); }
if ( $this->browser->submit($url, $vars) ) {
$regexp = '/[\'"]error[\'"]>(.+)</';
if ( preg_match($regexp, $this->browser->content, $matches) ) {
$this->log("$wiki: [[$page]] was NOT moved to [[$new_page]]. ".
'MediaWiki says: '. $matches[1], true);
return false;
} else {
$this->log("$wiki: [[$page]] was moved to [[$new_page]].");
}
} else {
$this->log('Move error: '. $this->browser->error, true);
return false;
}
return true;
}
# delete a page
# $page - page to be deleted (in proper encoding for the wiki)
# $reason - reason for deletion (Unicode)
# $wiki - wiki code
# $full_del - delete all file versions (for media deletion)
function delete($page, $reason, $wiki='', $full_del = false) {
global $latin;
if ( empty($wiki) ) { $wiki = $this->wiki; }
if ($full_del) {
$s = array('{{DOMAIN}}', '{{PAGE}}', '{{FILE}}');
$file = my_urlencode($page);
$page = 'Image:'.$file;
$r = array($this->sites[$wiki], $page, $file);
$url = str_replace( $s, $r, $this->delete_file_url );
} else {
$s = array('{{DOMAIN}}', '{{PAGE}}');
$r = array($this->sites[$wiki], my_urlencode($page));
$url = str_replace( $s, $r, $this->delete_url );
}
$vars = array(
'wpReason' => $reason,
'wpConfirm' => 1, # confirm deletion
'wpConfirmB' => 'Confirm', # confirm button
'wpEditToken' => $this->edit_token,
);
if (!$this->is_logged($wiki)) { $this->login($wiki); }
if ( $this->browser->submit($url, $vars) ) {
if ( strpos($this->browser->content, 'deleteconfirm') !== false ) {
$this->log("$wiki: [[". urldecode($page) .']] was NOT deleted!', true);
return false;
} else {
$this->log("$wiki: [[". urldecode($page) .']] was deleted.');
}
} else {
$this->log('Delete error: '. $this->browser->error, true);
return false;
}
return true;
}
# return true if $user is logged on $wiki, false otherwise
# $wiki - wiki code
# $user - user name
function is_logged($wiki = '', $user = '') {
$is_logged = false;
if ($this->is_anonymous) { $is_logged = true; }
if ( empty($wiki) ) { $wiki = $this->wiki; }
if ( empty($user) ) { $user = $this->user; }
/*if ( strpos($this->browser->content, 'pt-logout') !== false ) {
$is_logged = true;
}*/
if ( isset($this->browser->cookies[$this->sites[$wiki]]) ) {
foreach ($this->browser->cookies[$this->sites[$wiki]]
as $cookie_name => $cookie_vals) {
if (urlencode($user) == $cookie_vals[0]) {
$is_logged = true;
}
}
}
return $is_logged;
}
# login to wiki
# $wiki - wiki code
# $user - user name
# $pass - user password
function login($wiki = '', $user = '', $pass = '') {
if (empty($wiki)) { $wiki = $this->wiki; }
if (empty($user)) { $user = $this->user; }
if (empty($pass)) { $pass = $this->pass; }
if ( empty($wiki) || empty($user) || empty($pass) ) { return false; }
unset($this->browser->cookies[$this->sites[$wiki]]);
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki]);
$url = str_replace( $s, $r, $this->login_url );
$vars = array(
'wpName' => $user,
'wpPassword' => $pass,
'wpRemember' => '1',
'wpLoginattempt' => '1',
);
if ( $this->browser->submit($url, $vars) ) {
if ( strpos($this->browser->content, 'id="pt-userpage"') !== false ) {
$this->log('['. date('Y/m/d') .'] '. $user . " is logged on $wiki:");
} else {
$this->log('['. date('Y/m/d') .'] '. $user . " could not log in on $wiki:", true);
$error = getCDATA('div', $this->browser->content, array('class'=>'errorbox'));
$this->log('['. date('Y/m/d') .'] '. $error, true);
#echo $this->browser->content;
return false;
}
} else {
$this->log("Login error: ". $this->browser->error, true);
return false;
}
return true;
}
# signup to wiki
# $wiki - wiki code
# $user - chosen user name
# $pass - chosen user password
function signup($wiki, $user='', $pass='') {
if ( empty($user) ) { $user = $this->user; }
if ( empty($pass) ) { $pass = $this->pass; }
$s = array('{{DOMAIN}}', '{{SPECIAL}}');
$r = array($this->sites[$wiki], $this->special_nss[$wiki]);
$url = str_replace( $s, $r, $this->login_url );
$vars = array(
'wpName' => $user,
'wpPassword' => $pass,
'wpRetype' => $pass,
'wpCreateaccount' => 1
);
if ( $this->browser->submit($url, $vars) ) {
$this->log('['. date('Y/m/d') .'] '. $user . " is signed up on $wiki:");
} else {
$this->log("Sign-up error: ". $this->browser->error, true);
return false;
}
return true;
}
function log($msg, $is_error = false) {
$msg = '['. date('H:i:s') .'] '. $msg ."\n";
if ($this->echo_log) {
# print errors in red
echo $is_error ? "\033[31m$msg\033[0m" : $msg;
}
my_fwrite($this->log, $msg);
}
function get_form_field($name, $content = '') {
if ( empty($content) ) {
$content = $this->browser->content;
}
$re = '/value="([^"]+)" name="'.$name.'"/U';
preg_match($re, $content, $m);
return empty($m[1]) ? '' : $m[1];
}
# update interwikis in a page (add or delete)
# $page - page name
# $content - page content
# return string with all updated interwikis
function update_interwikis($page, $content) {
$langs[] = $this->wiki;
$tmp[$start] = $this->get_interwikis($content);
$wikis = array();
for ($i = 0; $i < count($langs); $i++ ) {
$lang = $langs[$i];
foreach ($tmp[$lang] as $lang2 => $page) {
if ( in_array($lang2, $langs) ) {
continue;
}
$ret = $this->fetch($page, $lang2, false);
$content = $ret['content'];
if ( !empty($content) ) {
$tmp[$lang2] = $this->get_interwikis($content);
$langs[] = $lang2;
$wikis[$lang2] = $page;
}
}
}
ksort($wikis);
$str = "\n\n";
foreach ($wikis as $wiki => $page) {
$str .= "\n[[$wiki:$page]]";
}
return $str;
}
# extract interwikis from a page
# $content - content of a page
# return assoc array of interwikis (wikicode => pagename)
function get_interwikis($content) {
global $latin;
$count = preg_match_all('/\[\[([\w-]+):(.+)\]\]/U', $content, $matches);
$wikis = array();
for ($i = 0; $i < $count; $i++) {
$wiki = $matches[1][$i];
if ( array_key_exists( $wiki, $this->special_nss ) ) {
$page = str_replace('&', '&', $matches[2][$i]);
// convert all html entities into unicode chars
$page = html_entity_decode($page, ENT_NOQUOTES, 'UTF-8');
$page = str_replace('_', ' ', $page);
$wikis[$wiki] = in_array($wiki, $latin) ? utf8_decode($page) : $page;
}
}
ksort($wikis);
return $wikis;
}
# strip interwikis from a page
# $content - content of a page
# return content without interwikis
function strip_interwikis($content) {
$wikis = '';
foreach ($this->special_nss as $wiki => $spec) {
if ( strpos($wiki, ':') === false ) { $wikis .= '|'.$wiki; }
}
$wikis = substr($wikis, 1);
$content = preg_replace('/\[\[('. $wikis .'):.+\]\]\s*/U', '', $content);
$content = trim($content);
return $content;
}
# add an interwiki to a page
# $content - page content
# $page - page name to be added
# $wiki - wiki code
# $replace - force replacement if the interwiki $wiki exists
# return content with the new interwiki
function add_interwiki($local_wiki, $content, $page, $wiki='', $replace = false) {
if ( empty($wiki) ) { $wiki = $this->wiki; }
$page = trim($page);
$new_interwiki = "[[$wiki:$page]]";
$has_interwiki = preg_match("/\[\[$wiki:([^]]+)\]\]/", $content, $matches);
if ($has_interwiki) {
$cur_page = trim($matches[1]);
if ( $replace && $page != $cur_page ) {
$this->log("The interwiki [[$wiki:$cur_page]] already exists on $local_wiki:. Replacing it with [[$wiki:$page]].");
return preg_replace("/\[\[$wiki:[^]]+\]\]/", $new_interwiki, $content);
} else {
$this->log("The interwiki [[$wiki:$cur_page]] already exists on $local_wiki:.");
return $content;
}
}
$pos = 0;
reset($this->sites);
$found = false;
while ( list($wiki_code, ) = each($this->sites) ) {
if ( $wiki == $wiki_code ) { break; }
}
$is_added = false;
$one_line_interwikis = array('fr', 'hu', 'pl');
while ( list($wiki_code, ) = each($this->sites) ) {
$search = "[[$wiki_code:";
$pos = strpos($content, $search, $pos);
if ( $pos !== false ) {
$new_interwiki .= in_array($local_wiki, $one_line_interwikis) ? ' ' : "\n";
$content = substr_replace($content, $new_interwiki, $pos, 0);
$is_added = true;
break;
}
}
if ( !$is_added ) { $content .= "\n\n$new_interwiki"; }
return $content;
}
# $page - page name (in proper encoding for the wiki)
# $wiki - wiki code
# return last diff url for page
function get_diff_url($page, $wiki='') {
if ( empty($wiki) ) { $wiki = $this->wiki; }
$s = array('{{DOMAIN}}', '{{PAGE}}');
$r = array($this->sites[$wiki], $page);
return str_replace( $s, $r, $this->diff_url );
}
function set_lang($lang = 'bg') {
$language_file = 'language'. strtoupper($lang) .'.php';
if (file_exists($language_file)) {
require_once $language_file;
$this->messages = & $messages;
}
}
/**
* $msg_id - string
* $msg_elements - array
*/
function msg($msg_id, $msg_elements = array()) {
$search = array('$1', '$2', '$3', '$4', '$5', '$6', '$7', '$8', '$9');
return str_replace($search, $msg_elements, $this->messages[$msg_id]);
}
} // end of class Bgbot