<?php
# Copyright (C) 2007 Grigor Gatchev
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# http://www.gnu.org/copyleft/gpl.html
#
# Author: Grigor Gatchev, grigor: gatchev.info, www.gatchev.info
#############################################################################
# Tested on MediaWiki 1.10. #
require_once 'utils.php';
require_once 'wikis.php';
require_once 'bgbot.php';
define("MIN_DATETIME", "0" );
define("MAX_DATETIME", "9999999999" );
function mw_url_oldid ( $url ) {
preg_match ( '/&oldid=(.*)$/Us', $url, $matches );
return $matches[1];
}
function mwtime2time ( $mwtime, $language ) {
global $wiki_month_names;
$hour = extract_element ( '/^(.*)\:/Us', $mwtime );
$min = extract_element ( '/\:(.*)\,\ /Us', $mwtime );
$date = extract_element ( '/\,\ (.*)\ /Us', $mwtime );
$month = extract_element ( '/\,\ .*\ (.*)\ /Us', $mwtime );
$year = extract_element ( '/\,\ .*\ .*\ (.*)$/Us', $mwtime );
$month = array_search ( $month, $wiki_month_names[$language] );
return mktime ( $hour, $min, 0, $month, $day, $year );
}
class Page_History {
var $wiki;
var $revisions;
var $active;
# --- Constructor --- #
function Page_History ( $Bgbot, $pagename ) {
$this->wiki = $Bgbot->wiki;
$this->revisions = array();
while ( true ) {
if ( $Bgbot->history ( $pagename, '', true, $hasmore['first'] ) ) {
$hasmore = $this->parse_history_page ( $Bgbot->browser->content );
$this->active = true;
if ( empty ( $hasmore['first'] ) ) {
break;
}
} else {
$this->active = false;
break;
}
}
/*
if ( $Bgbot->history ( $pagename ) ) {
$this->parse_history_page ( $Bgbot->browser->content );
$this->active = true;
} else {
$this->active = false;
}
*/
}
# --- Parsing engine --- #
function parse_history_line ( $line ) {
$parsed = array();
$link_currver = strip_parentheses (
extract_first_element ( '\(.*(<a href=.*<\/a>.*)*.*\)', $line )
);
$line = strip_first_element ( '\(.*(<a href=.*<\/a>.*)*.*\)', $line );
$parsed['curr_version'] = parse_first_maybe_href ( $link_currver );
$link_lastver = strip_parentheses (
extract_first_element ( '\(.*(<a href=.*<\/a>.*)*.*\)', $line )
);
$line = strip_first_element ( '\(.*(<a href=.*<\/a>.*)*.*\)', $line );
$parsed['last_version'] = parse_first_maybe_href ( $link_lastver );
if ( strpos ( $line, '<input ' ) ) { // if radio buttons present,
$line = strip_first_element ( '<input .*>', $line ); // junk the first
$line = strip_first_element ( '<input .*>', $line ); // and the second
}
$parsed['version'] = parse_first_href ( $line );
$line = strip_first_href ( $line );
$line = strip_first_element ( '<span.*>', $line ); // junk the span opening tag
$parsed['user'] = parse_first_href ( $line );
$line = strip_first_href ( $line );
$user_attrs = extract_first_element ( '\(.*(<a href=.*<\/a>.*)*.*\)', $line );
$line = strip_first_element ( '\(.*(<a href=.*<\/a>.*)*.*\)', $line );
$parsed['user_talkpage'] = parse_first_href ( $user_attrs );
$user_attrs = strip_first_href ( $user_attrs );
$parsed['user_contribs'] = parse_first_href ( $user_attrs );
$user_attrs = strip_first_href ( $user_attrs );
if ( strpos ( $user_attrs, '<a href=' ) ) {
$parsed['user_block'] = parse_first_href ( $user_attrs );
$user_attrs = strip_first_href ( $user_attrs );
}
$line = strip_first_element ( '<\/span.*>', $line ); // junk the span closing tag
if ( strpos ( $line, '<span class="minor">' ) ) {
$line = strip_first_element ( '<span class="minor">.*<\/span>', $line );
$parsed['minor_edit'] = true;
} else {
$parsed['minor_edit'] = false;
}
$history_size = extract_first_element ( '<span class="history-size">.*<\/span>', $line );
if ( $history_size ) {
$line = strip_first_element ( '<span class="history-size">.*<\/span>', $line );
$temp = explode ( " ", strip_parentheses ( $history_size ) );
$parsed['history_size'] = $temp[0];
}
if ( strpos ( $line, '<span class="comment">' ) ) {
// extract the comment span contents and remove it from the line
$line = strip_first_element ( '<span class="comment">', $line );
$comment = strip_parentheses ( $line );
$line = strip_first_element ( '\(.*\)', $line );
// parse the comment span contents
if ( strpos ( $comment, 'class="autocomment"' ) ) {
$autocomment = extract_span_content ( $comment );
$parsed['autocomment'] = parse_first_href ( $autocomment );
$parsed['comment'] = strip_first_href ( $autocomment );
}
$line = strip_first_element ( '<\/span>', $line );
}
$parsed['oldid'] = mw_url_oldid ( $parsed['version']['url'] );
$parsed['datetime'] = mwtime2time ( $parsed['version']['text'], $this->wiki );
return $parsed;
}
function parse_history_page ( $page_text ) {
$text_content = getCDATA('form', $page_text);
$lines = getCDATAs ( 'li', $text_content );
foreach ( $lines as $line ) {
$this->revisions[] = $this->parse_history_line ( $line );
}
$offset = strpos ( $page_text, "</form>" ) + 7;
$lastfirst = extract_element (
'/\((.*(<a href=".*<\/a>)*.*)\)/Us', $page_text, "", $offset );
$offset += strlen ( $lastfirst );
$prevN = extract_element ( '/\((.*(<a href=".*<\/a>)*.*)\)/Us', $page_text, "", $offset );
$offset += strlen ( $prevN );
$nextN = extract_element ( '/\((.*(<a href=".*<\/a>)*.*)\)/Us', $page_text, "", $offset );
$hasmore = array();
$hasmore['last'] = trim (
extract_element ( '/\&offset=(.*)\&limit=/Us', $prevN ) );
$hasmore['first'] = trim (
extract_element ( '/\&offset=(.*)\&limit=/Us', $nextN ) );
return $hasmore;
}
# --- Revisions global info access --- #
function revisions_count (
$sinceDateTime = MIN_DATETIME, $untilDateTime = MAX_DATETIME ) {
$count = 0;
foreach ( $this->revisions as $revision ) {
if ( ( $revision['datetime'] <= $untilDateTime ) &&
( $revision['datetime'] >= $sinceDateTime ) ) {
$count++;
}
}
return $count;
}
function minor_revisions_count (
$sinceDateTime = MIN_DATETIME, $untilDateTime = MAX_DATETIME ) {
$count = 0;
foreach ( $this->revisions as $revision ) {
if ( ( $revision['datetime'] <= $untilDateTime ) &&
( $revision['datetime'] >= $sinceDateTime ) &&
( $revision['minor_edit'] ) ) {
$count++;
}
}
return $count;
}
function major_revisions_count () {
return $this->revisions_count() - $this->minor_revisions_count();
}
# --- Revisions by-revision access --- #
function rev_currver_link ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['current_version']['url'];
}
function rev_currver_title ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['current_version']['title'];
}
function rev_currver_text ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['current_version']['text'];
}
function rev_lastver_link ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['last_version']['url'];
}
function rev_lastver_title ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['last_version']['title'];
}
function rev_lastver_text ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['last_version']['text'];
}
function rev_version_link ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['version']['url'];
}
function rev_version_title ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['version']['title'];
}
function rev_version_text ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['version']['text'];
}
function rev_user_link ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['user']['url'];
}
function rev_user_title ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['user']['title'];
}
function rev_user_text ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['user']['text'];
}
function rev_usertalk_link ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['user_talkpage']['url'];
}
function rev_usertalk_title ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['user_talkpage']['title'];
}
function rev_usertalk_text ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['user_talkpage']['text'];
}
function rev_usercontribs_link ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['user_contribs']['url'];
}
function rev_usercontribs_title ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['user_contribs']['title'];
}
function rev_usercontribs_text ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['user_contribs']['text'];
}
function rev_userblock_link ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['user_block']['url'];
}
function rev_userblock_title ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['user_block']['title'];
}
function rev_userblock_text ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['user_block']['text'];
}
function rev_isminoredit ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['minor_edit'];
}
function rev_historysize ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['history_size'];
}
function rev_autocomment_link ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['autocomment']['url'];
}
function rev_autocomment_title ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['autocomment']['title'];
}
function rev_autocomment_text ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['autocomment']['text'];
}
function rev_comment ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['comment'];
}
function rev_oldid ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['oldid'];
}
function rev_datetime ( $revNo = "0" ) {
return $this->revisions [ $revNo ]['datetime'];
}
# --- General data --- #
function page_first_updated_no ( $sinceDateTime = MIN_DATETIME ) {
for ( $rev=$this->revisions_count()-1; $rev>-1; $rev-- ) {
if ( $this->rev_datetime ( $rev ) > $sinceDateTime ) {
return $rev;
}
}
return -1;
}
function page_last_updated_no ( $untilDateTime = MAX_DATETIME ) {
for ( $rev=0; $rev<$this->revisions_count(); $rev++ ) {
if ( $this->rev_datetime ( $rev ) < $untilDateTime ) {
return $rev;
}
}
return -1;
}
function page_first_updated_on ( $sinceDateTime = MIN_DATETIME ) {
return $this->rev_datetime ( $this->page_first_updated_no ( $sinceDateTime ) );
}
function page_first_updated_by ( $sinceDateTime = MIN_DATETIME ) {
return $this->rev_user_text ( $this->page_first_updated_no ( $sinceDateTime ) );
}
function page_last_updated_on ( $untilDateTime = MAX_DATETIME ) {
return $this->rev_datetime ( $this->page_last_updated_no ( $untilDateTime ) );
}
function page_last_updated_by ( $untilDateTime = MAX_DATETIME ) {
return $this->rev_user_text ( $this->page_last_updated_no ( $untilDateTime ) );
}
function page_created_on () {
return $this->page_first_updated_on();
}
function page_created_by () {
return $this->page_first_updated_by();
}
# --- Summary data --- #
function editors_names (
$sinceDateTime = MIN_DATETIME, $untilDateTime = MAX_DATETIME,
$majorOnly = false ) {
$editors = array();
for ($rev=$this->revisions_count()-1; $rev>-1; $rev-- ) {
if ( ( $this->rev_datetime ( $rev ) <= $untilDateTime ) &&
( $this->rev_datetime ( $rev ) >= $sinceDateTime ) ) {
$found = array_search ( $this->rev_user_text ( $rev ), $editors );
if ( ( is_bool ( $found ) ) && ( ! ( $found ) ) ) {
if ( ( ! $majorOnly ) || ( ! $this->rev_minor_edit ( $rev ) ) ) {
$editors[] = $this->rev_user_text ( $rev );
}
}
}
}
return $editors;
}
function editors_names_until_page_creation ( $bgbot_copy,
$pagename_copy, $majorOnly = false ) {
$copyhist = new Page_History ( $bgbot_copy, $pagename_copy );
if ( $copyhist->active ) { // the page is present at all...
$attime = $copyhist->page_created_on();
return $this->editors_names ( MIN_DATETIME, $attime, $majorOnly );
}
return false;
}
function editor_editNos ( $editor,
$sinceDateTime = MIN_DATETIME, $untilDateTime = MAX_DATETIME,
$majorOnly = false ) {
$editNos = array();
for ($rev=$this->revisions_count()-1; $rev>-1; $rev-- ) {
if ( ( $this->rev_datetime ( $rev ) <= $untilDateTime ) &&
( $this->rev_datetime ( $rev ) >= $sinceDateTime ) ) {
if ( $this->rev_user_text() == $editor ) {
if ( ( ! $majorOnly ) || ( ! $this->rev_minor_edit ( $rev ) ) ) {
$editNos[] = $rev;
}
}
}
}
return $editNos;
}
}