Szerkesztő:UTF46/src/utf46.pl

utf46.pl forráskódja szerkesztés

#!/usr/bin/perl -T
use utf8;
use strict;
use warnings;

our $VERSION='1.0';

#	UTF46-bot
#		developer: UTF48
#	Version history
#		Testing phase
#			1.0	release, fix devel
#		Under developement:
#			0.11	lintlog++, origfix
#		2022-11-21
#			0.10	maxlag handling, wikitext chomping
#		2022-11-20
#			0.9	extended logging, difffix
#		2022-11-18
#			0.8:	bot source upload
#		2022-11-16
#			0.7:	bot summary, manual, half automated support
#		2022-11-15
#			0.6:	edit timeouts
#		2022-11-11
#			0.5:	task management (generator, editor, chekker)
#		2022-11-10
#			0.4:	fixing assert
#		2022-11-09
#			0.3:	edit page
#		2022-11-07
#			0.2:	read page (wikitext)
#			0.1:	try login
#			0.0:	try query
#		2022-11-06

our $setup; #global config parameters
our $bot; #bot singleton

package setup;
use utf8;
use strict;
use warnings;
use lib './';

use private::setup;
# private/setup.pm contains secret config values including password.
# this file is not public 

sub init {
	my $def={ # defaults
		pub=>{
			wiki_host=>'hu.wikipedia.org',
			# no support for different host yet
			api_url=>'/w/api.php',
		},
		sec=>{
			# defaults
			username=>'UTF46',
			password=>not('cleartext; use private/setup.pm'),
			#hidden details in separate file
			%{(eval{private::setup->init})},
		},
	};
	die $@ if $@;
	my $setup={
		can_edit=>1,
		wiki_host=>$def->{pub}->{wiki_host},
		url=>'https://'.$def->{pub}->{wiki_host}.$def->{pub}->{api_url},
		username=>($def->{sec}->{username}||die "username required"),
		password=>($def->{sec}->{password}||die "password required"),
		
		bot_status=>(
			(
				$main::VERSION =~ /^0/
			)?(
				'under_developement'
			):(
				$def->{sec}->{registered}
			)?(
				'registered_bot'
			):(
				'unregistered_bot'
			)
		),
		
		unregistered_bot_lag=>60, # bot evaluation period, time between edits
		registered_bot_lag=>10, # approved bot, expected time between edits
		min_wait=>1, # minimum wait between edits.
		nice_wait=>2, # wait between api calls (too high value may destroys tokens
		interactive=>2, # minimum interactive time
	};
	die "Minimum wait time cannot be 0" if not $setup->{min_wait};
	return $setup;
};

package page;
use utf8;
use strict;
use warnings;

sub new {
	my $class=shift;
	my $self={};
	bless $self,$class;
	return $self;
};

sub init {
	my $self=shift;
	my %params=@_;
	$self->{orig}={%params};
	$self->{loaded}=0;
	return $self;
};

sub cached {
	my $self=shift;
	return 0;
	# TODO check if the page content is already cached
};

sub load_from_cache {
	my $self=shift;
	die "unimplemented"; # TODO load page from cache
	$self->{loaded}=1;
};
sub save_to_cache {
	my $self=shift;
	# TODO save page to cache
};

sub load_from_api {
	my $self=shift;
	my $page=shift;
	
	$self->{page}=$page;
	$self->{loaded}=1;
	
	$page->{revisions}->[0]->{slots}->{main}->{content} =~ s/\r//sg;
};

sub edit {
	my $self=shift;
	my $editor=shift;
	my $taskmode=shift;

	my $rev=$self->{page}->{revisions}->[0];
	my $text=$rev->{slots}->{main}->{content};

	my $before=time;
	$self->{new}=$editor->edit($self->{page}->{title},$text);
	my $after=time;

	my $ok;
	if ($editor->automata or (($after-$before) >=$main::setup->{interactive})) {
		#ok
		$ok=1;
	} else {
		$ok=0;
	};
	

	$self->{new}->{baserevid}=$rev->{revid};
	$self->{new}->{contentformat}=$rev->{slots}->{main}->{contentformat};
	$self->{new}->{contentmodel}=$rev->{slots}->{main}->{contentmodel};
	$self->{new}->{pageid}=$self->{page}->{pageid};
	$self->{new}->{summary}="(bot v$VERSION task:$taskmode) ".$self->{new}->{summary};

	if ($self->{new}->{contentformat} eq 'text/x-wiki') {
		$self->{new}->{text} =~ s/\n+$//s;
	};

};

sub get_new {
	my $self=shift;
	return $self->{new};
};

sub check {
	my $self=shift;
	my $checker=shift;

	my $rev=$self->{page}->{revisions}->[0];
	my $text=$rev->{slots}->{main}->{content};
	
	my $before=time;
	my $ok=$checker->check($self->{page}->{title},$text,$self->{new}->{text},$self->{new}->{summary});
	my $after=time;
	if ($checker->automata or (($after-$before) >=$main::setup->{interactive})) {
		#ok
		return $ok;
	} else {
		return 0; # if too fast, then fail, slow human check required
	};
};

sub title {
	my $self=shift;
	return $self->{page}->{title};
};

sub orig {
	my $self=shift;
	return $self->{orig};
};

package task_common;
use utf8;
use strict;
use warnings;

sub new {
	my $class=shift;
	my $self={};
	bless $self,$class;
	$self->{taskmodules}=$self->init;
	$self->build;
	return $self;
};

sub init {
	my $self=shift;
	die "You must implement the init()";
};

sub automata {
	my $self=shift;
	return 1;
};

sub name {
	my $self=shift;
	die "You must implement the name()";
};

sub build {
	my $self=shift;
	my $tm=$self->{taskmodules};
	my $enabled={map {($_=>1)} qw{generator editor checker logger}};
	my $interactor={%{$enabled}};
	delete $interactor->{logger};
	my $interactive=0;
	my $automated=0;
	for my $module_key (grep {exists($enabled->{$_})} keys %{$tm}) {
		my $module=$tm->{$module_key};
		my $module_obj;
		if (ref($module)) {
			$module_obj=$module;
		} else {
			my $module_name;
			if (-f $module_key.'/'.$module.'.pm') {
				$module_name=$module_key.'::'.$module; # correct class name
			} else {
				die "$module_key/$module.pm does not exists";
			};
			eval("use $module_name");
			$module_obj=$module_name->new;
		};
		$self->{module}->{$module_key}=$module_obj;
		if ($interactor->{$module_key}) {
			my $automata=1;
			$automata=$module_obj->automata if ref($module_obj);
			$automated||=$automata;
			$interactive||=not $automata;
		};
	};
	die "Fully automated bot not supported yet" if not $interactive;
	$self->{botmode}=((
		$interactive and $automated
	)?(
		"kézi félautomata"
	):(
		$interactive and not $automated
	)?(
		"kézi"
	):(
		$automated and not $interactive
	)?(
		"automata"
	):(
		not $automated and not $interactive
	)?(
		"hibás"
	):(
		"hibás"
	));
};

sub get_botmode {
	my $self=shift;
	die "hibás működés" if $self->{botmode} eq 'hibás';
	return $self->{botmode};
};

our $AUTOLOAD;
sub AUTOLOAD {
	my $self=shift;
	my $sub=$AUTOLOAD;
	$sub=~ s/.*:://;
	if (exists $self->{module}->{$sub}) {
		return $self->{module}->{$sub}; # object
	} else {
		die "Cannot call method $sub";
	};
};

sub DESTROY {};

package bot;
use utf8;
use strict;
use warnings;
#use setup;
use LWP::UserAgent;
use Data::Dumper;
use JSON;

# LWP layer

sub _log {
	my $text=shift;
	open FOUT,'>>:utf8','log.txt';
	print FOUT $text;
	close FOUT;
};

sub debug {
	my $data=shift;
	my $dp=Data::Dumper->new([$data]);
	my $dumped= $dp->Dump;
	_log($dumped);
	print $dumped;
};

sub new {
	my $class=shift;
	my $self={
		setup=>$main::setup,
		logged_in=>0,
		min_wait=>$main::setup->{min_wait}, # per edit
		nice_wait=>$main::setup->{nice_wait}, # per api call
		can_edit=>$main::setup->{can_edit},
		wait_preferences=>{
			0=>{
				'under_developement'=>0,
				'unregistered_bot'=>$main::setup->{unregistered_bot_lag},
				'registered_bot'=>$main::setup->{registered_bot_lag},
			}->{$main::setup->{bot_status}},
			2=>2,
			4=>{
				'under_developement'=>$main::setup->{unregistered_bot_lag},
				'unregistered_bot'=>$main::setup->{unregistered_bot_lag},
				'registered_bot'=>$main::setup->{registered_bot_lag},
			},
			''=>0,
		},
	};
	$self->{ua}=LWP::UserAgent->new(
		agent=>'UTF46-bot/'.$main::VERSION.' (https://hu.wikipedia.org/wiki/User:UTF48)',
		cookie_jar=>{},
	);
	bless $self,$class;
	return $self;
};

# API layer

sub lag {
	my $self=shift;
	my $lag_method=shift;
	my $value=shift;
	
	if ($lag_method eq 'ns') {
		my $lag=$self->{wait_preferences}->{$value};
		if ($lag) {
			sleep $lag;
		} else {
			die "No editing enabled in '$value' namespace";
		};
	} elsif ($lag_method eq 'query') {
		sleep $self->{min_wait};
	} elsif ($lag_method eq 'nice') {
		sleep $self->{nice_wait};
	} else {
		die "Unexpected lag method: '$lag_method'";
	};
}; # end of lag

sub api {
	my $self=shift;
	my $method=shift; # GET POST etc
	my $content=shift; # POST
	my %params=@_; # GET

	$self->lag('nice');;
	my $url=$self->{setup}->{url};

	my $add={
		format=>'json',
		maxlag=>1,
		assert=>(($main::setup->{bot_status} eq 'registered_bot')?'bot':'user'),
	};
	$add->{assertuser}=$main::setup->{username} if not delete $params{noassertuser};

	my $ua=$self->{ua};
	my $result;
	if ($method eq 'get') {
		%params=(%{$add},%params);
		$url.='?'.join('&',map { ($_.'='.$params{$_}) } keys %params);
debug($url);
		$result=$ua->get($url);
	} elsif ($method eq 'post') {
		my %merged=(
			%{$add},
			%{$content},
		);
#		debug({%merged});
		$result=$ua->post($url,{%merged});
	} else {
		die "Bad http method: '$method'";
	};
	if ($result->is_success) {
		$result=JSON->new->utf8->decode($result->content);
	} else {
		debug($result);
		die "FAILED check log: ",$result->status_line;
	};
	if ($result->{error} and ($result->{error}->{code} eq 'maxlag')) {
		print "\n=== L A G ===\n\n";
		unshift @_,$content;
		unshift @_,$method;
		unshift @_,$self;
		$self->{nice_wait}++;
		goto &api;
	} else {
		$self->{nice_wait}-- if ($self->{nice_wait} > 2 * $main::setup->{nice_wait}) and  not(int(rand(100)));
	};
	return $result;
}; # end of api

sub api_get {
	my $self=shift;
	$self->api('get',{},@_);
};
sub api_post {
	my $self=shift;
	my $content=shift;
	$self->api('post',$content,@_);
};


# Above API layer; action layer, etc

sub query {
	my $self=shift;
	my $result=$self->api_get(action=>'query',@_);
	if (exists($result->{batchcomplete})) {
		# NOP
	} else {
		debug($result);
		die "Batch not completed";
	};
	return $result;
}; # end of qyery

sub login {
	my $self=shift;
#--------------------------
	my $ami_r=$self->query(
		meta=>'authmanagerinfo',
		amirequestsfor=>'login',
		assert=>'anon',
		noassertuser=>-1,
	);
	my $rqs=$ami_r->{query}->{authmanagerinfo}->{requests};
	my $ami_f={};
	if (ref($rqs) eq 'ARRAY') {
		for my $rq (@{$rqs}) {
			if ($rq->{required} eq 'primary-required') {
				$ami_f->{id}=$rq->{id};
				for my $key (keys %{$rq->{fields}}) {
					$ami_f->{$key}=1;
				};
			};
		};
	} else {
		debug($ami_r);
		die "How to login? Help me!";
	};
#----------------------------
	my $token_r=$self->query(
		meta=>'tokens',
		type=>'login',
		assert=>'anon',
		noassertuser=>-1,
	);
#debug([$ami_r,$token_r,$ami_f]);
	# checking...
	if (
		$token_r->{query}->{tokens}->{logintoken} and
		$ami_f->{id} and
		$ami_f->{username} and
		$ami_f->{password} and
		exists($ami_r->{query}->{authmanagerinfo}->{canauthenticatenow})
	) {
		#NOP
	} else {
		debug([$token_r,$ami_r,$ami_f]);
		die "Cannot login, no login token";
	};
#	print $result;
#--------------------------
	my $login_r=$self->api_post(
		{
			logintoken=>$token_r->{query}->{tokens}->{logintoken},
			action=>'clientlogin',
			username=>$main::setup->{username},
			password=>$main::setup->{password},
			loginreturnurl=>'https://'.$main::setup->{wiki_host}.'/wiki/User:'.$main::setup->{username},
			assert=>'anon',
		},
		noassertuser=>-1,
	);
	if (
		($login_r->{clientlogin}->{status} eq 'PASS') and
		($login_r->{clientlogin}->{username} eq $main::setup->{username})
	) {
		$self->{logged_in}=1;
	} else {
		debug($login_r);
		die "Login failed or other thing happened";
	};
	
	my $success=$bot->query(
		meta=>'userinfo',
		uiprop=>'*',
	);
	if ($success->{query}->{userinfo}->{name} eq $main::setup->{username}) {
#		debug($success);
	} else {
		debug($success);
		die "Finally login failed. Can't beleive";
	};
}; # end of login

sub get_page {
	my $self=shift;
	my $todo=shift;
	my $title;
	if (ref($todo) eq 'HASH') {
		$title=$todo->{title};
	} else {
		$title=$todo;
	};
#	my $revision=$page_r->{query}->{pages}->[0]->{revisions}->[0];
#	my $page_text=$revision->{slots}->{main}->{content};
#	print $page_text;
#	die "EDDIG";
	my $page_o=page->new->init(title=>$title,todo=>$todo);
	if ($page_o->cached) {
		$page_o->load_from_cache;
	} else {
		my $page_r=$self->query(
			prop=>'revisions',
			titles=>$title,
			formatversion=>2,
			rvprop=>join('|',qw[ ids timestamp flags comment user content ]),
			rvslots=>'*',
		);
#		debug($page_r);
		$page_o->load_from_api($page_r->{query}->{pages}->[0]);
		$page_o->save_to_cache;
	};
	return $page_o;
}; # end of get_page

sub lag_edit {
	my $self=shift;
	my $page=shift;
	my $ns=$page->{page}->{ns};
	my $lag=$self->{wait_preferences}->{$ns};
	if ($self->{lastedit}) {
		die "Editing ns: '$ns' not allowed" if not $lag;
		my $now=time;
		my $elapsed=$now-$self->{lastedit};
		if ($elapsed<0) {
			die "Time machine detected. This bug was fixed in the future";
		} elsif ($elapsed>=0 and $elapsed <$lag ) {
			sleep ($lag-$elapsed);
		} elsif ($elapsed >=$lag ) {
			# NOP
		} else {
			die "Trichotom error";
		};
	} else {
		die "Editing ns: '$ns' not allowed" if not $lag;
	};
};

sub edit_page {
	my $self=shift;
	my $page=shift;
#debug($page);
#die "DISAB";

	$self->lag_edit($page);
	
	my $new_page=$page->get_new;
	my $token_r=$self->query(
		meta=>'tokens',
		type=>'csrf',
	);
	debug($token_r);
	my $edit={
		action=>'edit',
		%{$new_page},
		watchlist=>'nochange',
		token=>$token_r->{query}->{tokens}->{csrftoken},
	};
#debug($edit);
	if ($self->{can_edit}) {
		my $edit_r=$self->api_post($edit);
#debug($edit_r);
	} else {
		#NOP
	};
	$self->{elapsed}=time;

}; # end of edit_page

sub tasklog {
	my $self=shift;
	my $log=shift;
	open FOUT, ">>:utf8","tasklog.txt";
	print FOUT time.":".$log."\n";
	close FOUT;
};

sub pagelog {
	my $self=shift;
	my $page=shift;
	my $ok=shift||0;
	$self->tasklog("Title $ok:".$page->title);
	if ($self->{logger}) {
		$self->{logger}->log($page,$ok);
	};
};

sub load_task {
	my $self=shift;
	my $taskname=shift @ARGV; # add taskname as script parameter
	if (not $taskname) {
		local $ENV{PATH}='/bin:/usr/bin';
		print qx{ls task/*.pm};
		local $|=1;
		print "Which task:";
		$taskname=<STDIN>; # or enter it into terminal prompt
	};
	if ($taskname =~ /^(\w+)$/) {
		$taskname=$1;
	};
	$taskname='task::'.$taskname;
#	if ($taskname =~ /^((generator|editor|checker)::\w+)$/) {
#		$taskname=$1;
#	};
	eval("use $taskname");
	die $@ if $@;
	$self->{task}=$taskname->new;
#	$self->{taskname}=$self->{task}->name.' ('.$taskname.'.pm';
};

sub process_task {
	my $self=shift;
	my $task=$self->{task};
	
bot::debug($task);
	my $generator=$task->generator;
	my $editor=$task->editor;
	my $checker=$task->checker;
	$self->{logger}=$task->logger;
	
	my $style=$task->style;
	
	$self->{todolist}=$generator->todolist;

	$self->tasklog('Task: '.$task->name);

	if ($style eq 'one_by_one') {

		for my $todo (@{$self->{todolist}}) {
			my $page=$self->get_page($todo);
			$page->edit($editor,$self->{task}->name.' ('.$self->{task}->get_botmode.')');
			my $ok=$page->check($checker);
			chomp($ok); # dont enatble empty terminal input az true
			$ok =~ s/\s//sg; #neither enable any whitespace
			if ($ok) {
				$self->edit_page($page);
			};
			$self->pagelog($page,$ok);
		};
	} elsif ($style eq 'batch') {
		my $pages=[];
		for my $todo (@{$self->{todolist}}) {
			push @{$pages},$self->get_page($todo);
		};
		for my $page (@{$pages}) {
			$page->edit($editor,$self->{task}->name.' ('.$self->{task}->get_botmode.')');
		};
		my $okpages=[];
		for my $page (@{$pages}) {
			my $ok=$page->check($checker);
			chomp($ok);
			if ($ok) {
				push @{$okpages},$page;
			} else {
				$self->pagelog($page,0);
			};
		};
		for my $page (@{$okpages}) {
			$self->edit_page($page);
			$self->pagelog($page,1);
		};
	} else {
		die "Unknown style: '$style'";
	};
	
};



package main;
use utf8;
use strict;
use warnings;
#use bot;


sub main {
	binmode(STDIN,':utf8');
	binmode(STDOUT,':utf8');
	binmode(STDERR,':utf8');
	$setup=setup->init;
	$bot=bot->new; # bot starts
	$bot->load_task; # loading task to do.
	$bot->login; # login to wikipedia via api calls
#	$bot->log_start; # user page: task started 
	$bot->process_task; # execute the task
#	$bot->log_end; # user page: task finished
	#$bot->logout; # unimplemented
	
};

main;

en:WTFPL