Rewrite parse control using pywikibot.bot classes

To use the default pywikibot.classes making life easier at some point
Beeing standardconform with pywikibot in handling args

Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=72 FS#72]
This commit is contained in:
2016-08-23 21:53:44 +02:00
parent a8605bcee6
commit 6cb92c1da7

View File

@@ -22,11 +22,15 @@
# #
# #
""" """
Script to parse all redpages in configured categories Script to parse all reddisc pages in configured categories
""" """
import os
import sys
import pywikibot import pywikibot
from pywikibot import pagegenerators from pywikibot import pagegenerators
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
import jogobot import jogobot
@@ -34,51 +38,56 @@ import redpage
import redfam import redfam
def get_cat_pages( cat ): class DiscussionParserBot(
# CurrentPageBot, # via next two sets 'current_page' on each treat()
ExistingPageBot, # CurrentPageBot only treats existing pages
NoRedirectPageBot ): # class which only treats non-redirects
""" """
Generates a iteratable generator-object with all pages listet in given Botclass witch initialises the parsing process of Redundancy Discussions
category
@param cat Category to request
@type cat str
@returns generator Iteratable object with pages of given category
""" """
# Get site to work on from pywikibot config def __init__( self, generator ):
site = pywikibot.Site()
# Retrieve the content of given category
category = pywikibot.Category( site, cat )
# Build an iteratable generator object with page objects for given category
generator = pagegenerators.CategorizedPageGenerator( category )
return generator
def main(*args):
"""
Handles process
""" """
Constructor
Parameters:
@param generator: The page generator that determines on which pages
to work.
@type generator: generator.
"""
super( DiscussionParserBot, self ).__init__(generator=generator)
def run( self ):
"""
Controls the overal parsing process, using super class for page switch
Needed to do things before/after treating pages is done
"""
try: try:
jogobot.output( "BEGINN parser-pages.py" )
# Iterate over configured categories super( DiscussionParserBot, self ).run()
for cat in ( jogobot.config["redundances"]["redpage_cats"] ):
# Iterate over pages in current cat except:
for page in get_cat_pages( cat ): raise
# For pages configured to exclude, go on with next page else:
if page.title() in (
# If successfully parsed all pages in cat, flush db write cache
redpage.RedPage.flush_db_cache()
def treat_page( self ):
"""
Handles work on current page
"""
# Short circuit excluded pages
if self.current_page.title() in (
jogobot.config["redundances"]["redpage_exclude"] ): jogobot.config["redundances"]["redpage_exclude"] ):
continue return
# Initiate RedPage object # Initiate RedPage object
red_page = redpage.RedPage( page ) red_page = redpage.RedPage( self.current_page )
# Check whether parsing is needed # Check whether parsing is needed
if red_page.is_parsing_needed(): if red_page.is_parsing_needed():
@@ -93,15 +102,96 @@ def main(*args):
# If successfully parsed whole page, flush # If successfully parsed whole page, flush
# db write cache # db write cache
redfam.RedFamParser.flush_db_cache() redfam.RedFamParser.flush_db_cache()
jogobot.output( "Page '%s' parsed" % jogobot.output( "Page [[{redisc}]] parsed".format(
red_page.page.title() ) reddisc=red_page.page.title() ) )
else:
# If successfully parsed all pages in cat, flush db write cache
redpage.RedPage.flush_db_cache()
finally:
jogobot.output( "END parser-pages.py" ) def main(*args): # noqa
pywikibot.stopme() """
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
@param args: command line arguments
@type args: list of unicode
"""
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# Get the jogobot-task_slug (basename of current file without ending)
task_slug = os.path.basename(__file__)[:-len(".py")]
# Before run, we need to check wether we are currently active or not
try:
# Will throw Exception if disabled/blocked
# jogobot.is_active( task_slug )
pass
except jogobot.jogobot.Blocked:
(type, value, traceback) = sys.exc_info()
jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ),
"CRITICAL" )
except jogobot.jogobot.Disabled:
(type, value, traceback) = sys.exc_info()
jogobot.output( "\03{red} %s (%s)" % (value, type ),
"ERROR" )
# Bot/Task is active
else:
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# The generator gives the pages that should be worked upon.
gen = None
# If always is True, bot won't ask for confirmation of edit (automode)
# always = False
# If force_reload is True, bot will always parse Countrylist regardless
# if parsing is needed or not
# force_reload = False
# Parse command line arguments
for arg in local_args:
if arg.startswith("-always"):
# always = True
pass
else:
genFactory.handleArg(arg)
if not gen:
# Check wether there are generators waiting for factoring, if not
# use configured categories
if not genFactory.gens:
# Create Generators for configured Categories
for category in jogobot.config["redundances"]["redpage_cats"]:
cgen = genFactory.getCategoryGen(
category,
gen_func=pagegenerators.CategorizedPageGenerator)
# If there is one, append to genFactory
if cgen:
genFactory.gens.append(cgen)
# Create combined Generator (Union of all Generators)
gen = genFactory.getCombinedGenerator()
if gen:
# Log beginning of parsing
jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) )
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
DiscussionParserBot( gen ).run()
else:
pywikibot.showHelp()
if( __name__ == "__main__" ): if( __name__ == "__main__" ):
main() main()