Rewrite parse control using pywikibot.bot classes

To use the default pywikibot.classes making life easier at some point
Beeing standardconform with pywikibot in handling args

Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=72 FS#72]
This commit is contained in:
2016-08-23 21:53:44 +02:00
parent a8605bcee6
commit 6cb92c1da7

View File

@@ -22,11 +22,15 @@
# #
# #
""" """
Script to parse all redpages in configured categories Script to parse all reddisc pages in configured categories
""" """
import os
import sys
import pywikibot import pywikibot
from pywikibot import pagegenerators from pywikibot import pagegenerators
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
import jogobot import jogobot
@@ -34,74 +38,160 @@ import redpage
import redfam import redfam
def get_cat_pages( cat ): class DiscussionParserBot(
# CurrentPageBot, # via next two sets 'current_page' on each treat()
ExistingPageBot, # CurrentPageBot only treats existing pages
NoRedirectPageBot ): # class which only treats non-redirects
""" """
Generates a iteratable generator-object with all pages listet in given Botclass witch initialises the parsing process of Redundancy Discussions
category
@param cat Category to request
@type cat str
@returns generator Iteratable object with pages of given category
""" """
# Get site to work on from pywikibot config def __init__( self, generator ):
site = pywikibot.Site() """
Constructor
# Retrieve the content of given category Parameters:
category = pywikibot.Category( site, cat ) @param generator: The page generator that determines on which pages
to work.
@type generator: generator.
"""
super( DiscussionParserBot, self ).__init__(generator=generator)
# Build an iteratable generator object with page objects for given category def run( self ):
generator = pagegenerators.CategorizedPageGenerator( category ) """
Controls the overal parsing process, using super class for page switch
return generator Needed to do things before/after treating pages is done
"""
try:
super( DiscussionParserBot, self ).run()
def main(*args): except:
""" raise
Handles process
"""
try: else:
jogobot.output( "BEGINN parser-pages.py" )
# Iterate over configured categories # If successfully parsed all pages in cat, flush db write cache
for cat in ( jogobot.config["redundances"]["redpage_cats"] ): redpage.RedPage.flush_db_cache()
# Iterate over pages in current cat def treat_page( self ):
for page in get_cat_pages( cat ): """
Handles work on current page
"""
# For pages configured to exclude, go on with next page # Short circuit excluded pages
if page.title() in ( if self.current_page.title() in (
jogobot.config["redundances"]["redpage_exclude"] ): jogobot.config["redundances"]["redpage_exclude"] ):
continue return
# Initiate RedPage object # Initiate RedPage object
red_page = redpage.RedPage( page ) red_page = redpage.RedPage( self.current_page )
# Check whether parsing is needed # Check whether parsing is needed
if red_page.is_parsing_needed(): if red_page.is_parsing_needed():
# Iterate over returned generator with redfam sections # Iterate over returned generator with redfam sections
for fam in red_page.parse(): for fam in red_page.parse():
# Run RedFamParser on section text # Run RedFamParser on section text
redfam.RedFamParser.parser( fam, red_page.page._pageid, redfam.RedFamParser.parser( fam, red_page.page._pageid,
red_page.is_archive() ) red_page.is_archive() )
else:
# If successfully parsed whole page, flush
# db write cache
redfam.RedFamParser.flush_db_cache()
jogobot.output( "Page '%s' parsed" %
red_page.page.title() )
else: else:
# If successfully parsed all pages in cat, flush db write cache # If successfully parsed whole page, flush
redpage.RedPage.flush_db_cache() # db write cache
redfam.RedFamParser.flush_db_cache()
jogobot.output( "Page [[{redisc}]] parsed".format(
reddisc=red_page.page.title() ) )
finally:
jogobot.output( "END parser-pages.py" ) def main(*args): # noqa
pywikibot.stopme() """
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
@param args: command line arguments
@type args: list of unicode
"""
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# Get the jogobot-task_slug (basename of current file without ending)
task_slug = os.path.basename(__file__)[:-len(".py")]
# Before run, we need to check wether we are currently active or not
try:
# Will throw Exception if disabled/blocked
# jogobot.is_active( task_slug )
pass
except jogobot.jogobot.Blocked:
(type, value, traceback) = sys.exc_info()
jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ),
"CRITICAL" )
except jogobot.jogobot.Disabled:
(type, value, traceback) = sys.exc_info()
jogobot.output( "\03{red} %s (%s)" % (value, type ),
"ERROR" )
# Bot/Task is active
else:
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# The generator gives the pages that should be worked upon.
gen = None
# If always is True, bot won't ask for confirmation of edit (automode)
# always = False
# If force_reload is True, bot will always parse Countrylist regardless
# if parsing is needed or not
# force_reload = False
# Parse command line arguments
for arg in local_args:
if arg.startswith("-always"):
# always = True
pass
else:
genFactory.handleArg(arg)
if not gen:
# Check wether there are generators waiting for factoring, if not
# use configured categories
if not genFactory.gens:
# Create Generators for configured Categories
for category in jogobot.config["redundances"]["redpage_cats"]:
cgen = genFactory.getCategoryGen(
category,
gen_func=pagegenerators.CategorizedPageGenerator)
# If there is one, append to genFactory
if cgen:
genFactory.gens.append(cgen)
# Create combined Generator (Union of all Generators)
gen = genFactory.getCombinedGenerator()
if gen:
# Log beginning of parsing
jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) )
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
DiscussionParserBot( gen ).run()
else:
pywikibot.showHelp()
if( __name__ == "__main__" ): if( __name__ == "__main__" ):
main() main()