Rewrite parse control using pywikibot.bot classes
To use the default pywikibot.classes making life easier at some point Beeing standardconform with pywikibot in handling args Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=72 FS#72]
This commit is contained in:
172
reddiscparser.py
172
reddiscparser.py
@@ -22,11 +22,15 @@
|
||||
#
|
||||
#
|
||||
"""
|
||||
Script to parse all redpages in configured categories
|
||||
Script to parse all reddisc pages in configured categories
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pywikibot
|
||||
from pywikibot import pagegenerators
|
||||
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
|
||||
|
||||
import jogobot
|
||||
|
||||
@@ -34,51 +38,56 @@ import redpage
|
||||
import redfam
|
||||
|
||||
|
||||
def get_cat_pages( cat ):
|
||||
class DiscussionParserBot(
|
||||
# CurrentPageBot, # via next two sets 'current_page' on each treat()
|
||||
ExistingPageBot, # CurrentPageBot only treats existing pages
|
||||
NoRedirectPageBot ): # class which only treats non-redirects
|
||||
"""
|
||||
Generates a iteratable generator-object with all pages listet in given
|
||||
category
|
||||
|
||||
@param cat Category to request
|
||||
@type cat str
|
||||
|
||||
@returns generator Iteratable object with pages of given category
|
||||
Botclass witch initialises the parsing process of Redundancy Discussions
|
||||
"""
|
||||
|
||||
# Get site to work on from pywikibot config
|
||||
site = pywikibot.Site()
|
||||
|
||||
# Retrieve the content of given category
|
||||
category = pywikibot.Category( site, cat )
|
||||
|
||||
# Build an iteratable generator object with page objects for given category
|
||||
generator = pagegenerators.CategorizedPageGenerator( category )
|
||||
|
||||
return generator
|
||||
|
||||
|
||||
def main(*args):
|
||||
"""
|
||||
Handles process
|
||||
def __init__( self, generator ):
|
||||
"""
|
||||
Constructor
|
||||
|
||||
Parameters:
|
||||
@param generator: The page generator that determines on which pages
|
||||
to work.
|
||||
@type generator: generator.
|
||||
"""
|
||||
super( DiscussionParserBot, self ).__init__(generator=generator)
|
||||
|
||||
def run( self ):
|
||||
"""
|
||||
Controls the overal parsing process, using super class for page switch
|
||||
|
||||
Needed to do things before/after treating pages is done
|
||||
"""
|
||||
try:
|
||||
jogobot.output( "BEGINN – parser-pages.py" )
|
||||
|
||||
# Iterate over configured categories
|
||||
for cat in ( jogobot.config["redundances"]["redpage_cats"] ):
|
||||
super( DiscussionParserBot, self ).run()
|
||||
|
||||
# Iterate over pages in current cat
|
||||
for page in get_cat_pages( cat ):
|
||||
except:
|
||||
raise
|
||||
|
||||
# For pages configured to exclude, go on with next page
|
||||
if page.title() in (
|
||||
else:
|
||||
|
||||
# If successfully parsed all pages in cat, flush db write cache
|
||||
redpage.RedPage.flush_db_cache()
|
||||
|
||||
def treat_page( self ):
|
||||
"""
|
||||
Handles work on current page
|
||||
"""
|
||||
|
||||
# Short circuit excluded pages
|
||||
if self.current_page.title() in (
|
||||
jogobot.config["redundances"]["redpage_exclude"] ):
|
||||
|
||||
continue
|
||||
return
|
||||
|
||||
# Initiate RedPage object
|
||||
red_page = redpage.RedPage( page )
|
||||
red_page = redpage.RedPage( self.current_page )
|
||||
|
||||
# Check whether parsing is needed
|
||||
if red_page.is_parsing_needed():
|
||||
@@ -93,15 +102,96 @@ def main(*args):
|
||||
# If successfully parsed whole page, flush
|
||||
# db write cache
|
||||
redfam.RedFamParser.flush_db_cache()
|
||||
jogobot.output( "Page '%s' parsed" %
|
||||
red_page.page.title() )
|
||||
else:
|
||||
# If successfully parsed all pages in cat, flush db write cache
|
||||
redpage.RedPage.flush_db_cache()
|
||||
jogobot.output( "Page [[{redisc}]] parsed".format(
|
||||
reddisc=red_page.page.title() ) )
|
||||
|
||||
finally:
|
||||
jogobot.output( "END – parser-pages.py" )
|
||||
pywikibot.stopme()
|
||||
|
||||
def main(*args): # noqa
|
||||
"""
|
||||
Process command line arguments and invoke bot.
|
||||
|
||||
If args is an empty list, sys.argv is used.
|
||||
|
||||
@param args: command line arguments
|
||||
@type args: list of unicode
|
||||
"""
|
||||
|
||||
# Process global arguments to determine desired site
|
||||
local_args = pywikibot.handle_args(args)
|
||||
|
||||
# Get the jogobot-task_slug (basename of current file without ending)
|
||||
task_slug = os.path.basename(__file__)[:-len(".py")]
|
||||
|
||||
# Before run, we need to check wether we are currently active or not
|
||||
try:
|
||||
# Will throw Exception if disabled/blocked
|
||||
# jogobot.is_active( task_slug )
|
||||
pass
|
||||
|
||||
except jogobot.jogobot.Blocked:
|
||||
(type, value, traceback) = sys.exc_info()
|
||||
jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ),
|
||||
"CRITICAL" )
|
||||
|
||||
except jogobot.jogobot.Disabled:
|
||||
(type, value, traceback) = sys.exc_info()
|
||||
jogobot.output( "\03{red} %s (%s)" % (value, type ),
|
||||
"ERROR" )
|
||||
|
||||
# Bot/Task is active
|
||||
else:
|
||||
|
||||
# This factory is responsible for processing command line arguments
|
||||
# that are also used by other scripts and that determine on which pages
|
||||
# to work on.
|
||||
genFactory = pagegenerators.GeneratorFactory()
|
||||
# The generator gives the pages that should be worked upon.
|
||||
gen = None
|
||||
|
||||
# If always is True, bot won't ask for confirmation of edit (automode)
|
||||
# always = False
|
||||
|
||||
# If force_reload is True, bot will always parse Countrylist regardless
|
||||
# if parsing is needed or not
|
||||
# force_reload = False
|
||||
|
||||
# Parse command line arguments
|
||||
for arg in local_args:
|
||||
if arg.startswith("-always"):
|
||||
# always = True
|
||||
pass
|
||||
else:
|
||||
genFactory.handleArg(arg)
|
||||
|
||||
if not gen:
|
||||
|
||||
# Check wether there are generators waiting for factoring, if not
|
||||
# use configured categories
|
||||
if not genFactory.gens:
|
||||
|
||||
# Create Generators for configured Categories
|
||||
for category in jogobot.config["redundances"]["redpage_cats"]:
|
||||
cgen = genFactory.getCategoryGen(
|
||||
category,
|
||||
gen_func=pagegenerators.CategorizedPageGenerator)
|
||||
|
||||
# If there is one, append to genFactory
|
||||
if cgen:
|
||||
genFactory.gens.append(cgen)
|
||||
|
||||
# Create combined Generator (Union of all Generators)
|
||||
gen = genFactory.getCombinedGenerator()
|
||||
|
||||
if gen:
|
||||
# Log beginning of parsing
|
||||
jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) )
|
||||
|
||||
# The preloading generator is responsible for downloading multiple
|
||||
# pages from the wiki simultaneously.
|
||||
gen = pagegenerators.PreloadingGenerator(gen)
|
||||
DiscussionParserBot( gen ).run()
|
||||
else:
|
||||
pywikibot.showHelp()
|
||||
|
||||
if( __name__ == "__main__" ):
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user