Rewrite parse control using pywikibot.bot classes

To use the default pywikibot.classes making life easier at some point Beeing standardconform with pywikibot in handling args Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=72 FS#72]
2016-08-23 21:53:44 +02:00
parent a8605bcee6
commit 6cb92c1da7
1 changed files with 140 additions and 50 deletions
--- a/reddiscparser.py
+++ b/reddiscparser.py
@@ -22,11 +22,15 @@
 #
 #
 """
-Script to parse all redpages in configured categories
+Script to parse all reddisc pages in configured categories
 """
 import os
 import sys
 import pywikibot
 from pywikibot import pagegenerators
 from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
 import jogobot
@@ -34,51 +38,56 @@ import redpage
 import redfam
-def get_cat_pages( cat ):
+class DiscussionParserBot(
        # CurrentPageBot,  # via next two sets 'current_page' on each treat()
        ExistingPageBot,  # CurrentPageBot only treats existing pages
        NoRedirectPageBot ):  # class which only treats non-redirects
    """
-    Generates a iteratable generator-object with all pages listet in given
+    Botclass witch initialises the parsing process of Redundancy Discussions
    category
    @param  cat  Category to request
    @type  cat  str
    @returns  generator  Iteratable object with pages of given category
    """
-    # Get site to work on from pywikibot config
+    def __init__( self, generator ):
    site = pywikibot.Site()
    # Retrieve the content of given category
    category = pywikibot.Category( site, cat )
    # Build an iteratable generator object with page objects for given category
    generator = pagegenerators.CategorizedPageGenerator( category )
    return generator
 def main(*args):
    """
    Handles process
        """
        Constructor
        Parameters:
            @param generator: The page generator that determines on which pages
                              to work.
            @type generator: generator.
        """
        super( DiscussionParserBot, self ).__init__(generator=generator)
    def run( self ):
        """
        Controls the overal parsing process, using super class for page switch
        Needed to do things before/after treating pages is done
        """
        try:
        jogobot.output( "BEGINN – parser-pages.py" )
-        # Iterate over configured categories
+            super( DiscussionParserBot, self ).run()
        for cat in ( jogobot.config["redundances"]["redpage_cats"] ):
-            # Iterate over pages in current cat
+        except:
-            for page in get_cat_pages( cat ):
+            raise
-                # For pages configured to exclude, go on with next page
+        else:
-                if page.title() in (
+
            # If successfully parsed all pages in cat, flush db write cache
            redpage.RedPage.flush_db_cache()
    def treat_page( self ):
        """
        Handles work on current page
        """
        # Short circuit excluded pages
        if self.current_page.title() in (
                jogobot.config["redundances"]["redpage_exclude"] ):
-                    continue
+            return
        # Initiate RedPage object
-                red_page = redpage.RedPage( page )
+        red_page = redpage.RedPage( self.current_page )
        # Check whether parsing is needed
        if red_page.is_parsing_needed():
@@ -93,15 +102,96 @@ def main(*args):
                # If successfully parsed whole page, flush
                # db write cache
                redfam.RedFamParser.flush_db_cache()
-                        jogobot.output( "Page '%s' parsed" %
+                jogobot.output( "Page [[{redisc}]] parsed".format(
-                                        red_page.page.title() )
+                                reddisc=red_page.page.title() ) )
            else:
                # If successfully parsed all pages in cat, flush db write cache
                redpage.RedPage.flush_db_cache()
-    finally:
+
-        jogobot.output( "END – parser-pages.py" )
+def main(*args):  # noqa
-        pywikibot.stopme()
+    """
    Process command line arguments and invoke bot.
    If args is an empty list, sys.argv is used.
    @param args: command line arguments
    @type args: list of unicode
    """
    # Process global arguments to determine desired site
    local_args = pywikibot.handle_args(args)
    # Get the jogobot-task_slug (basename of current file without ending)
    task_slug = os.path.basename(__file__)[:-len(".py")]
    # Before run, we need to check wether we are currently active or not
    try:
        # Will throw Exception if disabled/blocked
        # jogobot.is_active( task_slug )
        pass
    except jogobot.jogobot.Blocked:
        (type, value, traceback) = sys.exc_info()
        jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ),
                        "CRITICAL" )
    except jogobot.jogobot.Disabled:
        (type, value, traceback) = sys.exc_info()
        jogobot.output( "\03{red} %s (%s)" % (value, type ),
                        "ERROR" )
    # Bot/Task is active
    else:
        # This factory is responsible for processing command line arguments
        # that are also used by other scripts and that determine on which pages
        # to work on.
        genFactory = pagegenerators.GeneratorFactory()
        # The generator gives the pages that should be worked upon.
        gen = None
        # If always is True, bot won't ask for confirmation of edit (automode)
        # always = False
        # If force_reload is True, bot will always parse Countrylist regardless
        # if parsing is needed or not
        # force_reload = False
        # Parse command line arguments
        for arg in local_args:
            if arg.startswith("-always"):
                # always = True
                pass
            else:
                genFactory.handleArg(arg)
        if not gen:
            # Check wether there are generators waiting for factoring, if not
            # use configured categories
            if not genFactory.gens:
                # Create Generators for configured Categories
                for category in jogobot.config["redundances"]["redpage_cats"]:
                    cgen = genFactory.getCategoryGen(
                        category,
                        gen_func=pagegenerators.CategorizedPageGenerator)
                    # If there is one, append to genFactory
                    if cgen:
                        genFactory.gens.append(cgen)
            # Create combined Generator (Union of all Generators)
            gen = genFactory.getCombinedGenerator()
        if gen:
            # Log beginning of parsing
            jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) )
            # The preloading generator is responsible for downloading multiple
            # pages from the wiki simultaneously.
            gen = pagegenerators.PreloadingGenerator(gen)
            DiscussionParserBot( gen ).run()
        else:
            pywikibot.showHelp()
 if( __name__ == "__main__" ):
    main()