From 177a8f920f9396a6480efab60fc7c084e0234308 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 10:55:22 +0200 Subject: [PATCH 01/10] Prepare new structure to use subtasks To have only one entry point for the bot we want to have a single file (red.py) which is calling the specfic task class from bots dir with a standardized call Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- reddiscparser.py => bots/reddiscparser.py | 0 red.py | 230 ++++++++++++++++++++++ 2 files changed, 230 insertions(+) rename reddiscparser.py => bots/reddiscparser.py (100%) create mode 100644 red.py diff --git a/reddiscparser.py b/bots/reddiscparser.py similarity index 100% rename from reddiscparser.py rename to bots/reddiscparser.py diff --git a/red.py b/red.py new file mode 100644 index 0000000..f9b2059 --- /dev/null +++ b/red.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# reddiscparser.py +# +# Copyright 2016 GOLDERWEB – Jonathan Golder +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# +""" +Script to parse all reddisc pages in configured categories +""" + +import os +import sys +import re + +import pywikibot +from pywikibot import pagegenerators +from pywikibot.bot import ExistingPageBot, NoRedirectPageBot + +import jogobot + +from lib import redpage +from lib import redfam + + +class DiscussionParserBot( + # CurrentPageBot, # via next two sets 'current_page' on each treat() + ExistingPageBot, # CurrentPageBot only treats existing pages + NoRedirectPageBot ): # class which only treats non-redirects + """ + Botclass witch initialises the parsing process of Redundancy Discussions + """ + + # RegEx to filter wrong pages + onlyinclude_re = re.compile( + jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) + + def __init__( self, generator ): + """ + Constructor + + Parameters: + @param generator: The page generator that determines on which pages + to work. + @type generator: generator. + """ + super( DiscussionParserBot, self ).__init__(generator=generator) + + def run( self ): + """ + Controls the overal parsing process, using super class for page switch + + Needed to do things before/after treating pages is done + """ + try: + + super( DiscussionParserBot, self ).run() + + except: + raise + + else: + + # If successfully parsed all pages in cat, flush db write cache + redpage.RedPage.flush_db_cache() + + def treat_page( self ): + """ + Handles work on current page + """ + + # Short circuit excluded pages + if self.current_page.title() in ( + jogobot.config["redundances"]["redpage_exclude"] ): + + return + + # Exclude pages which does not match pattern + if not type(self).onlyinclude_re.search( self.current_page.title() ): + + return + + # Initiate RedPage object + red_page = redpage.RedPage( self.current_page ) + + # Check whether parsing is needed + if red_page.is_parsing_needed(): + + # Count families for failure analysis + fam_counter = 0 + + # Iterate over returned generator with redfam sections + for fam in red_page.parse(): + + # Run RedFamParser on section text + redfam.RedFamParser.parser( fam, red_page.page, + red_page.is_archive() ) + + fam_counter += 1 + + else: + # If successfully parsed whole page, flush + # db write cache + if( fam_counter ): + redfam.RedFamParser.flush_db_cache() + jogobot.output( "Page [[{reddisc}]] parsed".format( + reddisc=red_page.page.title() ) ) + else: + jogobot.output( + "\03{red}" + "Page [[{reddisc}]], ".format( + reddisc=red_page.page.title() ) + + "containing no redfam, parsed!", + "WARNING" ) + + +def apply_conf_cat_generators( genFactory ): + """ + Builds generators for categories which are read from jogobot.config + + Parameters: + @param genFactory: The GeneratorFactory to which the builded generators + should be added. + @type genFactory: pagegenerators.GeneratorFactory + """ + # Create Generators for configured Categories + for category in jogobot.config["redundances"]["redpage_cats"]: + cgen = genFactory.getCategoryGen( + category, gen_func=pagegenerators.CategorizedPageGenerator) + + # If there is one, append to genFactory + if cgen: + genFactory.gens.append(cgen) + + +def main(*args): + """ + Process command line arguments and invoke bot. + + If args is an empty list, sys.argv is used. + + @param args: command line arguments + @type args: list of unicode + """ + + # Process global arguments to determine desired site + local_args = pywikibot.handle_args(args) + + # Get the jogobot-task_slug (basename of current file without ending) + task_slug = os.path.basename(__file__)[:-len(".py")] + + # Before run, we need to check wether we are currently active or not + try: + # Will throw Exception if disabled/blocked + # jogobot.is_active( task_slug ) + pass + + except jogobot.jogobot.Blocked: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), + "CRITICAL" ) + + except jogobot.jogobot.Disabled: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{red} %s (%s)" % (value, type ), + "ERROR" ) + + # Bot/Task is active + else: + + # This factory is responsible for processing command line arguments + # that are also used by other scripts and that determine on which pages + # to work on. + genFactory = pagegenerators.GeneratorFactory() + # The generator gives the pages that should be worked upon. + gen = None + + # If always is True, bot won't ask for confirmation of edit (automode) + # always = False + + # If force_reload is True, bot will always parse Countrylist regardless + # if parsing is needed or not + # force_reload = False + + # Parse command line arguments + for arg in local_args: + if arg.startswith("-always"): + # always = True + pass + else: + genFactory.handleArg(arg) + + if not gen: + + # Check wether there are generators waiting for factoring, if not + # use configured categories + if not genFactory.gens: + apply_conf_cat_generators( genFactory ) + + # Create combined Generator (Union of all Generators) + gen = genFactory.getCombinedGenerator() + + if gen: + # Log beginning of parsing + jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) ) + + # The preloading generator is responsible for downloading multiple + # pages from the wiki simultaneously. + gen = pagegenerators.PreloadingGenerator(gen) + DiscussionParserBot( gen ).run() + else: + pywikibot.showHelp() + +if( __name__ == "__main__" ): + main() From b88efb6bdde64ea9d1dc736da224c990464eb863 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 12:17:12 +0200 Subject: [PATCH 02/10] Reflect stucture changes in Code Since bot class is moved to separate dir/file we need to do some changes to rebuild functionality Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- bots/__init__.py | 2 + bots/reddiscparser.py | 109 ++---------------------------------------- red.py | 98 +------------------------------------ 3 files changed, 7 insertions(+), 202 deletions(-) create mode 100644 bots/__init__.py diff --git a/bots/__init__.py b/bots/__init__.py new file mode 100644 index 0000000..9327388 --- /dev/null +++ b/bots/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index f9b2059..7f66a2f 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -22,15 +22,13 @@ # # """ -Script to parse all reddisc pages in configured categories +Bot to parse all reddisc pages in given Generator or configured categories """ -import os -import sys import re -import pywikibot -from pywikibot import pagegenerators +import pywikibot # noqa +from pywikibot import pagegenerators # noqa from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot @@ -127,104 +125,3 @@ class DiscussionParserBot( reddisc=red_page.page.title() ) + "containing no redfam, parsed!", "WARNING" ) - - -def apply_conf_cat_generators( genFactory ): - """ - Builds generators for categories which are read from jogobot.config - - Parameters: - @param genFactory: The GeneratorFactory to which the builded generators - should be added. - @type genFactory: pagegenerators.GeneratorFactory - """ - # Create Generators for configured Categories - for category in jogobot.config["redundances"]["redpage_cats"]: - cgen = genFactory.getCategoryGen( - category, gen_func=pagegenerators.CategorizedPageGenerator) - - # If there is one, append to genFactory - if cgen: - genFactory.gens.append(cgen) - - -def main(*args): - """ - Process command line arguments and invoke bot. - - If args is an empty list, sys.argv is used. - - @param args: command line arguments - @type args: list of unicode - """ - - # Process global arguments to determine desired site - local_args = pywikibot.handle_args(args) - - # Get the jogobot-task_slug (basename of current file without ending) - task_slug = os.path.basename(__file__)[:-len(".py")] - - # Before run, we need to check wether we are currently active or not - try: - # Will throw Exception if disabled/blocked - # jogobot.is_active( task_slug ) - pass - - except jogobot.jogobot.Blocked: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), - "CRITICAL" ) - - except jogobot.jogobot.Disabled: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{red} %s (%s)" % (value, type ), - "ERROR" ) - - # Bot/Task is active - else: - - # This factory is responsible for processing command line arguments - # that are also used by other scripts and that determine on which pages - # to work on. - genFactory = pagegenerators.GeneratorFactory() - # The generator gives the pages that should be worked upon. - gen = None - - # If always is True, bot won't ask for confirmation of edit (automode) - # always = False - - # If force_reload is True, bot will always parse Countrylist regardless - # if parsing is needed or not - # force_reload = False - - # Parse command line arguments - for arg in local_args: - if arg.startswith("-always"): - # always = True - pass - else: - genFactory.handleArg(arg) - - if not gen: - - # Check wether there are generators waiting for factoring, if not - # use configured categories - if not genFactory.gens: - apply_conf_cat_generators( genFactory ) - - # Create combined Generator (Union of all Generators) - gen = genFactory.getCombinedGenerator() - - if gen: - # Log beginning of parsing - jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) ) - - # The preloading generator is responsible for downloading multiple - # pages from the wiki simultaneously. - gen = pagegenerators.PreloadingGenerator(gen) - DiscussionParserBot( gen ).run() - else: - pywikibot.showHelp() - -if( __name__ == "__main__" ): - main() diff --git a/red.py b/red.py index f9b2059..bee76b8 100644 --- a/red.py +++ b/red.py @@ -22,111 +22,17 @@ # # """ -Script to parse all reddisc pages in configured categories +Wrapper script to invoke all redundances bot tasks """ import os import sys -import re import pywikibot from pywikibot import pagegenerators -from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot - -from lib import redpage -from lib import redfam - - -class DiscussionParserBot( - # CurrentPageBot, # via next two sets 'current_page' on each treat() - ExistingPageBot, # CurrentPageBot only treats existing pages - NoRedirectPageBot ): # class which only treats non-redirects - """ - Botclass witch initialises the parsing process of Redundancy Discussions - """ - - # RegEx to filter wrong pages - onlyinclude_re = re.compile( - jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) - - def __init__( self, generator ): - """ - Constructor - - Parameters: - @param generator: The page generator that determines on which pages - to work. - @type generator: generator. - """ - super( DiscussionParserBot, self ).__init__(generator=generator) - - def run( self ): - """ - Controls the overal parsing process, using super class for page switch - - Needed to do things before/after treating pages is done - """ - try: - - super( DiscussionParserBot, self ).run() - - except: - raise - - else: - - # If successfully parsed all pages in cat, flush db write cache - redpage.RedPage.flush_db_cache() - - def treat_page( self ): - """ - Handles work on current page - """ - - # Short circuit excluded pages - if self.current_page.title() in ( - jogobot.config["redundances"]["redpage_exclude"] ): - - return - - # Exclude pages which does not match pattern - if not type(self).onlyinclude_re.search( self.current_page.title() ): - - return - - # Initiate RedPage object - red_page = redpage.RedPage( self.current_page ) - - # Check whether parsing is needed - if red_page.is_parsing_needed(): - - # Count families for failure analysis - fam_counter = 0 - - # Iterate over returned generator with redfam sections - for fam in red_page.parse(): - - # Run RedFamParser on section text - redfam.RedFamParser.parser( fam, red_page.page, - red_page.is_archive() ) - - fam_counter += 1 - - else: - # If successfully parsed whole page, flush - # db write cache - if( fam_counter ): - redfam.RedFamParser.flush_db_cache() - jogobot.output( "Page [[{reddisc}]] parsed".format( - reddisc=red_page.page.title() ) ) - else: - jogobot.output( - "\03{red}" + "Page [[{reddisc}]], ".format( - reddisc=red_page.page.title() ) + - "containing no redfam, parsed!", - "WARNING" ) +from bots.reddiscparser import DiscussionParserBot def apply_conf_cat_generators( genFactory ): From 1679e2ad6a8b10bd0d319abbab6ad4653615586e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 13:36:14 +0200 Subject: [PATCH 03/10] Prepare environment for starting subtasks Before init and run bot we need to provide a environment for it, like parsed args Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- red.py | 65 ++++++++++++++++++++++------------------------------------ 1 file changed, 25 insertions(+), 40 deletions(-) diff --git a/red.py b/red.py index bee76b8..dd14625 100644 --- a/red.py +++ b/red.py @@ -32,26 +32,6 @@ import pywikibot from pywikibot import pagegenerators import jogobot -from bots.reddiscparser import DiscussionParserBot - - -def apply_conf_cat_generators( genFactory ): - """ - Builds generators for categories which are read from jogobot.config - - Parameters: - @param genFactory: The GeneratorFactory to which the builded generators - should be added. - @type genFactory: pagegenerators.GeneratorFactory - """ - # Create Generators for configured Categories - for category in jogobot.config["redundances"]["redpage_cats"]: - cgen = genFactory.getCategoryGen( - category, gen_func=pagegenerators.CategorizedPageGenerator) - - # If there is one, append to genFactory - if cgen: - genFactory.gens.append(cgen) def main(*args): @@ -68,7 +48,7 @@ def main(*args): local_args = pywikibot.handle_args(args) # Get the jogobot-task_slug (basename of current file without ending) - task_slug = os.path.basename(__file__)[:-len(".py")] + task_slug = os.path.basename(__file__)[:-len(".py")] # noqa (temp) # Before run, we need to check wether we are currently active or not try: @@ -93,8 +73,6 @@ def main(*args): # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() - # The generator gives the pages that should be worked upon. - gen = None # If always is True, bot won't ask for confirmation of edit (automode) # always = False @@ -103,34 +81,41 @@ def main(*args): # if parsing is needed or not # force_reload = False + # Subtask selects the specific bot to run + # Default is reddiscparser + subtask = None + + # kwargs are passed to selected bot as **kwargs + kwargs = dict() # noqa (temp) + # Parse command line arguments for arg in local_args: + + # Split args + arg, sep, value = arg.partition(':') + if arg.startswith("-always"): # always = True pass + elif arg.startswith("-task"): + subtask = value else: genFactory.handleArg(arg) - if not gen: + # After parsing args we can select bot to run + if not subtask or subtask == "discparser": + # Default case: discparser + subtask = "discparser" - # Check wether there are generators waiting for factoring, if not - # use configured categories - if not genFactory.gens: - apply_conf_cat_generators( genFactory ) + # Import related bot + from bots.reddiscparser import DiscussionParserBot as Bot # noqa (temp) - # Create combined Generator (Union of all Generators) - gen = genFactory.getCombinedGenerator() - - if gen: - # Log beginning of parsing - jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) ) - - # The preloading generator is responsible for downloading multiple - # pages from the wiki simultaneously. - gen = pagegenerators.PreloadingGenerator(gen) - DiscussionParserBot( gen ).run() + # else: - pywikibot.showHelp() + jogobot.output( ( + "\03{{red}} Given subtask \"{subtask} \"" + + "is not existing!" ).format( subtask=subtask ), "ERROR" ) + if( __name__ == "__main__" ): main() From 156f117b18ebd997a7e08454ab21455ca6491e98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 13:49:19 +0200 Subject: [PATCH 04/10] Add Bot initiation with exception handling Bot initiation needs to catch errors by Bot to enforce at least a basic logging. And also to be sure Init was successfull before starting bot. Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- red.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/red.py b/red.py index dd14625..7a26f24 100644 --- a/red.py +++ b/red.py @@ -48,7 +48,7 @@ def main(*args): local_args = pywikibot.handle_args(args) # Get the jogobot-task_slug (basename of current file without ending) - task_slug = os.path.basename(__file__)[:-len(".py")] # noqa (temp) + task_slug = os.path.basename(__file__)[:-len(".py")] # Before run, we need to check wether we are currently active or not try: @@ -86,7 +86,7 @@ def main(*args): subtask = None # kwargs are passed to selected bot as **kwargs - kwargs = dict() # noqa (temp) + kwargs = dict() # Parse command line arguments for arg in local_args: @@ -108,7 +108,7 @@ def main(*args): subtask = "discparser" # Import related bot - from bots.reddiscparser import DiscussionParserBot as Bot # noqa (temp) + from bots.reddiscparser import DiscussionParserBot as Bot # else: @@ -116,6 +116,25 @@ def main(*args): "\03{{red}} Given subtask \"{subtask} \"" + "is not existing!" ).format( subtask=subtask ), "ERROR" ) + # Bot gets prepared genFactory as first param and possible kwargs dict + # It has to threw an exception if something does not work properly + try: + # Init bot with genFactory and **kwargs + bot = Bot( genFactory, **kwargs ) # noqa (temp) + + except: + # Catch Errors while initiation + jogobot.output( ( + "\03{{red}} Error while trying to init " + + "subtask \"{task_slug}-{subtask} \"!" ). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + raise + else: + # Init successfull + jogobot.output( ( + "{task_slug}-{subtask} init successfull" ). + format(task_slug=task_slug, subtask=subtask) ) + if( __name__ == "__main__" ): main() From 460d2db18396939c13f7fbb9ca1627fbf4cb02a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 14:00:29 +0200 Subject: [PATCH 05/10] Add Bot run with exception handling Errors, especially caused by missing run-method, need to be catched to provide information in Logfile. And also to get information wether bot run was successfull Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- red.py | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/red.py b/red.py index 7a26f24..3d398f4 100644 --- a/red.py +++ b/red.py @@ -34,7 +34,7 @@ from pywikibot import pagegenerators import jogobot -def main(*args): +def main(*args): # noqa (temp) """ Process command line arguments and invoke bot. @@ -120,7 +120,7 @@ def main(*args): # It has to threw an exception if something does not work properly try: # Init bot with genFactory and **kwargs - bot = Bot( genFactory, **kwargs ) # noqa (temp) + bot = Bot( genFactory, **kwargs ) except: # Catch Errors while initiation @@ -132,7 +132,44 @@ def main(*args): else: # Init successfull jogobot.output( ( - "{task_slug}-{subtask} init successfull" ). + "Subtask \"{task_slug}-{subtask}\" was" + + "initiated successfully" ). + format(task_slug=task_slug, subtask=subtask) ) + + # Fire up Bot + # Bot must have implemented a run()-method + # It has to threw an exception if something does not work properly + try: + # Call run method on Bot + bot.run() + + # Special event on AttributeError to catch missing run()-method + except AttributeError: + (type, value, traceback) = sys.exc_info() + + # Catch missing run()-method + if "has no attribute 'run'" in value: + jogobot.output( ( + "\03{{red}} Error while trying to run " + + "subtask \"{task_slug}-{subtask} \": +" + "Run-method is missing! "). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + + # Pass through other AttributeError + else: + raise + + except: + jogobot.output( ( + "\03{{red}} Error while trying to run " + + "subtask \"{task_slug}-{subtask} \"!" ). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + raise + + else: + # Run successfull + jogobot.output( ( + "Subtask \"{task_slug}-{subtask}\" was finished successfully"). format(task_slug=task_slug, subtask=subtask) ) From 3540cc2a7d68e4c15d7e96bf84c11a21fd59e723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 15:18:17 +0200 Subject: [PATCH 06/10] Move functional sections to functions in main() To make main() function less complicated functional sections are moved to dedicated functions Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- red.py | 317 +++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 220 insertions(+), 97 deletions(-) diff --git a/red.py b/red.py index 3d398f4..d9bda88 100644 --- a/red.py +++ b/red.py @@ -34,23 +34,17 @@ from pywikibot import pagegenerators import jogobot -def main(*args): # noqa (temp) +def active(task_slug): """ - Process command line arguments and invoke bot. + Checks up if bot with given task_slug is active via jogobot.framework - If args is an empty list, sys.argv is used. + @param task_slug Task slug to check + @type task_slug str - @param args: command line arguments - @type args: list of unicode + @return True if active, otherwise False + @rtype bool """ - # Process global arguments to determine desired site - local_args = pywikibot.handle_args(args) - - # Get the jogobot-task_slug (basename of current file without ending) - task_slug = os.path.basename(__file__)[:-len(".py")] - - # Before run, we need to check wether we are currently active or not try: # Will throw Exception if disabled/blocked # jogobot.is_active( task_slug ) @@ -60,117 +54,246 @@ def main(*args): # noqa (temp) (type, value, traceback) = sys.exc_info() jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), "CRITICAL" ) + return False except jogobot.jogobot.Disabled: (type, value, traceback) = sys.exc_info() jogobot.output( "\03{red} %s (%s)" % (value, type ), "ERROR" ) + return False # Bot/Task is active else: + return True - # This factory is responsible for processing command line arguments - # that are also used by other scripts and that determine on which pages - # to work on. - genFactory = pagegenerators.GeneratorFactory() - - # If always is True, bot won't ask for confirmation of edit (automode) - # always = False - # If force_reload is True, bot will always parse Countrylist regardless - # if parsing is needed or not - # force_reload = False +def parse_local_args( local_args ): + """ + Parses local cmd args which are not parsed by pywikibot + + @param local_args Local args returned by pywikibot.handle_args(args) + @type iterable + + @returns The following tuple + @return 1 Slug of given subtask (Arg "-task") + @rtype str + @return 2 GenFactory with parsed pagegenerator args + @rtype pagegenerators.GeneratorFactory + @return 3 Additional args for subtasks + @rtype dict + @rtype tuple + """ - # Subtask selects the specific bot to run - # Default is reddiscparser - subtask = None + # This factory is responsible for processing command line arguments + # that are also used by other scripts and that determine on which pages + # to work on. + genFactory = pagegenerators.GeneratorFactory() - # kwargs are passed to selected bot as **kwargs - kwargs = dict() + # If always is True, bot won't ask for confirmation of edit (automode) + # always = False - # Parse command line arguments - for arg in local_args: + # If force_reload is True, bot will always parse Countrylist regardless + # if parsing is needed or not + # force_reload = False - # Split args - arg, sep, value = arg.partition(':') + # Subtask selects the specific bot to run + # Default is reddiscparser + subtask = None - if arg.startswith("-always"): - # always = True - pass - elif arg.startswith("-task"): - subtask = value - else: - genFactory.handleArg(arg) + # kwargs are passed to selected bot as **kwargs + kwargs = dict() - # After parsing args we can select bot to run - if not subtask or subtask == "discparser": - # Default case: discparser - subtask = "discparser" + # Parse command line arguments + for arg in local_args: - # Import related bot - from bots.reddiscparser import DiscussionParserBot as Bot + # Split args + arg, sep, value = arg.partition(':') - # + if arg.startswith("-always"): + # always = True + pass + elif arg.startswith("-task"): + subtask = value else: - jogobot.output( ( - "\03{{red}} Given subtask \"{subtask} \"" + - "is not existing!" ).format( subtask=subtask ), "ERROR" ) + genFactory.handleArg(arg) - # Bot gets prepared genFactory as first param and possible kwargs dict - # It has to threw an exception if something does not work properly - try: - # Init bot with genFactory and **kwargs - bot = Bot( genFactory, **kwargs ) + # Return Tuple + return ( subtask, genFactory, kwargs ) - except: - # Catch Errors while initiation - jogobot.output( ( - "\03{{red}} Error while trying to init " + - "subtask \"{task_slug}-{subtask} \"!" ). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - raise - else: - # Init successfull - jogobot.output( ( - "Subtask \"{task_slug}-{subtask}\" was" + - "initiated successfully" ). - format(task_slug=task_slug, subtask=subtask) ) - - # Fire up Bot - # Bot must have implemented a run()-method - # It has to threw an exception if something does not work properly - try: - # Call run method on Bot - bot.run() - - # Special event on AttributeError to catch missing run()-method - except AttributeError: - (type, value, traceback) = sys.exc_info() - - # Catch missing run()-method - if "has no attribute 'run'" in value: - jogobot.output( ( - "\03{{red}} Error while trying to run " + - "subtask \"{task_slug}-{subtask} \": +" - "Run-method is missing! "). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - - # Pass through other AttributeError - else: - raise - - except: + +def prepare_bot( task_slug, subtask, genFactory, subtask_args ): + """ + Handles importing subtask Bot class and prepares specific args + + Throws exception if bot not exists + + @param task_slug Task slug, needed for logging + @type task_slug str + @param subtask Slug of given subtask + @type subtask str + @param genFactory GenFactory with parsed pagegenerator args + @type genFactory pagegenerators.GeneratorFactory + @param subtask_args Additional args for subtasks + @type subtask_args dict\ + + @returns The following tuple + @return 1 Subtask slug (replaced None for default) + @rtype str + @return 2 Botclass of given subtask (Arg "-task") + @rtype Class + @return 3 GenFactory with parsed pagegenerator args + @rtype pagegenerators.GeneratorFactory + @return 4 Additional args for subtasks + @rtype dict + @rtype tuple + """ + # kwargs are passed to selected bot as **kwargs + kwargs = dict() + + if not subtask or subtask == "discparser": + # Default case: discparser + subtask = "discparser" + + # Import related bot + from bots.reddiscparser import DiscussionParserBot as Bot + + # Subtask error + else: + jogobot.output( ( + "\03{{red}} Given subtask \"{subtask} \"" + + "is not existing!" ).format( subtask=subtask ), "ERROR" ) + raise Exception + + return ( subtask, Bot, genFactory, kwargs ) + + +def init_bot( task_slug, subtask, Bot, genFactory, **kwargs ): + """ + Initiates Bot-Object with Class given in Bot and passes params genFactory + and kwargs to it + + Passes through exception generated by Bot.__init__() after logging. + + @param task_slug Task slug, needed for logging + @type task_slug str + @param subtask Slug of given subtask + @type subtask str + @param Bot Bot class to build bot-object from + @type Class + @param genFactory GenFactory with parsed pagegenerator args + @type genFactory pagegenerators.GeneratorFactory + @param **kwargs Additional args for Bot() + @type **kwargs dict + + @returns bot-object + @type type(Bot()) + """ + # Bot gets prepared genFactory as first param and possible kwargs dict + # It has to threw an exception if something does not work properly + try: + # Init bot with genFactory and **kwargs + bot = Bot( genFactory, **kwargs ) + + except: + # Catch Errors while initiation + jogobot.output( ( + "\03{{red}} Error while trying to init " + + "subtask \"{task_slug}-{subtask}\"!" ). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + raise + else: + # Init successfull + jogobot.output( ( + "Subtask \"{task_slug}-{subtask}\" was " + + "initiated successfully" ). + format(task_slug=task_slug, subtask=subtask) ) + return bot + + +def run_bot( task_slug, subtask, bot ): + """ + Calls the run()-method of bot-object + + Passes through exceptions generated by Bot.__init__() after logging. + Catches Errors caused by missing run(0-method. + + @param task_slug Task slug, needed for logging + @type task_slug str + @param subtask Slug of given subtask + @type subtask str + @param bot Bot object to call run()-method on + @type object with method run + """ + + # Fire up Bot + # Bot must have implemented a run()-method + # It has to threw an exception if something does not work properly + try: + # Call run method on Bot + bot.run() + + # Special event on AttributeError to catch missing run()-method + except AttributeError: + (type, value, traceback) = sys.exc_info() + + # Catch missing run()-method + if "has no attribute 'run'" in value: jogobot.output( ( "\03{{red}} Error while trying to run " + - "subtask \"{task_slug}-{subtask} \"!" ). + "subtask \"{task_slug}-{subtask} \": +" + "Run-method is missing! "). format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - raise + # Pass through other AttributeError else: - # Run successfull - jogobot.output( ( - "Subtask \"{task_slug}-{subtask}\" was finished successfully"). - format(task_slug=task_slug, subtask=subtask) ) + raise + + except: + jogobot.output( ( + "\03{{red}} Error while trying to run " + + "subtask \"{task_slug}-{subtask} \"!" ). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + raise + + else: + # Run successfull + jogobot.output( ( + "Subtask \"{task_slug}-{subtask}\" was finished successfully"). + format(task_slug=task_slug, subtask=subtask) ) + + +def main(*args): + """ + Process command line arguments and invoke bot. + + If args is an empty list, sys.argv is used. + + @param args: command line arguments + @type args: list of unicode + """ + + # Process global arguments to determine desired site + local_args = pywikibot.handle_args(args) + + # Get the jogobot-task_slug (basename of current file without ending) + task_slug = os.path.basename(__file__)[:-len(".py")] + + # Before run, we need to check wether we are currently active or not + if not active( task_slug ): + return + + # Parse local Args to get information about subtask + ( subtask, genFactory, subtask_args ) = parse_local_args( local_args ) + + # select subtask and prepare args + ( subtask, Bot, genFactory, kwargs ) = prepare_bot( + task_slug, subtask, genFactory, subtask_args ) + + # Init Bot + bot = init_bot( task_slug, subtask, Bot, genFactory, **kwargs) + + # Run bot + run_bot( task_slug, subtask, bot ) if( __name__ == "__main__" ): From 0ceb2e6e836dfd19a225227b521dc1e99bb9f54d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 16:58:20 +0200 Subject: [PATCH 07/10] Add methods to build gen to DiscussionParser With the new wrapper script the Bot gets a GenFactory and has to build a generator out of it by its own Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=83 FS#83] --- bots/reddiscparser.py | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 7f66a2f..2a47642 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -58,7 +58,47 @@ class DiscussionParserBot( to work. @type generator: generator. """ - super( DiscussionParserBot, self ).__init__(generator=generator) + + def build_generator(self): + """ + Builds generator to work on, based on self.genFactory + """ + # Check wether there are generators waiting for factoring, if not + # use configured categories + if not self.genFactory.gens: + self.apply_conf_cat_generators() + + # Create combined Generator (Union of all Generators) + gen = self.genFactory.getCombinedGenerator() + + if gen: + # The preloading generator is responsible for downloading multiple + # pages from the wiki simultaneously. + self.gen = pagegenerators.PreloadingGenerator(gen) + + else: + pywikibot.showHelp() + + def apply_conf_cat_generators( self ): + """ + Builds generators for categories which are read from jogobot.config + + Parameters: + @param genFactory: The GeneratorFactory to which the builded + generators should be added. + @type genFactory: pagegenerators.GeneratorFactory + """ + # Create Generators for configured Categories + for category in jogobot.config["redundances"]["redpage_cats"]: + gen = self.genFactory.getCategoryGen( + category, gen_func=pagegenerators.CategorizedPageGenerator) + + # If there is one, append to genFactory + if gen: + self.genFactory.gens.append(gen) + + # Reset gen for next iteration + gen = None def run( self ): """ From 2be0a8903de6600939999f6dcb3da813ef584be5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 17:02:51 +0200 Subject: [PATCH 08/10] Adjust constructor for wrapper-script The new wrapper-script calls a standardized API We need to be conform with that Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=83 FS#83] --- bots/reddiscparser.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 2a47642..818eb05 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -49,16 +49,27 @@ class DiscussionParserBot( onlyinclude_re = re.compile( jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) - def __init__( self, generator ): + def __init__( self, genFactory, **kwargs ): """ Constructor Parameters: - @param generator: The page generator that determines on which pages - to work. - @type generator: generator. + @param genFactory GenFactory with parsed pagegenerator args to + build generator + @type genFactory pagegenerators.GeneratorFactory + @param **kwargs Additional args + @type iterable """ + # Copy needed args + self.genFactory = genFactory + + # Build generator with genFactory + self.build_generator() + + # Run super class init with builded generator + super( DiscussionParserBot, self ).__init__(generator=self.gen) + def build_generator(self): """ Builds generator to work on, based on self.genFactory From d0fa15d0edd12c5e17f2e915d08f13b1f712b928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 18:27:11 +0200 Subject: [PATCH 09/10] Update jogobot module to get standart Start-API [FS#84] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] --- jogobot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jogobot b/jogobot index 2173f29..28d03f3 160000 --- a/jogobot +++ b/jogobot @@ -1 +1 @@ -Subproject commit 2173f2984f1de6950728a15709bf93db5188731d +Subproject commit 28d03f35b848a33ad45d3f5f8f3f82e8c45534ec From 604b7bd8b726fb56f2ae6fb4b6d3871a6518eedc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 18:51:42 +0200 Subject: [PATCH 10/10] Now use Bot-Start API from jogobot framework API was moved to jogobot to share with other tasks Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] --- red.py | 196 +++------------------------------------------------------ 1 file changed, 7 insertions(+), 189 deletions(-) diff --git a/red.py b/red.py index d9bda88..733def2 100644 --- a/red.py +++ b/red.py @@ -26,101 +26,12 @@ Wrapper script to invoke all redundances bot tasks """ import os -import sys import pywikibot -from pywikibot import pagegenerators import jogobot -def active(task_slug): - """ - Checks up if bot with given task_slug is active via jogobot.framework - - @param task_slug Task slug to check - @type task_slug str - - @return True if active, otherwise False - @rtype bool - """ - - try: - # Will throw Exception if disabled/blocked - # jogobot.is_active( task_slug ) - pass - - except jogobot.jogobot.Blocked: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), - "CRITICAL" ) - return False - - except jogobot.jogobot.Disabled: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{red} %s (%s)" % (value, type ), - "ERROR" ) - return False - - # Bot/Task is active - else: - return True - - -def parse_local_args( local_args ): - """ - Parses local cmd args which are not parsed by pywikibot - - @param local_args Local args returned by pywikibot.handle_args(args) - @type iterable - - @returns The following tuple - @return 1 Slug of given subtask (Arg "-task") - @rtype str - @return 2 GenFactory with parsed pagegenerator args - @rtype pagegenerators.GeneratorFactory - @return 3 Additional args for subtasks - @rtype dict - @rtype tuple - """ - - # This factory is responsible for processing command line arguments - # that are also used by other scripts and that determine on which pages - # to work on. - genFactory = pagegenerators.GeneratorFactory() - - # If always is True, bot won't ask for confirmation of edit (automode) - # always = False - - # If force_reload is True, bot will always parse Countrylist regardless - # if parsing is needed or not - # force_reload = False - - # Subtask selects the specific bot to run - # Default is reddiscparser - subtask = None - - # kwargs are passed to selected bot as **kwargs - kwargs = dict() - - # Parse command line arguments - for arg in local_args: - - # Split args - arg, sep, value = arg.partition(':') - - if arg.startswith("-always"): - # always = True - pass - elif arg.startswith("-task"): - subtask = value - else: - genFactory.handleArg(arg) - - # Return Tuple - return ( subtask, genFactory, kwargs ) - - def prepare_bot( task_slug, subtask, genFactory, subtask_args ): """ Handles importing subtask Bot class and prepares specific args @@ -167,101 +78,6 @@ def prepare_bot( task_slug, subtask, genFactory, subtask_args ): return ( subtask, Bot, genFactory, kwargs ) -def init_bot( task_slug, subtask, Bot, genFactory, **kwargs ): - """ - Initiates Bot-Object with Class given in Bot and passes params genFactory - and kwargs to it - - Passes through exception generated by Bot.__init__() after logging. - - @param task_slug Task slug, needed for logging - @type task_slug str - @param subtask Slug of given subtask - @type subtask str - @param Bot Bot class to build bot-object from - @type Class - @param genFactory GenFactory with parsed pagegenerator args - @type genFactory pagegenerators.GeneratorFactory - @param **kwargs Additional args for Bot() - @type **kwargs dict - - @returns bot-object - @type type(Bot()) - """ - # Bot gets prepared genFactory as first param and possible kwargs dict - # It has to threw an exception if something does not work properly - try: - # Init bot with genFactory and **kwargs - bot = Bot( genFactory, **kwargs ) - - except: - # Catch Errors while initiation - jogobot.output( ( - "\03{{red}} Error while trying to init " + - "subtask \"{task_slug}-{subtask}\"!" ). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - raise - else: - # Init successfull - jogobot.output( ( - "Subtask \"{task_slug}-{subtask}\" was " + - "initiated successfully" ). - format(task_slug=task_slug, subtask=subtask) ) - return bot - - -def run_bot( task_slug, subtask, bot ): - """ - Calls the run()-method of bot-object - - Passes through exceptions generated by Bot.__init__() after logging. - Catches Errors caused by missing run(0-method. - - @param task_slug Task slug, needed for logging - @type task_slug str - @param subtask Slug of given subtask - @type subtask str - @param bot Bot object to call run()-method on - @type object with method run - """ - - # Fire up Bot - # Bot must have implemented a run()-method - # It has to threw an exception if something does not work properly - try: - # Call run method on Bot - bot.run() - - # Special event on AttributeError to catch missing run()-method - except AttributeError: - (type, value, traceback) = sys.exc_info() - - # Catch missing run()-method - if "has no attribute 'run'" in value: - jogobot.output( ( - "\03{{red}} Error while trying to run " + - "subtask \"{task_slug}-{subtask} \": +" - "Run-method is missing! "). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - - # Pass through other AttributeError - else: - raise - - except: - jogobot.output( ( - "\03{{red}} Error while trying to run " + - "subtask \"{task_slug}-{subtask} \"!" ). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - raise - - else: - # Run successfull - jogobot.output( ( - "Subtask \"{task_slug}-{subtask}\" was finished successfully"). - format(task_slug=task_slug, subtask=subtask) ) - - def main(*args): """ Process command line arguments and invoke bot. @@ -278,22 +94,24 @@ def main(*args): # Get the jogobot-task_slug (basename of current file without ending) task_slug = os.path.basename(__file__)[:-len(".py")] + # Disabled until [FS#86] is done # Before run, we need to check wether we are currently active or not - if not active( task_slug ): - return + # if not jogobot.bot.active( task_slug ): + # return # Parse local Args to get information about subtask - ( subtask, genFactory, subtask_args ) = parse_local_args( local_args ) + ( subtask, genFactory, subtask_args ) = jogobot.bot.parse_local_args( + local_args ) # select subtask and prepare args ( subtask, Bot, genFactory, kwargs ) = prepare_bot( task_slug, subtask, genFactory, subtask_args ) # Init Bot - bot = init_bot( task_slug, subtask, Bot, genFactory, **kwargs) + bot = jogobot.bot.init_bot( task_slug, subtask, Bot, genFactory, **kwargs) # Run bot - run_bot( task_slug, subtask, bot ) + jogobot.bot.run_bot( task_slug, subtask, bot ) if( __name__ == "__main__" ):