From a8605bcee64a12fc2787492d978622710c4132a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 23 Aug 2016 21:50:22 +0200 Subject: [PATCH 01/26] Mv pages-parser.py to reddiscparser.py New, more meaningfull naming conventions, from redpage to reddisc (page) Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=72 FS#72] --- parse-pages.py => reddiscparser.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename parse-pages.py => reddiscparser.py (100%) diff --git a/parse-pages.py b/reddiscparser.py similarity index 100% rename from parse-pages.py rename to reddiscparser.py From 6cb92c1da7cac0ecfa5875968c11aae9e8252aaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 23 Aug 2016 21:53:44 +0200 Subject: [PATCH 02/26] Rewrite parse control using pywikibot.bot classes To use the default pywikibot.classes making life easier at some point Beeing standardconform with pywikibot in handling args Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=72 FS#72] --- reddiscparser.py | 190 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 140 insertions(+), 50 deletions(-) diff --git a/reddiscparser.py b/reddiscparser.py index 4545aef..2d7164f 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -22,11 +22,15 @@ # # """ -Script to parse all redpages in configured categories +Script to parse all reddisc pages in configured categories """ +import os +import sys + import pywikibot from pywikibot import pagegenerators +from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot @@ -34,74 +38,160 @@ import redpage import redfam -def get_cat_pages( cat ): +class DiscussionParserBot( + # CurrentPageBot, # via next two sets 'current_page' on each treat() + ExistingPageBot, # CurrentPageBot only treats existing pages + NoRedirectPageBot ): # class which only treats non-redirects """ - Generates a iteratable generator-object with all pages listet in given - category - - @param cat Category to request - @type cat str - - @returns generator Iteratable object with pages of given category + Botclass witch initialises the parsing process of Redundancy Discussions """ - # Get site to work on from pywikibot config - site = pywikibot.Site() + def __init__( self, generator ): + """ + Constructor - # Retrieve the content of given category - category = pywikibot.Category( site, cat ) + Parameters: + @param generator: The page generator that determines on which pages + to work. + @type generator: generator. + """ + super( DiscussionParserBot, self ).__init__(generator=generator) - # Build an iteratable generator object with page objects for given category - generator = pagegenerators.CategorizedPageGenerator( category ) + def run( self ): + """ + Controls the overal parsing process, using super class for page switch - return generator + Needed to do things before/after treating pages is done + """ + try: + super( DiscussionParserBot, self ).run() -def main(*args): - """ - Handles process - """ + except: + raise - try: - jogobot.output( "BEGINN – parser-pages.py" ) + else: - # Iterate over configured categories - for cat in ( jogobot.config["redundances"]["redpage_cats"] ): + # If successfully parsed all pages in cat, flush db write cache + redpage.RedPage.flush_db_cache() - # Iterate over pages in current cat - for page in get_cat_pages( cat ): + def treat_page( self ): + """ + Handles work on current page + """ - # For pages configured to exclude, go on with next page - if page.title() in ( - jogobot.config["redundances"]["redpage_exclude"] ): + # Short circuit excluded pages + if self.current_page.title() in ( + jogobot.config["redundances"]["redpage_exclude"] ): - continue + return - # Initiate RedPage object - red_page = redpage.RedPage( page ) + # Initiate RedPage object + red_page = redpage.RedPage( self.current_page ) - # Check whether parsing is needed - if red_page.is_parsing_needed(): + # Check whether parsing is needed + if red_page.is_parsing_needed(): - # Iterate over returned generator with redfam sections - for fam in red_page.parse(): + # Iterate over returned generator with redfam sections + for fam in red_page.parse(): - # Run RedFamParser on section text - redfam.RedFamParser.parser( fam, red_page.page._pageid, - red_page.is_archive() ) - else: - # If successfully parsed whole page, flush - # db write cache - redfam.RedFamParser.flush_db_cache() - jogobot.output( "Page '%s' parsed" % - red_page.page.title() ) + # Run RedFamParser on section text + redfam.RedFamParser.parser( fam, red_page.page._pageid, + red_page.is_archive() ) else: - # If successfully parsed all pages in cat, flush db write cache - redpage.RedPage.flush_db_cache() + # If successfully parsed whole page, flush + # db write cache + redfam.RedFamParser.flush_db_cache() + jogobot.output( "Page [[{redisc}]] parsed".format( + reddisc=red_page.page.title() ) ) - finally: - jogobot.output( "END – parser-pages.py" ) - pywikibot.stopme() + +def main(*args): # noqa + """ + Process command line arguments and invoke bot. + + If args is an empty list, sys.argv is used. + + @param args: command line arguments + @type args: list of unicode + """ + + # Process global arguments to determine desired site + local_args = pywikibot.handle_args(args) + + # Get the jogobot-task_slug (basename of current file without ending) + task_slug = os.path.basename(__file__)[:-len(".py")] + + # Before run, we need to check wether we are currently active or not + try: + # Will throw Exception if disabled/blocked + # jogobot.is_active( task_slug ) + pass + + except jogobot.jogobot.Blocked: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), + "CRITICAL" ) + + except jogobot.jogobot.Disabled: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{red} %s (%s)" % (value, type ), + "ERROR" ) + + # Bot/Task is active + else: + + # This factory is responsible for processing command line arguments + # that are also used by other scripts and that determine on which pages + # to work on. + genFactory = pagegenerators.GeneratorFactory() + # The generator gives the pages that should be worked upon. + gen = None + + # If always is True, bot won't ask for confirmation of edit (automode) + # always = False + + # If force_reload is True, bot will always parse Countrylist regardless + # if parsing is needed or not + # force_reload = False + + # Parse command line arguments + for arg in local_args: + if arg.startswith("-always"): + # always = True + pass + else: + genFactory.handleArg(arg) + + if not gen: + + # Check wether there are generators waiting for factoring, if not + # use configured categories + if not genFactory.gens: + + # Create Generators for configured Categories + for category in jogobot.config["redundances"]["redpage_cats"]: + cgen = genFactory.getCategoryGen( + category, + gen_func=pagegenerators.CategorizedPageGenerator) + + # If there is one, append to genFactory + if cgen: + genFactory.gens.append(cgen) + + # Create combined Generator (Union of all Generators) + gen = genFactory.getCombinedGenerator() + + if gen: + # Log beginning of parsing + jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) ) + + # The preloading generator is responsible for downloading multiple + # pages from the wiki simultaneously. + gen = pagegenerators.PreloadingGenerator(gen) + DiscussionParserBot( gen ).run() + else: + pywikibot.showHelp() if( __name__ == "__main__" ): main() From 17bfb32dede157bf33272c1a025b357729850561 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 11:13:12 +0200 Subject: [PATCH 03/26] Building generators of config cats in sep Function Since the main()-Function was too complex the logic to build generators out of categories provided in jogobot.conf was moved in a separate function [https://fs.golderweb.de/index.php?do=details&task_id=73 FS#73] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=72 FS#72] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=72 FS#72] --- reddiscparser.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/reddiscparser.py b/reddiscparser.py index 2d7164f..cd9cf29 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -106,7 +106,26 @@ class DiscussionParserBot( reddisc=red_page.page.title() ) ) -def main(*args): # noqa +def apply_conf_cat_generators( genFactory ): + """ + Builds generators for categories which are read from jogobot.config + + Parameters: + @param genFactory: The GeneratorFactory to which the builded generators + should be added. + @type genFactory: pagegenerators.GeneratorFactory + """ + # Create Generators for configured Categories + for category in jogobot.config["redundances"]["redpage_cats"]: + cgen = genFactory.getCategoryGen( + category, gen_func=pagegenerators.CategorizedPageGenerator) + + # If there is one, append to genFactory + if cgen: + genFactory.gens.append(cgen) + + +def main(*args): """ Process command line arguments and invoke bot. @@ -168,16 +187,7 @@ def main(*args): # noqa # Check wether there are generators waiting for factoring, if not # use configured categories if not genFactory.gens: - - # Create Generators for configured Categories - for category in jogobot.config["redundances"]["redpage_cats"]: - cgen = genFactory.getCategoryGen( - category, - gen_func=pagegenerators.CategorizedPageGenerator) - - # If there is one, append to genFactory - if cgen: - genFactory.gens.append(cgen) + apply_conf_cat_generators( genFactory ) # Create combined Generator (Union of all Generators) gen = genFactory.getCombinedGenerator() From 2f878ee901051c5d58eb22f640d882dae99eaa92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 11:20:28 +0200 Subject: [PATCH 04/26] Correct filename in header Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=72 FS#72] --- reddiscparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reddiscparser.py b/reddiscparser.py index cd9cf29..6525ac9 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # -# parse-pages.py +# reddiscparser.py # # Copyright 2016 GOLDERWEB – Jonathan Golder # From dcc485151392a9c05d75e6b845d5c7d3fd1044a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 15:27:42 +0200 Subject: [PATCH 05/26] Check reddisc page titles against regex To prevent parsing Pages which have been categorized in configured cats wrong or are given via cmd params Parsing them results in unexpected behaviour Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] --- reddiscparser.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/reddiscparser.py b/reddiscparser.py index 6525ac9..00329e4 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -27,6 +27,7 @@ Script to parse all reddisc pages in configured categories import os import sys +import re import pywikibot from pywikibot import pagegenerators @@ -46,6 +47,10 @@ class DiscussionParserBot( Botclass witch initialises the parsing process of Redundancy Discussions """ + # RegEx to filter wrong pages + onlyinclude_re = re.compile( + jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) + def __init__( self, generator ): """ Constructor @@ -86,6 +91,11 @@ class DiscussionParserBot( return + # Exclude pages which does not match pattern + if not type(self).onlyinclude_re.search( self.current_page.title() ): + + return + # Initiate RedPage object red_page = redpage.RedPage( self.current_page ) @@ -102,7 +112,7 @@ class DiscussionParserBot( # If successfully parsed whole page, flush # db write cache redfam.RedFamParser.flush_db_cache() - jogobot.output( "Page [[{redisc}]] parsed".format( + jogobot.output( "Page [[{reddisc}]] parsed".format( reddisc=red_page.page.title() ) ) From ee8ebbc8bc088d41ba15801f7d42ac3f29bbbf1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 15:41:13 +0200 Subject: [PATCH 06/26] Make sure only flush db if there are redfams To prevent from doing unnecessary stuff and trying to use not existing db connection Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] --- reddiscparser.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/reddiscparser.py b/reddiscparser.py index 00329e4..962eb5a 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -102,18 +102,31 @@ class DiscussionParserBot( # Check whether parsing is needed if red_page.is_parsing_needed(): + # Count families for failure analysis + fam_counter = 0 + # Iterate over returned generator with redfam sections for fam in red_page.parse(): # Run RedFamParser on section text redfam.RedFamParser.parser( fam, red_page.page._pageid, red_page.is_archive() ) + + fam_counter += 1 + else: # If successfully parsed whole page, flush # db write cache - redfam.RedFamParser.flush_db_cache() - jogobot.output( "Page [[{reddisc}]] parsed".format( - reddisc=red_page.page.title() ) ) + if( fam_counter ): + redfam.RedFamParser.flush_db_cache() + jogobot.output( "Page [[{reddisc}]] parsed".format( + reddisc=red_page.page.title() ) ) + else: + jogobot.output( + "\03{red} Page [[{reddisc}]], ".format( + reddisc=red_page.page.title() ) + + "containing no redfam, parsed!", + "WARNING" ) def apply_conf_cat_generators( genFactory ): From bd2d221c488d80cf992bd4d141d2db27db1b8ce4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 15:48:30 +0200 Subject: [PATCH 07/26] Prevent flush from creating cursor without con MysqlRed.flush() tried to create a cursor in any case. If there was no connection (because the subclasses haven't been instantiated an oursql Error occured. Instead, check before if there is a connection and otherwise raise an Error Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] --- mysqlred.py | 17 +++++++++++++++++ reddiscparser.py | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/mysqlred.py b/mysqlred.py index 055b995..77eae35 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -92,6 +92,9 @@ class MysqlRed: """ Run cached querys """ + if not cls.connection: + raise MysqlRedConnectionError( "No connection exists!" ) + cursor = cls.connection.cursor() # Execute insert query @@ -307,3 +310,17 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' break for row in res: yield row + + +class MysqlRedError(Exception): + """ + Basic Exception class for this module + """ + pass + + +class MysqlRedConnectionError(MysqlRedError): + """ + Raised if there are Errors with Mysql-Connections + """ + pass diff --git a/reddiscparser.py b/reddiscparser.py index 962eb5a..3a6f43b 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -123,7 +123,7 @@ class DiscussionParserBot( reddisc=red_page.page.title() ) ) else: jogobot.output( - "\03{red} Page [[{reddisc}]], ".format( + "\03{red}" + "Page [[{reddisc}]], ".format( reddisc=red_page.page.title() ) + "containing no redfam, parsed!", "WARNING" ) From 0bb0b2d95756a0ea8c334054a9cf10514583adfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 16:51:23 +0200 Subject: [PATCH 08/26] Make sure var beginning is always defined To prevent unbound Errors caused by using undeclared variable beginning if the redfam-section does not contain any timestamp Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=76 FS#76] --- redfam.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/redfam.py b/redfam.py index 3dec12f..7b03131 100644 --- a/redfam.py +++ b/redfam.py @@ -401,6 +401,10 @@ class RedFamParser( RedFam ): else: ending = None + # Missing dates (Task: FS#76) + else: + beginning = None + ending = None return (beginning, ending) From 95be31385982180a3fd352d54f908b385eec30aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 16:53:45 +0200 Subject: [PATCH 09/26] Pass reddisc pywikibot.page object to redfam To access page information like page title (eg. to get dates from it) of the reddisc page Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=76 FS#76] --- reddiscparser.py | 2 +- redfam.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/reddiscparser.py b/reddiscparser.py index 3a6f43b..43417f3 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -109,7 +109,7 @@ class DiscussionParserBot( for fam in red_page.parse(): # Run RedFamParser on section text - redfam.RedFamParser.parser( fam, red_page.page._pageid, + redfam.RedFamParser.parser( fam, red_page.page, red_page.is_archive() ) fam_counter += 1 diff --git a/redfam.py b/redfam.py index 7b03131..26b3c76 100644 --- a/redfam.py +++ b/redfam.py @@ -137,14 +137,14 @@ class RedFamParser( RedFam ): wurde gewünscht von:" __done_notice2 = "{{Erledigt|" - def __init__( self, heading, red_page_id, red_page_archive, + def __init__( self, heading, red_page, red_page_archive, beginning, ending=None ): """ Creates a RedFam object based on data collected while parsing red_pages combined with possibly former known data from db @param red_fam_heading str Wikitext heading of section - @param red_page_id int MediaWiki page_id + @param red_page page Pywikibot.page object @param red_page_archive bool Is red_page an archive @param beginning datetime Timestamp of beginning str as strptime parseable string @@ -153,7 +153,7 @@ class RedFamParser( RedFam ): """ # Set object attributes: - self._red_page_id = red_page_id + self._red_page_id = red_page._pageid self._red_page_archive = red_page_archive self._fam_hash = None @@ -341,7 +341,7 @@ class RedFamParser( RedFam ): return False @classmethod - def parser( cls, text, pageid, isarchive=False ): + def parser( cls, text, page, isarchive=False ): """ Handles parsing of redfam section @@ -360,7 +360,7 @@ class RedFamParser( RedFam ): (beginning, ending) = RedFamParser.extract_dates( text, isarchive ) # Create the RedFam object - RedFamParser( heading, pageid, isarchive, beginning, ending ) + RedFamParser( heading, page, isarchive, beginning, ending ) @classmethod def extract_dates( cls, text, isarchive=False ): From ab430e00857f380d2738e25a4e276d22eb08146e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 16:56:54 +0200 Subject: [PATCH 10/26] Use month of reddisc as beginning if missing Construct a fictive but sensfull beginning if we cant detect one Needed since beginning is mandatory Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=76 FS#76] --- redfam.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/redfam.py b/redfam.py index 26b3c76..a78b150 100644 --- a/redfam.py +++ b/redfam.py @@ -359,6 +359,19 @@ class RedFamParser( RedFam ): # Extract beginnig and maybe ending (beginning, ending) = RedFamParser.extract_dates( text, isarchive ) + # Missing beginning (Task: FS#76) + # Use first day of month of reddisc + if not beginning: + match = re.search( + jogobot.config["redundances"]["reddiscs_onlyinclude_re"], + page.title() ) + + if match: + beginning = datetime.strptime( + "01. {month} {year}".format( + month=match.group(1), year=match.group(2)), + "%d. %B %Y" ) + # Create the RedFam object RedFamParser( heading, page, isarchive, beginning, ending ) From 1e4c8646bf890081bf04c757d205e38740d5bf83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 19:57:25 +0200 Subject: [PATCH 11/26] Reparse redfam-heading with mwparser See related ticked for detailed failure explanation Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=77 FS#77] --- redfam.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/redfam.py b/redfam.py index a78b150..41e6367 100644 --- a/redfam.py +++ b/redfam.py @@ -210,13 +210,14 @@ class RedFamParser( RedFam ): @type heading wikicode or mwparser-parseable """ - # Parse heading with mwparse if needed - if not isinstance( heading, mwparser.wikicode.Wikicode ): - heading = mwparser.parse( heading ) - # Save heading as string self._heading = str( heading ) + # Parse string heading with mwparse again everytime + # In some cases the given wikicode is broken due to syntax errors + # (Task FS#77) + heading = mwparser.parse( self._heading ) + # Save destinations of wikilinks in headings self._articlesList = [ str( link.title ) for link in heading.ifilter_wikilinks() ] From ac54aea69832baa92d4bcb3cac86f7adf6b1991d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 20:02:48 +0200 Subject: [PATCH 12/26] Use callback to detect redfam.section Detecting redfam-Sections via RegExp caused some false positives due to wrong formated things in wikisyntax. See Task Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=78 FS#78] --- redfam.py | 16 ++++++++++++++++ redpage.py | 5 +++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/redfam.py b/redfam.py index 41e6367..b58b94a 100644 --- a/redfam.py +++ b/redfam.py @@ -341,6 +341,22 @@ class RedFamParser( RedFam ): else: return False + @classmethod + def is_section_redfam_cb( cls, heading ): + """ + Used as callback for wikicode.get_sections in redpage.parse to + select sections which are redfams + """ + # Because of strange behavior in some cases, parse heading again + # (Task FS#77) + heading = mwparser.parse( str( heading ) ) + + # Make sure we have min. two wikilinks in heading to assume a redfam + if len( heading.filter_wikilinks() ) >= 2: + return True + else: + return False + @classmethod def parser( cls, text, page, isarchive=False ): """ diff --git a/redpage.py b/redpage.py index 2b93ae8..6bb6cc4 100644 --- a/redpage.py +++ b/redpage.py @@ -28,9 +28,10 @@ Provides a class for handling redundance discussion pages and archives import pywikibot # noqa import mwparserfromhell as mwparser -import jogobot +import jogobot # noqa from mysqlred import MysqlRedPage +from redfam import RedFamParser class RedPage: @@ -116,7 +117,7 @@ class RedPage: # include_lead = if true include first section (intro) # include_heading = if true include heading fams = self.wikicode.get_sections( - matches=jogobot.config["redundances"]["section_heading_regex"], + matches=RedFamParser.is_section_redfam_cb, include_lead=False, include_headings=True ) # Iterate over RedFam From e28acf88d1e81908107081127f0b54cf943c3b50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 22:41:41 +0200 Subject: [PATCH 13/26] Introduce new directory structure To clarify which is a bot and which are helper scripts Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=74 FS#74] --- mysqlred.py => lib/mysqlred.py | 0 redfam.py => lib/redfam.py | 2 +- redpage.py => lib/redpage.py | 4 ++-- reddiscparser.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) rename mysqlred.py => lib/mysqlred.py (100%) rename redfam.py => lib/redfam.py (99%) rename redpage.py => lib/redpage.py (98%) diff --git a/mysqlred.py b/lib/mysqlred.py similarity index 100% rename from mysqlred.py rename to lib/mysqlred.py diff --git a/redfam.py b/lib/redfam.py similarity index 99% rename from redfam.py rename to lib/redfam.py index b58b94a..a0f566f 100644 --- a/redfam.py +++ b/lib/redfam.py @@ -35,7 +35,7 @@ import pywikibot # noqa from pywikibot.tools import deprecated # noqa import jogobot -from mysqlred import MysqlRedFam +from lib.mysqlred import MysqlRedFam class RedFam: diff --git a/redpage.py b/lib/redpage.py similarity index 98% rename from redpage.py rename to lib/redpage.py index 6bb6cc4..176f6bc 100644 --- a/redpage.py +++ b/lib/redpage.py @@ -30,8 +30,8 @@ import mwparserfromhell as mwparser import jogobot # noqa -from mysqlred import MysqlRedPage -from redfam import RedFamParser +from lib.mysqlred import MysqlRedPage +from lib.redfam import RedFamParser class RedPage: diff --git a/reddiscparser.py b/reddiscparser.py index 43417f3..f9b2059 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -35,8 +35,8 @@ from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot -import redpage -import redfam +from lib import redpage +from lib import redfam class DiscussionParserBot( From 77d1de44731b1aa51649a3e0a4a0550488dea63c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 23:53:10 +0200 Subject: [PATCH 14/26] Add a tablename prefix depending on Site To be able to run the bot on different wikis the db tables should be named pywikibot.Site dependend and changed automatically Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=79 FS#79] --- lib/mysqlred.py | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 77eae35..9eb7f4b 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -33,6 +33,7 @@ except ImportError: import atexit +import pywikibot from pywikibot import config import jogobot @@ -53,6 +54,7 @@ class MysqlRed: db_username = config.db_username db_password = config.db_password db_name = config.db_username + jogobot.config['db_suffix'] + db_table_prefix = pywikibot.Site().family.dbName(pywikibot.Site().code) # Class variables for storing cached querys _cached_update_data = [] @@ -136,12 +138,14 @@ class MysqlRedPage( MysqlRed ): # Class variables for storing cached querys _cached_update_data = [] - _update_query = 'UPDATE `red_pages` \ -SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' + _update_query = 'UPDATE `{pre}_red_pages` \ +SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'.format( + pre=MysqlRed.db_table_prefix) _cached_insert_data = {} - _insert_query = 'INSERT INTO `red_pages` \ -( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' + _insert_query = 'INSERT INTO `{pre}_red_pages` \ +( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );'.format( + pre=MysqlRed.db_table_prefix) def __init__( self, page_id ): """ @@ -169,8 +173,10 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' cursor = type( self ).connection.cursor(mysqldb.DictCursor) - cursor.execute( 'SELECT * FROM `red_pages` WHERE `page_id` = ?;', - ( self.__page_id, ) ) + cursor.execute( + 'SELECT * FROM `{pre}_red_pages` WHERE `page_id` = ?;'.format( + pre=MysqlRed.db_table_prefix), ( self.__page_id, ) ) + res = cursor.fetchone() if res: @@ -221,15 +227,17 @@ class MysqlRedFam( MysqlRed ): # Class variables for storing cached querys _cached_update_data = [] - _update_query = 'UPDATE `red_families` \ + _update_query = 'UPDATE `{pre}_red_families` \ SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ -`status`= ? WHERE `fam_hash` = ?;' +`status`= ? WHERE `fam_hash` = ?;'.format( + pre=MysqlRed.db_table_prefix) _cached_insert_data = {} - _insert_query = 'INSERT INTO `red_families` \ + _insert_query = 'INSERT INTO `{pre}_red_families` \ ( fam_hash, red_page_id, beginning, ending, status, heading, \ article0, article1, article2, article3, article4, article5, article6, \ -article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' +article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'.format( + pre=MysqlRed.db_table_prefix) def __init__( self ): """ @@ -252,8 +260,10 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - cursor.execute( 'SELECT * FROM `red_families` WHERE `fam_hash` = ?;', - ( fam_hash, ) ) + cursor.execute( + 'SELECT * FROM `{pre}_red_families` WHERE `fam_hash` = ?;'.format( + pre=MysqlRed.db_table_prefix), ( fam_hash, ) ) + self.data = cursor.fetchone() def add_fam( self, articlesList, heading, red_page_id, @@ -301,8 +311,9 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - cursor.execute( 'SELECT * FROM `red_families` WHERE `status` = ?;', - ( status, ) ) + cursor.execute( + 'SELECT * FROM `{pre}_red_families` WHERE `status` = ?;'.format( + pre=type( self ).db_table_prefix), ( status, ) ) while True: res = cursor.fetchmany( 1000 ) From 71b99b5f5837e43af1cc57f1890bfbf6d4d0e382 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 25 Aug 2016 13:06:32 +0200 Subject: [PATCH 15/26] Delay definition of db_table_prefix db_table_prefix should be defined at init of MysqlRed and not at import to have cmdline args already parsed Otherwise it uses default family Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=79 FS#79] --- lib/mysqlred.py | 53 ++++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 9eb7f4b..499816f 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -54,7 +54,7 @@ class MysqlRed: db_username = config.db_username db_password = config.db_password db_name = config.db_username + jogobot.config['db_suffix'] - db_table_prefix = pywikibot.Site().family.dbName(pywikibot.Site().code) + db_table_prefix = False # Class variables for storing cached querys _cached_update_data = [] @@ -69,6 +69,14 @@ class MysqlRed: @returns mysql-stream MySQL Connection """ + # Needs to be generated after Parsing of Args (not at import time) + if not type(self).db_table_prefix: + type(self).db_table_prefix = \ + pywikibot.Site().family.dbName(pywikibot.Site().code) + + # Now we can setup prepared queries + self._prepare_queries() + # Connect to mysqldb only once if not type( self ).connection: @@ -89,6 +97,15 @@ class MysqlRed: type( self ).connection.close() + def _prepare_queries( self ): + """ + Used to replace placeholders in prepared queries + """ + type(self)._update_query = type(self)._update_query.format( + prefix=type(self).db_table_prefix) + type(self)._insert_query = type(self)._insert_query.format( + prefix=type(self).db_table_prefix) + @classmethod def flush( cls ): """ @@ -137,15 +154,14 @@ class MysqlRedPage( MysqlRed ): """ # Class variables for storing cached querys + # '{prefix}' will be replaced during super().__init__() _cached_update_data = [] - _update_query = 'UPDATE `{pre}_red_pages` \ -SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'.format( - pre=MysqlRed.db_table_prefix) + _update_query = 'UPDATE `{prefix}_red_pages` \ +SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' _cached_insert_data = {} - _insert_query = 'INSERT INTO `{pre}_red_pages` \ -( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );'.format( - pre=MysqlRed.db_table_prefix) + _insert_query = 'INSERT INTO `{prefix}_red_pages` \ +( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' def __init__( self, page_id ): """ @@ -174,8 +190,8 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'.format( cursor = type( self ).connection.cursor(mysqldb.DictCursor) cursor.execute( - 'SELECT * FROM `{pre}_red_pages` WHERE `page_id` = ?;'.format( - pre=MysqlRed.db_table_prefix), ( self.__page_id, ) ) + 'SELECT * FROM `{prefix}_red_pages` WHERE `page_id` = ?;'.format( + prefix=type(self).db_table_prefix), ( self.__page_id, ) ) res = cursor.fetchone() @@ -227,17 +243,14 @@ class MysqlRedFam( MysqlRed ): # Class variables for storing cached querys _cached_update_data = [] - _update_query = 'UPDATE `{pre}_red_families` \ + _update_query = 'UPDATE `{prefix}_red_families` \ SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ -`status`= ? WHERE `fam_hash` = ?;'.format( - pre=MysqlRed.db_table_prefix) - +`status`= ? WHERE `fam_hash` = ?;' _cached_insert_data = {} - _insert_query = 'INSERT INTO `{pre}_red_families` \ + _insert_query = 'INSERT INTO `{prefix}_red_families` \ ( fam_hash, red_page_id, beginning, ending, status, heading, \ article0, article1, article2, article3, article4, article5, article6, \ -article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'.format( - pre=MysqlRed.db_table_prefix) +article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' def __init__( self ): """ @@ -261,8 +274,8 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'.format( cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor.execute( - 'SELECT * FROM `{pre}_red_families` WHERE `fam_hash` = ?;'.format( - pre=MysqlRed.db_table_prefix), ( fam_hash, ) ) + 'SELECT * FROM `{prefix}_red_families` WHERE `fam_hash` = ?;'. + format( prefix=type(self).db_table_prefix), ( fam_hash, ) ) self.data = cursor.fetchone() @@ -312,8 +325,8 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'.format( cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor.execute( - 'SELECT * FROM `{pre}_red_families` WHERE `status` = ?;'.format( - pre=type( self ).db_table_prefix), ( status, ) ) + 'SELECT * FROM `{prefix}_red_families` WHERE `status` = ?;'.format( + prefix=type( self ).db_table_prefix), ( status, ) ) while True: res = cursor.fetchmany( 1000 ) From 78eda105622c2692e2dd0e1c825119d2b14a2e94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 25 Aug 2016 22:41:13 +0200 Subject: [PATCH 16/26] Remove deprecated methods Deprecated functions which are not used anymore can be removed to make code more clearer and improve maintainability Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=80 FS#80] --- lib/redfam.py | 58 --------------------------------------------------- 1 file changed, 58 deletions(-) diff --git a/lib/redfam.py b/lib/redfam.py index a0f566f..30dd22d 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -325,22 +325,6 @@ class RedFamParser( RedFam ): self._beginning, self._ending, self._status ) - @classmethod - @deprecated - def is_sectionheading( cls, line ): - """ - Checks wether given line is a red_fam section heading - - @param str line String to check - - @returns bool Returns True if it is a section heading - """ - - if cls.__sectionhead_pat.search( str(line) ): - return True - else: - return False - @classmethod def is_section_redfam_cb( cls, heading ): """ @@ -438,48 +422,6 @@ class RedFamParser( RedFam ): return (beginning, ending) - @classmethod - @deprecated( 'extract_dates' ) - def is_beginning( cls, line ): - """ - Returns the first timestamp found in line, otherwise None - - @param str line String to search in - - @returns str Timestamp, otherwise None - """ - - return cls.extract_dates( line )[0] - - @classmethod - @deprecated( 'extract_dates' ) - def is_ending( cls, line, isarchive=False ): - """ - Returns the timestamp of done notice ( if one ), otherwise None - - @param line String to search in - @type line str - @param isarchive If true skip searching done notice (on archivepages) - @type isarchive bool - - @returns Timestamp, otherwise None - @returntype str - """ - - return cls.extract_dates( line )[1] - - @classmethod - @deprecated( 'extract_dates' ) - def is_ending2( cls, line ): - """ - Returns the last timestamp found in line, otherwise None - @param str line String to search in - - @returns str Timestamp, otherwise None - """ - - return cls.extract_dates( line, True )[1] - class RedFamWorker( RedFam ): """ From 177a8f920f9396a6480efab60fc7c084e0234308 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 10:55:22 +0200 Subject: [PATCH 17/26] Prepare new structure to use subtasks To have only one entry point for the bot we want to have a single file (red.py) which is calling the specfic task class from bots dir with a standardized call Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- reddiscparser.py => bots/reddiscparser.py | 0 red.py | 230 ++++++++++++++++++++++ 2 files changed, 230 insertions(+) rename reddiscparser.py => bots/reddiscparser.py (100%) create mode 100644 red.py diff --git a/reddiscparser.py b/bots/reddiscparser.py similarity index 100% rename from reddiscparser.py rename to bots/reddiscparser.py diff --git a/red.py b/red.py new file mode 100644 index 0000000..f9b2059 --- /dev/null +++ b/red.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# reddiscparser.py +# +# Copyright 2016 GOLDERWEB – Jonathan Golder +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# +""" +Script to parse all reddisc pages in configured categories +""" + +import os +import sys +import re + +import pywikibot +from pywikibot import pagegenerators +from pywikibot.bot import ExistingPageBot, NoRedirectPageBot + +import jogobot + +from lib import redpage +from lib import redfam + + +class DiscussionParserBot( + # CurrentPageBot, # via next two sets 'current_page' on each treat() + ExistingPageBot, # CurrentPageBot only treats existing pages + NoRedirectPageBot ): # class which only treats non-redirects + """ + Botclass witch initialises the parsing process of Redundancy Discussions + """ + + # RegEx to filter wrong pages + onlyinclude_re = re.compile( + jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) + + def __init__( self, generator ): + """ + Constructor + + Parameters: + @param generator: The page generator that determines on which pages + to work. + @type generator: generator. + """ + super( DiscussionParserBot, self ).__init__(generator=generator) + + def run( self ): + """ + Controls the overal parsing process, using super class for page switch + + Needed to do things before/after treating pages is done + """ + try: + + super( DiscussionParserBot, self ).run() + + except: + raise + + else: + + # If successfully parsed all pages in cat, flush db write cache + redpage.RedPage.flush_db_cache() + + def treat_page( self ): + """ + Handles work on current page + """ + + # Short circuit excluded pages + if self.current_page.title() in ( + jogobot.config["redundances"]["redpage_exclude"] ): + + return + + # Exclude pages which does not match pattern + if not type(self).onlyinclude_re.search( self.current_page.title() ): + + return + + # Initiate RedPage object + red_page = redpage.RedPage( self.current_page ) + + # Check whether parsing is needed + if red_page.is_parsing_needed(): + + # Count families for failure analysis + fam_counter = 0 + + # Iterate over returned generator with redfam sections + for fam in red_page.parse(): + + # Run RedFamParser on section text + redfam.RedFamParser.parser( fam, red_page.page, + red_page.is_archive() ) + + fam_counter += 1 + + else: + # If successfully parsed whole page, flush + # db write cache + if( fam_counter ): + redfam.RedFamParser.flush_db_cache() + jogobot.output( "Page [[{reddisc}]] parsed".format( + reddisc=red_page.page.title() ) ) + else: + jogobot.output( + "\03{red}" + "Page [[{reddisc}]], ".format( + reddisc=red_page.page.title() ) + + "containing no redfam, parsed!", + "WARNING" ) + + +def apply_conf_cat_generators( genFactory ): + """ + Builds generators for categories which are read from jogobot.config + + Parameters: + @param genFactory: The GeneratorFactory to which the builded generators + should be added. + @type genFactory: pagegenerators.GeneratorFactory + """ + # Create Generators for configured Categories + for category in jogobot.config["redundances"]["redpage_cats"]: + cgen = genFactory.getCategoryGen( + category, gen_func=pagegenerators.CategorizedPageGenerator) + + # If there is one, append to genFactory + if cgen: + genFactory.gens.append(cgen) + + +def main(*args): + """ + Process command line arguments and invoke bot. + + If args is an empty list, sys.argv is used. + + @param args: command line arguments + @type args: list of unicode + """ + + # Process global arguments to determine desired site + local_args = pywikibot.handle_args(args) + + # Get the jogobot-task_slug (basename of current file without ending) + task_slug = os.path.basename(__file__)[:-len(".py")] + + # Before run, we need to check wether we are currently active or not + try: + # Will throw Exception if disabled/blocked + # jogobot.is_active( task_slug ) + pass + + except jogobot.jogobot.Blocked: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), + "CRITICAL" ) + + except jogobot.jogobot.Disabled: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{red} %s (%s)" % (value, type ), + "ERROR" ) + + # Bot/Task is active + else: + + # This factory is responsible for processing command line arguments + # that are also used by other scripts and that determine on which pages + # to work on. + genFactory = pagegenerators.GeneratorFactory() + # The generator gives the pages that should be worked upon. + gen = None + + # If always is True, bot won't ask for confirmation of edit (automode) + # always = False + + # If force_reload is True, bot will always parse Countrylist regardless + # if parsing is needed or not + # force_reload = False + + # Parse command line arguments + for arg in local_args: + if arg.startswith("-always"): + # always = True + pass + else: + genFactory.handleArg(arg) + + if not gen: + + # Check wether there are generators waiting for factoring, if not + # use configured categories + if not genFactory.gens: + apply_conf_cat_generators( genFactory ) + + # Create combined Generator (Union of all Generators) + gen = genFactory.getCombinedGenerator() + + if gen: + # Log beginning of parsing + jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) ) + + # The preloading generator is responsible for downloading multiple + # pages from the wiki simultaneously. + gen = pagegenerators.PreloadingGenerator(gen) + DiscussionParserBot( gen ).run() + else: + pywikibot.showHelp() + +if( __name__ == "__main__" ): + main() From b88efb6bdde64ea9d1dc736da224c990464eb863 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 12:17:12 +0200 Subject: [PATCH 18/26] Reflect stucture changes in Code Since bot class is moved to separate dir/file we need to do some changes to rebuild functionality Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- bots/__init__.py | 2 + bots/reddiscparser.py | 109 ++---------------------------------------- red.py | 98 +------------------------------------ 3 files changed, 7 insertions(+), 202 deletions(-) create mode 100644 bots/__init__.py diff --git a/bots/__init__.py b/bots/__init__.py new file mode 100644 index 0000000..9327388 --- /dev/null +++ b/bots/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index f9b2059..7f66a2f 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -22,15 +22,13 @@ # # """ -Script to parse all reddisc pages in configured categories +Bot to parse all reddisc pages in given Generator or configured categories """ -import os -import sys import re -import pywikibot -from pywikibot import pagegenerators +import pywikibot # noqa +from pywikibot import pagegenerators # noqa from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot @@ -127,104 +125,3 @@ class DiscussionParserBot( reddisc=red_page.page.title() ) + "containing no redfam, parsed!", "WARNING" ) - - -def apply_conf_cat_generators( genFactory ): - """ - Builds generators for categories which are read from jogobot.config - - Parameters: - @param genFactory: The GeneratorFactory to which the builded generators - should be added. - @type genFactory: pagegenerators.GeneratorFactory - """ - # Create Generators for configured Categories - for category in jogobot.config["redundances"]["redpage_cats"]: - cgen = genFactory.getCategoryGen( - category, gen_func=pagegenerators.CategorizedPageGenerator) - - # If there is one, append to genFactory - if cgen: - genFactory.gens.append(cgen) - - -def main(*args): - """ - Process command line arguments and invoke bot. - - If args is an empty list, sys.argv is used. - - @param args: command line arguments - @type args: list of unicode - """ - - # Process global arguments to determine desired site - local_args = pywikibot.handle_args(args) - - # Get the jogobot-task_slug (basename of current file without ending) - task_slug = os.path.basename(__file__)[:-len(".py")] - - # Before run, we need to check wether we are currently active or not - try: - # Will throw Exception if disabled/blocked - # jogobot.is_active( task_slug ) - pass - - except jogobot.jogobot.Blocked: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), - "CRITICAL" ) - - except jogobot.jogobot.Disabled: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{red} %s (%s)" % (value, type ), - "ERROR" ) - - # Bot/Task is active - else: - - # This factory is responsible for processing command line arguments - # that are also used by other scripts and that determine on which pages - # to work on. - genFactory = pagegenerators.GeneratorFactory() - # The generator gives the pages that should be worked upon. - gen = None - - # If always is True, bot won't ask for confirmation of edit (automode) - # always = False - - # If force_reload is True, bot will always parse Countrylist regardless - # if parsing is needed or not - # force_reload = False - - # Parse command line arguments - for arg in local_args: - if arg.startswith("-always"): - # always = True - pass - else: - genFactory.handleArg(arg) - - if not gen: - - # Check wether there are generators waiting for factoring, if not - # use configured categories - if not genFactory.gens: - apply_conf_cat_generators( genFactory ) - - # Create combined Generator (Union of all Generators) - gen = genFactory.getCombinedGenerator() - - if gen: - # Log beginning of parsing - jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) ) - - # The preloading generator is responsible for downloading multiple - # pages from the wiki simultaneously. - gen = pagegenerators.PreloadingGenerator(gen) - DiscussionParserBot( gen ).run() - else: - pywikibot.showHelp() - -if( __name__ == "__main__" ): - main() diff --git a/red.py b/red.py index f9b2059..bee76b8 100644 --- a/red.py +++ b/red.py @@ -22,111 +22,17 @@ # # """ -Script to parse all reddisc pages in configured categories +Wrapper script to invoke all redundances bot tasks """ import os import sys -import re import pywikibot from pywikibot import pagegenerators -from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot - -from lib import redpage -from lib import redfam - - -class DiscussionParserBot( - # CurrentPageBot, # via next two sets 'current_page' on each treat() - ExistingPageBot, # CurrentPageBot only treats existing pages - NoRedirectPageBot ): # class which only treats non-redirects - """ - Botclass witch initialises the parsing process of Redundancy Discussions - """ - - # RegEx to filter wrong pages - onlyinclude_re = re.compile( - jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) - - def __init__( self, generator ): - """ - Constructor - - Parameters: - @param generator: The page generator that determines on which pages - to work. - @type generator: generator. - """ - super( DiscussionParserBot, self ).__init__(generator=generator) - - def run( self ): - """ - Controls the overal parsing process, using super class for page switch - - Needed to do things before/after treating pages is done - """ - try: - - super( DiscussionParserBot, self ).run() - - except: - raise - - else: - - # If successfully parsed all pages in cat, flush db write cache - redpage.RedPage.flush_db_cache() - - def treat_page( self ): - """ - Handles work on current page - """ - - # Short circuit excluded pages - if self.current_page.title() in ( - jogobot.config["redundances"]["redpage_exclude"] ): - - return - - # Exclude pages which does not match pattern - if not type(self).onlyinclude_re.search( self.current_page.title() ): - - return - - # Initiate RedPage object - red_page = redpage.RedPage( self.current_page ) - - # Check whether parsing is needed - if red_page.is_parsing_needed(): - - # Count families for failure analysis - fam_counter = 0 - - # Iterate over returned generator with redfam sections - for fam in red_page.parse(): - - # Run RedFamParser on section text - redfam.RedFamParser.parser( fam, red_page.page, - red_page.is_archive() ) - - fam_counter += 1 - - else: - # If successfully parsed whole page, flush - # db write cache - if( fam_counter ): - redfam.RedFamParser.flush_db_cache() - jogobot.output( "Page [[{reddisc}]] parsed".format( - reddisc=red_page.page.title() ) ) - else: - jogobot.output( - "\03{red}" + "Page [[{reddisc}]], ".format( - reddisc=red_page.page.title() ) + - "containing no redfam, parsed!", - "WARNING" ) +from bots.reddiscparser import DiscussionParserBot def apply_conf_cat_generators( genFactory ): From 1679e2ad6a8b10bd0d319abbab6ad4653615586e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 13:36:14 +0200 Subject: [PATCH 19/26] Prepare environment for starting subtasks Before init and run bot we need to provide a environment for it, like parsed args Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- red.py | 65 ++++++++++++++++++++++------------------------------------ 1 file changed, 25 insertions(+), 40 deletions(-) diff --git a/red.py b/red.py index bee76b8..dd14625 100644 --- a/red.py +++ b/red.py @@ -32,26 +32,6 @@ import pywikibot from pywikibot import pagegenerators import jogobot -from bots.reddiscparser import DiscussionParserBot - - -def apply_conf_cat_generators( genFactory ): - """ - Builds generators for categories which are read from jogobot.config - - Parameters: - @param genFactory: The GeneratorFactory to which the builded generators - should be added. - @type genFactory: pagegenerators.GeneratorFactory - """ - # Create Generators for configured Categories - for category in jogobot.config["redundances"]["redpage_cats"]: - cgen = genFactory.getCategoryGen( - category, gen_func=pagegenerators.CategorizedPageGenerator) - - # If there is one, append to genFactory - if cgen: - genFactory.gens.append(cgen) def main(*args): @@ -68,7 +48,7 @@ def main(*args): local_args = pywikibot.handle_args(args) # Get the jogobot-task_slug (basename of current file without ending) - task_slug = os.path.basename(__file__)[:-len(".py")] + task_slug = os.path.basename(__file__)[:-len(".py")] # noqa (temp) # Before run, we need to check wether we are currently active or not try: @@ -93,8 +73,6 @@ def main(*args): # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() - # The generator gives the pages that should be worked upon. - gen = None # If always is True, bot won't ask for confirmation of edit (automode) # always = False @@ -103,34 +81,41 @@ def main(*args): # if parsing is needed or not # force_reload = False + # Subtask selects the specific bot to run + # Default is reddiscparser + subtask = None + + # kwargs are passed to selected bot as **kwargs + kwargs = dict() # noqa (temp) + # Parse command line arguments for arg in local_args: + + # Split args + arg, sep, value = arg.partition(':') + if arg.startswith("-always"): # always = True pass + elif arg.startswith("-task"): + subtask = value else: genFactory.handleArg(arg) - if not gen: + # After parsing args we can select bot to run + if not subtask or subtask == "discparser": + # Default case: discparser + subtask = "discparser" - # Check wether there are generators waiting for factoring, if not - # use configured categories - if not genFactory.gens: - apply_conf_cat_generators( genFactory ) + # Import related bot + from bots.reddiscparser import DiscussionParserBot as Bot # noqa (temp) - # Create combined Generator (Union of all Generators) - gen = genFactory.getCombinedGenerator() - - if gen: - # Log beginning of parsing - jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) ) - - # The preloading generator is responsible for downloading multiple - # pages from the wiki simultaneously. - gen = pagegenerators.PreloadingGenerator(gen) - DiscussionParserBot( gen ).run() + # else: - pywikibot.showHelp() + jogobot.output( ( + "\03{{red}} Given subtask \"{subtask} \"" + + "is not existing!" ).format( subtask=subtask ), "ERROR" ) + if( __name__ == "__main__" ): main() From 156f117b18ebd997a7e08454ab21455ca6491e98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 13:49:19 +0200 Subject: [PATCH 20/26] Add Bot initiation with exception handling Bot initiation needs to catch errors by Bot to enforce at least a basic logging. And also to be sure Init was successfull before starting bot. Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- red.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/red.py b/red.py index dd14625..7a26f24 100644 --- a/red.py +++ b/red.py @@ -48,7 +48,7 @@ def main(*args): local_args = pywikibot.handle_args(args) # Get the jogobot-task_slug (basename of current file without ending) - task_slug = os.path.basename(__file__)[:-len(".py")] # noqa (temp) + task_slug = os.path.basename(__file__)[:-len(".py")] # Before run, we need to check wether we are currently active or not try: @@ -86,7 +86,7 @@ def main(*args): subtask = None # kwargs are passed to selected bot as **kwargs - kwargs = dict() # noqa (temp) + kwargs = dict() # Parse command line arguments for arg in local_args: @@ -108,7 +108,7 @@ def main(*args): subtask = "discparser" # Import related bot - from bots.reddiscparser import DiscussionParserBot as Bot # noqa (temp) + from bots.reddiscparser import DiscussionParserBot as Bot # else: @@ -116,6 +116,25 @@ def main(*args): "\03{{red}} Given subtask \"{subtask} \"" + "is not existing!" ).format( subtask=subtask ), "ERROR" ) + # Bot gets prepared genFactory as first param and possible kwargs dict + # It has to threw an exception if something does not work properly + try: + # Init bot with genFactory and **kwargs + bot = Bot( genFactory, **kwargs ) # noqa (temp) + + except: + # Catch Errors while initiation + jogobot.output( ( + "\03{{red}} Error while trying to init " + + "subtask \"{task_slug}-{subtask} \"!" ). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + raise + else: + # Init successfull + jogobot.output( ( + "{task_slug}-{subtask} init successfull" ). + format(task_slug=task_slug, subtask=subtask) ) + if( __name__ == "__main__" ): main() From 460d2db18396939c13f7fbb9ca1627fbf4cb02a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 14:00:29 +0200 Subject: [PATCH 21/26] Add Bot run with exception handling Errors, especially caused by missing run-method, need to be catched to provide information in Logfile. And also to get information wether bot run was successfull Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- red.py | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/red.py b/red.py index 7a26f24..3d398f4 100644 --- a/red.py +++ b/red.py @@ -34,7 +34,7 @@ from pywikibot import pagegenerators import jogobot -def main(*args): +def main(*args): # noqa (temp) """ Process command line arguments and invoke bot. @@ -120,7 +120,7 @@ def main(*args): # It has to threw an exception if something does not work properly try: # Init bot with genFactory and **kwargs - bot = Bot( genFactory, **kwargs ) # noqa (temp) + bot = Bot( genFactory, **kwargs ) except: # Catch Errors while initiation @@ -132,7 +132,44 @@ def main(*args): else: # Init successfull jogobot.output( ( - "{task_slug}-{subtask} init successfull" ). + "Subtask \"{task_slug}-{subtask}\" was" + + "initiated successfully" ). + format(task_slug=task_slug, subtask=subtask) ) + + # Fire up Bot + # Bot must have implemented a run()-method + # It has to threw an exception if something does not work properly + try: + # Call run method on Bot + bot.run() + + # Special event on AttributeError to catch missing run()-method + except AttributeError: + (type, value, traceback) = sys.exc_info() + + # Catch missing run()-method + if "has no attribute 'run'" in value: + jogobot.output( ( + "\03{{red}} Error while trying to run " + + "subtask \"{task_slug}-{subtask} \": +" + "Run-method is missing! "). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + + # Pass through other AttributeError + else: + raise + + except: + jogobot.output( ( + "\03{{red}} Error while trying to run " + + "subtask \"{task_slug}-{subtask} \"!" ). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + raise + + else: + # Run successfull + jogobot.output( ( + "Subtask \"{task_slug}-{subtask}\" was finished successfully"). format(task_slug=task_slug, subtask=subtask) ) From 3540cc2a7d68e4c15d7e96bf84c11a21fd59e723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 15:18:17 +0200 Subject: [PATCH 22/26] Move functional sections to functions in main() To make main() function less complicated functional sections are moved to dedicated functions Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- red.py | 357 ++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 240 insertions(+), 117 deletions(-) diff --git a/red.py b/red.py index 3d398f4..d9bda88 100644 --- a/red.py +++ b/red.py @@ -34,7 +34,235 @@ from pywikibot import pagegenerators import jogobot -def main(*args): # noqa (temp) +def active(task_slug): + """ + Checks up if bot with given task_slug is active via jogobot.framework + + @param task_slug Task slug to check + @type task_slug str + + @return True if active, otherwise False + @rtype bool + """ + + try: + # Will throw Exception if disabled/blocked + # jogobot.is_active( task_slug ) + pass + + except jogobot.jogobot.Blocked: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), + "CRITICAL" ) + return False + + except jogobot.jogobot.Disabled: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{red} %s (%s)" % (value, type ), + "ERROR" ) + return False + + # Bot/Task is active + else: + return True + + +def parse_local_args( local_args ): + """ + Parses local cmd args which are not parsed by pywikibot + + @param local_args Local args returned by pywikibot.handle_args(args) + @type iterable + + @returns The following tuple + @return 1 Slug of given subtask (Arg "-task") + @rtype str + @return 2 GenFactory with parsed pagegenerator args + @rtype pagegenerators.GeneratorFactory + @return 3 Additional args for subtasks + @rtype dict + @rtype tuple + """ + + # This factory is responsible for processing command line arguments + # that are also used by other scripts and that determine on which pages + # to work on. + genFactory = pagegenerators.GeneratorFactory() + + # If always is True, bot won't ask for confirmation of edit (automode) + # always = False + + # If force_reload is True, bot will always parse Countrylist regardless + # if parsing is needed or not + # force_reload = False + + # Subtask selects the specific bot to run + # Default is reddiscparser + subtask = None + + # kwargs are passed to selected bot as **kwargs + kwargs = dict() + + # Parse command line arguments + for arg in local_args: + + # Split args + arg, sep, value = arg.partition(':') + + if arg.startswith("-always"): + # always = True + pass + elif arg.startswith("-task"): + subtask = value + else: + genFactory.handleArg(arg) + + # Return Tuple + return ( subtask, genFactory, kwargs ) + + +def prepare_bot( task_slug, subtask, genFactory, subtask_args ): + """ + Handles importing subtask Bot class and prepares specific args + + Throws exception if bot not exists + + @param task_slug Task slug, needed for logging + @type task_slug str + @param subtask Slug of given subtask + @type subtask str + @param genFactory GenFactory with parsed pagegenerator args + @type genFactory pagegenerators.GeneratorFactory + @param subtask_args Additional args for subtasks + @type subtask_args dict\ + + @returns The following tuple + @return 1 Subtask slug (replaced None for default) + @rtype str + @return 2 Botclass of given subtask (Arg "-task") + @rtype Class + @return 3 GenFactory with parsed pagegenerator args + @rtype pagegenerators.GeneratorFactory + @return 4 Additional args for subtasks + @rtype dict + @rtype tuple + """ + # kwargs are passed to selected bot as **kwargs + kwargs = dict() + + if not subtask or subtask == "discparser": + # Default case: discparser + subtask = "discparser" + + # Import related bot + from bots.reddiscparser import DiscussionParserBot as Bot + + # Subtask error + else: + jogobot.output( ( + "\03{{red}} Given subtask \"{subtask} \"" + + "is not existing!" ).format( subtask=subtask ), "ERROR" ) + raise Exception + + return ( subtask, Bot, genFactory, kwargs ) + + +def init_bot( task_slug, subtask, Bot, genFactory, **kwargs ): + """ + Initiates Bot-Object with Class given in Bot and passes params genFactory + and kwargs to it + + Passes through exception generated by Bot.__init__() after logging. + + @param task_slug Task slug, needed for logging + @type task_slug str + @param subtask Slug of given subtask + @type subtask str + @param Bot Bot class to build bot-object from + @type Class + @param genFactory GenFactory with parsed pagegenerator args + @type genFactory pagegenerators.GeneratorFactory + @param **kwargs Additional args for Bot() + @type **kwargs dict + + @returns bot-object + @type type(Bot()) + """ + # Bot gets prepared genFactory as first param and possible kwargs dict + # It has to threw an exception if something does not work properly + try: + # Init bot with genFactory and **kwargs + bot = Bot( genFactory, **kwargs ) + + except: + # Catch Errors while initiation + jogobot.output( ( + "\03{{red}} Error while trying to init " + + "subtask \"{task_slug}-{subtask}\"!" ). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + raise + else: + # Init successfull + jogobot.output( ( + "Subtask \"{task_slug}-{subtask}\" was " + + "initiated successfully" ). + format(task_slug=task_slug, subtask=subtask) ) + return bot + + +def run_bot( task_slug, subtask, bot ): + """ + Calls the run()-method of bot-object + + Passes through exceptions generated by Bot.__init__() after logging. + Catches Errors caused by missing run(0-method. + + @param task_slug Task slug, needed for logging + @type task_slug str + @param subtask Slug of given subtask + @type subtask str + @param bot Bot object to call run()-method on + @type object with method run + """ + + # Fire up Bot + # Bot must have implemented a run()-method + # It has to threw an exception if something does not work properly + try: + # Call run method on Bot + bot.run() + + # Special event on AttributeError to catch missing run()-method + except AttributeError: + (type, value, traceback) = sys.exc_info() + + # Catch missing run()-method + if "has no attribute 'run'" in value: + jogobot.output( ( + "\03{{red}} Error while trying to run " + + "subtask \"{task_slug}-{subtask} \": +" + "Run-method is missing! "). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + + # Pass through other AttributeError + else: + raise + + except: + jogobot.output( ( + "\03{{red}} Error while trying to run " + + "subtask \"{task_slug}-{subtask} \"!" ). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + raise + + else: + # Run successfull + jogobot.output( ( + "Subtask \"{task_slug}-{subtask}\" was finished successfully"). + format(task_slug=task_slug, subtask=subtask) ) + + +def main(*args): """ Process command line arguments and invoke bot. @@ -51,126 +279,21 @@ def main(*args): # noqa (temp) task_slug = os.path.basename(__file__)[:-len(".py")] # Before run, we need to check wether we are currently active or not - try: - # Will throw Exception if disabled/blocked - # jogobot.is_active( task_slug ) - pass + if not active( task_slug ): + return - except jogobot.jogobot.Blocked: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), - "CRITICAL" ) + # Parse local Args to get information about subtask + ( subtask, genFactory, subtask_args ) = parse_local_args( local_args ) - except jogobot.jogobot.Disabled: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{red} %s (%s)" % (value, type ), - "ERROR" ) + # select subtask and prepare args + ( subtask, Bot, genFactory, kwargs ) = prepare_bot( + task_slug, subtask, genFactory, subtask_args ) - # Bot/Task is active - else: + # Init Bot + bot = init_bot( task_slug, subtask, Bot, genFactory, **kwargs) - # This factory is responsible for processing command line arguments - # that are also used by other scripts and that determine on which pages - # to work on. - genFactory = pagegenerators.GeneratorFactory() - - # If always is True, bot won't ask for confirmation of edit (automode) - # always = False - - # If force_reload is True, bot will always parse Countrylist regardless - # if parsing is needed or not - # force_reload = False - - # Subtask selects the specific bot to run - # Default is reddiscparser - subtask = None - - # kwargs are passed to selected bot as **kwargs - kwargs = dict() - - # Parse command line arguments - for arg in local_args: - - # Split args - arg, sep, value = arg.partition(':') - - if arg.startswith("-always"): - # always = True - pass - elif arg.startswith("-task"): - subtask = value - else: - genFactory.handleArg(arg) - - # After parsing args we can select bot to run - if not subtask or subtask == "discparser": - # Default case: discparser - subtask = "discparser" - - # Import related bot - from bots.reddiscparser import DiscussionParserBot as Bot - - # - else: - jogobot.output( ( - "\03{{red}} Given subtask \"{subtask} \"" + - "is not existing!" ).format( subtask=subtask ), "ERROR" ) - - # Bot gets prepared genFactory as first param and possible kwargs dict - # It has to threw an exception if something does not work properly - try: - # Init bot with genFactory and **kwargs - bot = Bot( genFactory, **kwargs ) - - except: - # Catch Errors while initiation - jogobot.output( ( - "\03{{red}} Error while trying to init " + - "subtask \"{task_slug}-{subtask} \"!" ). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - raise - else: - # Init successfull - jogobot.output( ( - "Subtask \"{task_slug}-{subtask}\" was" + - "initiated successfully" ). - format(task_slug=task_slug, subtask=subtask) ) - - # Fire up Bot - # Bot must have implemented a run()-method - # It has to threw an exception if something does not work properly - try: - # Call run method on Bot - bot.run() - - # Special event on AttributeError to catch missing run()-method - except AttributeError: - (type, value, traceback) = sys.exc_info() - - # Catch missing run()-method - if "has no attribute 'run'" in value: - jogobot.output( ( - "\03{{red}} Error while trying to run " + - "subtask \"{task_slug}-{subtask} \": +" - "Run-method is missing! "). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - - # Pass through other AttributeError - else: - raise - - except: - jogobot.output( ( - "\03{{red}} Error while trying to run " + - "subtask \"{task_slug}-{subtask} \"!" ). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - raise - - else: - # Run successfull - jogobot.output( ( - "Subtask \"{task_slug}-{subtask}\" was finished successfully"). - format(task_slug=task_slug, subtask=subtask) ) + # Run bot + run_bot( task_slug, subtask, bot ) if( __name__ == "__main__" ): From 0ceb2e6e836dfd19a225227b521dc1e99bb9f54d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 16:58:20 +0200 Subject: [PATCH 23/26] Add methods to build gen to DiscussionParser With the new wrapper script the Bot gets a GenFactory and has to build a generator out of it by its own Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=83 FS#83] --- bots/reddiscparser.py | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 7f66a2f..2a47642 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -58,7 +58,47 @@ class DiscussionParserBot( to work. @type generator: generator. """ - super( DiscussionParserBot, self ).__init__(generator=generator) + + def build_generator(self): + """ + Builds generator to work on, based on self.genFactory + """ + # Check wether there are generators waiting for factoring, if not + # use configured categories + if not self.genFactory.gens: + self.apply_conf_cat_generators() + + # Create combined Generator (Union of all Generators) + gen = self.genFactory.getCombinedGenerator() + + if gen: + # The preloading generator is responsible for downloading multiple + # pages from the wiki simultaneously. + self.gen = pagegenerators.PreloadingGenerator(gen) + + else: + pywikibot.showHelp() + + def apply_conf_cat_generators( self ): + """ + Builds generators for categories which are read from jogobot.config + + Parameters: + @param genFactory: The GeneratorFactory to which the builded + generators should be added. + @type genFactory: pagegenerators.GeneratorFactory + """ + # Create Generators for configured Categories + for category in jogobot.config["redundances"]["redpage_cats"]: + gen = self.genFactory.getCategoryGen( + category, gen_func=pagegenerators.CategorizedPageGenerator) + + # If there is one, append to genFactory + if gen: + self.genFactory.gens.append(gen) + + # Reset gen for next iteration + gen = None def run( self ): """ From 2be0a8903de6600939999f6dcb3da813ef584be5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 17:02:51 +0200 Subject: [PATCH 24/26] Adjust constructor for wrapper-script The new wrapper-script calls a standardized API We need to be conform with that Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=83 FS#83] --- bots/reddiscparser.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 2a47642..818eb05 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -49,16 +49,27 @@ class DiscussionParserBot( onlyinclude_re = re.compile( jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) - def __init__( self, generator ): + def __init__( self, genFactory, **kwargs ): """ Constructor Parameters: - @param generator: The page generator that determines on which pages - to work. - @type generator: generator. + @param genFactory GenFactory with parsed pagegenerator args to + build generator + @type genFactory pagegenerators.GeneratorFactory + @param **kwargs Additional args + @type iterable """ + # Copy needed args + self.genFactory = genFactory + + # Build generator with genFactory + self.build_generator() + + # Run super class init with builded generator + super( DiscussionParserBot, self ).__init__(generator=self.gen) + def build_generator(self): """ Builds generator to work on, based on self.genFactory From d0fa15d0edd12c5e17f2e915d08f13b1f712b928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 18:27:11 +0200 Subject: [PATCH 25/26] Update jogobot module to get standart Start-API [FS#84] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] --- jogobot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jogobot b/jogobot index 2173f29..28d03f3 160000 --- a/jogobot +++ b/jogobot @@ -1 +1 @@ -Subproject commit 2173f2984f1de6950728a15709bf93db5188731d +Subproject commit 28d03f35b848a33ad45d3f5f8f3f82e8c45534ec From 604b7bd8b726fb56f2ae6fb4b6d3871a6518eedc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 18:51:42 +0200 Subject: [PATCH 26/26] Now use Bot-Start API from jogobot framework API was moved to jogobot to share with other tasks Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] --- red.py | 196 +++------------------------------------------------------ 1 file changed, 7 insertions(+), 189 deletions(-) diff --git a/red.py b/red.py index d9bda88..733def2 100644 --- a/red.py +++ b/red.py @@ -26,101 +26,12 @@ Wrapper script to invoke all redundances bot tasks """ import os -import sys import pywikibot -from pywikibot import pagegenerators import jogobot -def active(task_slug): - """ - Checks up if bot with given task_slug is active via jogobot.framework - - @param task_slug Task slug to check - @type task_slug str - - @return True if active, otherwise False - @rtype bool - """ - - try: - # Will throw Exception if disabled/blocked - # jogobot.is_active( task_slug ) - pass - - except jogobot.jogobot.Blocked: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), - "CRITICAL" ) - return False - - except jogobot.jogobot.Disabled: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{red} %s (%s)" % (value, type ), - "ERROR" ) - return False - - # Bot/Task is active - else: - return True - - -def parse_local_args( local_args ): - """ - Parses local cmd args which are not parsed by pywikibot - - @param local_args Local args returned by pywikibot.handle_args(args) - @type iterable - - @returns The following tuple - @return 1 Slug of given subtask (Arg "-task") - @rtype str - @return 2 GenFactory with parsed pagegenerator args - @rtype pagegenerators.GeneratorFactory - @return 3 Additional args for subtasks - @rtype dict - @rtype tuple - """ - - # This factory is responsible for processing command line arguments - # that are also used by other scripts and that determine on which pages - # to work on. - genFactory = pagegenerators.GeneratorFactory() - - # If always is True, bot won't ask for confirmation of edit (automode) - # always = False - - # If force_reload is True, bot will always parse Countrylist regardless - # if parsing is needed or not - # force_reload = False - - # Subtask selects the specific bot to run - # Default is reddiscparser - subtask = None - - # kwargs are passed to selected bot as **kwargs - kwargs = dict() - - # Parse command line arguments - for arg in local_args: - - # Split args - arg, sep, value = arg.partition(':') - - if arg.startswith("-always"): - # always = True - pass - elif arg.startswith("-task"): - subtask = value - else: - genFactory.handleArg(arg) - - # Return Tuple - return ( subtask, genFactory, kwargs ) - - def prepare_bot( task_slug, subtask, genFactory, subtask_args ): """ Handles importing subtask Bot class and prepares specific args @@ -167,101 +78,6 @@ def prepare_bot( task_slug, subtask, genFactory, subtask_args ): return ( subtask, Bot, genFactory, kwargs ) -def init_bot( task_slug, subtask, Bot, genFactory, **kwargs ): - """ - Initiates Bot-Object with Class given in Bot and passes params genFactory - and kwargs to it - - Passes through exception generated by Bot.__init__() after logging. - - @param task_slug Task slug, needed for logging - @type task_slug str - @param subtask Slug of given subtask - @type subtask str - @param Bot Bot class to build bot-object from - @type Class - @param genFactory GenFactory with parsed pagegenerator args - @type genFactory pagegenerators.GeneratorFactory - @param **kwargs Additional args for Bot() - @type **kwargs dict - - @returns bot-object - @type type(Bot()) - """ - # Bot gets prepared genFactory as first param and possible kwargs dict - # It has to threw an exception if something does not work properly - try: - # Init bot with genFactory and **kwargs - bot = Bot( genFactory, **kwargs ) - - except: - # Catch Errors while initiation - jogobot.output( ( - "\03{{red}} Error while trying to init " + - "subtask \"{task_slug}-{subtask}\"!" ). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - raise - else: - # Init successfull - jogobot.output( ( - "Subtask \"{task_slug}-{subtask}\" was " + - "initiated successfully" ). - format(task_slug=task_slug, subtask=subtask) ) - return bot - - -def run_bot( task_slug, subtask, bot ): - """ - Calls the run()-method of bot-object - - Passes through exceptions generated by Bot.__init__() after logging. - Catches Errors caused by missing run(0-method. - - @param task_slug Task slug, needed for logging - @type task_slug str - @param subtask Slug of given subtask - @type subtask str - @param bot Bot object to call run()-method on - @type object with method run - """ - - # Fire up Bot - # Bot must have implemented a run()-method - # It has to threw an exception if something does not work properly - try: - # Call run method on Bot - bot.run() - - # Special event on AttributeError to catch missing run()-method - except AttributeError: - (type, value, traceback) = sys.exc_info() - - # Catch missing run()-method - if "has no attribute 'run'" in value: - jogobot.output( ( - "\03{{red}} Error while trying to run " + - "subtask \"{task_slug}-{subtask} \": +" - "Run-method is missing! "). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - - # Pass through other AttributeError - else: - raise - - except: - jogobot.output( ( - "\03{{red}} Error while trying to run " + - "subtask \"{task_slug}-{subtask} \"!" ). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - raise - - else: - # Run successfull - jogobot.output( ( - "Subtask \"{task_slug}-{subtask}\" was finished successfully"). - format(task_slug=task_slug, subtask=subtask) ) - - def main(*args): """ Process command line arguments and invoke bot. @@ -278,22 +94,24 @@ def main(*args): # Get the jogobot-task_slug (basename of current file without ending) task_slug = os.path.basename(__file__)[:-len(".py")] + # Disabled until [FS#86] is done # Before run, we need to check wether we are currently active or not - if not active( task_slug ): - return + # if not jogobot.bot.active( task_slug ): + # return # Parse local Args to get information about subtask - ( subtask, genFactory, subtask_args ) = parse_local_args( local_args ) + ( subtask, genFactory, subtask_args ) = jogobot.bot.parse_local_args( + local_args ) # select subtask and prepare args ( subtask, Bot, genFactory, kwargs ) = prepare_bot( task_slug, subtask, genFactory, subtask_args ) # Init Bot - bot = init_bot( task_slug, subtask, Bot, genFactory, **kwargs) + bot = jogobot.bot.init_bot( task_slug, subtask, Bot, genFactory, **kwargs) # Run bot - run_bot( task_slug, subtask, bot ) + jogobot.bot.run_bot( task_slug, subtask, bot ) if( __name__ == "__main__" ):