From dcc485151392a9c05d75e6b845d5c7d3fd1044a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 15:27:42 +0200 Subject: [PATCH 1/3] Check reddisc page titles against regex To prevent parsing Pages which have been categorized in configured cats wrong or are given via cmd params Parsing them results in unexpected behaviour Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] --- reddiscparser.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/reddiscparser.py b/reddiscparser.py index 6525ac9..00329e4 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -27,6 +27,7 @@ Script to parse all reddisc pages in configured categories import os import sys +import re import pywikibot from pywikibot import pagegenerators @@ -46,6 +47,10 @@ class DiscussionParserBot( Botclass witch initialises the parsing process of Redundancy Discussions """ + # RegEx to filter wrong pages + onlyinclude_re = re.compile( + jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) + def __init__( self, generator ): """ Constructor @@ -86,6 +91,11 @@ class DiscussionParserBot( return + # Exclude pages which does not match pattern + if not type(self).onlyinclude_re.search( self.current_page.title() ): + + return + # Initiate RedPage object red_page = redpage.RedPage( self.current_page ) @@ -102,7 +112,7 @@ class DiscussionParserBot( # If successfully parsed whole page, flush # db write cache redfam.RedFamParser.flush_db_cache() - jogobot.output( "Page [[{redisc}]] parsed".format( + jogobot.output( "Page [[{reddisc}]] parsed".format( reddisc=red_page.page.title() ) ) From ee8ebbc8bc088d41ba15801f7d42ac3f29bbbf1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 15:41:13 +0200 Subject: [PATCH 2/3] Make sure only flush db if there are redfams To prevent from doing unnecessary stuff and trying to use not existing db connection Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] --- reddiscparser.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/reddiscparser.py b/reddiscparser.py index 00329e4..962eb5a 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -102,18 +102,31 @@ class DiscussionParserBot( # Check whether parsing is needed if red_page.is_parsing_needed(): + # Count families for failure analysis + fam_counter = 0 + # Iterate over returned generator with redfam sections for fam in red_page.parse(): # Run RedFamParser on section text redfam.RedFamParser.parser( fam, red_page.page._pageid, red_page.is_archive() ) + + fam_counter += 1 + else: # If successfully parsed whole page, flush # db write cache - redfam.RedFamParser.flush_db_cache() - jogobot.output( "Page [[{reddisc}]] parsed".format( - reddisc=red_page.page.title() ) ) + if( fam_counter ): + redfam.RedFamParser.flush_db_cache() + jogobot.output( "Page [[{reddisc}]] parsed".format( + reddisc=red_page.page.title() ) ) + else: + jogobot.output( + "\03{red} Page [[{reddisc}]], ".format( + reddisc=red_page.page.title() ) + + "containing no redfam, parsed!", + "WARNING" ) def apply_conf_cat_generators( genFactory ): From bd2d221c488d80cf992bd4d141d2db27db1b8ce4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 15:48:30 +0200 Subject: [PATCH 3/3] Prevent flush from creating cursor without con MysqlRed.flush() tried to create a cursor in any case. If there was no connection (because the subclasses haven't been instantiated an oursql Error occured. Instead, check before if there is a connection and otherwise raise an Error Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] --- mysqlred.py | 17 +++++++++++++++++ reddiscparser.py | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/mysqlred.py b/mysqlred.py index 055b995..77eae35 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -92,6 +92,9 @@ class MysqlRed: """ Run cached querys """ + if not cls.connection: + raise MysqlRedConnectionError( "No connection exists!" ) + cursor = cls.connection.cursor() # Execute insert query @@ -307,3 +310,17 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' break for row in res: yield row + + +class MysqlRedError(Exception): + """ + Basic Exception class for this module + """ + pass + + +class MysqlRedConnectionError(MysqlRedError): + """ + Raised if there are Errors with Mysql-Connections + """ + pass diff --git a/reddiscparser.py b/reddiscparser.py index 962eb5a..3a6f43b 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -123,7 +123,7 @@ class DiscussionParserBot( reddisc=red_page.page.title() ) ) else: jogobot.output( - "\03{red} Page [[{reddisc}]], ".format( + "\03{red}" + "Page [[{reddisc}]], ".format( reddisc=red_page.page.title() ) + "containing no redfam, parsed!", "WARNING" )