From efa919ff2739ae8093a42bc160628bfdab07d7db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Aug 2016 16:39:32 +0200 Subject: [PATCH 01/26] Add new bot with basic structure We need a bot to work on pages which are subjects of redfams and on the belonging talk page Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] --- bots/markpages.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 bots/markpages.py diff --git a/bots/markpages.py b/bots/markpages.py new file mode 100644 index 0000000..6bdcb5a --- /dev/null +++ b/bots/markpages.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# markpages.py +# +# Copyright 2016 GOLDERWEB – Jonathan Golder +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# +""" +Bot to mark pages which were/are subjects of redundance discussions +with templates +""" + +from pywikibot.bot import CurrentPageBot + + +class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() + """ + Bot class to mark pages which were/are subjects of redundance discussions + with templates + """ + + def __init__( self, genFactory, **kwargs ): + """ + Constructor + + Parameters: + @param genFactory GenFactory with parsed pagegenerator args to + build generator + @type genFactory pagegenerators.GeneratorFactory + @param **kwargs Additional args + @type iterable + """ + + # Copy needed args + self.genFactory = genFactory + + # Build generator with genFactory + self.build_generator() + + # Run super class init with builded generator + super( MarkPagesBot, self ).__init__(generator=self.gen) + + def build_generator( self ): + """ + Builds generator + """ + self.gen = self.genFactory.getCombinedGenerator() + + def treat_page( self ): + """ + Handles work on current page + """ + + # Here is the place where to do what ever you want + print( self.current_page.title() ) From ecc78bef96494a05945a8ed87b7f153d09639163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Aug 2016 18:01:02 +0200 Subject: [PATCH 02/26] Import needed modules and add redfams-generator We will need a couple of modules to build the needed generator Also we will need a generator with redfams to work with Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] --- bots/markpages.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 6bdcb5a..ca9d586 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -26,8 +26,15 @@ Bot to mark pages which were/are subjects of redundance discussions with templates """ +from datetime import datetime + +from pywikibot import pagegenerators from pywikibot.bot import CurrentPageBot +import jogobot + +from lib.redfam import RedFamWorker + class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() """ @@ -47,8 +54,12 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() @type iterable """ - # Copy needed args - self.genFactory = genFactory + # Init attribute + self.__redfams = None # Will hold a generator with our redfams + + # We do not use predefined genFactory as there is no sensefull case to + # give a generator via cmd-line for this right now + self.genFactory = pagegenerators.GeneratorFactory() # Build generator with genFactory self.build_generator() @@ -56,6 +67,21 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Run super class init with builded generator super( MarkPagesBot, self ).__init__(generator=self.gen) + @property + def redfams(self): + """ + Holds redfams generator to work on in this bot + """ + # Create generator if not present + if not self.__redfams: + end_after = datetime.strptime( + jogobot.config["red.markpages"]["mark_done_after"], + "%Y-%m-%d" ) + self.__redfams = RedFamWorker.gen_by_status_and_ending( + 2, end_after) + + return self.__redfams + def build_generator( self ): """ Builds generator From da4f9b5d6bc3eaad44ee5373bf45b8fb7e410703 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Aug 2016 18:09:04 +0200 Subject: [PATCH 03/26] Add wrapper-generator to redfam.article_generator We need a wrapper around redfam.article_generator to pass it to pagegenerators.PageWithTalkPageGenerator and to add a reference to related redfam to each pywikibot.page-object before yielding it Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] --- bots/markpages.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/bots/markpages.py b/bots/markpages.py index ca9d586..d20951f 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -88,6 +88,26 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() """ self.gen = self.genFactory.getCombinedGenerator() + def redfam_talkpages_generator( self ): + """ + Wrappers the redfam.article_generator and + passes it to pagegenerators.PageWithTalkPageGenerator(). + Then it iterates over the generator and adds a reference to the + related redfam to each talkpage-object. + """ + + for redfam in self.redfams: + + # We need the talkpage (and only this) of each existing page + for talkpage in pagegenerators.PageWithTalkPageGenerator( + redfam.article_generator( filter_existing=True ), + return_talk_only=True ): + + # Add reference to redfam to talkpages + talkpage.redfam = redfam + + yield talkpage + def treat_page( self ): """ Handles work on current page From c4d8a95672e83d0b1b46551505a5238196eefef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Aug 2016 18:13:27 +0200 Subject: [PATCH 04/26] Implement build_generator-method Build_generator will add the redfam_talkpages_generator to the genFactory, build a generator of the genFactory and sets self.gen which is used as generator for run() Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] --- bots/markpages.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index d20951f..b08776c 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -84,9 +84,14 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() def build_generator( self ): """ - Builds generator + Builds generator to pass to super class """ - self.gen = self.genFactory.getCombinedGenerator() + # Add Talkpages to work on to generatorFactory + self.genFactory.gens.append( self.redfam_talkpages_generator() ) + + # Set generator to pass to super class + self.gen = pagegenerators.PreloadingGenerator( + self.genFactory.getCombinedGenerator() ) def redfam_talkpages_generator( self ): """ From 9beca7f6c905a6ea87f632d00c5b2734570f854a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Aug 2016 20:53:31 +0200 Subject: [PATCH 05/26] Implement method to add notice to disk page Adds the generated notice to the talkpage and starts the saving of the page Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] --- bots/markpages.py | 53 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index b08776c..7fae7c8 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -31,6 +31,8 @@ from datetime import datetime from pywikibot import pagegenerators from pywikibot.bot import CurrentPageBot +import mwparserfromhell as mwparser + import jogobot from lib.redfam import RedFamWorker @@ -116,7 +118,54 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() def treat_page( self ): """ Handles work on current page + + We get a reference to related redfam in current_page.redfam """ - # Here is the place where to do what ever you want - print( self.current_page.title() ) + # First we need to have the current text of page + # and parse it as wikicode + self.current_wikicode = mwparser.parse( self.current_page.text ) + + # Add notice + self.add_disc_notice_template() + + # Convert wikicode back to string to save + self.new_text = str( self.current_wikicode ) + + # Save + self.put_current( self.new_text ) + + def add_disc_notice_template( self ): + """ + Will take self.current_wikicode and adds disc notice template after the + last template in leading section or as first element if there is no + other template in leading section + """ + # The notice to add + notice = self.current_page.redfam.generate_disc_notice_template() + + # Find the right place to insert notice template + # Therfore we need the first section (if there is one) + leadsec = self.current_wikicode.get_sections( + flat=False, include_lead=True )[0] + + # There is none on empty pages, so we need to check + if leadsec: + # Get the last template in leadsec + ltemplate = leadsec.filter_templates()[-1] + + # If there is one, add notice after this + if ltemplate: + self.current_wikicode.insert_after(ltemplate, notice ) + + # To have it in its own line we need to add a linbreak before + self.current_wikicode.insert_before(notice, "\n" ) + + # If there is no template, add before first element on page + else: + self.current_wikicode.insert( 0, notice ) + + # If there is no leadsec (and therefore no template in it, we will add + # before the first element + else: + self.current_wikicode.insert( 0, notice ) From 2b93e4cf16c176ce13da3a78cd9afd873c14b79b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Aug 2016 21:39:54 +0200 Subject: [PATCH 06/26] Check if notice is present before add To prevent duplications we need to check wether notice is already present on talkpage Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] --- bots/markpages.py | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 7fae7c8..9be668a 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -142,7 +142,12 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() other template in leading section """ # The notice to add - notice = self.current_page.redfam.generate_disc_notice_template() + self.disc_notice = \ + self.current_page.redfam.generate_disc_notice_template() + + # Check if it is already present in wikicode + if self.disc_notice_present(): + return False # Find the right place to insert notice template # Therfore we need the first section (if there is one) @@ -156,16 +161,43 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # If there is one, add notice after this if ltemplate: - self.current_wikicode.insert_after(ltemplate, notice ) + self.current_wikicode.insert_after(ltemplate, self.disc_notice) # To have it in its own line we need to add a linbreak before - self.current_wikicode.insert_before(notice, "\n" ) + self.current_wikicode.insert_before(self.disc_notice, "\n" ) # If there is no template, add before first element on page else: - self.current_wikicode.insert( 0, notice ) + self.current_wikicode.insert( 0, self.disc_notice ) # If there is no leadsec (and therefore no template in it, we will add # before the first element else: - self.current_wikicode.insert( 0, notice ) + self.current_wikicode.insert( 0, self.disc_notice ) + + # Notice was added + return True + + def disc_notice_present(self): + """ + Checks if disc notice which shall be added is already present. + """ + # Iterate over Templates with same name (if any) to search equal + # Link to decide if they are the same + for present_notice in self.current_wikicode.ifilter_templates( + matches=self.disc_notice.name ): + + # Get reddisc page.title of notice to add + add_notice_link_tile = self.disc_notice.get( + "Diskussion").partition("#")[0] + # Get reddisc page.title of possible present notice + present_notice_link_tile = present_notice.get( + "Diskussion").partition("#")[0] + + # If those are equal, notice is already present + if add_notice_link_tile == present_notice_link_tile: + return True + + # If nothing is found, loop will run till its end + else: + return False From 59d4d23c83a9863ca0cb798c59e21002ff51e3f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 30 Aug 2016 11:33:54 +0200 Subject: [PATCH 07/26] Set edit summary for each edit Each edit of bot needs a edit summary Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=92 FS#92] --- bots/markpages.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bots/markpages.py b/bots/markpages.py index 9be668a..754e1bf 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -132,8 +132,12 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Convert wikicode back to string to save self.new_text = str( self.current_wikicode ) + # Define edit summary + summary = jogobot.config["red.markpages"]["mark_done_summary"].format( + reddisc=self.current_page.redfam.get_disc_link() ) + # Save - self.put_current( self.new_text ) + self.put_current( self.new_text, summary=summary ) def add_disc_notice_template( self ): """ From 20b811bc2a67bd40b9bca6aa59dc948d0ff2b9e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 30 Aug 2016 11:48:07 +0200 Subject: [PATCH 08/26] Make sure edit summary starts with bot Due to bot policy all edit summaries of bot edits have to start with "Bot:" Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=92 FS#92] --- bots/markpages.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bots/markpages.py b/bots/markpages.py index 754e1bf..e47f4d7 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -134,7 +134,11 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Define edit summary summary = jogobot.config["red.markpages"]["mark_done_summary"].format( - reddisc=self.current_page.redfam.get_disc_link() ) + reddisc=self.current_page.redfam.get_disc_link() ).strip() + + # Make sure summary starts with "Bot:" + if not summary[:len("Bot:")] == "Bot:": + summary = "Bot: " + summary.strip() # Save self.put_current( self.new_text, summary=summary ) From 8c56125a7b56617369d11cecb0c69359d323da7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 30 Aug 2016 12:07:11 +0200 Subject: [PATCH 09/26] Update talkpage notice template Exact date is not necessary and end could be ommited if of the same month Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=93 FS#93] --- lib/redfam.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/redfam.py b/lib/redfam.py index 9889908..f0b36fd 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -554,12 +554,13 @@ class RedFamWorker( RedFam ): param_cnt += 1 # Add begin - template.add( "Beginn", self._mysql.data[ 'beginning' ].strftime( - "%d. %B %Y").lstrip("0"), True ) + begin = self._mysql.data[ 'beginning' ].strftime( "%B %Y" ) + template.add( "Beginn", begin, True ) - # Add end - template.add( "Ende", self._mysql.data[ 'ending' ].strftime( - "%d. %B %Y").lstrip("0"), True ) + # Add end (if not same as begin) + end = self._mysql.data[ 'ending' ].strftime( "%B %Y" ) + if not end == begin: + template.add( "Ende", end, True ) # Add link to related reddisc template.add( "Diskussion", self.get_disc_link(), True ) From 6149dcdb8b097fb9dccef7ffc930e10641cd8548 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 30 Aug 2016 14:28:28 +0200 Subject: [PATCH 10/26] Apply changes to data structure See related ticket Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=94 FS#94] --- bots/markpages.py | 2 +- lib/mysqlred.py | 98 ++++++++++++++--------------- lib/redfam.py | 156 ++++++++++++++++++++++++++++++---------------- lib/redpage.py | 77 ++++++++++++++++++++--- 4 files changed, 222 insertions(+), 111 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index e47f4d7..244ba14 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -80,7 +80,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() jogobot.config["red.markpages"]["mark_done_after"], "%Y-%m-%d" ) self.__redfams = RedFamWorker.gen_by_status_and_ending( - 2, end_after) + "archived", end_after) return self.__redfams diff --git a/lib/mysqlred.py b/lib/mysqlred.py index f57ae2b..79360a8 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -156,21 +156,21 @@ class MysqlRedPage( MysqlRed ): # Class variables for storing cached querys # '{prefix}' will be replaced during super().__init__() _cached_update_data = [] - _update_query = 'UPDATE `{prefix}_red_pages` \ -SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' + _update_query = 'UPDATE `{prefix}_redpages` \ +SET `pagetitle` = ?, `revid` = ?, `status`= ? WHERE `pageid` = ?;' _cached_insert_data = {} - _insert_query = 'INSERT INTO `{prefix}_red_pages` \ -( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' + _insert_query = 'INSERT INTO `{prefix}_redpages` \ +( pageid, pagetitle, revid, status ) VALUES ( ?, ?, ?, ? );' - def __init__( self, page_id ): + def __init__( self, pageid ): """ Creates a new instance, runs __init__ of parent class """ super().__init__( ) - self.__page_id = int( page_id ) + self.__pageid = int( pageid ) self.data = self.get_page() @@ -185,7 +185,7 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' """ Retrieves a red page row from MySQL-Database for given page_id - @param int page_id MediaWiki page_id for page to retrieve + @param int pageid MediaWiki page_id for page to retrieve @returns tuple Tuple with data for given page_id bool FALSE if none found @@ -194,8 +194,8 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' cursor = type( self ).connection.cursor(mysqldb.DictCursor) cursor.execute( - 'SELECT * FROM `{prefix}_red_pages` WHERE `page_id` = ?;'.format( - prefix=type(self).db_table_prefix), ( self.__page_id, ) ) + 'SELECT * FROM `{prefix}_redpages` WHERE `pageid` = ?;'.format( + prefix=type(self).db_table_prefix), ( self.__pageid, ) ) res = cursor.fetchone() @@ -204,40 +204,40 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' else: return False - def add_page( self, page_title, rev_id, status=0 ): + def add_page( self, pagetitle, revid, status=0 ): """ - Inserts a red page row in MySQL-Database for given page_id + Inserts a red page row in MySQL-Database for given pageid - @param int rev_id MediaWiki current rev_id - @param str page_title MediaWiki new page_title + @param int revid MediaWiki current revid + @param str pagetitle MediaWiki new pagetitle @param int status Page parsing status """ - insert_data = { self.__page_id: ( self.__page_id, page_title, - rev_id, status ) } + insert_data = { self.__pageid: ( self.__pageid, pagetitle, + revid, status ) } type( self )._cached_insert_data.update( insert_data ) # Manualy construct self.data dict - self.data = { 'page_id': self.__page_id, 'rev_id': rev_id, - 'page_title': page_title, 'status': status } + self.data = { 'pageid': self.__pageid, 'revid': revid, + 'pagetitle': pagetitle, 'status': status } - def update_page( self, rev_id=None, page_title=None, status=0 ): + def update_page( self, revid=None, pagetitle=None, status=0 ): """ Updates the red page row in MySQL-Database for given page_id - @param int rev_id MediaWiki current rev_id - @param str page_title MediaWiki new page_title + @param int revid MediaWiki current rev_id + @param str pagetitle MediaWiki new page_title @param int status Page parsing status """ - if not page_title: - page_title = self.data[ 'page_title' ] - if not rev_id: - rev_id = self.data[ 'rev_id' ] + if not pagetitle: + pagetitle = self.data[ 'pagetitle' ] + if not revid: + revid = self.data[ 'revid' ] - type( self )._cached_update_data.append( ( page_title, rev_id, - status, self.__page_id ) ) + type( self )._cached_update_data.append( ( pagetitle, revid, + status, self.__pageid ) ) class MysqlRedFam( MysqlRed ): @@ -247,22 +247,22 @@ class MysqlRedFam( MysqlRed ): # Class variables for storing cached querys _cached_update_data = [] - _update_query = 'UPDATE `{prefix}_red_families` \ -SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ -`status`= ? WHERE `fam_hash` = ?;' + _update_query = 'UPDATE `{prefix}_redfams` \ +SET `redpageid` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ +`status`= ? WHERE `famhash` = ?;' _cached_insert_data = {} - _insert_query = 'INSERT INTO `{prefix}_red_families` \ -( fam_hash, red_page_id, beginning, ending, status, heading, \ + _insert_query = 'INSERT INTO `{prefix}_redfams` \ +( famhash, redpageid, beginning, ending, status, heading, \ article0, article1, article2, article3, article4, article5, article6, \ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - def __init__( self, fam_hash=None ): + def __init__( self, famhash=None ): """ Creates a new instance, runs __init__ of parent class """ - self.__fam_hash = fam_hash + self.__famhash = famhash super().__init__( ) @@ -273,27 +273,27 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' """ pass - def get_fam( self, fam_hash ): + def get_fam( self, famhash ): """ Retrieves a red family row from MySQL-Database for given fam_hash @returns dict Dictionairy with data for given fam hash False if none found """ - self.__fam_hash = fam_hash + self.__famhash = famhash cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor.execute( - 'SELECT * FROM `{prefix}_red_families` WHERE `fam_hash` = ?;'. - format( prefix=type(self).db_table_prefix), ( fam_hash, ) ) + 'SELECT * FROM `{prefix}_redfams` WHERE `famhash` = ?;'. + format( prefix=type(self).db_table_prefix), ( famhash, ) ) self.data = cursor.fetchone() - def add_fam( self, articlesList, heading, red_page_id, + def add_fam( self, articlesList, heading, redpageid, beginning, ending=None, status=0 ): - data = [ self.__fam_hash, red_page_id, beginning, ending, + data = [ self.__famhash, redpageid, beginning, ending, status, heading ] for article in articlesList: @@ -304,29 +304,29 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' data = tuple( data ) - insert_data = { self.__fam_hash: data } + insert_data = { self.__famhash: data } type( self )._cached_insert_data.update( insert_data ) # Manualy construct self.data dict - data_keys = ( 'fam_hash', 'red_page_id', 'beginning', 'ending', + data_keys = ( 'fam_hash', 'redpageid', 'beginning', 'ending', 'status', 'heading', 'article0', 'article1', 'article2', 'article3', 'article4', 'article5', 'article6', 'article7' ) self.data = dict( zip( data_keys, data ) ) - def update_fam( self, red_page_id, heading, beginning, ending, status ): + def update_fam( self, redpageid, heading, beginning, ending, status ): """ Updates the red fam row in MySQL-Database for given fam_hash - @param int red_page_id MediaWiki page_id + @param int redpageid MediaWiki page_id @param datetime beginning Timestamp of beginning qparam datetime ending Timestamp of ending of @param int status red_fam status """ - type( self )._cached_update_data.append( ( red_page_id, heading, + type( self )._cached_update_data.append( ( redpageid, heading, beginning, ending, status, - self.__fam_hash ) ) + self.__famhash ) ) def get_by_status( self, status ): """ @@ -336,7 +336,7 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor.execute( - 'SELECT * FROM `{prefix}_red_families` WHERE `status` = ?;'.format( + 'SELECT * FROM `{prefix}_redfams` WHERE `status` = ?;'.format( prefix=type( self ).db_table_prefix), ( status, ) ) while True: @@ -355,11 +355,11 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' cursor.execute( ( 'SELECT * ' + - 'FROM `{prefix}_red_families` `F` ' + - 'INNER JOIN `{prefix}_red_pages` `P` ' + + 'FROM `{prefix}_redfams` `F` ' + + 'INNER JOIN `{prefix}_redpages` `P` ' + 'ON `F`.`status` = ? ' + 'AND `F`.`ending` >= ? ' - 'AND `F`.`red_page_id` = `P`.`page_id`;').format( + 'AND `F`.`redpageid` = `P`.`pageid`;').format( prefix=type( self ).db_table_prefix), ( status, ending ) ) while True: diff --git a/lib/redfam.py b/lib/redfam.py index f0b36fd..798d501 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -43,8 +43,8 @@ class RedFam: Basic class for RedFams, containing the basic data structure """ - def __init__( self, articlesList, beginning, ending=None, red_page_id=None, - status=0, fam_hash=None, heading=None ): + def __init__( self, articlesList, beginning, ending=None, redpageid=None, + status=None, famhash=None, heading=None ): """ Generates a new RedFam object @@ -52,7 +52,7 @@ class RedFam: @param beginning datetime Beginning date @param ending datetime Ending date @param red_page_id int MW pageid of containing RedPage - @param status int Status of RedFam + @param status str Status of RedFam @param fam_hash str SHA1 hash of articlesList @param heading str Original heading of RedFam (Link) """ @@ -61,20 +61,20 @@ class RedFam: self.site = pywikibot.Site() # Database interface - self._mysql = MysqlRedFam( fam_hash ) + self._mysql = MysqlRedFam( famhash ) # Initial attribute values self._articlesList = articlesList self._beginning = beginning self._ending = ending - self._red_page_id = red_page_id - self._status = status - self._fam_hash = fam_hash + self._redpageid = redpageid + self._status = self._parse_status(status) + self._famhash = famhash self._heading = heading # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families - self.calc_fam_hash() + self.calc_famhash() def __repr__( self ): """ @@ -88,14 +88,14 @@ class RedFam: ", heading=" + repr( self._heading ) + \ ", beginning=" + repr( self._beginning ) + \ ", ending=" + repr( self._ending ) + \ - ", red_page_id=" + repr( self._red_page_id ) + \ + ", red_page_id=" + repr( self._redpageid ) + \ ", status=" + repr( self._status ) + \ - ", fam_hash=" + repr( self._fam_hash ) + \ + ", fam_hash=" + repr( self._famhash ) + \ " )" return __repr - def calc_fam_hash( self ): + def calc_famhash( self ): """ Calculates the SHA-1 hash for the articlesList of redundance family. Since we don't need security SHA-1 is just fine. @@ -106,35 +106,35 @@ class RedFam: h = hashlib.sha1() h.update( str( self._articlesList[:8] ).encode('utf-8') ) - if self._fam_hash and h.hexdigest() != self._fam_hash: - raise RedFamHashError( self._fam_hash, h.hexdigest() ) + if self._famhash and h.hexdigest() != self._famhash: + raise RedFamHashError( self._famhash, h.hexdigest() ) - elif self._fam_hash: + elif self._famhash: return else: - self._fam_hash = h.hexdigest() + self._famhash = h.hexdigest() def changed( self ): """ Checks wether anything has changed and maybe triggers db update """ - # On archived red_fams do not delete possibly existing ending - if( not self._ending and self._status > 1 and + # On archived redfams do not delete possibly existing ending + if( not self._ending and "archived" in self._status and self._mysql.data[ 'ending' ] ): self._ending = self._mysql.data[ 'ending' ] # Since status change means something has changed, update database - if( self._status != self._mysql.data[ 'status' ] or + if( self._raw_status != self._mysql.data[ 'status' ] or self._beginning != self._mysql.data[ 'beginning' ] or self._ending != self._mysql.data[ 'ending' ] or - self._red_page_id != self._mysql.data[ 'red_page_id' ] or + self._red_page_id != self._mysql.data[ 'redpageid' ] or self._heading != self._mysql.data[ 'heading' ]): - self._mysql.update_fam( self._red_page_id, self._heading, + self._mysql.update_fam( self._redpageid, self._heading, self._beginning, self._ending, - self._status ) + self._raw_status() ) @classmethod def flush_db_cache( cls ): @@ -143,6 +143,61 @@ class RedFam: """ MysqlRedFam.flush() + def add_status(self, status): + """ + Adds a status specified by status, to status set + + @param status Statusstring to add + @type status str + """ + self._status.add(status) + + def remove_status(self, status, weak=True): + """ + Removes a status, specified by status from set. If weak is set to + False it will throw a KeyError when trying to remove a status not set. + + @param status Statusstring to add + @type status str + @param weak Change behavior on missing status + @type bool + """ + if weak: + self._status.discard(status) + else: + self._status.remove(status) + + def has_status(self, status): + """ + Returns True, if redfam has given status + + @param status Statusstring to check + @type status str + @returns True if status is present else False + """ + if status in self._status: + return True + else: + return False + + def _parse_status(self, raw_status ): + """ + Sets status based on comma separated list + + @param raw_status Commaseparated string of stati (from DB) + @type raw_status str + """ + self._status = set( raw_status.strip().split(",")) + + def _raw_status( self ): + """ + Returns status as commaseparated string (to save in DB) + + @returns Raw status string + @rtype str + """ + return ",".join( self._status ) + class RedFamParser( RedFam ): """ @@ -165,15 +220,15 @@ class RedFamParser( RedFam ): wurde gewünscht von:" __done_notice2 = "{{Erledigt|" - def __init__( self, heading, red_page, red_page_archive, + def __init__( self, heading, redpage, redpagearchive, beginning, ending=None ): """ Creates a RedFam object based on data collected while parsing red_pages combined with possibly former known data from db - @param red_fam_heading str Wikitext heading of section - @param red_page page Pywikibot.page object - @param red_page_archive bool Is red_page an archive + @param redfam_heading str Wikitext heading of section + @param redpage page Pywikibot.page object + @param redpagearchive bool Is red_page an archive @param beginning datetime Timestamp of beginning str as strptime parseable string @param ending datetime Timestamp of ending @@ -181,9 +236,9 @@ class RedFamParser( RedFam ): """ # Set object attributes: - self._red_page_id = red_page._pageid - self._red_page_archive = red_page_archive - self._fam_hash = None + self._redpageid = redpage._pageid + self._redpagearchive = redpagearchive + self._famhash = None # Method self.add_beginning sets self._beginning directly self.add_beginning( beginning ) @@ -195,7 +250,7 @@ class RedFamParser( RedFam ): # If no ending was provided set to None self._ending = None - self._status = None + self._status = set() # Parse the provided heading of redundance section # to set self._articlesList @@ -204,7 +259,7 @@ class RedFamParser( RedFam ): # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families - self.calc_fam_hash() + self.calc_famhash() # Open database connection, ask for data if existing, # otherwise create entry @@ -223,11 +278,11 @@ class RedFamParser( RedFam ): # We need a connection to our mysqldb self._mysql = MysqlRedFam( ) - self._mysql.get_fam( self._fam_hash ) + self._mysql.get_fam( self._famhash ) if not self._mysql.data: self._mysql.add_fam( self._articlesList, self._heading, - self._red_page_id, self._beginning, + self._redpageid, self._beginning, self._ending ) def heading_parser( self, heading ): @@ -253,7 +308,7 @@ class RedFamParser( RedFam ): # Catch sections with more then 8 articles, print error if len( self._articlesList ) > 8: # For repression in output we need to know the fam hash - self.calc_fam_hash() + self.calc_famhash() jogobot.output( ( "\03{{lightred}}" + @@ -317,21 +372,18 @@ class RedFamParser( RedFam ): - 3 and greater status was set by worker script, do not change it """ - # Do not change stati set by worker script etc. - if not self._mysql.data['status'] > 2: - - # No ending, discussion is running: - # Sometimes archived discussions also have no detectable ending - if not self._ending and not self._red_page_archive: - self._status = 0 - else: - if not self._red_page_archive: - self._status = 1 - else: - self._status = 2 + # No ending, discussion is running: + # Sometimes archived discussions also have no detectable ending + if not self._ending and not self._redpagearchive: + self.add_status("open") else: - - self._status = self._mysql.data[ 'status' ] + self.remove_status("open") + if not self._redpagearchive: + self.add_status("done") + else: + self.remove_status("done") + self.remove_status("open") + self.add_status("archived") @classmethod def is_section_redfam_cb( cls, heading ): @@ -444,15 +496,15 @@ class RedFamWorker( RedFam ): articlesList.append( mysql_data[ key ] ) super().__init__( articlesList, mysql_data[ 'beginning' ], - mysql_data[ 'ending' ], mysql_data[ 'red_page_id' ], - mysql_data[ 'status' ], mysql_data[ 'fam_hash' ], + mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], + mysql_data[ 'status' ], mysql_data[ 'famhash' ], mysql_data[ 'heading' ] ) self._mysql.data = mysql_data # Get related RedPage-Information - self.redpageid = mysql_data[ 'page_id' ] - self.redpagetitle = mysql_data[ 'page_title' ] + self.redpageid = mysql_data[ 'pageid' ] + self.redpagetitle = mysql_data[ 'pagetitle' ] # Make sure locale is set to 'de_DE.UTF-8' to prevent problems # with wrong month abreviations in strptime @@ -499,7 +551,7 @@ class RedFamWorker( RedFam ): Sets status to 3 when worked on """ - self._status = 3 + pass def get_disc_link( self ): """ diff --git a/lib/redpage.py b/lib/redpage.py index ebedaba..b4361b9 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -49,6 +49,8 @@ class RedPage: @type pageid int """ + self._status = set() + # Safe the pywikibot page object self.page = page self.pageid = pageid @@ -71,7 +73,7 @@ class RedPage: elif self.pageid: self.__mysql = MysqlRedPage( self.pageid ) self.page = pywikibot.Page( pywikibot.Site(), - self.__mysql.data['page_title'] ) + self.__mysql.data['pagetitle'] ) self.page.exists() else: raise ValueError( "Page NOR pagid provided!" ) @@ -84,9 +86,9 @@ class RedPage: Check wether the page was changed since last run """ - if( self.__mysql.data != { 'page_id': self.page._pageid, - 'rev_id': self.page._revid, - 'page_title': self.page.title(), + if( self.__mysql.data != { 'pageid': self.page._pageid, + 'revid': self.page._revid, + 'pagetitle': self.page.title(), 'status': self.__mysql.data[ 'status' ] } ): self._changed = True else: @@ -110,7 +112,7 @@ class RedPage: Decides wether current RedPage needs to be parsed or not """ - if( self._changed or self.__mysql.data[ 'status' ] == 0 ): + if( self._changed or self.__mysql.data[ 'status' ] == "" ): return True else: return False @@ -146,14 +148,16 @@ class RedPage: Updates the page meta data in mysql db """ if( self._parsed or not self._changed ): - status = 1 + self.add_status( "open" ) if( self.is_archive() ): - status = 2 + self.remove_status( "open" ) + self.add_status( "archived" ) else: - status = 0 + self._status = set() - self.__mysql.update_page( self.page._revid, self.page.title(), status ) + self.__mysql.update_page( self.page._revid, self.page.title(), + self._raw_status() ) @classmethod def flush_db_cache( cls ): @@ -161,3 +165,58 @@ class RedPage: Calls flush method of Mysql Interface class """ MysqlRedPage.flush() + + def add_status(self, status): + """ + Adds a status specified by status, to status set + + @param status Statusstring to add + @type status str + """ + self._status.add(status) + + def remove_status(self, status, weak=True): + """ + Removes a status, specified by status from set. If weak is set to + False it will throw a KeyError when trying to remove a status not set. + + @param status Statusstring to add + @type status str + @param weak Change behavior on missing status + @type bool + """ + if weak: + self._status.discard(status) + else: + self._status.remove(status) + + def has_status(self, status): + """ + Returns True, if redfam has given status + + @param status Statusstring to check + @type status str + @returns True if status is present else False + """ + if status in self._status: + return True + else: + return False + + def _parse_status(self, raw_status ): + """ + Sets status based on comma separated list + + @param raw_status Commaseparated string of stati (from DB) + @type raw_status str + """ + self._status = set( raw_status.strip().split(",")) + + def _raw_status( self ): + """ + Returns status as commaseparated string (to save in DB) + + @returns Raw status string + @rtype str + """ + return ",".join( self._status ) From e13320820ce635c1844dd9a8cdc66f5fd2db4311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 30 Aug 2016 17:45:18 +0200 Subject: [PATCH 11/26] Add API to manage status per article To be able to track changes to articles to update redfam status Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=89 FS#89] --- lib/mysqlred.py | 9 ++-- lib/redfam.py | 125 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 129 insertions(+), 5 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 79360a8..0bb843c 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -336,8 +336,8 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor.execute( - 'SELECT * FROM `{prefix}_redfams` WHERE `status` = ?;'.format( - prefix=type( self ).db_table_prefix), ( status, ) ) + 'SELECT * FROM `{prefix}_redfams` WHERE `status` = LIKE %?%;'. + format( prefix=type( self ).db_table_prefix), ( status, ) ) while True: res = cursor.fetchmany( 1000 ) @@ -358,9 +358,10 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' 'FROM `{prefix}_redfams` `F` ' + 'INNER JOIN `{prefix}_redpages` `P` ' + 'ON `F`.`status` = ? ' + - 'AND `F`.`ending` >= ? ' + 'AND `F`.`ending` >= ? ' + 'AND `F`.`redpageid` = `P`.`pageid`;').format( - prefix=type( self ).db_table_prefix), ( status, ending ) ) + prefix=type( self ).db_table_prefix), + ( status, ending ) ) while True: res = cursor.fetchmany( 1000 ) diff --git a/lib/redfam.py b/lib/redfam.py index 798d501..d5312ca 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -198,6 +198,116 @@ class RedFam: """ return ",".join( self._status ) + def article_add_status(self, status, index=None, title=None ): + """ + Adds a status specified by status, to article (identified by title + or index in articlesList) status set + + @param status Statusstring to add + @type status str + @param index Add to article with index in articlesList + @type index int + @param title Add to article with title in articlesList + @type title str + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + self._article_status[index].add(status) + else: + raise IndexError( "No index given or wrong format!") + + def article_remove_status(self, status, index=None, title=None, weak=True): + """ + Removes a status specified by status, from article (identified by title + or index in articlesList) status set + If weak is set to False it will throw a KeyError when trying to + remove a status not set. + + @param status Statusstring to add + @type status str + @param index Remove from article with index in articlesList + @type index int + @param title Remove from article with title in articlesList + @type title str + @param weak Change behavior on missing status + @type bool + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + if weak: + self._article_status[index].discard(status) + else: + self._article_status[index].remove(status) + else: + raise IndexError( "No index given or wrong format!") + + def article_has_status(self, status, index=None, title=None ): + """ + Adds a status specified by status, to articles (identified by title + or index in articlesList) status set + + @param status Statusstring to add + @type status str + @param index Check article with index in articlesList + @type index int + @param title Check article with title in articlesList + @type title str + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + if status in self._article_status[index]: + return True + else: + return False + else: + raise IndexError( "No index given or wrong format!") + + def _article_parse_status(self, raw_status, index=None, title=None ): + """ + Sets status based on comma separated list to articles (identified by + title or index in articlesList) status set + + @param status Statusstring to set + @type status str + @param index Add to article with index in articlesList + @type index int + @param title Add to article with title in articlesList + @type title str + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + self._article_status[index] = set( raw_status.strip().split(",")) + else: + raise IndexError( "No index given or wrong format!") + + def _article_raw_status( self, index=None, title=None ): + """ + Returns status as commaseparated string (to save in DB) of article + (identified by title or index in articlesList) status set + + @param index Get from article with index in articlesList + @type index int + @param title Get from article with title in articlesList + @type title str + @returns Raw status string + @rtype str + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + return ",".join( self._article_status[index] ) + else: + raise IndexError( "No index given or wrong format!") + class RedFamParser( RedFam ): """ @@ -491,10 +601,14 @@ class RedFamWorker( RedFam ): def __init__( self, mysql_data ): articlesList = [] + for key in sorted( mysql_data.keys() ): - if 'article' in key and mysql_data[ key ]: + if 'article' in key and 'status' not in key and mysql_data[ key ]: articlesList.append( mysql_data[ key ] ) + # Preset article status list with empty sets for existing articles + self._article_status = [set() for x in range(0, len(articlesList))] + super().__init__( articlesList, mysql_data[ 'beginning' ], mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], mysql_data[ 'status' ], mysql_data[ 'famhash' ], @@ -502,6 +616,15 @@ class RedFamWorker( RedFam ): self._mysql.data = mysql_data + # Set up article status + index = 0 + for article in self._articlesList: + raw_status = mysql_data[ "article" + str(index) + "_status" ] + if not raw_status: + raw_status = str() + self._article_parse_status( raw_status, index ) + index += 1 + # Get related RedPage-Information self.redpageid = mysql_data[ 'pageid' ] self.redpagetitle = mysql_data[ 'pagetitle' ] From 870ed4bf25cbf6688657f3cb8bd6f70b0ab96afe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 30 Aug 2016 17:47:02 +0200 Subject: [PATCH 12/26] Update redfam.article_generator use article status To be able to filter articles by status of that article Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=89 FS#89] --- bots/markpages.py | 4 +++- lib/redfam.py | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 244ba14..aa9597c 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -107,7 +107,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # We need the talkpage (and only this) of each existing page for talkpage in pagegenerators.PageWithTalkPageGenerator( - redfam.article_generator( filter_existing=True ), + redfam.article_generator( + filter_existing=True, + exclude_article_status=["marked"] ), return_talk_only=True ): # Add reference to redfam to talkpages diff --git a/lib/redfam.py b/lib/redfam.py index d5312ca..d82ffbb 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -633,7 +633,9 @@ class RedFamWorker( RedFam ): # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') - def article_generator(self, filter_existing=None, filter_redirects=None ): + def article_generator(self, filter_existing=None, filter_redirects=None, + exclude_article_status=[], + onlyinclude_article_status=[] ): """ Yields pywikibot pageobjects for articles belonging to this redfams in a generator @@ -647,11 +649,22 @@ class RedFamWorker( RedFam ): set to False to get only redirectpages, unset/None results in not filtering @type filter_redirects bool/None + """ # Iterate over articles in redfam for article in self._articlesList: page = pywikibot.Page(pywikibot.Link(article), self.site) + # Exclude by article status + for status in exclude_article_status: + if self.article_has_status( status, title=article ): + continue + + # Only include by article status + for status in onlyinclude_article_status: + if not self.article_has_status( status, title=article ): + continue + # Filter non existing Pages if requested with filter_existing=True if filter_existing and not page.exists(): continue From d55c81c97b6545e59a36a49c5695b216491c2a16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 30 Aug 2016 18:05:51 +0200 Subject: [PATCH 13/26] Set article status when worked on talkpage To detect whole redfam status after run over all articles Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=89 FS#89] --- bots/markpages.py | 56 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index aa9597c..7548294 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -129,7 +129,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() self.current_wikicode = mwparser.parse( self.current_page.text ) # Add notice - self.add_disc_notice_template() + # Returns True if added + # None if already present + add_ret = self.add_disc_notice_template() # Convert wikicode back to string to save self.new_text = str( self.current_wikicode ) @@ -142,8 +144,24 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() if not summary[:len("Bot:")] == "Bot:": summary = "Bot: " + summary.strip() - # Save - self.put_current( self.new_text, summary=summary ) + # will return True if saved + # False if not saved because of errors + # None if change was not accepted by user + save_ret = self.put_current( self.new_text ) + + # Status + if add_ret is None or add_ret and save_ret: + self.current_page.redfam.article_add_status( + "marked", + title=self.current_page.title(withNamespace=False)) + elif save_ret is None: + self.current_page.redfam.article_add_status( + "note_rej", + title=self.current_page.title(withNamespace=False)) + else: + self.current_page.redfam.article_add_status( + "sav_err", + title=self.current_page.title(withNamespace=False)) def add_disc_notice_template( self ): """ @@ -157,7 +175,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Check if it is already present in wikicode if self.disc_notice_present(): - return False + return # Find the right place to insert notice template # Therfore we need the first section (if there is one) @@ -211,3 +229,33 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # If nothing is found, loop will run till its end else: return False + + # We need to overrite this since orginal from pywikibot.bot.CurrentPageBot + # does not return result of self._save_page + def put_current(self, new_text, ignore_save_related_errors=None, + ignore_server_errors=None, **kwargs): + """ + Call L{Bot.userPut} but use the current page. + + It compares the new_text to the current page text. + + @param new_text: The new text + @type new_text: basestring + @param ignore_save_related_errors: Ignore save related errors and + automatically print a message. If None uses this instances default. + @type ignore_save_related_errors: bool or None + @param ignore_server_errors: Ignore server errors and automatically + print a message. If None uses this instances default. + @type ignore_server_errors: bool or None + @param kwargs: Additional parameters directly given to L{Bot.userPut}. + @type kwargs: dict + """ + if ignore_save_related_errors is None: + ignore_save_related_errors = self.ignore_save_related_errors + if ignore_server_errors is None: + ignore_server_errors = self.ignore_server_errors + return self.userPut( + self.current_page, self.current_page.text, new_text, + ignore_save_related_errors=ignore_save_related_errors, + ignore_server_errors=ignore_server_errors, + **kwargs) From 65fb2ecb287f8060513977dd95fd3a81361cb9ae Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 5 Nov 2016 19:27:56 +0100 Subject: [PATCH 14/26] Generate Fam status based on article status Some article states should be reflected in the RedFam status Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=89 FS#89] --- lib/redfam.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/lib/redfam.py b/lib/redfam.py index d82ffbb..6e8b3d5 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -68,6 +68,7 @@ class RedFam: self._beginning = beginning self._ending = ending self._redpageid = redpageid + self._status = set() self._status = self._parse_status(status) self._famhash = famhash self._heading = heading @@ -686,8 +687,24 @@ class RedFamWorker( RedFam ): """ Sets status to 3 when worked on """ + for article in self._articlesList: + if self.article_has_status( "note_rej", title=article ): + self.add_status( "note_rej" ) + if self.article_has_status( "sav_err", title=article ): + self.add_status( "sav_err" ) - pass + if not self.has_status( "sav_err" ) and \ + not self.has_status( "note_rej" ): + self.add_status( "marked" ) + + self._mysql.data[ 'status' ] = self._raw_status() + index = 0 + for article in self._articlesList: + self._mysql.data[ "article" + str(index) + 'status' ] = \ + self._article_raw_status( index=index ) + index += 1 + + print( repr(self) ) def get_disc_link( self ): """ From 0ebf307bb80daa662a6f20add5fcb853d7de36f1 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 5 Nov 2016 19:32:02 +0100 Subject: [PATCH 15/26] Add markpages as subtask Markpages is a subtask of our Red-Bot Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=89 FS#89] # The following line will be added automatically # Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] --- bots/markpages.py | 24 +++++++++++++++++++++--- jogobot | 2 +- lib/mysqlred.py | 2 +- red.py | 4 ++++ 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 7548294..b7b45c0 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -69,6 +69,24 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Run super class init with builded generator super( MarkPagesBot, self ).__init__(generator=self.gen) + def run(self): + """ + Controls the overal parsing process, using super class for page switch + + Needed to do things before/after treating pages is done + """ + try: + + super( MarkPagesBot, self ).run() + + except: + raise + + else: + # Do status redfam status updates + for redfam in self.redfams: + redfam.update_status() + @property def redfams(self): """ @@ -79,8 +97,8 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() end_after = datetime.strptime( jogobot.config["red.markpages"]["mark_done_after"], "%Y-%m-%d" ) - self.__redfams = RedFamWorker.gen_by_status_and_ending( - "archived", end_after) + self.__redfams = list( RedFamWorker.gen_by_status_and_ending( + "archived", end_after) ) return self.__redfams @@ -147,7 +165,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # will return True if saved # False if not saved because of errors # None if change was not accepted by user - save_ret = self.put_current( self.new_text ) + save_ret = self.put_current( self.new_text, summary=summary ) # Status if add_ret is None or add_ret and save_ret: diff --git a/jogobot b/jogobot index 28d03f3..49ada29 160000 --- a/jogobot +++ b/jogobot @@ -1 +1 @@ -Subproject commit 28d03f35b848a33ad45d3f5f8f3f82e8c45534ec +Subproject commit 49ada2993e345600523c161c5e2516ec65625684 diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 0bb843c..9e2e01b 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -308,7 +308,7 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' type( self )._cached_insert_data.update( insert_data ) # Manualy construct self.data dict - data_keys = ( 'fam_hash', 'redpageid', 'beginning', 'ending', + data_keys = ( 'famhash', 'redpageid', 'beginning', 'ending', 'status', 'heading', 'article0', 'article1', 'article2', 'article3', 'article4', 'article5', 'article6', 'article7' ) diff --git a/red.py b/red.py index 733def2..81388d6 100644 --- a/red.py +++ b/red.py @@ -68,6 +68,10 @@ def prepare_bot( task_slug, subtask, genFactory, subtask_args ): # Import related bot from bots.reddiscparser import DiscussionParserBot as Bot + elif subtask == "markpages": + # Import related bot + from bots.markpages import MarkPagesBot as Bot + # Subtask error else: jogobot.output( ( From 6e973369cd868d80862c7efc03fe3cb525573ccb Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 26 Nov 2016 22:26:55 +0100 Subject: [PATCH 16/26] sqlalchemy working for parser Needs some testing, presumably contains some bugs --- bots/reddiscparser.py | 30 +- lib/mysqlred.py | 795 +++++++++++++++++++++++++++--------------- lib/redfam.py | 360 +++++++++++-------- lib/redpage.py | 188 ++++++---- 4 files changed, 857 insertions(+), 516 deletions(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 818eb05..c789d86 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -33,8 +33,8 @@ from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot -from lib import redpage -from lib import redfam +from lib.redpage import RedPage +from lib.redfam import RedFamParser class DiscussionParserBot( @@ -127,7 +127,7 @@ class DiscussionParserBot( else: # If successfully parsed all pages in cat, flush db write cache - redpage.RedPage.flush_db_cache() + RedPage.flush_db_cache() def treat_page( self ): """ @@ -146,20 +146,23 @@ class DiscussionParserBot( return # Initiate RedPage object - red_page = redpage.RedPage( self.current_page ) + redpage = RedPage.session.query(RedPage).filter(RedPage.pageid == self.current_page.pageid ).one_or_none() - # Check whether parsing is needed - if red_page.is_parsing_needed(): + if redpage: + redpage.update( self.current_page ) + else: + redpage = RedPage( self.current_page ) + #~ # Check whether parsing is needed + if redpage.is_parsing_needed(): # Count families for failure analysis fam_counter = 0 # Iterate over returned generator with redfam sections - for fam in red_page.parse(): - + for fam in redpage.parse(): # Run RedFamParser on section text - redfam.RedFamParser.parser( fam, red_page.page, - red_page.is_archive() ) + RedFamParser.parser( fam, redpage, + redpage.is_archive() ) fam_counter += 1 @@ -167,12 +170,13 @@ class DiscussionParserBot( # If successfully parsed whole page, flush # db write cache if( fam_counter ): - redfam.RedFamParser.flush_db_cache() + + RedFamParser.flush_db_cache() jogobot.output( "Page [[{reddisc}]] parsed".format( - reddisc=red_page.page.title() ) ) + reddisc=redpage.page.title() ) ) else: jogobot.output( "\03{red}" + "Page [[{reddisc}]], ".format( - reddisc=red_page.page.title() ) + + reddisc=redpage.page.title() ) + "containing no redfam, parsed!", "WARNING" ) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 9e2e01b..8257822 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -39,336 +39,553 @@ from pywikibot import config import jogobot -class MysqlRed: - """ - Basic interface class, containing opening of connection +from sqlalchemy import create_engine +from sqlalchemy.engine.url import URL +url = URL( "mysql+oursql", + username=config.db_username, + password=config.db_password, + host=config.db_hostname, + port=config.db_port, + database=config.db_username + jogobot.config['db_suffix'] ) +engine = create_engine(url, echo=True) - Specific querys should be defined in descendant classes per data type + +from sqlalchemy.ext.declarative import ( + declarative_base, declared_attr, has_inherited_table ) +Base = declarative_base() + +from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey + +from sqlalchemy.orm import sessionmaker, relationship, composite +from sqlalchemy.ext.mutable import MutableComposite, MutableSet +from sqlalchemy.orm.collections import attribute_mapped_collection +import sqlalchemy.types as types + + +Session = sessionmaker(bind=engine) +session = Session() + +family = "dewpbeta" + +class Mysql(object): + session = session + @declared_attr + def _tableprefix(cls): + return family + "_" + @declared_attr + def _tablesuffix(cls): + return "s" + @declared_attr + def __tablename__(cls): + if has_inherited_table(cls): + return None + prefix = family + "_" + name = cls.__name__[len("Mysql"):].lower() + suffix = "s" + return cls._tableprefix + name + cls._tablesuffix + def changedp(self): + return self in self.session.dirty + +class ColumnList( list, MutableComposite ): + """ + Combines multiple Colums into a list like object """ - # Save mysqldb-connection as class attribute to use only one - # in descendant classes - connection = False - db_hostname = config.db_hostname - db_port = config.db_port - db_username = config.db_username - db_password = config.db_password - db_name = config.db_username + jogobot.config['db_suffix'] - db_table_prefix = False - - # Class variables for storing cached querys - _cached_update_data = [] - _update_query = '' - _cached_insert_data = {} - _insert_query = '' - - def __init__( self ): + def __init__( self, *columns ): """ - Opens a connection to MySQL-DB - - @returns mysql-stream MySQL Connection + Wrapper to the list constructor deciding whether we have initialization + with individual params per article or with an iterable. """ - - # Needs to be generated after Parsing of Args (not at import time) - if not type(self).db_table_prefix: - type(self).db_table_prefix = \ - pywikibot.Site().family.dbName(pywikibot.Site().code) - - # Now we can setup prepared queries - self._prepare_queries() - - # Connect to mysqldb only once - if not type( self ).connection: - - type( self ).connection = mysqldb.connect( - host=type( self ).db_hostname, - port=type( self ).db_port, - user=type( self ).db_username, - passwd=type( self ).db_password, - db=type( self ).db_name ) - - # Register callback for warnig if exit with cached db write querys - atexit.register( type(self).warn_if_not_flushed ) - - def __del__( self ): - """ - Before deleting class, close connection to MySQL-DB - """ - - type( self ).connection.close() - - def _prepare_queries( self ): - """ - Used to replace placeholders in prepared queries - """ - type(self)._update_query = type(self)._update_query.format( - prefix=type(self).db_table_prefix) - type(self)._insert_query = type(self)._insert_query.format( - prefix=type(self).db_table_prefix) - - @classmethod - def flush( cls ): - """ - Run cached querys - """ - if not cls.connection: - raise MysqlRedConnectionError( "No connection exists!" ) - - cursor = cls.connection.cursor() - - # Execute insert query - if cls._cached_insert_data: - # Since cls._cached_insert_data is a dict, we need to have a custom - # Generator to iterate over it - cursor.executemany( cls._insert_query, - ( cls._cached_insert_data[ key ] - for key in cls._cached_insert_data ) ) - # Reset after writing - cls._cached_insert_data = {} - - # Execute update query - # Use executemany since update could not be reduced to one query - if cls._cached_update_data: - cursor.executemany( cls._update_query, cls._cached_update_data ) - # Reset after writing - cls._cached_update_data = [] - - # Commit db changes - if cls._cached_insert_data or cls._cached_update_data: - cls.connection.commit() - - @classmethod - def warn_if_not_flushed(cls): - """ - Outputs a warning if there are db write querys cached and not flushed - before exiting programm! - """ - if cls._cached_update_data or cls._cached_insert_data: - jogobot.output( "Cached Database write querys not flushed!!! " + - "Data loss is possible!", "WARNING" ) - - -class MysqlRedPage( MysqlRed ): - """ - MySQL-db Interface for handling querys for RedPages - """ - - # Class variables for storing cached querys - # '{prefix}' will be replaced during super().__init__() - _cached_update_data = [] - _update_query = 'UPDATE `{prefix}_redpages` \ -SET `pagetitle` = ?, `revid` = ?, `status`= ? WHERE `pageid` = ?;' - - _cached_insert_data = {} - _insert_query = 'INSERT INTO `{prefix}_redpages` \ -( pageid, pagetitle, revid, status ) VALUES ( ?, ?, ?, ? );' - - def __init__( self, pageid ): - """ - Creates a new instance, runs __init__ of parent class - """ - - super().__init__( ) - - self.__pageid = int( pageid ) - - self.data = self.get_page() - - def __del__( self ): - """ - Needed to prevent descendant classes of MYSQL_RED from deleting - connection to db - """ - pass - - def get_page( self ): - """ - Retrieves a red page row from MySQL-Database for given page_id - - @param int pageid MediaWiki page_id for page to retrieve - - @returns tuple Tuple with data for given page_id - bool FALSE if none found - """ - - cursor = type( self ).connection.cursor(mysqldb.DictCursor) - - cursor.execute( - 'SELECT * FROM `{prefix}_redpages` WHERE `pageid` = ?;'.format( - prefix=type(self).db_table_prefix), ( self.__pageid, ) ) - - res = cursor.fetchone() - - if res: - return res + # Individual params per article (from db), first one is a str + if isinstance( columns[0], str ) or \ + isinstance( columns[0], MutableSet ) or columns[0] is None: + super().__init__( columns ) + # Iterable articles list else: - return False + super().__init__( columns[0] ) - def add_page( self, pagetitle, revid, status=0 ): + def __setitem__(self, key, value): """ - Inserts a red page row in MySQL-Database for given pageid - - @param int revid MediaWiki current revid - @param str pagetitle MediaWiki new pagetitle - @param int status Page parsing status + The MutableComposite class needs to be noticed about changes in our + component. So we tweak the setitem process. """ - insert_data = { self.__pageid: ( self.__pageid, pagetitle, - revid, status ) } + # set the item + super().__setitem__( key, value) - type( self )._cached_insert_data.update( insert_data ) + # alert all parents to the change + self.changed() - # Manualy construct self.data dict - self.data = { 'pageid': self.__pageid, 'revid': revid, - 'pagetitle': pagetitle, 'status': status } - - def update_page( self, revid=None, pagetitle=None, status=0 ): + def __composite_values__(self): """ - Updates the red page row in MySQL-Database for given page_id - - @param int revid MediaWiki current rev_id - @param str pagetitle MediaWiki new page_title - @param int status Page parsing status + The Composite method needs to have this method to get the items for db. """ + return self - if not pagetitle: - pagetitle = self.data[ 'pagetitle' ] - if not revid: - revid = self.data[ 'revid' ] +class Status( types.TypeDecorator ): - type( self )._cached_update_data.append( ( pagetitle, revid, - status, self.__pageid ) ) + impl = types.String - -class MysqlRedFam( MysqlRed ): - """ - MySQL-db Interface for handling querys for RedFams - """ - - # Class variables for storing cached querys - _cached_update_data = [] - _update_query = 'UPDATE `{prefix}_redfams` \ -SET `redpageid` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ -`status`= ? WHERE `famhash` = ?;' - - _cached_insert_data = {} - _insert_query = 'INSERT INTO `{prefix}_redfams` \ -( famhash, redpageid, beginning, ending, status, heading, \ -article0, article1, article2, article3, article4, article5, article6, \ -article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - - def __init__( self, famhash=None ): + def process_bind_param(self, value, dialect): """ - Creates a new instance, runs __init__ of parent class + Returns status as commaseparated string (to save in DB) + + @returns Raw status string + @rtype str """ + if isinstance(value, MutableSet): + return ",".join( value ) + elif isinstance(value, String ) or value is None: + return value + else: + raise ProgrammingError - self.__famhash = famhash - super().__init__( ) - - def __del__( self ): + def process_result_value(self, value, dialect): """ - Needed to prevent descendant classes of MYSQL_RED from deleting - connection to db + Sets status based on comma separated list + + @param raw_status Commaseparated string of stati (from DB) + @type raw_status str """ - pass + if value: + return MutableSet( value.strip().split(",")) + else: + return MutableSet([]) - def get_fam( self, famhash ): + def copy(self, **kw): + return Status(self.impl.length) + + + +class MysqlRedFam( Mysql, Base ): + + famhash = Column( String(64), primary_key=True, unique=True ) + + __article0 = Column('article0', String(255), nullable=False ) + __article1 = Column('article1', String(255), nullable=False ) + __article2 = Column('article2', String(255), nullable=True ) + __article3 = Column('article3', String(255), nullable=True ) + __article4 = Column('article4', String(255), nullable=True ) + __article5 = Column('article5', String(255), nullable=True ) + __article6 = Column('article6', String(255), nullable=True ) + __article7 = Column('article7', String(255), nullable=True ) + __articlesList = composite( + ColumnList, __article0, __article1, __article2, __article3, + __article4, __article5, __article6, __article7 ) + + heading = Column( Text, nullable=False ) + redpageid = Column( + Integer, ForeignKey( "dewpbeta_redpages.pageid" ), nullable=False ) + beginning = Column( DateTime, nullable=False ) + ending = Column( DateTime, nullable=True ) + __status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) + + __article0_status = Column( + 'article0_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article1_status = Column( + 'article1_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article2_status = Column( + 'article2_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article3_status = Column( + 'article3_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article4_status = Column( + 'article4_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article5_status = Column( + 'article5_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article6_status = Column( + 'article6_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article7_status = Column( + 'article7_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __articlesStatus = composite( + ColumnList, __article0_status, __article1_status, __article2_status, + __article3_status, __article4_status, __article5_status, + __article6_status, __article7_status ) + + redpage = relationship( "RedPage", back_populates="redfams" ) + + @property + def articlesList(self): """ - Retrieves a red family row from MySQL-Database for given fam_hash - - @returns dict Dictionairy with data for given fam hash - False if none found + List of articles belonging to the redfam """ - self.__famhash = famhash + return self.__articlesList - cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + @articlesList.setter + def articlesList(self, articlesList): + # Make sure to always have full length for complete overwrites + while( len(articlesList) < 8 ): + articlesList.append(None) + self.__articlesList = ColumnList(articlesList) - cursor.execute( - 'SELECT * FROM `{prefix}_redfams` WHERE `famhash` = ?;'. - format( prefix=type(self).db_table_prefix), ( famhash, ) ) - - self.data = cursor.fetchone() - - def add_fam( self, articlesList, heading, redpageid, - beginning, ending=None, status=0 ): - - data = [ self.__famhash, redpageid, beginning, ending, - status, heading ] - - for article in articlesList: - data.append( str( article ) ) - - while len( data ) < 14: - data.append( None ) - - data = tuple( data ) - - insert_data = { self.__famhash: data } - type( self )._cached_insert_data.update( insert_data ) - - # Manualy construct self.data dict - data_keys = ( 'famhash', 'redpageid', 'beginning', 'ending', - 'status', 'heading', 'article0', 'article1', 'article2', - 'article3', 'article4', 'article5', 'article6', - 'article7' ) - self.data = dict( zip( data_keys, data ) ) - - def update_fam( self, redpageid, heading, beginning, ending, status ): + @property + def status( self ): """ - Updates the red fam row in MySQL-Database for given fam_hash - - @param int redpageid MediaWiki page_id - @param datetime beginning Timestamp of beginning - qparam datetime ending Timestamp of ending of - @param int status red_fam status + Current fam status """ + return self.__status - type( self )._cached_update_data.append( ( redpageid, heading, - beginning, ending, status, - self.__famhash ) ) + @status.setter + def status( self, status ): + if status: + self.__status = MutableSet( status ) + else: + self.__status = MutableSet() - def get_by_status( self, status ): + @property + def articlesStatus(self): """ - Generator witch fetches redFams with given status from DB + List of status strings/sets for the articles of the redfam """ + return self.__articlesStatus - cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + @articlesStatus.setter + def articlesStatus(self, articlesStatus): + self.__articlesStatus = ColumnList(articlesStatus) - cursor.execute( - 'SELECT * FROM `{prefix}_redfams` WHERE `status` = LIKE %?%;'. - format( prefix=type( self ).db_table_prefix), ( status, ) ) +class MysqlRedPage( Mysql, Base ): + pageid = Column( Integer, unique=True, primary_key=True ) + revid = Column( Integer, unique=True, nullable=False ) + pagetitle = Column( String(255), nullable=False ) + status = Column( MutableSet.as_mutable(Status(255)), nullable=True ) - while True: - res = cursor.fetchmany( 1000 ) - if not res: - break - for row in res: - yield row + redfams = relationship( + "MysqlRedFam", order_by=MysqlRedFam.famhash, back_populates="redpage", + collection_class=attribute_mapped_collection("famhash")) - def get_by_status_and_ending( self, status, ending ): - """ - Generator witch fetches redFams with given status from DB - """ - cursor = type( self ).connection.cursor( mysqldb.DictCursor ) +Base.metadata.create_all(engine) - cursor.execute( ( - 'SELECT * ' + - 'FROM `{prefix}_redfams` `F` ' + - 'INNER JOIN `{prefix}_redpages` `P` ' + - 'ON `F`.`status` = ? ' + - 'AND `F`.`ending` >= ? ' + - 'AND `F`.`redpageid` = `P`.`pageid`;').format( - prefix=type( self ).db_table_prefix), - ( status, ending ) ) +#~ class MysqlRed: + #~ """ + #~ Basic interface class, containing opening of connection - while True: - res = cursor.fetchmany( 1000 ) - if not res: - break - for row in res: - yield row + #~ Specific querys should be defined in descendant classes per data type + #~ """ + + #~ # Save mysqldb-connection as class attribute to use only one + #~ # in descendant classes + #~ connection = False + #~ db_hostname = config.db_hostname + #~ db_port = config.db_port + #~ db_username = config.db_username + #~ db_password = config.db_password + #~ db_name = config.db_username + jogobot.config['db_suffix'] + #~ db_table_prefix = False + + #~ # Class variables for storing cached querys + #~ _cached_update_data = [] + #~ _update_query = '' + #~ _cached_insert_data = {} + #~ _insert_query = '' + + #~ def __init__( self ): + #~ """ + #~ Opens a connection to MySQL-DB + + #~ @returns mysql-stream MySQL Connection + #~ """ + + #~ # Needs to be generated after Parsing of Args (not at import time) + #~ if not type(self).db_table_prefix: + #~ type(self).db_table_prefix = \ + #~ pywikibot.Site().family.dbName(pywikibot.Site().code) + + #~ # Now we can setup prepared queries + #~ self._prepare_queries() + + #~ # Connect to mysqldb only once + #~ if not type( self ).connection: + + #~ type( self ).connection = mysqldb.connect( + #~ host=type( self ).db_hostname, + #~ port=type( self ).db_port, + #~ user=type( self ).db_username, + #~ passwd=type( self ).db_password, + #~ db=type( self ).db_name ) + + #~ # Register callback for warnig if exit with cached db write querys + #~ atexit.register( type(self).warn_if_not_flushed ) + + #~ def __del__( self ): + #~ """ + #~ Before deleting class, close connection to MySQL-DB + #~ """ + + #~ type( self ).connection.close() + + #~ def _prepare_queries( self ): + #~ """ + #~ Used to replace placeholders in prepared queries + #~ """ + #~ type(self)._update_query = type(self)._update_query.format( + #~ prefix=type(self).db_table_prefix) + #~ type(self)._insert_query = type(self)._insert_query.format( + #~ prefix=type(self).db_table_prefix) + + #~ @classmethod + #~ def flush( cls ): + #~ """ + #~ Run cached querys + #~ """ + #~ if not cls.connection: + #~ raise MysqlRedConnectionError( "No connection exists!" ) + + #~ cursor = cls.connection.cursor() + + #~ # Execute insert query + #~ if cls._cached_insert_data: + #~ # Since cls._cached_insert_data is a dict, we need to have a custom + #~ # Generator to iterate over it + #~ cursor.executemany( cls._insert_query, + #~ ( cls._cached_insert_data[ key ] + #~ for key in cls._cached_insert_data ) ) + #~ # Reset after writing + #~ cls._cached_insert_data = {} + + #~ # Execute update query + #~ # Use executemany since update could not be reduced to one query + #~ if cls._cached_update_data: + #~ cursor.executemany( cls._update_query, cls._cached_update_data ) + #~ # Reset after writing + #~ cls._cached_update_data = [] + + #~ # Commit db changes + #~ if cls._cached_insert_data or cls._cached_update_data: + #~ cls.connection.commit() + + #~ @classmethod + #~ def warn_if_not_flushed(cls): + #~ """ + #~ Outputs a warning if there are db write querys cached and not flushed + #~ before exiting programm! + #~ """ + #~ if cls._cached_update_data or cls._cached_insert_data: + #~ jogobot.output( "Cached Database write querys not flushed!!! " + + #~ "Data loss is possible!", "WARNING" ) + + +#~ class MysqlRedPage( MysqlRed ): + #~ """ + #~ MySQL-db Interface for handling querys for RedPages + #~ """ + + #~ # Class variables for storing cached querys + #~ # '{prefix}' will be replaced during super().__init__() + #~ _cached_update_data = [] + #~ _update_query = 'UPDATE `{prefix}_redpages` \ +#~ SET `pagetitle` = ?, `revid` = ?, `status`= ? WHERE `pageid` = ?;' + + #~ _cached_insert_data = {} + #~ _insert_query = 'INSERT INTO `{prefix}_redpages` \ +#~ ( pageid, pagetitle, revid, status ) VALUES ( ?, ?, ?, ? );' + + #~ def __init__( self, pageid ): + #~ """ + #~ Creates a new instance, runs __init__ of parent class + #~ """ + + #~ super().__init__( ) + + #~ self.__pageid = int( pageid ) + + #~ self.data = self.get_page() + + #~ def __del__( self ): + #~ """ + #~ Needed to prevent descendant classes of MYSQL_RED from deleting + #~ connection to db + #~ """ + #~ pass + + #~ def get_page( self ): + #~ """ + #~ Retrieves a red page row from MySQL-Database for given page_id + + #~ @param int pageid MediaWiki page_id for page to retrieve + + #~ @returns tuple Tuple with data for given page_id + #~ bool FALSE if none found + #~ """ + + #~ cursor = type( self ).connection.cursor(mysqldb.DictCursor) + + #~ cursor.execute( + #~ 'SELECT * FROM `{prefix}_redpages` WHERE `pageid` = ?;'.format( + #~ prefix=type(self).db_table_prefix), ( self.__pageid, ) ) + + #~ res = cursor.fetchone() + + #~ if res: + #~ return res + #~ else: + #~ return False + + #~ def add_page( self, pagetitle, revid, status=0 ): + #~ """ + #~ Inserts a red page row in MySQL-Database for given pageid + + #~ @param int revid MediaWiki current revid + #~ @param str pagetitle MediaWiki new pagetitle + #~ @param int status Page parsing status + #~ """ + + #~ insert_data = { self.__pageid: ( self.__pageid, pagetitle, + #~ revid, status ) } + + #~ type( self )._cached_insert_data.update( insert_data ) + + #~ # Manualy construct self.data dict + #~ self.data = { 'pageid': self.__pageid, 'revid': revid, + #~ 'pagetitle': pagetitle, 'status': status } + + #~ def update_page( self, revid=None, pagetitle=None, status=0 ): + #~ """ + #~ Updates the red page row in MySQL-Database for given page_id + + #~ @param int revid MediaWiki current rev_id + #~ @param str pagetitle MediaWiki new page_title + #~ @param int status Page parsing status + #~ """ + + #~ if not pagetitle: + #~ pagetitle = self.data[ 'pagetitle' ] + #~ if not revid: + #~ revid = self.data[ 'revid' ] + + #~ type( self )._cached_update_data.append( ( pagetitle, revid, + #~ status, self.__pageid ) ) + + +#~ class MysqlRedFam( MysqlRed ): + #~ """ + #~ MySQL-db Interface for handling querys for RedFams + #~ """ + + #~ # Class variables for storing cached querys + #~ _cached_update_data = [] + #~ _update_query = 'UPDATE `{prefix}_redfams` \ +#~ SET `redpageid` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ +#~ `status`= ? WHERE `famhash` = ?;' + + #~ _cached_insert_data = {} + #~ _insert_query = 'INSERT INTO `{prefix}_redfams` \ +#~ ( famhash, redpageid, beginning, ending, status, heading, \ +#~ article0, article1, article2, article3, article4, article5, article6, \ +#~ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' + + #~ def __init__( self, famhash=None ): + #~ """ + #~ Creates a new instance, runs __init__ of parent class + #~ """ + + #~ self.__famhash = famhash + + #~ super().__init__( ) + + #~ def __del__( self ): + #~ """ + #~ Needed to prevent descendant classes of MYSQL_RED from deleting + #~ connection to db + #~ """ + #~ pass + + #~ def get_fam( self, famhash ): + #~ """ + #~ Retrieves a red family row from MySQL-Database for given fam_hash + + #~ @returns dict Dictionairy with data for given fam hash + #~ False if none found + #~ """ + #~ self.__famhash = famhash + + #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + + #~ cursor.execute( + #~ 'SELECT * FROM `{prefix}_redfams` WHERE `famhash` = ?;'. + #~ format( prefix=type(self).db_table_prefix), ( famhash, ) ) + + #~ self.data = cursor.fetchone() + + #~ def add_fam( self, articlesList, heading, redpageid, + #~ beginning, ending=None, status=0 ): + + #~ data = [ self.__famhash, redpageid, beginning, ending, + #~ status, heading ] + + #~ for article in articlesList: + #~ data.append( str( article ) ) + + #~ while len( data ) < 14: + #~ data.append( None ) + + #~ data = tuple( data ) + + #~ insert_data = { self.__famhash: data } + #~ type( self )._cached_insert_data.update( insert_data ) + + #~ # Manualy construct self.data dict + #~ data_keys = ( 'famhash', 'redpageid', 'beginning', 'ending', + #~ 'status', 'heading', 'article0', 'article1', 'article2', + #~ 'article3', 'article4', 'article5', 'article6', + #~ 'article7' ) + #~ self.data = dict( zip( data_keys, data ) ) + + #~ def update_fam( self, redpageid, heading, beginning, ending, status ): + #~ """ + #~ Updates the red fam row in MySQL-Database for given fam_hash + + #~ @param int redpageid MediaWiki page_id + #~ @param datetime beginning Timestamp of beginning + #~ qparam datetime ending Timestamp of ending of + #~ @param int status red_fam status + #~ """ + + #~ type( self )._cached_update_data.append( ( redpageid, heading, + #~ beginning, ending, status, + #~ self.__famhash ) ) + + #~ def get_by_status( self, status ): + #~ """ + #~ Generator witch fetches redFams with given status from DB + #~ """ + + #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + + #~ cursor.execute( + #~ 'SELECT * FROM `{prefix}_redfams` WHERE `status` = LIKE %?%;'. + #~ format( prefix=type( self ).db_table_prefix), ( status, ) ) + + #~ while True: + #~ res = cursor.fetchmany( 1000 ) + #~ if not res: + #~ break + #~ for row in res: + #~ yield row + + #~ def get_by_status_and_ending( self, status, ending ): + #~ """ + #~ Generator witch fetches redFams with given status from DB + #~ """ + + #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + + #~ cursor.execute( ( + #~ 'SELECT * ' + + #~ 'FROM `{prefix}_redfams` `F` ' + + #~ 'INNER JOIN `{prefix}_redpages` `P` ' + + #~ 'ON `F`.`status` = ? ' + + #~ 'AND `F`.`ending` >= ? ' + + #~ 'AND `F`.`redpageid` = `P`.`pageid`;').format( + #~ prefix=type( self ).db_table_prefix), + #~ ( status, ending ) ) + + #~ while True: + #~ res = cursor.fetchmany( 1000 ) + #~ if not res: + #~ break + #~ for row in res: + #~ yield row class MysqlRedError(Exception): diff --git a/lib/redfam.py b/lib/redfam.py index 6e8b3d5..526f902 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -3,7 +3,7 @@ # # redfam.py # -# Copyright 2015 GOLDERWEB – Jonathan Golder +# Copyright 2017 GOLDERWEB – Jonathan Golder # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -35,16 +35,17 @@ import pywikibot # noqa from pywikibot.tools import deprecated # noqa import jogobot -from lib.mysqlred import MysqlRedFam +#~ from lib.mysqlred import Column, Integer, String, Text, DateTime, ForeignKey, ColumnList, Status +from lib.mysqlred import MysqlRedFam, MutableSet, ColumnList #, Mysql, Base, relationship, composite, -class RedFam: +class RedFam( MysqlRedFam ): """ Basic class for RedFams, containing the basic data structure """ def __init__( self, articlesList, beginning, ending=None, redpageid=None, - status=None, famhash=None, heading=None ): + status=MutableSet(), famhash=None, heading=None ): """ Generates a new RedFam object @@ -61,21 +62,32 @@ class RedFam: self.site = pywikibot.Site() # Database interface - self._mysql = MysqlRedFam( famhash ) + #self._mysql = MysqlRedFam( famhash ) # Initial attribute values - self._articlesList = articlesList - self._beginning = beginning - self._ending = ending - self._redpageid = redpageid - self._status = set() - self._status = self._parse_status(status) - self._famhash = famhash - self._heading = heading + #~ self.articlesList = articlesList + #~ self.beginning = beginning + #~ self.ending = ending + #~ self.redpageid = redpageid +#~ # self._status = set() +#~ # self._status = self._parse_status(status) + #~ self.famhash = famhash + #~ self.heading = heading + #self.status = status - # Calculates the sha1 hash over self._articlesList to - # rediscover known redundance families - self.calc_famhash() + #articlesStatus = ColumnList([ MutableSet() for x in range(0,8) ]) + + #~ # Calculates the sha1 hash over self._articlesList to + #~ # rediscover known redundance families + #~ self.calc_famhash() + + #~ if not status: + #~ status = MutableSet() + + super().__init__( articlesList=articlesList, beginning=beginning, ending=ending, redpageid=redpageid, + famhash=famhash, heading=heading, status=status, articlesStatus=None ) + + #super().__init__() def __repr__( self ): """ @@ -85,64 +97,75 @@ class RedFam: """ __repr = "RedFam( " + \ - "articlesList=" + repr( self._articlesList ) + \ - ", heading=" + repr( self._heading ) + \ - ", beginning=" + repr( self._beginning ) + \ - ", ending=" + repr( self._ending ) + \ - ", red_page_id=" + repr( self._redpageid ) + \ - ", status=" + repr( self._status ) + \ - ", fam_hash=" + repr( self._famhash ) + \ + "articlesList=" + repr( self.articlesList ) + \ + ", heading=" + repr( self.heading ) + \ + ", beginning=" + repr( self.beginning ) + \ + ", ending=" + repr( self.ending ) + \ + ", red_page_id=" + repr( self.redpageid ) + \ + ", status=" + repr( self.status ) + \ + ", fam_hash=" + repr( self.famhash ) + \ " )" return __repr - def calc_famhash( self ): + @classmethod + def calc_famhash(cls, articlesList ): + + h = hashlib.sha1() + # Since articlesList attr of RedFam will have always 8 Members we + # need to fill up smaller lists (longers will be cropped below). + while len( articlesList) < 8: + articlesList.append(None) + + h.update( str( articlesList[:8] ).encode('utf-8') ) + + return h.hexdigest() + + def c_famhash( self ): """ Calculates the SHA-1 hash for the articlesList of redundance family. Since we don't need security SHA-1 is just fine. @returns str String with the hexadecimal hash digest """ + print( type( self ) ) - h = hashlib.sha1() - h.update( str( self._articlesList[:8] ).encode('utf-8') ) - - if self._famhash and h.hexdigest() != self._famhash: - raise RedFamHashError( self._famhash, h.hexdigest() ) - - elif self._famhash: + if self.famhash and type(self).calc_famhash(self.articlesList) != self.famhash: + raise RedFamHashError( self.famhash, h.hexdigest() ) + elif self.famhash: return else: - self._famhash = h.hexdigest() + self.famhash = type(self).calc_famhash(self.articlesList) - def changed( self ): - """ - Checks wether anything has changed and maybe triggers db update - """ + #~ def changed( self ): + #~ """ + #~ Checks wether anything has changed and maybe triggers db update + #~ """ - # On archived redfams do not delete possibly existing ending - if( not self._ending and "archived" in self._status and - self._mysql.data[ 'ending' ] ): + #~ # On archived redfams do not delete possibly existing ending + #~ if( not self.ending and "archived" in self._status and + #~ self._mysql.data[ 'ending' ] ): - self._ending = self._mysql.data[ 'ending' ] + #~ self._ending = self._mysql.data[ 'ending' ] - # Since status change means something has changed, update database - if( self._raw_status != self._mysql.data[ 'status' ] or - self._beginning != self._mysql.data[ 'beginning' ] or - self._ending != self._mysql.data[ 'ending' ] or - self._red_page_id != self._mysql.data[ 'redpageid' ] or - self._heading != self._mysql.data[ 'heading' ]): + #~ # Since status change means something has changed, update database + #~ if( self._raw_status != self._mysql.data[ 'status' ] or + #~ self._beginning != self._mysql.data[ 'beginning' ] or + #~ self._ending != self._mysql.data[ 'ending' ] or + #~ self._red_page_id != self._mysql.data[ 'redpageid' ] or + #~ self._heading != self._mysql.data[ 'heading' ]): - self._mysql.update_fam( self._redpageid, self._heading, - self._beginning, self._ending, - self._raw_status() ) + #~ self._mysql.update_fam( self._redpageid, self._heading, + #~ self._beginning, self._ending, + #~ self._raw_status() ) @classmethod def flush_db_cache( cls ): """ Calls flush method of Mysql Interface class """ - MysqlRedFam.flush() + cls.session.commit() + #~ MysqlRedFam.flush() def add_status(self, status): """ @@ -151,7 +174,7 @@ class RedFam: @param status Statusstring to add @type status str """ - self._status.add(status) + self.status.add(status) def remove_status(self, status, weak=True): """ @@ -164,9 +187,9 @@ class RedFam: @type bool """ if weak: - self._status.discard(status) + self.status.discard(status) else: - self._status.remove(status) + self.status.remove(status) def has_status(self, status): """ @@ -176,28 +199,28 @@ class RedFam: @type status str @returns True if status is present else False """ - if status in self._status: + if status in self.status: return True else: return False - def _parse_status(self, raw_status ): - """ - Sets status based on comma separated list + #~ def _parse_status(self, raw_status ): + #~ """ + #~ Sets status based on comma separated list - @param raw_status Commaseparated string of stati (from DB) - @type raw_status str - """ - self._status = set( raw_status.strip().split(",")) + #~ @param raw_status Commaseparated string of stati (from DB) + #~ @type raw_status str + #~ """ + #~ self._status = set( raw_status.strip().split(",")) - def _raw_status( self ): - """ - Returns status as commaseparated string (to save in DB) + #~ def _raw_status( self ): + #~ """ + #~ Returns status as commaseparated string (to save in DB) - @returns Raw status string - @rtype str - """ - return ",".join( self._status ) + #~ @returns Raw status string + #~ @rtype str + #~ """ + #~ return ",".join( self._status ) def article_add_status(self, status, index=None, title=None ): """ @@ -331,7 +354,7 @@ class RedFamParser( RedFam ): wurde gewünscht von:" __done_notice2 = "{{Erledigt|" - def __init__( self, heading, redpage, redpagearchive, + def __init__( self, articlesList, heading, redpage, redpagearchive, beginning, ending=None ): """ Creates a RedFam object based on data collected while parsing red_pages @@ -346,57 +369,111 @@ class RedFamParser( RedFam ): str strptime parseable string """ - # Set object attributes: - self._redpageid = redpage._pageid - self._redpagearchive = redpagearchive - self._famhash = None - - # Method self.add_beginning sets self._beginning directly - self.add_beginning( beginning ) - - # Method self.add_ending sets self._ending directly - if( ending ): - self.add_ending( ending ) - else: - # If no ending was provided set to None - self._ending = None - - self._status = set() - # Parse the provided heading of redundance section # to set self._articlesList - self.heading_parser( heading ) + #~ self.heading = str(heading) + #~ self.articlesList = articlesList + + #~ # Catch sections with more then 8 articles, print error + #~ if len( self.articlesList ) > 8: + #~ # For repression in output we need to know the fam hash + #~ self.calc_famhash() + + #~ jogobot.output( + #~ ( "\03{{lightred}}" + + #~ "Maximum number of articles in red_fam exceeded, " + + #~ "maximum number is 8, {number:d} were given \n {repress}" + #~ ).format( datetime=datetime.now().strftime( + #~ "%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ), + #~ repress=repr( self ) ), + #~ "WARNING" ) + + #~ # Only save the first 8 articles +#~ # self.articlesList = self.articlesList[:8] # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families + famhash = type(self).calc_famhash(articlesList) - self.calc_famhash() + #~ obj = self.session.query(RedFamParser).filter(RedFamParser.famhash == self.famhash ).one_or_none() + #~ if obj: + #~ self = obj - # Open database connection, ask for data if existing, - # otherwise create entry - self.__handle_db() + + # Set object attributes: + #~ self.redpageid = redpage._pageid + self._redpagearchive = redpagearchive +# self.famhash = None + + # Method self.add_beginning sets self._beginning directly + #~ self.add_beginning( beginning ) + + #~ # Method self.add_ending sets self._ending directly + #~ if( ending ): + #~ self.add_ending( ending ) + #~ else: + #~ # If no ending was provided set to None + #~ self.ending = None + + #~ self.status = MutableSet() + + beginning = self.__datetime(beginning) + if ending: + ending = self.__datetime(ending) + + + super().__init__( articlesList, beginning, ending=ending, redpageid=redpage._pageid, + famhash=famhash, heading=heading ) # Check status changes - self.status() + self.check_status() + + self.session.add(self) + # Open database connection, ask for data if existing, + # otherwise create entry +# self.__handle_db() + + # Triggers db update if anything changed - self.changed() +# self.changed() - def __handle_db( self ): - """ - Handles opening of db connection - """ - # We need a connection to our mysqldb - self._mysql = MysqlRedFam( ) - self._mysql.get_fam( self._famhash ) - if not self._mysql.data: - self._mysql.add_fam( self._articlesList, self._heading, - self._redpageid, self._beginning, - self._ending ) + #~ def __handle_db( self ): + #~ """ + #~ Handles opening of db connection + #~ """ - def heading_parser( self, heading ): + #~ # We need a connection to our mysqldb + #~ self._mysql = MysqlRedFam( ) + #~ self._mysql.get_fam( self._famhash ) + + #~ if not self._mysql.data: + #~ self._mysql.add_fam( self._articlesList, self._heading, + #~ self._redpageid, self._beginning, + #~ self._ending ) + + def update( self, articlesList, heading, redpage, redpagearchive, + beginning, ending=None): + + self.articlesList = articlesList; + self.heading = heading; + self.redpage = redpage; + self.redpageid = redpage.pageid; + + self.add_beginning( beginning ) + + if( ending ): + self.add_ending( ending ) + + self._redpagearchive = redpagearchive + + # Check status changes + self.check_status() + + @classmethod + def heading_parser( cls, heading ): """ Parses given red_fam_heading string and saves articles list @@ -404,34 +481,16 @@ class RedFamParser( RedFam ): @type heading wikicode or mwparser-parseable """ - # Save heading as string - self._heading = str( heading ) - # Parse string heading with mwparse again everytime # In some cases the given wikicode is broken due to syntax errors # (Task FS#77) - heading = mwparser.parse( self._heading ) + heading = mwparser.parse( str( heading ) ) # Save destinations of wikilinks in headings - self._articlesList = [ str( link.title ) for link + return [ str( link.title ) for link in heading.ifilter_wikilinks() ] - # Catch sections with more then 8 articles, print error - if len( self._articlesList ) > 8: - # For repression in output we need to know the fam hash - self.calc_famhash() - jogobot.output( - ( "\03{{lightred}}" + - "Maximum number of articles in red_fam exceeded, " + - "maximum number is 8, {number:d} were given \n {repress}" - ).format( datetime=datetime.now().strftime( - "%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ), - repress=repr( self ) ), - "WARNING" ) - - # Only save the first 8 articles - self._articlesList = self._articlesList[:8] def add_beginning( self, beginning ): """ @@ -440,7 +499,7 @@ class RedFamParser( RedFam ): @param datetime datetime Beginning date """ - self._beginning = self.__datetime( beginning ) + self.beginning = self.__datetime( beginning ) def add_ending( self, ending ): """ @@ -449,7 +508,7 @@ class RedFamParser( RedFam ): @param datetime datetime Ending date """ - self._ending = self.__datetime( ending ) + self.ending = self.__datetime( ending ) def __datetime( self, timestamp ): """ @@ -473,7 +532,7 @@ class RedFamParser( RedFam ): type( self ).__timestamp_format ) return result - def status( self ): + def check_status( self ): """ Handles detection of correct status There are three possible stati: @@ -485,7 +544,7 @@ class RedFamParser( RedFam ): # No ending, discussion is running: # Sometimes archived discussions also have no detectable ending - if not self._ending and not self._redpagearchive: + if not self.ending and not self._redpagearchive: self.add_status("open") else: self.remove_status("open") @@ -513,7 +572,7 @@ class RedFamParser( RedFam ): return False @classmethod - def parser( cls, text, page, isarchive=False ): + def parser( cls, text, redpage, isarchive=False ): """ Handles parsing of redfam section @@ -536,16 +595,33 @@ class RedFamParser( RedFam ): if not beginning: match = re.search( jogobot.config["redundances"]["reddiscs_onlyinclude_re"], - page.title() ) + redpage.page.title() ) if match: beginning = datetime.strptime( "01. {month} {year}".format( month=match.group(1), year=match.group(2)), "%d. %B %Y" ) + articlesList = RedFamParser.heading_parser( heading ) + famhash = RedFamParser.calc_famhash( articlesList ) - # Create the RedFam object - RedFamParser( heading, page, isarchive, beginning, ending ) + # Check for existing objects in DB first in current redpage + redfam = redpage.redfams.get(famhash) + + with RedFamParser.session.no_autoflush: + if not redfam: + # Otherwise in db table + redfam = RedFamParser.session.query(RedFamParser).filter( + RedFamParser.famhash == famhash ).one_or_none() + + if redfam: + # Existing redfams need to be updated + redfam.update( articlesList, str(heading), redpage, isarchive, beginning, ending ) + + else: + # Create the RedFam object + redfam = RedFamParser( articlesList, str(heading).strip(), redpage.page, isarchive, beginning, ending ) + return redfam @classmethod def extract_dates( cls, text, isarchive=False ): @@ -615,16 +691,16 @@ class RedFamWorker( RedFam ): mysql_data[ 'status' ], mysql_data[ 'famhash' ], mysql_data[ 'heading' ] ) - self._mysql.data = mysql_data +# #~ self._mysql.data = mysql_data - # Set up article status - index = 0 - for article in self._articlesList: - raw_status = mysql_data[ "article" + str(index) + "_status" ] - if not raw_status: - raw_status = str() - self._article_parse_status( raw_status, index ) - index += 1 + #~ # Set up article status + #~ index = 0 + #~ for article in self.articlesList: + #~ raw_status = mysql_data[ "article" + str(index) + "_status" ] + #~ if not raw_status: + #~ raw_status = str() + #~ self._article_parse_status( raw_status, index ) + #~ index += 1 # Get related RedPage-Information self.redpageid = mysql_data[ 'pageid' ] diff --git a/lib/redpage.py b/lib/redpage.py index b4361b9..558cd8c 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -30,15 +30,23 @@ import mwparserfromhell as mwparser import jogobot # noqa -from lib.mysqlred import MysqlRedPage -from lib.redfam import RedFamParser +#~ from lib.mysqlred import Column, Integer, String, Text, DateTime, ForeignKey, ColumnList, Status +from lib.mysqlred import MysqlRedPage, relationship, MutableSet #MysqlRedFam, Base, composite, +from lib.redfam import RedFam, RedFamParser +from sqlalchemy.orm.collections import attribute_mapped_collection -class RedPage: +class RedPage( MysqlRedPage ): """ Class for handling redundance discussion pages and archives """ + #TODO POLYMORPHISM? of BASEClass + redfams = relationship( + "RedFamParser", order_by=RedFamParser.famhash, + back_populates="redpage", + collection_class=attribute_mapped_collection( "famhash" ) ) + def __init__( self, page=None, pageid=None, archive=False ): """ Generate a new RedPage object based on the given pywikibot page object @@ -49,57 +57,91 @@ class RedPage: @type pageid int """ - self._status = set() - # Safe the pywikibot page object - self.page = page - self.pageid = pageid - self._archive = archive + if page: + self._page = page + pageid = self._page.pageid - self.__handle_db( ) - self.is_page_changed() + super().__init__( + pageid=pageid, + revid=self.page._revid, + pagetitle=self.page.title(), + status=MutableSet() ) #TODO EMPTY MutableSet() necessary? + #~ self._status = set() - self._parsed = None + if archive: + self.status.add("archived") - def __handle_db( self ): - """ - Handles opening of db connection - """ + #~ self._archive = archive - # We need a connection to our mysqldb - if self.page: - self.__mysql = MysqlRedPage( self.page._pageid ) - self.pageid = self.page._pageid - elif self.pageid: - self.__mysql = MysqlRedPage( self.pageid ) - self.page = pywikibot.Page( pywikibot.Site(), - self.__mysql.data['pagetitle'] ) - self.page.exists() - else: - raise ValueError( "Page NOR pagid provided!" ) + #~ self.pageid = pageid + #~ self.revid = self.page._revid + #~ self.p + #~ self.status = MutableSet() - if not self.__mysql.data: - self.__mysql.add_page( self.page.title(), self.page._revid ) +# self.__handle_db( ) + #~ self.is_page_changed() + + #~ self._parsed = None + + self.session.add(self) + + #~ def __handle_db( self ): + #~ """ + #~ Handles opening of db connection + #~ """ + + #~ # We need a connection to our mysqldb + #~ if self.page: + #~ self.__mysql = MysqlRedPage( self.page._pageid ) + #~ self.pageid = self.page._pageid + #~ elif self.pageid: + #~ self.__mysql = MysqlRedPage( self.pageid ) + #~ self.page = pywikibot.Page( pywikibot.Site(), + #~ self.pagetitle ) + #~ self.page.exists() + #~ else: + #~ raise ValueError( "Page NOR pagid provided!" ) + + #~ if not self.__mysql.data: + #~ self.__mysql.add_page( self.page.title(), self.page._revid ) + + def update( self, page ): + + self._page = page + self.revid = page._revid + self.pagetitle = page.title() + + @property + def page(self): + if not hasattr(self,"_page"): + self._page = pywikibot.Page( pywikibot.Site(), self.pagetitle ) + + return self._page + + @property + def archive(self): + return self.has_status("archived") def is_page_changed( self ): """ Check wether the page was changed since last run """ - - if( self.__mysql.data != { 'pageid': self.page._pageid, - 'revid': self.page._revid, - 'pagetitle': self.page.title(), - 'status': self.__mysql.data[ 'status' ] } ): - self._changed = True - else: - self._changed = False + self._changed = self.changedp() + #~ if( self.__mysql.data != { 'pageid': self.page._pageid, + #~ 'revid': self.page._revid, + #~ 'pagetitle': self.page.title(), + #~ 'status': self.__mysql.data[ 'status' ] } ): + #~ self._changed = True + #~ else: + #~ self._changed = False def is_archive( self ): """ Detects wether current page is an archive of discussions """ - if( self._archive or ( u"/Archiv" in self.page.title() ) or + if( self.archive or ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ) ): @@ -111,8 +153,7 @@ class RedPage: """ Decides wether current RedPage needs to be parsed or not """ - - if( self._changed or self.__mysql.data[ 'status' ] == "" ): + if( self.changedp() or not self.has_status("parsed") ): return True else: return False @@ -140,31 +181,34 @@ class RedPage: yield fam else: + self.status.add("parsed") self._parsed = True - self.__update_db() + #~ self.__update_db() - def __update_db( self ): - """ - Updates the page meta data in mysql db - """ - if( self._parsed or not self._changed ): - self.add_status( "open" ) + #~ def __update_db( self ): + #~ """ + #~ Updates the page meta data in mysql db + #~ """ + #~ if( self._parsed or not self._changed ): + #~ self.add_status( "open" ) - if( self.is_archive() ): - self.remove_status( "open" ) - self.add_status( "archived" ) - else: - self._status = set() + #~ if( self.is_archive() ): + #~ self.remove_status( "open" ) + #~ self.add_status( "archived" ) + #~ else: + #~ pass + #~ self._status = set() - self.__mysql.update_page( self.page._revid, self.page.title(), - self._raw_status() ) + #~ self.__mysql.update_page( self.page._revid, self.page.title(), + #~ self._raw_status() ) @classmethod def flush_db_cache( cls ): """ Calls flush method of Mysql Interface class """ - MysqlRedPage.flush() + cls.session.commit() + #~ MysqlRedPage.flush() def add_status(self, status): """ @@ -173,7 +217,7 @@ class RedPage: @param status Statusstring to add @type status str """ - self._status.add(status) + self.status.add(status) def remove_status(self, status, weak=True): """ @@ -186,9 +230,9 @@ class RedPage: @type bool """ if weak: - self._status.discard(status) + self.status.discard(status) else: - self._status.remove(status) + self.status.remove(status) def has_status(self, status): """ @@ -198,25 +242,25 @@ class RedPage: @type status str @returns True if status is present else False """ - if status in self._status: + if status in self.status: return True else: return False - def _parse_status(self, raw_status ): - """ - Sets status based on comma separated list + #~ def _parse_status(self, raw_status ): + #~ """ + #~ Sets status based on comma separated list - @param raw_status Commaseparated string of stati (from DB) - @type raw_status str - """ - self._status = set( raw_status.strip().split(",")) + #~ @param raw_status Commaseparated string of stati (from DB) + #~ @type raw_status str + #~ """ + #~ self._status = set( raw_status.strip().split(",")) - def _raw_status( self ): - """ - Returns status as commaseparated string (to save in DB) + #~ def _raw_status( self ): + #~ """ + #~ Returns status as commaseparated string (to save in DB) - @returns Raw status string - @rtype str - """ - return ",".join( self._status ) + #~ @returns Raw status string + #~ @rtype str + #~ """ + #~ return ",".join( self._status ) From 467f829af2f8a24222a5da3f2823ad53b2de3166 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 7 Mar 2017 10:54:10 +0100 Subject: [PATCH 17/26] Some cleanups Remove old commented out code from manual mysql solution --- bots/reddiscparser.py | 2 +- lib/mysqlred.py | 336 +----------------------------------------- lib/redfam.py | 180 +--------------------- lib/redpage.py | 97 +----------- 4 files changed, 12 insertions(+), 603 deletions(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index c789d86..336cd9f 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -153,7 +153,7 @@ class DiscussionParserBot( else: redpage = RedPage( self.current_page ) - #~ # Check whether parsing is needed + # Check whether parsing is needed if redpage.is_parsing_needed(): # Count families for failure analysis fam_counter = 0 diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 8257822..3710219 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -67,6 +67,7 @@ session = Session() family = "dewpbeta" + class Mysql(object): session = session @declared_attr @@ -122,6 +123,7 @@ class ColumnList( list, MutableComposite ): """ return self + class Status( types.TypeDecorator ): impl = types.String @@ -157,7 +159,6 @@ class Status( types.TypeDecorator ): return Status(self.impl.length) - class MysqlRedFam( Mysql, Base ): famhash = Column( String(64), primary_key=True, unique=True ) @@ -243,6 +244,7 @@ class MysqlRedFam( Mysql, Base ): def articlesStatus(self, articlesStatus): self.__articlesStatus = ColumnList(articlesStatus) + class MysqlRedPage( Mysql, Base ): pageid = Column( Integer, unique=True, primary_key=True ) revid = Column( Integer, unique=True, nullable=False ) @@ -254,339 +256,9 @@ class MysqlRedPage( Mysql, Base ): collection_class=attribute_mapped_collection("famhash")) + Base.metadata.create_all(engine) -#~ class MysqlRed: - #~ """ - #~ Basic interface class, containing opening of connection - - #~ Specific querys should be defined in descendant classes per data type - #~ """ - - #~ # Save mysqldb-connection as class attribute to use only one - #~ # in descendant classes - #~ connection = False - #~ db_hostname = config.db_hostname - #~ db_port = config.db_port - #~ db_username = config.db_username - #~ db_password = config.db_password - #~ db_name = config.db_username + jogobot.config['db_suffix'] - #~ db_table_prefix = False - - #~ # Class variables for storing cached querys - #~ _cached_update_data = [] - #~ _update_query = '' - #~ _cached_insert_data = {} - #~ _insert_query = '' - - #~ def __init__( self ): - #~ """ - #~ Opens a connection to MySQL-DB - - #~ @returns mysql-stream MySQL Connection - #~ """ - - #~ # Needs to be generated after Parsing of Args (not at import time) - #~ if not type(self).db_table_prefix: - #~ type(self).db_table_prefix = \ - #~ pywikibot.Site().family.dbName(pywikibot.Site().code) - - #~ # Now we can setup prepared queries - #~ self._prepare_queries() - - #~ # Connect to mysqldb only once - #~ if not type( self ).connection: - - #~ type( self ).connection = mysqldb.connect( - #~ host=type( self ).db_hostname, - #~ port=type( self ).db_port, - #~ user=type( self ).db_username, - #~ passwd=type( self ).db_password, - #~ db=type( self ).db_name ) - - #~ # Register callback for warnig if exit with cached db write querys - #~ atexit.register( type(self).warn_if_not_flushed ) - - #~ def __del__( self ): - #~ """ - #~ Before deleting class, close connection to MySQL-DB - #~ """ - - #~ type( self ).connection.close() - - #~ def _prepare_queries( self ): - #~ """ - #~ Used to replace placeholders in prepared queries - #~ """ - #~ type(self)._update_query = type(self)._update_query.format( - #~ prefix=type(self).db_table_prefix) - #~ type(self)._insert_query = type(self)._insert_query.format( - #~ prefix=type(self).db_table_prefix) - - #~ @classmethod - #~ def flush( cls ): - #~ """ - #~ Run cached querys - #~ """ - #~ if not cls.connection: - #~ raise MysqlRedConnectionError( "No connection exists!" ) - - #~ cursor = cls.connection.cursor() - - #~ # Execute insert query - #~ if cls._cached_insert_data: - #~ # Since cls._cached_insert_data is a dict, we need to have a custom - #~ # Generator to iterate over it - #~ cursor.executemany( cls._insert_query, - #~ ( cls._cached_insert_data[ key ] - #~ for key in cls._cached_insert_data ) ) - #~ # Reset after writing - #~ cls._cached_insert_data = {} - - #~ # Execute update query - #~ # Use executemany since update could not be reduced to one query - #~ if cls._cached_update_data: - #~ cursor.executemany( cls._update_query, cls._cached_update_data ) - #~ # Reset after writing - #~ cls._cached_update_data = [] - - #~ # Commit db changes - #~ if cls._cached_insert_data or cls._cached_update_data: - #~ cls.connection.commit() - - #~ @classmethod - #~ def warn_if_not_flushed(cls): - #~ """ - #~ Outputs a warning if there are db write querys cached and not flushed - #~ before exiting programm! - #~ """ - #~ if cls._cached_update_data or cls._cached_insert_data: - #~ jogobot.output( "Cached Database write querys not flushed!!! " + - #~ "Data loss is possible!", "WARNING" ) - - -#~ class MysqlRedPage( MysqlRed ): - #~ """ - #~ MySQL-db Interface for handling querys for RedPages - #~ """ - - #~ # Class variables for storing cached querys - #~ # '{prefix}' will be replaced during super().__init__() - #~ _cached_update_data = [] - #~ _update_query = 'UPDATE `{prefix}_redpages` \ -#~ SET `pagetitle` = ?, `revid` = ?, `status`= ? WHERE `pageid` = ?;' - - #~ _cached_insert_data = {} - #~ _insert_query = 'INSERT INTO `{prefix}_redpages` \ -#~ ( pageid, pagetitle, revid, status ) VALUES ( ?, ?, ?, ? );' - - #~ def __init__( self, pageid ): - #~ """ - #~ Creates a new instance, runs __init__ of parent class - #~ """ - - #~ super().__init__( ) - - #~ self.__pageid = int( pageid ) - - #~ self.data = self.get_page() - - #~ def __del__( self ): - #~ """ - #~ Needed to prevent descendant classes of MYSQL_RED from deleting - #~ connection to db - #~ """ - #~ pass - - #~ def get_page( self ): - #~ """ - #~ Retrieves a red page row from MySQL-Database for given page_id - - #~ @param int pageid MediaWiki page_id for page to retrieve - - #~ @returns tuple Tuple with data for given page_id - #~ bool FALSE if none found - #~ """ - - #~ cursor = type( self ).connection.cursor(mysqldb.DictCursor) - - #~ cursor.execute( - #~ 'SELECT * FROM `{prefix}_redpages` WHERE `pageid` = ?;'.format( - #~ prefix=type(self).db_table_prefix), ( self.__pageid, ) ) - - #~ res = cursor.fetchone() - - #~ if res: - #~ return res - #~ else: - #~ return False - - #~ def add_page( self, pagetitle, revid, status=0 ): - #~ """ - #~ Inserts a red page row in MySQL-Database for given pageid - - #~ @param int revid MediaWiki current revid - #~ @param str pagetitle MediaWiki new pagetitle - #~ @param int status Page parsing status - #~ """ - - #~ insert_data = { self.__pageid: ( self.__pageid, pagetitle, - #~ revid, status ) } - - #~ type( self )._cached_insert_data.update( insert_data ) - - #~ # Manualy construct self.data dict - #~ self.data = { 'pageid': self.__pageid, 'revid': revid, - #~ 'pagetitle': pagetitle, 'status': status } - - #~ def update_page( self, revid=None, pagetitle=None, status=0 ): - #~ """ - #~ Updates the red page row in MySQL-Database for given page_id - - #~ @param int revid MediaWiki current rev_id - #~ @param str pagetitle MediaWiki new page_title - #~ @param int status Page parsing status - #~ """ - - #~ if not pagetitle: - #~ pagetitle = self.data[ 'pagetitle' ] - #~ if not revid: - #~ revid = self.data[ 'revid' ] - - #~ type( self )._cached_update_data.append( ( pagetitle, revid, - #~ status, self.__pageid ) ) - - -#~ class MysqlRedFam( MysqlRed ): - #~ """ - #~ MySQL-db Interface for handling querys for RedFams - #~ """ - - #~ # Class variables for storing cached querys - #~ _cached_update_data = [] - #~ _update_query = 'UPDATE `{prefix}_redfams` \ -#~ SET `redpageid` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ -#~ `status`= ? WHERE `famhash` = ?;' - - #~ _cached_insert_data = {} - #~ _insert_query = 'INSERT INTO `{prefix}_redfams` \ -#~ ( famhash, redpageid, beginning, ending, status, heading, \ -#~ article0, article1, article2, article3, article4, article5, article6, \ -#~ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - - #~ def __init__( self, famhash=None ): - #~ """ - #~ Creates a new instance, runs __init__ of parent class - #~ """ - - #~ self.__famhash = famhash - - #~ super().__init__( ) - - #~ def __del__( self ): - #~ """ - #~ Needed to prevent descendant classes of MYSQL_RED from deleting - #~ connection to db - #~ """ - #~ pass - - #~ def get_fam( self, famhash ): - #~ """ - #~ Retrieves a red family row from MySQL-Database for given fam_hash - - #~ @returns dict Dictionairy with data for given fam hash - #~ False if none found - #~ """ - #~ self.__famhash = famhash - - #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - - #~ cursor.execute( - #~ 'SELECT * FROM `{prefix}_redfams` WHERE `famhash` = ?;'. - #~ format( prefix=type(self).db_table_prefix), ( famhash, ) ) - - #~ self.data = cursor.fetchone() - - #~ def add_fam( self, articlesList, heading, redpageid, - #~ beginning, ending=None, status=0 ): - - #~ data = [ self.__famhash, redpageid, beginning, ending, - #~ status, heading ] - - #~ for article in articlesList: - #~ data.append( str( article ) ) - - #~ while len( data ) < 14: - #~ data.append( None ) - - #~ data = tuple( data ) - - #~ insert_data = { self.__famhash: data } - #~ type( self )._cached_insert_data.update( insert_data ) - - #~ # Manualy construct self.data dict - #~ data_keys = ( 'famhash', 'redpageid', 'beginning', 'ending', - #~ 'status', 'heading', 'article0', 'article1', 'article2', - #~ 'article3', 'article4', 'article5', 'article6', - #~ 'article7' ) - #~ self.data = dict( zip( data_keys, data ) ) - - #~ def update_fam( self, redpageid, heading, beginning, ending, status ): - #~ """ - #~ Updates the red fam row in MySQL-Database for given fam_hash - - #~ @param int redpageid MediaWiki page_id - #~ @param datetime beginning Timestamp of beginning - #~ qparam datetime ending Timestamp of ending of - #~ @param int status red_fam status - #~ """ - - #~ type( self )._cached_update_data.append( ( redpageid, heading, - #~ beginning, ending, status, - #~ self.__famhash ) ) - - #~ def get_by_status( self, status ): - #~ """ - #~ Generator witch fetches redFams with given status from DB - #~ """ - - #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - - #~ cursor.execute( - #~ 'SELECT * FROM `{prefix}_redfams` WHERE `status` = LIKE %?%;'. - #~ format( prefix=type( self ).db_table_prefix), ( status, ) ) - - #~ while True: - #~ res = cursor.fetchmany( 1000 ) - #~ if not res: - #~ break - #~ for row in res: - #~ yield row - - #~ def get_by_status_and_ending( self, status, ending ): - #~ """ - #~ Generator witch fetches redFams with given status from DB - #~ """ - - #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - - #~ cursor.execute( ( - #~ 'SELECT * ' + - #~ 'FROM `{prefix}_redfams` `F` ' + - #~ 'INNER JOIN `{prefix}_redpages` `P` ' + - #~ 'ON `F`.`status` = ? ' + - #~ 'AND `F`.`ending` >= ? ' + - #~ 'AND `F`.`redpageid` = `P`.`pageid`;').format( - #~ prefix=type( self ).db_table_prefix), - #~ ( status, ending ) ) - - #~ while True: - #~ res = cursor.fetchmany( 1000 ) - #~ if not res: - #~ break - #~ for row in res: - #~ yield row - class MysqlRedError(Exception): """ diff --git a/lib/redfam.py b/lib/redfam.py index 526f902..d4f00be 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -35,8 +35,7 @@ import pywikibot # noqa from pywikibot.tools import deprecated # noqa import jogobot -#~ from lib.mysqlred import Column, Integer, String, Text, DateTime, ForeignKey, ColumnList, Status -from lib.mysqlred import MysqlRedFam, MutableSet, ColumnList #, Mysql, Base, relationship, composite, +from lib.mysqlred import MysqlRedFam class RedFam( MysqlRedFam ): @@ -45,7 +44,7 @@ class RedFam( MysqlRedFam ): """ def __init__( self, articlesList, beginning, ending=None, redpageid=None, - status=MutableSet(), famhash=None, heading=None ): + status=None, famhash=None, heading=None ): """ Generates a new RedFam object @@ -61,34 +60,9 @@ class RedFam( MysqlRedFam ): # Having pywikibot.Site() is a good idea most of the time self.site = pywikibot.Site() - # Database interface - #self._mysql = MysqlRedFam( famhash ) - - # Initial attribute values - #~ self.articlesList = articlesList - #~ self.beginning = beginning - #~ self.ending = ending - #~ self.redpageid = redpageid -#~ # self._status = set() -#~ # self._status = self._parse_status(status) - #~ self.famhash = famhash - #~ self.heading = heading - #self.status = status - - #articlesStatus = ColumnList([ MutableSet() for x in range(0,8) ]) - - #~ # Calculates the sha1 hash over self._articlesList to - #~ # rediscover known redundance families - #~ self.calc_famhash() - - #~ if not status: - #~ status = MutableSet() - super().__init__( articlesList=articlesList, beginning=beginning, ending=ending, redpageid=redpageid, famhash=famhash, heading=heading, status=status, articlesStatus=None ) - #super().__init__() - def __repr__( self ): """ Returns repression str of RedFam object @@ -137,35 +111,12 @@ class RedFam( MysqlRedFam ): else: self.famhash = type(self).calc_famhash(self.articlesList) - #~ def changed( self ): - #~ """ - #~ Checks wether anything has changed and maybe triggers db update - #~ """ - - #~ # On archived redfams do not delete possibly existing ending - #~ if( not self.ending and "archived" in self._status and - #~ self._mysql.data[ 'ending' ] ): - - #~ self._ending = self._mysql.data[ 'ending' ] - - #~ # Since status change means something has changed, update database - #~ if( self._raw_status != self._mysql.data[ 'status' ] or - #~ self._beginning != self._mysql.data[ 'beginning' ] or - #~ self._ending != self._mysql.data[ 'ending' ] or - #~ self._red_page_id != self._mysql.data[ 'redpageid' ] or - #~ self._heading != self._mysql.data[ 'heading' ]): - - #~ self._mysql.update_fam( self._redpageid, self._heading, - #~ self._beginning, self._ending, - #~ self._raw_status() ) - @classmethod def flush_db_cache( cls ): """ Calls flush method of Mysql Interface class """ cls.session.commit() - #~ MysqlRedFam.flush() def add_status(self, status): """ @@ -204,24 +155,6 @@ class RedFam( MysqlRedFam ): else: return False - #~ def _parse_status(self, raw_status ): - #~ """ - #~ Sets status based on comma separated list - - #~ @param raw_status Commaseparated string of stati (from DB) - #~ @type raw_status str - #~ """ - #~ self._status = set( raw_status.strip().split(",")) - - #~ def _raw_status( self ): - #~ """ - #~ Returns status as commaseparated string (to save in DB) - - #~ @returns Raw status string - #~ @rtype str - #~ """ - #~ return ",".join( self._status ) - def article_add_status(self, status, index=None, title=None ): """ Adds a status specified by status, to article (identified by title @@ -292,46 +225,6 @@ class RedFam( MysqlRedFam ): else: raise IndexError( "No index given or wrong format!") - def _article_parse_status(self, raw_status, index=None, title=None ): - """ - Sets status based on comma separated list to articles (identified by - title or index in articlesList) status set - - @param status Statusstring to set - @type status str - @param index Add to article with index in articlesList - @type index int - @param title Add to article with title in articlesList - @type title str - """ - if title and not index: - index = self._articlesList.index( title ) - - if isinstance( index, int ) and index < len(self._articlesList): - self._article_status[index] = set( raw_status.strip().split(",")) - else: - raise IndexError( "No index given or wrong format!") - - def _article_raw_status( self, index=None, title=None ): - """ - Returns status as commaseparated string (to save in DB) of article - (identified by title or index in articlesList) status set - - @param index Get from article with index in articlesList - @type index int - @param title Get from article with title in articlesList - @type title str - @returns Raw status string - @rtype str - """ - if title and not index: - index = self._articlesList.index( title ) - - if isinstance( index, int ) and index < len(self._articlesList): - return ",".join( self._article_status[index] ) - else: - raise IndexError( "No index given or wrong format!") - class RedFamParser( RedFam ): """ @@ -369,54 +262,14 @@ class RedFamParser( RedFam ): str strptime parseable string """ - # Parse the provided heading of redundance section - # to set self._articlesList - #~ self.heading = str(heading) - #~ self.articlesList = articlesList - - #~ # Catch sections with more then 8 articles, print error - #~ if len( self.articlesList ) > 8: - #~ # For repression in output we need to know the fam hash - #~ self.calc_famhash() - - #~ jogobot.output( - #~ ( "\03{{lightred}}" + - #~ "Maximum number of articles in red_fam exceeded, " + - #~ "maximum number is 8, {number:d} were given \n {repress}" - #~ ).format( datetime=datetime.now().strftime( - #~ "%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ), - #~ repress=repr( self ) ), - #~ "WARNING" ) - - #~ # Only save the first 8 articles -#~ # self.articlesList = self.articlesList[:8] - # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families famhash = type(self).calc_famhash(articlesList) - #~ obj = self.session.query(RedFamParser).filter(RedFamParser.famhash == self.famhash ).one_or_none() - #~ if obj: - #~ self = obj - - # Set object attributes: - #~ self.redpageid = redpage._pageid self._redpagearchive = redpagearchive -# self.famhash = None - - # Method self.add_beginning sets self._beginning directly - #~ self.add_beginning( beginning ) - - #~ # Method self.add_ending sets self._ending directly - #~ if( ending ): - #~ self.add_ending( ending ) - #~ else: - #~ # If no ending was provided set to None - #~ self.ending = None - - #~ self.status = MutableSet() + # Parse Timestamps beginning = self.__datetime(beginning) if ending: ending = self.__datetime(ending) @@ -429,31 +282,8 @@ class RedFamParser( RedFam ): self.check_status() self.session.add(self) - # Open database connection, ask for data if existing, - # otherwise create entry -# self.__handle_db() - - # Triggers db update if anything changed -# self.changed() - - - - #~ def __handle_db( self ): - #~ """ - #~ Handles opening of db connection - #~ """ - - #~ # We need a connection to our mysqldb - #~ self._mysql = MysqlRedFam( ) - #~ self._mysql.get_fam( self._famhash ) - - #~ if not self._mysql.data: - #~ self._mysql.add_fam( self._articlesList, self._heading, - #~ self._redpageid, self._beginning, - #~ self._ending ) - def update( self, articlesList, heading, redpage, redpagearchive, beginning, ending=None): @@ -490,8 +320,6 @@ class RedFamParser( RedFam ): return [ str( link.title ) for link in heading.ifilter_wikilinks() ] - - def add_beginning( self, beginning ): """ Adds the beginning date of a redundance diskussion to the object @@ -780,8 +608,6 @@ class RedFamWorker( RedFam ): self._article_raw_status( index=index ) index += 1 - print( repr(self) ) - def get_disc_link( self ): """ Constructs and returns the link to Redundancy discussion diff --git a/lib/redpage.py b/lib/redpage.py index 558cd8c..fa1c695 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -30,8 +30,7 @@ import mwparserfromhell as mwparser import jogobot # noqa -#~ from lib.mysqlred import Column, Integer, String, Text, DateTime, ForeignKey, ColumnList, Status -from lib.mysqlred import MysqlRedPage, relationship, MutableSet #MysqlRedFam, Base, composite, +from lib.mysqlred import MysqlRedPage, relationship from lib.redfam import RedFam, RedFamParser from sqlalchemy.orm.collections import attribute_mapped_collection @@ -60,7 +59,6 @@ class RedPage( MysqlRedPage ): # Safe the pywikibot page object if page: self._page = page - pageid = self._page.pageid super().__init__( pageid=pageid, @@ -69,48 +67,15 @@ class RedPage( MysqlRedPage ): status=MutableSet() ) #TODO EMPTY MutableSet() necessary? #~ self._status = set() - if archive: - self.status.add("archived") - - #~ self._archive = archive - - #~ self.pageid = pageid - #~ self.revid = self.page._revid - #~ self.p - #~ self.status = MutableSet() - -# self.__handle_db( ) - #~ self.is_page_changed() - - #~ self._parsed = None + self.is_archive() self.session.add(self) - #~ def __handle_db( self ): - #~ """ - #~ Handles opening of db connection - #~ """ - - #~ # We need a connection to our mysqldb - #~ if self.page: - #~ self.__mysql = MysqlRedPage( self.page._pageid ) - #~ self.pageid = self.page._pageid - #~ elif self.pageid: - #~ self.__mysql = MysqlRedPage( self.pageid ) - #~ self.page = pywikibot.Page( pywikibot.Site(), - #~ self.pagetitle ) - #~ self.page.exists() - #~ else: - #~ raise ValueError( "Page NOR pagid provided!" ) - - #~ if not self.__mysql.data: - #~ self.__mysql.add_page( self.page.title(), self.page._revid ) - def update( self, page ): - self._page = page self.revid = page._revid self.pagetitle = page.title() + self.is_archive() @property def page(self): @@ -123,24 +88,10 @@ class RedPage( MysqlRedPage ): def archive(self): return self.has_status("archived") - def is_page_changed( self ): - """ - Check wether the page was changed since last run - """ - self._changed = self.changedp() - #~ if( self.__mysql.data != { 'pageid': self.page._pageid, - #~ 'revid': self.page._revid, - #~ 'pagetitle': self.page.title(), - #~ 'status': self.__mysql.data[ 'status' ] } ): - #~ self._changed = True - #~ else: - #~ self._changed = False - def is_archive( self ): """ Detects wether current page is an archive of discussions """ - if( self.archive or ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ) ): @@ -153,10 +104,7 @@ class RedPage( MysqlRedPage ): """ Decides wether current RedPage needs to be parsed or not """ - if( self.changedp() or not self.has_status("parsed") ): - return True - else: - return False + return self.changedp() or not self.has_status("parsed") def parse( self ): """ @@ -183,24 +131,6 @@ class RedPage( MysqlRedPage ): else: self.status.add("parsed") self._parsed = True - #~ self.__update_db() - - #~ def __update_db( self ): - #~ """ - #~ Updates the page meta data in mysql db - #~ """ - #~ if( self._parsed or not self._changed ): - #~ self.add_status( "open" ) - - #~ if( self.is_archive() ): - #~ self.remove_status( "open" ) - #~ self.add_status( "archived" ) - #~ else: - #~ pass - #~ self._status = set() - - #~ self.__mysql.update_page( self.page._revid, self.page.title(), - #~ self._raw_status() ) @classmethod def flush_db_cache( cls ): @@ -208,7 +138,6 @@ class RedPage( MysqlRedPage ): Calls flush method of Mysql Interface class """ cls.session.commit() - #~ MysqlRedPage.flush() def add_status(self, status): """ @@ -246,21 +175,3 @@ class RedPage( MysqlRedPage ): return True else: return False - - #~ def _parse_status(self, raw_status ): - #~ """ - #~ Sets status based on comma separated list - - #~ @param raw_status Commaseparated string of stati (from DB) - #~ @type raw_status str - #~ """ - #~ self._status = set( raw_status.strip().split(",")) - - #~ def _raw_status( self ): - #~ """ - #~ Returns status as commaseparated string (to save in DB) - - #~ @returns Raw status string - #~ @rtype str - #~ """ - #~ return ",".join( self._status ) From bf8e47f916ee632e5c4f56a6d1b1e2f69a84bb35 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 7 Mar 2017 10:55:44 +0100 Subject: [PATCH 18/26] Improve new status API Make sure state changes are only detected as such by sqlalchemy if they are real changes --- lib/mysqlred.py | 63 +++++++++++++++++++++++++++++++++++++++++++++++-- lib/redpage.py | 14 +++++------ 2 files changed, 68 insertions(+), 9 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 3710219..46fa811 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -85,7 +85,54 @@ class Mysql(object): suffix = "s" return cls._tableprefix + name + cls._tablesuffix def changedp(self): - return self in self.session.dirty + return self.session.is_modified(self) + + +class MutableSet(MutableSet): + """ + Extended version of the mutable set for our states + """ + + def has(self, item): + """ + Check if item is in set + + @param item Item to check + """ + return item in self + + def add(self, item): + """ + Extended add method, which only result in changed object if there is + really an item added. + + @param item Item to add + """ + if not item in self: + super().add(item) + + def discard(self, item): + """ + Wrapper for extended remove below + + @param item Item to discard + """ + self.remove(item) + + def remove(self, item, weak=True ): + """ + Extended remove method, which only results in changed object if there + is really an item removed. Additionally, combine remove and discard! + + @param item Item to remove/discard + @param weak Set to false to use remove, else discard behavior + """ + if item in self: + if weak: + super().discard(item) + else: + super().remove(item) + class ColumnList( list, MutableComposite ): """ @@ -249,13 +296,25 @@ class MysqlRedPage( Mysql, Base ): pageid = Column( Integer, unique=True, primary_key=True ) revid = Column( Integer, unique=True, nullable=False ) pagetitle = Column( String(255), nullable=False ) - status = Column( MutableSet.as_mutable(Status(255)), nullable=True ) + __status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) redfams = relationship( "MysqlRedFam", order_by=MysqlRedFam.famhash, back_populates="redpage", collection_class=attribute_mapped_collection("famhash")) + @property + def status( self ): + """ + Current fam status + """ + return self.__status + @status.setter + def status( self, status ): + if status: + self.__status = MutableSet( status ) + else: + self.__status = MutableSet() Base.metadata.create_all(engine) diff --git a/lib/redpage.py b/lib/redpage.py index fa1c695..cba4268 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -61,11 +61,11 @@ class RedPage( MysqlRedPage ): self._page = page super().__init__( - pageid=pageid, - revid=self.page._revid, - pagetitle=self.page.title(), - status=MutableSet() ) #TODO EMPTY MutableSet() necessary? - #~ self._status = set() + pageid=self._page.pageid, + revid=self._page._revid, + pagetitle=self._page.title(), + status=None + ) self.is_archive() @@ -95,9 +95,9 @@ class RedPage( MysqlRedPage ): if( self.archive or ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ) ): - - return True + self.status.add("archive") else: + self.status.discard("archive") return False def is_parsing_needed( self ): From 89b50e3312a59827fdb454335324c8735cac95c2 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 7 Mar 2017 12:06:11 +0100 Subject: [PATCH 19/26] Remove old status API Now we use the methods of status object directly --- bots/reddiscparser.py | 3 +- lib/redfam.py | 95 +++++++++++-------------------------------- lib/redpage.py | 45 ++------------------ 3 files changed, 29 insertions(+), 114 deletions(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 336cd9f..2e203ba 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -161,8 +161,7 @@ class DiscussionParserBot( # Iterate over returned generator with redfam sections for fam in redpage.parse(): # Run RedFamParser on section text - RedFamParser.parser( fam, redpage, - redpage.is_archive() ) + RedFamParser.parser( fam, redpage, redpage.archive ) fam_counter += 1 diff --git a/lib/redfam.py b/lib/redfam.py index d4f00be..763bfcc 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -84,6 +84,12 @@ class RedFam( MysqlRedFam ): @classmethod def calc_famhash(cls, articlesList ): + """ + Calculates the SHA-1 hash for the articlesList of redundance family. + Since we don't need security SHA-1 is just fine. + + @returns str String with the hexadecimal hash digest + """ h = hashlib.sha1() # Since articlesList attr of RedFam will have always 8 Members we @@ -95,22 +101,6 @@ class RedFam( MysqlRedFam ): return h.hexdigest() - def c_famhash( self ): - """ - Calculates the SHA-1 hash for the articlesList of redundance family. - Since we don't need security SHA-1 is just fine. - - @returns str String with the hexadecimal hash digest - """ - print( type( self ) ) - - if self.famhash and type(self).calc_famhash(self.articlesList) != self.famhash: - raise RedFamHashError( self.famhash, h.hexdigest() ) - elif self.famhash: - return - else: - self.famhash = type(self).calc_famhash(self.articlesList) - @classmethod def flush_db_cache( cls ): """ @@ -118,43 +108,6 @@ class RedFam( MysqlRedFam ): """ cls.session.commit() - def add_status(self, status): - """ - Adds a status specified by status, to status set - - @param status Statusstring to add - @type status str - """ - self.status.add(status) - - def remove_status(self, status, weak=True): - """ - Removes a status, specified by status from set. If weak is set to - False it will throw a KeyError when trying to remove a status not set. - - @param status Statusstring to add - @type status str - @param weak Change behavior on missing status - @type bool - """ - if weak: - self.status.discard(status) - else: - self.status.remove(status) - - def has_status(self, status): - """ - Returns True, if redfam has given status - - @param status Statusstring to check - @type status str - @returns True if status is present else False - """ - if status in self.status: - return True - else: - return False - def article_add_status(self, status, index=None, title=None ): """ Adds a status specified by status, to article (identified by title @@ -267,7 +220,7 @@ class RedFamParser( RedFam ): famhash = type(self).calc_famhash(articlesList) # Set object attributes: - self._redpagearchive = redpagearchive + self.redpage = redpage # Parse Timestamps beginning = self.__datetime(beginning) @@ -275,7 +228,7 @@ class RedFamParser( RedFam ): ending = self.__datetime(ending) - super().__init__( articlesList, beginning, ending=ending, redpageid=redpage._pageid, + super().__init__( articlesList, beginning, ending=ending, redpageid=redpage.page._pageid, famhash=famhash, heading=heading ) # Check status changes @@ -294,7 +247,7 @@ class RedFamParser( RedFam ): self.add_beginning( beginning ) - if( ending ): + if ending: self.add_ending( ending ) self._redpagearchive = redpagearchive @@ -372,16 +325,16 @@ class RedFamParser( RedFam ): # No ending, discussion is running: # Sometimes archived discussions also have no detectable ending - if not self.ending and not self._redpagearchive: - self.add_status("open") + if not self.ending and not self.redpage.archive: + self.status.add("open") else: - self.remove_status("open") - if not self._redpagearchive: - self.add_status("done") + self.status.remove("open") + if not self.redpage.archive: + self.status.add("done") else: - self.remove_status("done") - self.remove_status("open") - self.add_status("archived") + self.status.remove("done") + self.status.remove("open") + self.status.add("archived") @classmethod def is_section_redfam_cb( cls, heading ): @@ -413,7 +366,7 @@ class RedFamParser( RedFam ): text = mwparser.parse( text ) # Extract heading text - heading = next( text.ifilter_headings() ).title + heading = next( text.ifilter_headings() ).title.strip() # Extract beginnig and maybe ending (beginning, ending) = RedFamParser.extract_dates( text, isarchive ) @@ -448,7 +401,7 @@ class RedFamParser( RedFam ): else: # Create the RedFam object - redfam = RedFamParser( articlesList, str(heading).strip(), redpage.page, isarchive, beginning, ending ) + redfam = RedFamParser( articlesList, str(heading), redpage, isarchive, beginning, ending ) return redfam @classmethod @@ -593,13 +546,13 @@ class RedFamWorker( RedFam ): """ for article in self._articlesList: if self.article_has_status( "note_rej", title=article ): - self.add_status( "note_rej" ) + self.status.add( "note_rej" ) if self.article_has_status( "sav_err", title=article ): - self.add_status( "sav_err" ) + self.status.add( "sav_err" ) - if not self.has_status( "sav_err" ) and \ - not self.has_status( "note_rej" ): - self.add_status( "marked" ) + if not self.status.has( "sav_err" ) and \ + not self.status.has( "note_rej" ): + self.status.add( "marked" ) self._mysql.data[ 'status' ] = self._raw_status() index = 0 diff --git a/lib/redpage.py b/lib/redpage.py index cba4268..f9f0aa8 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -86,25 +86,25 @@ class RedPage( MysqlRedPage ): @property def archive(self): - return self.has_status("archived") + self.is_archive() + return self.status.has("archive") def is_archive( self ): """ Detects wether current page is an archive of discussions """ - if( self.archive or ( u"/Archiv" in self.page.title() ) or + if( ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ) ): self.status.add("archive") else: self.status.discard("archive") - return False def is_parsing_needed( self ): """ Decides wether current RedPage needs to be parsed or not """ - return self.changedp() or not self.has_status("parsed") + return self.changedp() or not self.status.has("parsed") def parse( self ): """ @@ -138,40 +138,3 @@ class RedPage( MysqlRedPage ): Calls flush method of Mysql Interface class """ cls.session.commit() - - def add_status(self, status): - """ - Adds a status specified by status, to status set - - @param status Statusstring to add - @type status str - """ - self.status.add(status) - - def remove_status(self, status, weak=True): - """ - Removes a status, specified by status from set. If weak is set to - False it will throw a KeyError when trying to remove a status not set. - - @param status Statusstring to add - @type status str - @param weak Change behavior on missing status - @type bool - """ - if weak: - self.status.discard(status) - else: - self.status.remove(status) - - def has_status(self, status): - """ - Returns True, if redfam has given status - - @param status Statusstring to check - @type status str - @returns True if status is present else False - """ - if status in self.status: - return True - else: - return False From 43e31c108a408af39434572129208b63e4c178c9 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 7 Mar 2017 14:51:55 +0100 Subject: [PATCH 20/26] Working RedFamWorker query Modify RedfamWorker class to work with new DB API --- lib/mysqlred.py | 10 +++--- lib/redfam.py | 90 ++++++++++++++++++++++++++----------------------- 2 files changed, 53 insertions(+), 47 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 46fa811..1f92026 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -227,7 +227,7 @@ class MysqlRedFam( Mysql, Base ): Integer, ForeignKey( "dewpbeta_redpages.pageid" ), nullable=False ) beginning = Column( DateTime, nullable=False ) ending = Column( DateTime, nullable=True ) - __status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) + _status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) __article0_status = Column( 'article0_status', MutableSet.as_mutable(Status(64)), nullable=True ) @@ -250,7 +250,7 @@ class MysqlRedFam( Mysql, Base ): __article3_status, __article4_status, __article5_status, __article6_status, __article7_status ) - redpage = relationship( "RedPage", back_populates="redfams" ) + redpage = relationship( "MysqlRedPage", back_populates="redfams" ) @property def articlesList(self): @@ -271,14 +271,14 @@ class MysqlRedFam( Mysql, Base ): """ Current fam status """ - return self.__status + return self._status @status.setter def status( self, status ): if status: - self.__status = MutableSet( status ) + self._status = MutableSet( status ) else: - self.__status = MutableSet() + self._status = MutableSet() @property def articlesStatus(self): diff --git a/lib/redfam.py b/lib/redfam.py index 763bfcc..69b68c7 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -121,10 +121,10 @@ class RedFam( MysqlRedFam ): @type title str """ if title and not index: - index = self._articlesList.index( title ) + index = self.articlesList.index( title ) - if isinstance( index, int ) and index < len(self._articlesList): - self._article_status[index].add(status) + if isinstance( index, int ) and index < len(self.articlesList): + self.articlesStatus[index].add(status) else: raise IndexError( "No index given or wrong format!") @@ -145,13 +145,13 @@ class RedFam( MysqlRedFam ): @type bool """ if title and not index: - index = self._articlesList.index( title ) + index = self.articlesList.index( title ) - if isinstance( index, int ) and index < len(self._articlesList): + if isinstance( index, int ) and index < len(self.articlesList): if weak: - self._article_status[index].discard(status) + self.articlesStatus[index].discard(status) else: - self._article_status[index].remove(status) + self.articlesStatus[index].remove(status) else: raise IndexError( "No index given or wrong format!") @@ -168,10 +168,10 @@ class RedFam( MysqlRedFam ): @type title str """ if title and not index: - index = self._articlesList.index( title ) + index = self.articlesList.index( title ) - if isinstance( index, int ) and index < len(self._articlesList): - if status in self._article_status[index]: + if isinstance( index, int ) and index < len(self.articlesList): + if status in self.articlesStatus[index]: return True else: return False @@ -458,19 +458,20 @@ class RedFamWorker( RedFam ): """ def __init__( self, mysql_data ): - articlesList = [] + #~ articlesList = [] - for key in sorted( mysql_data.keys() ): - if 'article' in key and 'status' not in key and mysql_data[ key ]: - articlesList.append( mysql_data[ key ] ) + #~ for key in sorted( mysql_data.keys() ): + #~ if 'article' in key and 'status' not in key and mysql_data[ key ]: + #~ articlesList.append( mysql_data[ key ] ) - # Preset article status list with empty sets for existing articles - self._article_status = [set() for x in range(0, len(articlesList))] + #~ # Preset article status list with empty sets for existing articles + #~ self._article_status = [set() for x in range(0, len(articlesList))] - super().__init__( articlesList, mysql_data[ 'beginning' ], - mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], - mysql_data[ 'status' ], mysql_data[ 'famhash' ], - mysql_data[ 'heading' ] ) + #~ super().__init__( articlesList, mysql_data[ 'beginning' ], + #~ mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], + #~ mysql_data[ 'status' ], mysql_data[ 'famhash' ], + #~ mysql_data[ 'heading' ] ) + super().__init__() # #~ self._mysql.data = mysql_data @@ -510,8 +511,12 @@ class RedFamWorker( RedFam ): """ # Iterate over articles in redfam - for article in self._articlesList: - page = pywikibot.Page(pywikibot.Link(article), self.site) + for article in self.articlesList: + # Not all list elements contain articles + if not article: + break + + page = pywikibot.Page(pywikibot.Link(article), pywikibot.Site()) # Exclude by article status for status in exclude_article_status: @@ -544,7 +549,10 @@ class RedFamWorker( RedFam ): """ Sets status to 3 when worked on """ - for article in self._articlesList: + for article in self.articlesList: + if not article: + break + if self.article_has_status( "note_rej", title=article ): self.status.add( "note_rej" ) if self.article_has_status( "sav_err", title=article ): @@ -554,13 +562,6 @@ class RedFamWorker( RedFam ): not self.status.has( "note_rej" ): self.status.add( "marked" ) - self._mysql.data[ 'status' ] = self._raw_status() - index = 0 - for article in self._articlesList: - self._mysql.data[ "article" + str(index) + 'status' ] = \ - self._article_raw_status( index=index ) - index += 1 - def get_disc_link( self ): """ Constructs and returns the link to Redundancy discussion @@ -570,7 +571,7 @@ class RedFamWorker( RedFam ): """ # We need to Replace Links with their linktext - anchor_code = mwparser.parse( self._mysql.data[ 'heading' ].strip() ) + anchor_code = mwparser.parse( self.heading.strip() ) for link in anchor_code.ifilter_wikilinks(): if link.text: text = link.text @@ -583,7 +584,7 @@ class RedFamWorker( RedFam ): anchor_code.replace( " ", "_" ) # We try it with out any more parsing as mw will do while parsing page - return ( self.redpagetitle + "#" + + return ( self.redpage.pagetitle + "#" + str(anchor_code).strip() ) def generate_disc_notice_template( self ): @@ -603,7 +604,9 @@ class RedFamWorker( RedFam ): param_cnt = 3 # Iterate over articles in redfam - for article in self._articlesList: + for article in self.articlesList: + if not article: + break # Make sure to only use 8 articles (max. param 10) if param_cnt > 10: break @@ -614,11 +617,11 @@ class RedFamWorker( RedFam ): param_cnt += 1 # Add begin - begin = self._mysql.data[ 'beginning' ].strftime( "%B %Y" ) + begin = self.beginning.strftime( "%B %Y" ) template.add( "Beginn", begin, True ) # Add end (if not same as begin) - end = self._mysql.data[ 'ending' ].strftime( "%B %Y" ) + end = self.ending.strftime( "%B %Y" ) if not end == begin: template.add( "Ende", end, True ) @@ -650,13 +653,16 @@ class RedFamWorker( RedFam ): Yield red_fams stored in db by given status which have an ending after given one """ - mysql = MysqlRedFam() - for fam in mysql.get_by_status_and_ending( status, ending ): - try: - yield cls( fam ) - except RedFamHashError: - print(fam) - raise + from sqlalchemy import text + + for redfam in RedFamWorker.session.query(RedFamWorker).filter( + #~ RedFamWorker._status.like('archived'), + #RedFamWorker._status.like("%{0:s}%".format(status)), + text("status LIKE '%archived%'"), + RedFamWorker.ending >= ending + ): + + yield redfam class RedFamError( Exception ): From 844fee52aec378bd9b16e649fd8e437ee93d939e Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Wed, 8 Mar 2017 00:01:36 +0100 Subject: [PATCH 21/26] Make markpages using new DB/Class structure Update markpages and RedFamWorker-Code to use the new sqlalchemy based DB ORM Interface --- bots/markpages.py | 5 ++++- lib/redfam.py | 31 ++----------------------------- 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index b7b45c0..664f5d4 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -87,6 +87,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() for redfam in self.redfams: redfam.update_status() + RedFamWorker.flush_db_cache() + + @property def redfams(self): """ @@ -168,7 +171,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() save_ret = self.put_current( self.new_text, summary=summary ) # Status - if add_ret is None or add_ret and save_ret: + if add_ret is None or ( add_ret and save_ret ): self.current_page.redfam.article_add_status( "marked", title=self.current_page.title(withNamespace=False)) diff --git a/lib/redfam.py b/lib/redfam.py index 69b68c7..8dae7ec 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -78,6 +78,7 @@ class RedFam( MysqlRedFam ): ", red_page_id=" + repr( self.redpageid ) + \ ", status=" + repr( self.status ) + \ ", fam_hash=" + repr( self.famhash ) + \ + ", articlesStatus=" + repr( self.articlesStatus ) + \ " )" return __repr @@ -456,38 +457,10 @@ class RedFamWorker( RedFam ): Handles working with redundance families stored in database where discussion is finished """ - def __init__( self, mysql_data ): + def __init__( self ): - #~ articlesList = [] - - #~ for key in sorted( mysql_data.keys() ): - #~ if 'article' in key and 'status' not in key and mysql_data[ key ]: - #~ articlesList.append( mysql_data[ key ] ) - - #~ # Preset article status list with empty sets for existing articles - #~ self._article_status = [set() for x in range(0, len(articlesList))] - - #~ super().__init__( articlesList, mysql_data[ 'beginning' ], - #~ mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], - #~ mysql_data[ 'status' ], mysql_data[ 'famhash' ], - #~ mysql_data[ 'heading' ] ) super().__init__() -# #~ self._mysql.data = mysql_data - - #~ # Set up article status - #~ index = 0 - #~ for article in self.articlesList: - #~ raw_status = mysql_data[ "article" + str(index) + "_status" ] - #~ if not raw_status: - #~ raw_status = str() - #~ self._article_parse_status( raw_status, index ) - #~ index += 1 - - # Get related RedPage-Information - self.redpageid = mysql_data[ 'pageid' ] - self.redpagetitle = mysql_data[ 'pagetitle' ] - # Make sure locale is set to 'de_DE.UTF-8' to prevent problems # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') From 9ba7d2e51755733cda9357d30832dfc40216af20 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Wed, 8 Mar 2017 00:04:15 +0100 Subject: [PATCH 22/26] Change redfam generator filters Change and clear up the filters in redfam generator to keep track of article status and use positive conditionals --- lib/redfam.py | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/lib/redfam.py b/lib/redfam.py index 8dae7ec..8be9cf3 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -491,6 +491,28 @@ class RedFamWorker( RedFam ): page = pywikibot.Page(pywikibot.Link(article), pywikibot.Site()) + # Filter existing pages if requested with filter_existing=False + if page.exists(): + self.article_remove_status( "deleted", title=article ) + if filter_existing is False: + continue + # Filter non existing Pages if requested with filter_existing=True + else: + self.article_add_status( "deleted", title=article ) + if filter_existing: + continue + + # Filter redirects if requested with filter_redirects=True + if page.isRedirectPage(): + self.article_add_status( "redirect", title=article ) + if filter_redirects: + continue + # Filter noredirects if requested with filter_redirects=False + else: + self.article_remove_status("redirect", title=article ) + if filter_redirects is False: + continue + # Exclude by article status for status in exclude_article_status: if self.article_has_status( status, title=article ): @@ -501,20 +523,6 @@ class RedFamWorker( RedFam ): if not self.article_has_status( status, title=article ): continue - # Filter non existing Pages if requested with filter_existing=True - if filter_existing and not page.exists(): - continue - # Filter existing pages if requested with filter_existing=False - elif filter_existing is False and page.exists(): - continue - - # Filter redirects if requested with filter_redirects=True - if filter_redirects and page.isRedirectPage(): - continue - # Filter noredirects if requested with filter_redirects=False - elif filter_redirects is False and not page.isRedirectPage(): - continue - # Yield filtered pages yield page From e16925197cb1a71e63e2d2604caa73abe274f2e5 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Wed, 8 Mar 2017 18:38:15 +0100 Subject: [PATCH 23/26] Fix pep8.. compliance To be concordant with the coding styles fix pep8 compliance --- bots/markpages.py | 1 - bots/reddiscparser.py | 3 ++- lib/mysqlred.py | 60 +++++++++++++++++++++---------------------- lib/redfam.py | 58 ++++++++++++++++++++++++----------------- lib/redpage.py | 8 +++--- 5 files changed, 70 insertions(+), 60 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 664f5d4..0fbaded 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -89,7 +89,6 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() RedFamWorker.flush_db_cache() - @property def redfams(self): """ diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 2e203ba..9179841 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -146,7 +146,8 @@ class DiscussionParserBot( return # Initiate RedPage object - redpage = RedPage.session.query(RedPage).filter(RedPage.pageid == self.current_page.pageid ).one_or_none() + redpage = RedPage.session.query(RedPage).filter( + RedPage.pageid == self.current_page.pageid ).one_or_none() if redpage: redpage.update( self.current_page ) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 1f92026..232dc7c 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -25,22 +25,27 @@ Provides interface classes for communication of redundances bot with mysql-db """ -# Prefere using oursql then MySQLdb -try: - import oursql as mysqldb -except ImportError: - import MySQLdb as mysqldb +import atexit # noqa -import atexit - -import pywikibot +import pywikibot # noqa from pywikibot import config import jogobot - -from sqlalchemy import create_engine +from sqlalchemy import ( + create_engine, Column, Integer, String, Text, DateTime, ForeignKey ) +from sqlalchemy import text # noqa from sqlalchemy.engine.url import URL +from sqlalchemy.ext.declarative import ( + declarative_base, declared_attr, has_inherited_table ) +from sqlalchemy.ext.mutable import MutableComposite, MutableSet +from sqlalchemy.orm import sessionmaker, relationship, composite +from sqlalchemy.orm.collections import attribute_mapped_collection +import sqlalchemy.types as types + + +Base = declarative_base() + url = URL( "mysql+oursql", username=config.db_username, password=config.db_password, @@ -50,18 +55,6 @@ url = URL( "mysql+oursql", engine = create_engine(url, echo=True) -from sqlalchemy.ext.declarative import ( - declarative_base, declared_attr, has_inherited_table ) -Base = declarative_base() - -from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey - -from sqlalchemy.orm import sessionmaker, relationship, composite -from sqlalchemy.ext.mutable import MutableComposite, MutableSet -from sqlalchemy.orm.collections import attribute_mapped_collection -import sqlalchemy.types as types - - Session = sessionmaker(bind=engine) session = Session() @@ -70,20 +63,22 @@ family = "dewpbeta" class Mysql(object): session = session + @declared_attr def _tableprefix(cls): return family + "_" + @declared_attr def _tablesuffix(cls): return "s" + @declared_attr def __tablename__(cls): if has_inherited_table(cls): return None - prefix = family + "_" name = cls.__name__[len("Mysql"):].lower() - suffix = "s" return cls._tableprefix + name + cls._tablesuffix + def changedp(self): return self.session.is_modified(self) @@ -108,7 +103,7 @@ class MutableSet(MutableSet): @param item Item to add """ - if not item in self: + if item not in self: super().add(item) def discard(self, item): @@ -187,8 +182,11 @@ class Status( types.TypeDecorator ): elif isinstance(value, String ) or value is None: return value else: - raise ProgrammingError - + raise TypeError( + "Value should be an instance of one of {0:s},".format( + str( [type(MutableSet()), type(String()), type(None)] ) ) + + "given value was an instance of {1:s}".format( + str(type(value))) ) def process_result_value(self, value, dialect): """ @@ -226,8 +224,9 @@ class MysqlRedFam( Mysql, Base ): redpageid = Column( Integer, ForeignKey( "dewpbeta_redpages.pageid" ), nullable=False ) beginning = Column( DateTime, nullable=False ) - ending = Column( DateTime, nullable=True ) - _status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) + ending = Column( DateTime, nullable=True ) + _status = Column( 'status', MutableSet.as_mutable(Status(255)), + nullable=True ) __article0_status = Column( 'article0_status', MutableSet.as_mutable(Status(64)), nullable=True ) @@ -296,7 +295,8 @@ class MysqlRedPage( Mysql, Base ): pageid = Column( Integer, unique=True, primary_key=True ) revid = Column( Integer, unique=True, nullable=False ) pagetitle = Column( String(255), nullable=False ) - __status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) + __status = Column( 'status', MutableSet.as_mutable(Status(255)), + nullable=True ) redfams = relationship( "MysqlRedFam", order_by=MysqlRedFam.famhash, back_populates="redpage", diff --git a/lib/redfam.py b/lib/redfam.py index 8be9cf3..5c31364 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -35,7 +35,7 @@ import pywikibot # noqa from pywikibot.tools import deprecated # noqa import jogobot -from lib.mysqlred import MysqlRedFam +from lib.mysqlred import MysqlRedFam, text class RedFam( MysqlRedFam ): @@ -60,8 +60,16 @@ class RedFam( MysqlRedFam ): # Having pywikibot.Site() is a good idea most of the time self.site = pywikibot.Site() - super().__init__( articlesList=articlesList, beginning=beginning, ending=ending, redpageid=redpageid, - famhash=famhash, heading=heading, status=status, articlesStatus=None ) + super().__init__( + articlesList=articlesList, + beginning=beginning, + ending=ending, + redpageid=redpageid, + famhash=famhash, + heading=heading, + status=status, + articlesStatus=None + ) def __repr__( self ): """ @@ -228,23 +236,25 @@ class RedFamParser( RedFam ): if ending: ending = self.__datetime(ending) - - super().__init__( articlesList, beginning, ending=ending, redpageid=redpage.page._pageid, - famhash=famhash, heading=heading ) + super().__init__( articlesList, + beginning, + ending=ending, + redpageid=redpage.page._pageid, + famhash=famhash, + heading=heading ) # Check status changes self.check_status() self.session.add(self) - def update( self, articlesList, heading, redpage, redpagearchive, - beginning, ending=None): + beginning, ending=None ): - self.articlesList = articlesList; - self.heading = heading; - self.redpage = redpage; - self.redpageid = redpage.pageid; + self.articlesList = articlesList + self.heading = heading + self.redpage = redpage + self.redpageid = redpage.pageid self.add_beginning( beginning ) @@ -271,8 +281,7 @@ class RedFamParser( RedFam ): heading = mwparser.parse( str( heading ) ) # Save destinations of wikilinks in headings - return [ str( link.title ) for link - in heading.ifilter_wikilinks() ] + return [ str( link.title ) for link in heading.ifilter_wikilinks() ] def add_beginning( self, beginning ): """ @@ -398,11 +407,13 @@ class RedFamParser( RedFam ): if redfam: # Existing redfams need to be updated - redfam.update( articlesList, str(heading), redpage, isarchive, beginning, ending ) + redfam.update( articlesList, str(heading), redpage, isarchive, + beginning, ending ) else: # Create the RedFam object - redfam = RedFamParser( articlesList, str(heading), redpage, isarchive, beginning, ending ) + redfam = RedFamParser( articlesList, str(heading), + redpage, isarchive, beginning, ending ) return redfam @classmethod @@ -465,7 +476,8 @@ class RedFamWorker( RedFam ): # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') - def article_generator(self, filter_existing=None, filter_redirects=None, + def article_generator(self, # noqa + filter_existing=None, filter_redirects=None, exclude_article_status=[], onlyinclude_article_status=[] ): """ @@ -602,7 +614,7 @@ class RedFamWorker( RedFam ): template.add( "Beginn", begin, True ) # Add end (if not same as begin) - end = self.ending.strftime( "%B %Y" ) + end = self.ending.strftime( "%B %Y" ) if not end == begin: template.add( "Ende", end, True ) @@ -634,14 +646,12 @@ class RedFamWorker( RedFam ): Yield red_fams stored in db by given status which have an ending after given one """ - from sqlalchemy import text - for redfam in RedFamWorker.session.query(RedFamWorker).filter( - #~ RedFamWorker._status.like('archived'), - #RedFamWorker._status.like("%{0:s}%".format(status)), + # NOT WORKING WITH OBJECT NOTATION + # RedFamWorker._status.like('archived'), + # RedFamWorker._status.like("%{0:s}%".format(status)), text("status LIKE '%archived%'"), - RedFamWorker.ending >= ending - ): + RedFamWorker.ending >= ending ): yield redfam diff --git a/lib/redpage.py b/lib/redpage.py index f9f0aa8..3678111 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -31,8 +31,8 @@ import mwparserfromhell as mwparser import jogobot # noqa from lib.mysqlred import MysqlRedPage, relationship -from lib.redfam import RedFam, RedFamParser from sqlalchemy.orm.collections import attribute_mapped_collection +from lib.redfam import RedFamParser class RedPage( MysqlRedPage ): @@ -40,7 +40,7 @@ class RedPage( MysqlRedPage ): Class for handling redundance discussion pages and archives """ - #TODO POLYMORPHISM? of BASEClass + # TODO POLYMORPHISM? of BASEClass redfams = relationship( "RedFamParser", order_by=RedFamParser.famhash, back_populates="redpage", @@ -65,7 +65,7 @@ class RedPage( MysqlRedPage ): revid=self._page._revid, pagetitle=self._page.title(), status=None - ) + ) self.is_archive() @@ -79,7 +79,7 @@ class RedPage( MysqlRedPage ): @property def page(self): - if not hasattr(self,"_page"): + if not hasattr(self, "_page"): self._page = pywikibot.Page( pywikibot.Site(), self.pagetitle ) return self._page From 3fe47e666f9db09be9b1eab4a11620e8ea71ea65 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Wed, 8 Mar 2017 18:41:02 +0100 Subject: [PATCH 24/26] Fix polymorphism problem with relationships Since we are using subclasses of the ORM mapped classes, disable typechecks for ORM relations --- lib/mysqlred.py | 8 +++++--- lib/redpage.py | 9 +-------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 232dc7c..4f6101e 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -249,7 +249,8 @@ class MysqlRedFam( Mysql, Base ): __article3_status, __article4_status, __article5_status, __article6_status, __article7_status ) - redpage = relationship( "MysqlRedPage", back_populates="redfams" ) + redpage = relationship( "MysqlRedPage", enable_typechecks=False, + back_populates="redfams" ) @property def articlesList(self): @@ -299,8 +300,9 @@ class MysqlRedPage( Mysql, Base ): nullable=True ) redfams = relationship( - "MysqlRedFam", order_by=MysqlRedFam.famhash, back_populates="redpage", - collection_class=attribute_mapped_collection("famhash")) + "MysqlRedFam", enable_typechecks=False, + back_populates="redpage", order_by=MysqlRedFam.famhash, + collection_class=attribute_mapped_collection("famhash") ) @property def status( self ): diff --git a/lib/redpage.py b/lib/redpage.py index 3678111..69f02b8 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -30,8 +30,7 @@ import mwparserfromhell as mwparser import jogobot # noqa -from lib.mysqlred import MysqlRedPage, relationship -from sqlalchemy.orm.collections import attribute_mapped_collection +from lib.mysqlred import MysqlRedPage from lib.redfam import RedFamParser @@ -40,12 +39,6 @@ class RedPage( MysqlRedPage ): Class for handling redundance discussion pages and archives """ - # TODO POLYMORPHISM? of BASEClass - redfams = relationship( - "RedFamParser", order_by=RedFamParser.famhash, - back_populates="redpage", - collection_class=attribute_mapped_collection( "famhash" ) ) - def __init__( self, page=None, pageid=None, archive=False ): """ Generate a new RedPage object based on the given pywikibot page object From 281f1c49a8f13349084107389128ad2714e8b089 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Thu, 9 Mar 2017 00:00:17 +0100 Subject: [PATCH 25/26] mysqlred: Set family via pywikibot Get family/language part of table names from PyWikiBot Site --- lib/mysqlred.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 4f6101e..1760fda 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -58,7 +58,7 @@ engine = create_engine(url, echo=True) Session = sessionmaker(bind=engine) session = Session() -family = "dewpbeta" +family = pywikibot.Site().family.dbName(pywikibot.Site().code) class Mysql(object): @@ -222,7 +222,7 @@ class MysqlRedFam( Mysql, Base ): heading = Column( Text, nullable=False ) redpageid = Column( - Integer, ForeignKey( "dewpbeta_redpages.pageid" ), nullable=False ) + Integer, ForeignKey( family + "_redpages.pageid" ), nullable=False ) beginning = Column( DateTime, nullable=False ) ending = Column( DateTime, nullable=True ) _status = Column( 'status', MutableSet.as_mutable(Status(255)), From 4aaacf144314cbc5c83eed566e38fc428525fdd4 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Thu, 9 Mar 2017 10:13:56 +0100 Subject: [PATCH 26/26] Add redfams to redpage-obj after parsing To have redfams available for updates immediately after parsing. Double redfams then will be seen as Update. Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=108 FS#108] --- lib/redfam.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/redfam.py b/lib/redfam.py index 5c31364..ca10e87 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -414,7 +414,9 @@ class RedFamParser( RedFam ): # Create the RedFam object redfam = RedFamParser( articlesList, str(heading), redpage, isarchive, beginning, ending ) - return redfam + + # Add redfam to redpage object + redpage.redfams.set( redfam ) @classmethod def extract_dates( cls, text, isarchive=False ):