diff --git a/bots/markpages.py b/bots/markpages.py index e47f4d7..244ba14 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -80,7 +80,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() jogobot.config["red.markpages"]["mark_done_after"], "%Y-%m-%d" ) self.__redfams = RedFamWorker.gen_by_status_and_ending( - 2, end_after) + "archived", end_after) return self.__redfams diff --git a/lib/mysqlred.py b/lib/mysqlred.py index f57ae2b..79360a8 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -156,21 +156,21 @@ class MysqlRedPage( MysqlRed ): # Class variables for storing cached querys # '{prefix}' will be replaced during super().__init__() _cached_update_data = [] - _update_query = 'UPDATE `{prefix}_red_pages` \ -SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' + _update_query = 'UPDATE `{prefix}_redpages` \ +SET `pagetitle` = ?, `revid` = ?, `status`= ? WHERE `pageid` = ?;' _cached_insert_data = {} - _insert_query = 'INSERT INTO `{prefix}_red_pages` \ -( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' + _insert_query = 'INSERT INTO `{prefix}_redpages` \ +( pageid, pagetitle, revid, status ) VALUES ( ?, ?, ?, ? );' - def __init__( self, page_id ): + def __init__( self, pageid ): """ Creates a new instance, runs __init__ of parent class """ super().__init__( ) - self.__page_id = int( page_id ) + self.__pageid = int( pageid ) self.data = self.get_page() @@ -185,7 +185,7 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' """ Retrieves a red page row from MySQL-Database for given page_id - @param int page_id MediaWiki page_id for page to retrieve + @param int pageid MediaWiki page_id for page to retrieve @returns tuple Tuple with data for given page_id bool FALSE if none found @@ -194,8 +194,8 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' cursor = type( self ).connection.cursor(mysqldb.DictCursor) cursor.execute( - 'SELECT * FROM `{prefix}_red_pages` WHERE `page_id` = ?;'.format( - prefix=type(self).db_table_prefix), ( self.__page_id, ) ) + 'SELECT * FROM `{prefix}_redpages` WHERE `pageid` = ?;'.format( + prefix=type(self).db_table_prefix), ( self.__pageid, ) ) res = cursor.fetchone() @@ -204,40 +204,40 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' else: return False - def add_page( self, page_title, rev_id, status=0 ): + def add_page( self, pagetitle, revid, status=0 ): """ - Inserts a red page row in MySQL-Database for given page_id + Inserts a red page row in MySQL-Database for given pageid - @param int rev_id MediaWiki current rev_id - @param str page_title MediaWiki new page_title + @param int revid MediaWiki current revid + @param str pagetitle MediaWiki new pagetitle @param int status Page parsing status """ - insert_data = { self.__page_id: ( self.__page_id, page_title, - rev_id, status ) } + insert_data = { self.__pageid: ( self.__pageid, pagetitle, + revid, status ) } type( self )._cached_insert_data.update( insert_data ) # Manualy construct self.data dict - self.data = { 'page_id': self.__page_id, 'rev_id': rev_id, - 'page_title': page_title, 'status': status } + self.data = { 'pageid': self.__pageid, 'revid': revid, + 'pagetitle': pagetitle, 'status': status } - def update_page( self, rev_id=None, page_title=None, status=0 ): + def update_page( self, revid=None, pagetitle=None, status=0 ): """ Updates the red page row in MySQL-Database for given page_id - @param int rev_id MediaWiki current rev_id - @param str page_title MediaWiki new page_title + @param int revid MediaWiki current rev_id + @param str pagetitle MediaWiki new page_title @param int status Page parsing status """ - if not page_title: - page_title = self.data[ 'page_title' ] - if not rev_id: - rev_id = self.data[ 'rev_id' ] + if not pagetitle: + pagetitle = self.data[ 'pagetitle' ] + if not revid: + revid = self.data[ 'revid' ] - type( self )._cached_update_data.append( ( page_title, rev_id, - status, self.__page_id ) ) + type( self )._cached_update_data.append( ( pagetitle, revid, + status, self.__pageid ) ) class MysqlRedFam( MysqlRed ): @@ -247,22 +247,22 @@ class MysqlRedFam( MysqlRed ): # Class variables for storing cached querys _cached_update_data = [] - _update_query = 'UPDATE `{prefix}_red_families` \ -SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ -`status`= ? WHERE `fam_hash` = ?;' + _update_query = 'UPDATE `{prefix}_redfams` \ +SET `redpageid` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ +`status`= ? WHERE `famhash` = ?;' _cached_insert_data = {} - _insert_query = 'INSERT INTO `{prefix}_red_families` \ -( fam_hash, red_page_id, beginning, ending, status, heading, \ + _insert_query = 'INSERT INTO `{prefix}_redfams` \ +( famhash, redpageid, beginning, ending, status, heading, \ article0, article1, article2, article3, article4, article5, article6, \ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - def __init__( self, fam_hash=None ): + def __init__( self, famhash=None ): """ Creates a new instance, runs __init__ of parent class """ - self.__fam_hash = fam_hash + self.__famhash = famhash super().__init__( ) @@ -273,27 +273,27 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' """ pass - def get_fam( self, fam_hash ): + def get_fam( self, famhash ): """ Retrieves a red family row from MySQL-Database for given fam_hash @returns dict Dictionairy with data for given fam hash False if none found """ - self.__fam_hash = fam_hash + self.__famhash = famhash cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor.execute( - 'SELECT * FROM `{prefix}_red_families` WHERE `fam_hash` = ?;'. - format( prefix=type(self).db_table_prefix), ( fam_hash, ) ) + 'SELECT * FROM `{prefix}_redfams` WHERE `famhash` = ?;'. + format( prefix=type(self).db_table_prefix), ( famhash, ) ) self.data = cursor.fetchone() - def add_fam( self, articlesList, heading, red_page_id, + def add_fam( self, articlesList, heading, redpageid, beginning, ending=None, status=0 ): - data = [ self.__fam_hash, red_page_id, beginning, ending, + data = [ self.__famhash, redpageid, beginning, ending, status, heading ] for article in articlesList: @@ -304,29 +304,29 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' data = tuple( data ) - insert_data = { self.__fam_hash: data } + insert_data = { self.__famhash: data } type( self )._cached_insert_data.update( insert_data ) # Manualy construct self.data dict - data_keys = ( 'fam_hash', 'red_page_id', 'beginning', 'ending', + data_keys = ( 'fam_hash', 'redpageid', 'beginning', 'ending', 'status', 'heading', 'article0', 'article1', 'article2', 'article3', 'article4', 'article5', 'article6', 'article7' ) self.data = dict( zip( data_keys, data ) ) - def update_fam( self, red_page_id, heading, beginning, ending, status ): + def update_fam( self, redpageid, heading, beginning, ending, status ): """ Updates the red fam row in MySQL-Database for given fam_hash - @param int red_page_id MediaWiki page_id + @param int redpageid MediaWiki page_id @param datetime beginning Timestamp of beginning qparam datetime ending Timestamp of ending of @param int status red_fam status """ - type( self )._cached_update_data.append( ( red_page_id, heading, + type( self )._cached_update_data.append( ( redpageid, heading, beginning, ending, status, - self.__fam_hash ) ) + self.__famhash ) ) def get_by_status( self, status ): """ @@ -336,7 +336,7 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor.execute( - 'SELECT * FROM `{prefix}_red_families` WHERE `status` = ?;'.format( + 'SELECT * FROM `{prefix}_redfams` WHERE `status` = ?;'.format( prefix=type( self ).db_table_prefix), ( status, ) ) while True: @@ -355,11 +355,11 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' cursor.execute( ( 'SELECT * ' + - 'FROM `{prefix}_red_families` `F` ' + - 'INNER JOIN `{prefix}_red_pages` `P` ' + + 'FROM `{prefix}_redfams` `F` ' + + 'INNER JOIN `{prefix}_redpages` `P` ' + 'ON `F`.`status` = ? ' + 'AND `F`.`ending` >= ? ' - 'AND `F`.`red_page_id` = `P`.`page_id`;').format( + 'AND `F`.`redpageid` = `P`.`pageid`;').format( prefix=type( self ).db_table_prefix), ( status, ending ) ) while True: diff --git a/lib/redfam.py b/lib/redfam.py index f0b36fd..798d501 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -43,8 +43,8 @@ class RedFam: Basic class for RedFams, containing the basic data structure """ - def __init__( self, articlesList, beginning, ending=None, red_page_id=None, - status=0, fam_hash=None, heading=None ): + def __init__( self, articlesList, beginning, ending=None, redpageid=None, + status=None, famhash=None, heading=None ): """ Generates a new RedFam object @@ -52,7 +52,7 @@ class RedFam: @param beginning datetime Beginning date @param ending datetime Ending date @param red_page_id int MW pageid of containing RedPage - @param status int Status of RedFam + @param status str Status of RedFam @param fam_hash str SHA1 hash of articlesList @param heading str Original heading of RedFam (Link) """ @@ -61,20 +61,20 @@ class RedFam: self.site = pywikibot.Site() # Database interface - self._mysql = MysqlRedFam( fam_hash ) + self._mysql = MysqlRedFam( famhash ) # Initial attribute values self._articlesList = articlesList self._beginning = beginning self._ending = ending - self._red_page_id = red_page_id - self._status = status - self._fam_hash = fam_hash + self._redpageid = redpageid + self._status = self._parse_status(status) + self._famhash = famhash self._heading = heading # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families - self.calc_fam_hash() + self.calc_famhash() def __repr__( self ): """ @@ -88,14 +88,14 @@ class RedFam: ", heading=" + repr( self._heading ) + \ ", beginning=" + repr( self._beginning ) + \ ", ending=" + repr( self._ending ) + \ - ", red_page_id=" + repr( self._red_page_id ) + \ + ", red_page_id=" + repr( self._redpageid ) + \ ", status=" + repr( self._status ) + \ - ", fam_hash=" + repr( self._fam_hash ) + \ + ", fam_hash=" + repr( self._famhash ) + \ " )" return __repr - def calc_fam_hash( self ): + def calc_famhash( self ): """ Calculates the SHA-1 hash for the articlesList of redundance family. Since we don't need security SHA-1 is just fine. @@ -106,35 +106,35 @@ class RedFam: h = hashlib.sha1() h.update( str( self._articlesList[:8] ).encode('utf-8') ) - if self._fam_hash and h.hexdigest() != self._fam_hash: - raise RedFamHashError( self._fam_hash, h.hexdigest() ) + if self._famhash and h.hexdigest() != self._famhash: + raise RedFamHashError( self._famhash, h.hexdigest() ) - elif self._fam_hash: + elif self._famhash: return else: - self._fam_hash = h.hexdigest() + self._famhash = h.hexdigest() def changed( self ): """ Checks wether anything has changed and maybe triggers db update """ - # On archived red_fams do not delete possibly existing ending - if( not self._ending and self._status > 1 and + # On archived redfams do not delete possibly existing ending + if( not self._ending and "archived" in self._status and self._mysql.data[ 'ending' ] ): self._ending = self._mysql.data[ 'ending' ] # Since status change means something has changed, update database - if( self._status != self._mysql.data[ 'status' ] or + if( self._raw_status != self._mysql.data[ 'status' ] or self._beginning != self._mysql.data[ 'beginning' ] or self._ending != self._mysql.data[ 'ending' ] or - self._red_page_id != self._mysql.data[ 'red_page_id' ] or + self._red_page_id != self._mysql.data[ 'redpageid' ] or self._heading != self._mysql.data[ 'heading' ]): - self._mysql.update_fam( self._red_page_id, self._heading, + self._mysql.update_fam( self._redpageid, self._heading, self._beginning, self._ending, - self._status ) + self._raw_status() ) @classmethod def flush_db_cache( cls ): @@ -143,6 +143,61 @@ class RedFam: """ MysqlRedFam.flush() + def add_status(self, status): + """ + Adds a status specified by status, to status set + + @param status Statusstring to add + @type status str + """ + self._status.add(status) + + def remove_status(self, status, weak=True): + """ + Removes a status, specified by status from set. If weak is set to + False it will throw a KeyError when trying to remove a status not set. + + @param status Statusstring to add + @type status str + @param weak Change behavior on missing status + @type bool + """ + if weak: + self._status.discard(status) + else: + self._status.remove(status) + + def has_status(self, status): + """ + Returns True, if redfam has given status + + @param status Statusstring to check + @type status str + @returns True if status is present else False + """ + if status in self._status: + return True + else: + return False + + def _parse_status(self, raw_status ): + """ + Sets status based on comma separated list + + @param raw_status Commaseparated string of stati (from DB) + @type raw_status str + """ + self._status = set( raw_status.strip().split(",")) + + def _raw_status( self ): + """ + Returns status as commaseparated string (to save in DB) + + @returns Raw status string + @rtype str + """ + return ",".join( self._status ) + class RedFamParser( RedFam ): """ @@ -165,15 +220,15 @@ class RedFamParser( RedFam ): wurde gewünscht von:" __done_notice2 = "{{Erledigt|" - def __init__( self, heading, red_page, red_page_archive, + def __init__( self, heading, redpage, redpagearchive, beginning, ending=None ): """ Creates a RedFam object based on data collected while parsing red_pages combined with possibly former known data from db - @param red_fam_heading str Wikitext heading of section - @param red_page page Pywikibot.page object - @param red_page_archive bool Is red_page an archive + @param redfam_heading str Wikitext heading of section + @param redpage page Pywikibot.page object + @param redpagearchive bool Is red_page an archive @param beginning datetime Timestamp of beginning str as strptime parseable string @param ending datetime Timestamp of ending @@ -181,9 +236,9 @@ class RedFamParser( RedFam ): """ # Set object attributes: - self._red_page_id = red_page._pageid - self._red_page_archive = red_page_archive - self._fam_hash = None + self._redpageid = redpage._pageid + self._redpagearchive = redpagearchive + self._famhash = None # Method self.add_beginning sets self._beginning directly self.add_beginning( beginning ) @@ -195,7 +250,7 @@ class RedFamParser( RedFam ): # If no ending was provided set to None self._ending = None - self._status = None + self._status = set() # Parse the provided heading of redundance section # to set self._articlesList @@ -204,7 +259,7 @@ class RedFamParser( RedFam ): # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families - self.calc_fam_hash() + self.calc_famhash() # Open database connection, ask for data if existing, # otherwise create entry @@ -223,11 +278,11 @@ class RedFamParser( RedFam ): # We need a connection to our mysqldb self._mysql = MysqlRedFam( ) - self._mysql.get_fam( self._fam_hash ) + self._mysql.get_fam( self._famhash ) if not self._mysql.data: self._mysql.add_fam( self._articlesList, self._heading, - self._red_page_id, self._beginning, + self._redpageid, self._beginning, self._ending ) def heading_parser( self, heading ): @@ -253,7 +308,7 @@ class RedFamParser( RedFam ): # Catch sections with more then 8 articles, print error if len( self._articlesList ) > 8: # For repression in output we need to know the fam hash - self.calc_fam_hash() + self.calc_famhash() jogobot.output( ( "\03{{lightred}}" + @@ -317,21 +372,18 @@ class RedFamParser( RedFam ): - 3 and greater status was set by worker script, do not change it """ - # Do not change stati set by worker script etc. - if not self._mysql.data['status'] > 2: - - # No ending, discussion is running: - # Sometimes archived discussions also have no detectable ending - if not self._ending and not self._red_page_archive: - self._status = 0 - else: - if not self._red_page_archive: - self._status = 1 - else: - self._status = 2 + # No ending, discussion is running: + # Sometimes archived discussions also have no detectable ending + if not self._ending and not self._redpagearchive: + self.add_status("open") else: - - self._status = self._mysql.data[ 'status' ] + self.remove_status("open") + if not self._redpagearchive: + self.add_status("done") + else: + self.remove_status("done") + self.remove_status("open") + self.add_status("archived") @classmethod def is_section_redfam_cb( cls, heading ): @@ -444,15 +496,15 @@ class RedFamWorker( RedFam ): articlesList.append( mysql_data[ key ] ) super().__init__( articlesList, mysql_data[ 'beginning' ], - mysql_data[ 'ending' ], mysql_data[ 'red_page_id' ], - mysql_data[ 'status' ], mysql_data[ 'fam_hash' ], + mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], + mysql_data[ 'status' ], mysql_data[ 'famhash' ], mysql_data[ 'heading' ] ) self._mysql.data = mysql_data # Get related RedPage-Information - self.redpageid = mysql_data[ 'page_id' ] - self.redpagetitle = mysql_data[ 'page_title' ] + self.redpageid = mysql_data[ 'pageid' ] + self.redpagetitle = mysql_data[ 'pagetitle' ] # Make sure locale is set to 'de_DE.UTF-8' to prevent problems # with wrong month abreviations in strptime @@ -499,7 +551,7 @@ class RedFamWorker( RedFam ): Sets status to 3 when worked on """ - self._status = 3 + pass def get_disc_link( self ): """ diff --git a/lib/redpage.py b/lib/redpage.py index ebedaba..b4361b9 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -49,6 +49,8 @@ class RedPage: @type pageid int """ + self._status = set() + # Safe the pywikibot page object self.page = page self.pageid = pageid @@ -71,7 +73,7 @@ class RedPage: elif self.pageid: self.__mysql = MysqlRedPage( self.pageid ) self.page = pywikibot.Page( pywikibot.Site(), - self.__mysql.data['page_title'] ) + self.__mysql.data['pagetitle'] ) self.page.exists() else: raise ValueError( "Page NOR pagid provided!" ) @@ -84,9 +86,9 @@ class RedPage: Check wether the page was changed since last run """ - if( self.__mysql.data != { 'page_id': self.page._pageid, - 'rev_id': self.page._revid, - 'page_title': self.page.title(), + if( self.__mysql.data != { 'pageid': self.page._pageid, + 'revid': self.page._revid, + 'pagetitle': self.page.title(), 'status': self.__mysql.data[ 'status' ] } ): self._changed = True else: @@ -110,7 +112,7 @@ class RedPage: Decides wether current RedPage needs to be parsed or not """ - if( self._changed or self.__mysql.data[ 'status' ] == 0 ): + if( self._changed or self.__mysql.data[ 'status' ] == "" ): return True else: return False @@ -146,14 +148,16 @@ class RedPage: Updates the page meta data in mysql db """ if( self._parsed or not self._changed ): - status = 1 + self.add_status( "open" ) if( self.is_archive() ): - status = 2 + self.remove_status( "open" ) + self.add_status( "archived" ) else: - status = 0 + self._status = set() - self.__mysql.update_page( self.page._revid, self.page.title(), status ) + self.__mysql.update_page( self.page._revid, self.page.title(), + self._raw_status() ) @classmethod def flush_db_cache( cls ): @@ -161,3 +165,58 @@ class RedPage: Calls flush method of Mysql Interface class """ MysqlRedPage.flush() + + def add_status(self, status): + """ + Adds a status specified by status, to status set + + @param status Statusstring to add + @type status str + """ + self._status.add(status) + + def remove_status(self, status, weak=True): + """ + Removes a status, specified by status from set. If weak is set to + False it will throw a KeyError when trying to remove a status not set. + + @param status Statusstring to add + @type status str + @param weak Change behavior on missing status + @type bool + """ + if weak: + self._status.discard(status) + else: + self._status.remove(status) + + def has_status(self, status): + """ + Returns True, if redfam has given status + + @param status Statusstring to check + @type status str + @returns True if status is present else False + """ + if status in self._status: + return True + else: + return False + + def _parse_status(self, raw_status ): + """ + Sets status based on comma separated list + + @param raw_status Commaseparated string of stati (from DB) + @type raw_status str + """ + self._status = set( raw_status.strip().split(",")) + + def _raw_status( self ): + """ + Returns status as commaseparated string (to save in DB) + + @returns Raw status string + @rtype str + """ + return ",".join( self._status )