From 6e973369cd868d80862c7efc03fe3cb525573ccb Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 26 Nov 2016 22:26:55 +0100 Subject: [PATCH 01/11] sqlalchemy working for parser Needs some testing, presumably contains some bugs --- bots/reddiscparser.py | 30 +- lib/mysqlred.py | 791 +++++++++++++++++++++++++++--------------- lib/redfam.py | 352 +++++++++++-------- lib/redpage.py | 188 ++++++---- 4 files changed, 851 insertions(+), 510 deletions(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 818eb05..c789d86 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -33,8 +33,8 @@ from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot -from lib import redpage -from lib import redfam +from lib.redpage import RedPage +from lib.redfam import RedFamParser class DiscussionParserBot( @@ -127,7 +127,7 @@ class DiscussionParserBot( else: # If successfully parsed all pages in cat, flush db write cache - redpage.RedPage.flush_db_cache() + RedPage.flush_db_cache() def treat_page( self ): """ @@ -146,20 +146,23 @@ class DiscussionParserBot( return # Initiate RedPage object - red_page = redpage.RedPage( self.current_page ) + redpage = RedPage.session.query(RedPage).filter(RedPage.pageid == self.current_page.pageid ).one_or_none() - # Check whether parsing is needed - if red_page.is_parsing_needed(): + if redpage: + redpage.update( self.current_page ) + else: + redpage = RedPage( self.current_page ) + #~ # Check whether parsing is needed + if redpage.is_parsing_needed(): # Count families for failure analysis fam_counter = 0 # Iterate over returned generator with redfam sections - for fam in red_page.parse(): - + for fam in redpage.parse(): # Run RedFamParser on section text - redfam.RedFamParser.parser( fam, red_page.page, - red_page.is_archive() ) + RedFamParser.parser( fam, redpage, + redpage.is_archive() ) fam_counter += 1 @@ -167,12 +170,13 @@ class DiscussionParserBot( # If successfully parsed whole page, flush # db write cache if( fam_counter ): - redfam.RedFamParser.flush_db_cache() + + RedFamParser.flush_db_cache() jogobot.output( "Page [[{reddisc}]] parsed".format( - reddisc=red_page.page.title() ) ) + reddisc=redpage.page.title() ) ) else: jogobot.output( "\03{red}" + "Page [[{reddisc}]], ".format( - reddisc=red_page.page.title() ) + + reddisc=redpage.page.title() ) + "containing no redfam, parsed!", "WARNING" ) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 9e2e01b..8257822 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -39,336 +39,553 @@ from pywikibot import config import jogobot -class MysqlRed: +from sqlalchemy import create_engine +from sqlalchemy.engine.url import URL +url = URL( "mysql+oursql", + username=config.db_username, + password=config.db_password, + host=config.db_hostname, + port=config.db_port, + database=config.db_username + jogobot.config['db_suffix'] ) +engine = create_engine(url, echo=True) + + +from sqlalchemy.ext.declarative import ( + declarative_base, declared_attr, has_inherited_table ) +Base = declarative_base() + +from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey + +from sqlalchemy.orm import sessionmaker, relationship, composite +from sqlalchemy.ext.mutable import MutableComposite, MutableSet +from sqlalchemy.orm.collections import attribute_mapped_collection +import sqlalchemy.types as types + + +Session = sessionmaker(bind=engine) +session = Session() + +family = "dewpbeta" + +class Mysql(object): + session = session + @declared_attr + def _tableprefix(cls): + return family + "_" + @declared_attr + def _tablesuffix(cls): + return "s" + @declared_attr + def __tablename__(cls): + if has_inherited_table(cls): + return None + prefix = family + "_" + name = cls.__name__[len("Mysql"):].lower() + suffix = "s" + return cls._tableprefix + name + cls._tablesuffix + def changedp(self): + return self in self.session.dirty + +class ColumnList( list, MutableComposite ): """ - Basic interface class, containing opening of connection - - Specific querys should be defined in descendant classes per data type + Combines multiple Colums into a list like object """ - # Save mysqldb-connection as class attribute to use only one - # in descendant classes - connection = False - db_hostname = config.db_hostname - db_port = config.db_port - db_username = config.db_username - db_password = config.db_password - db_name = config.db_username + jogobot.config['db_suffix'] - db_table_prefix = False - - # Class variables for storing cached querys - _cached_update_data = [] - _update_query = '' - _cached_insert_data = {} - _insert_query = '' - - def __init__( self ): + def __init__( self, *columns ): """ - Opens a connection to MySQL-DB - - @returns mysql-stream MySQL Connection + Wrapper to the list constructor deciding whether we have initialization + with individual params per article or with an iterable. """ + # Individual params per article (from db), first one is a str + if isinstance( columns[0], str ) or \ + isinstance( columns[0], MutableSet ) or columns[0] is None: + super().__init__( columns ) + # Iterable articles list + else: + super().__init__( columns[0] ) - # Needs to be generated after Parsing of Args (not at import time) - if not type(self).db_table_prefix: - type(self).db_table_prefix = \ - pywikibot.Site().family.dbName(pywikibot.Site().code) - - # Now we can setup prepared queries - self._prepare_queries() - - # Connect to mysqldb only once - if not type( self ).connection: - - type( self ).connection = mysqldb.connect( - host=type( self ).db_hostname, - port=type( self ).db_port, - user=type( self ).db_username, - passwd=type( self ).db_password, - db=type( self ).db_name ) - - # Register callback for warnig if exit with cached db write querys - atexit.register( type(self).warn_if_not_flushed ) - - def __del__( self ): + def __setitem__(self, key, value): """ - Before deleting class, close connection to MySQL-DB + The MutableComposite class needs to be noticed about changes in our + component. So we tweak the setitem process. """ - type( self ).connection.close() + # set the item + super().__setitem__( key, value) - def _prepare_queries( self ): - """ - Used to replace placeholders in prepared queries - """ - type(self)._update_query = type(self)._update_query.format( - prefix=type(self).db_table_prefix) - type(self)._insert_query = type(self)._insert_query.format( - prefix=type(self).db_table_prefix) + # alert all parents to the change + self.changed() - @classmethod - def flush( cls ): - """ - Run cached querys + def __composite_values__(self): """ - if not cls.connection: - raise MysqlRedConnectionError( "No connection exists!" ) - - cursor = cls.connection.cursor() - - # Execute insert query - if cls._cached_insert_data: - # Since cls._cached_insert_data is a dict, we need to have a custom - # Generator to iterate over it - cursor.executemany( cls._insert_query, - ( cls._cached_insert_data[ key ] - for key in cls._cached_insert_data ) ) - # Reset after writing - cls._cached_insert_data = {} - - # Execute update query - # Use executemany since update could not be reduced to one query - if cls._cached_update_data: - cursor.executemany( cls._update_query, cls._cached_update_data ) - # Reset after writing - cls._cached_update_data = [] - - # Commit db changes - if cls._cached_insert_data or cls._cached_update_data: - cls.connection.commit() - - @classmethod - def warn_if_not_flushed(cls): + The Composite method needs to have this method to get the items for db. """ - Outputs a warning if there are db write querys cached and not flushed - before exiting programm! - """ - if cls._cached_update_data or cls._cached_insert_data: - jogobot.output( "Cached Database write querys not flushed!!! " + - "Data loss is possible!", "WARNING" ) - + return self -class MysqlRedPage( MysqlRed ): - """ - MySQL-db Interface for handling querys for RedPages - """ +class Status( types.TypeDecorator ): - # Class variables for storing cached querys - # '{prefix}' will be replaced during super().__init__() - _cached_update_data = [] - _update_query = 'UPDATE `{prefix}_redpages` \ -SET `pagetitle` = ?, `revid` = ?, `status`= ? WHERE `pageid` = ?;' + impl = types.String - _cached_insert_data = {} - _insert_query = 'INSERT INTO `{prefix}_redpages` \ -( pageid, pagetitle, revid, status ) VALUES ( ?, ?, ?, ? );' - - def __init__( self, pageid ): - """ - Creates a new instance, runs __init__ of parent class + def process_bind_param(self, value, dialect): """ + Returns status as commaseparated string (to save in DB) - super().__init__( ) - - self.__pageid = int( pageid ) - - self.data = self.get_page() - - def __del__( self ): - """ - Needed to prevent descendant classes of MYSQL_RED from deleting - connection to db - """ - pass - - def get_page( self ): - """ - Retrieves a red page row from MySQL-Database for given page_id - - @param int pageid MediaWiki page_id for page to retrieve - - @returns tuple Tuple with data for given page_id - bool FALSE if none found + @returns Raw status string + @rtype str """ - - cursor = type( self ).connection.cursor(mysqldb.DictCursor) - - cursor.execute( - 'SELECT * FROM `{prefix}_redpages` WHERE `pageid` = ?;'.format( - prefix=type(self).db_table_prefix), ( self.__pageid, ) ) - - res = cursor.fetchone() - - if res: - return res + if isinstance(value, MutableSet): + return ",".join( value ) + elif isinstance(value, String ) or value is None: + return value else: - return False + raise ProgrammingError - def add_page( self, pagetitle, revid, status=0 ): - """ - Inserts a red page row in MySQL-Database for given pageid - @param int revid MediaWiki current revid - @param str pagetitle MediaWiki new pagetitle - @param int status Page parsing status + def process_result_value(self, value, dialect): """ + Sets status based on comma separated list - insert_data = { self.__pageid: ( self.__pageid, pagetitle, - revid, status ) } - - type( self )._cached_insert_data.update( insert_data ) - - # Manualy construct self.data dict - self.data = { 'pageid': self.__pageid, 'revid': revid, - 'pagetitle': pagetitle, 'status': status } - - def update_page( self, revid=None, pagetitle=None, status=0 ): - """ - Updates the red page row in MySQL-Database for given page_id - - @param int revid MediaWiki current rev_id - @param str pagetitle MediaWiki new page_title - @param int status Page parsing status - """ - - if not pagetitle: - pagetitle = self.data[ 'pagetitle' ] - if not revid: - revid = self.data[ 'revid' ] - - type( self )._cached_update_data.append( ( pagetitle, revid, - status, self.__pageid ) ) - - -class MysqlRedFam( MysqlRed ): - """ - MySQL-db Interface for handling querys for RedFams - """ - - # Class variables for storing cached querys - _cached_update_data = [] - _update_query = 'UPDATE `{prefix}_redfams` \ -SET `redpageid` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ -`status`= ? WHERE `famhash` = ?;' - - _cached_insert_data = {} - _insert_query = 'INSERT INTO `{prefix}_redfams` \ -( famhash, redpageid, beginning, ending, status, heading, \ -article0, article1, article2, article3, article4, article5, article6, \ -article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - - def __init__( self, famhash=None ): - """ - Creates a new instance, runs __init__ of parent class - """ - - self.__famhash = famhash - - super().__init__( ) - - def __del__( self ): - """ - Needed to prevent descendant classes of MYSQL_RED from deleting - connection to db - """ - pass - - def get_fam( self, famhash ): - """ - Retrieves a red family row from MySQL-Database for given fam_hash - - @returns dict Dictionairy with data for given fam hash - False if none found + @param raw_status Commaseparated string of stati (from DB) + @type raw_status str """ - self.__famhash = famhash - - cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + if value: + return MutableSet( value.strip().split(",")) + else: + return MutableSet([]) + + def copy(self, **kw): + return Status(self.impl.length) + + + +class MysqlRedFam( Mysql, Base ): + + famhash = Column( String(64), primary_key=True, unique=True ) + + __article0 = Column('article0', String(255), nullable=False ) + __article1 = Column('article1', String(255), nullable=False ) + __article2 = Column('article2', String(255), nullable=True ) + __article3 = Column('article3', String(255), nullable=True ) + __article4 = Column('article4', String(255), nullable=True ) + __article5 = Column('article5', String(255), nullable=True ) + __article6 = Column('article6', String(255), nullable=True ) + __article7 = Column('article7', String(255), nullable=True ) + __articlesList = composite( + ColumnList, __article0, __article1, __article2, __article3, + __article4, __article5, __article6, __article7 ) + + heading = Column( Text, nullable=False ) + redpageid = Column( + Integer, ForeignKey( "dewpbeta_redpages.pageid" ), nullable=False ) + beginning = Column( DateTime, nullable=False ) + ending = Column( DateTime, nullable=True ) + __status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) + + __article0_status = Column( + 'article0_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article1_status = Column( + 'article1_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article2_status = Column( + 'article2_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article3_status = Column( + 'article3_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article4_status = Column( + 'article4_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article5_status = Column( + 'article5_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article6_status = Column( + 'article6_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article7_status = Column( + 'article7_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __articlesStatus = composite( + ColumnList, __article0_status, __article1_status, __article2_status, + __article3_status, __article4_status, __article5_status, + __article6_status, __article7_status ) + + redpage = relationship( "RedPage", back_populates="redfams" ) + + @property + def articlesList(self): + """ + List of articles belonging to the redfam + """ + return self.__articlesList + + @articlesList.setter + def articlesList(self, articlesList): + # Make sure to always have full length for complete overwrites + while( len(articlesList) < 8 ): + articlesList.append(None) + self.__articlesList = ColumnList(articlesList) + + @property + def status( self ): + """ + Current fam status + """ + return self.__status + + @status.setter + def status( self, status ): + if status: + self.__status = MutableSet( status ) + else: + self.__status = MutableSet() + + @property + def articlesStatus(self): + """ + List of status strings/sets for the articles of the redfam + """ + return self.__articlesStatus + + @articlesStatus.setter + def articlesStatus(self, articlesStatus): + self.__articlesStatus = ColumnList(articlesStatus) + +class MysqlRedPage( Mysql, Base ): + pageid = Column( Integer, unique=True, primary_key=True ) + revid = Column( Integer, unique=True, nullable=False ) + pagetitle = Column( String(255), nullable=False ) + status = Column( MutableSet.as_mutable(Status(255)), nullable=True ) + + redfams = relationship( + "MysqlRedFam", order_by=MysqlRedFam.famhash, back_populates="redpage", + collection_class=attribute_mapped_collection("famhash")) + + +Base.metadata.create_all(engine) + +#~ class MysqlRed: + #~ """ + #~ Basic interface class, containing opening of connection + + #~ Specific querys should be defined in descendant classes per data type + #~ """ + + #~ # Save mysqldb-connection as class attribute to use only one + #~ # in descendant classes + #~ connection = False + #~ db_hostname = config.db_hostname + #~ db_port = config.db_port + #~ db_username = config.db_username + #~ db_password = config.db_password + #~ db_name = config.db_username + jogobot.config['db_suffix'] + #~ db_table_prefix = False + + #~ # Class variables for storing cached querys + #~ _cached_update_data = [] + #~ _update_query = '' + #~ _cached_insert_data = {} + #~ _insert_query = '' + + #~ def __init__( self ): + #~ """ + #~ Opens a connection to MySQL-DB + + #~ @returns mysql-stream MySQL Connection + #~ """ + + #~ # Needs to be generated after Parsing of Args (not at import time) + #~ if not type(self).db_table_prefix: + #~ type(self).db_table_prefix = \ + #~ pywikibot.Site().family.dbName(pywikibot.Site().code) + + #~ # Now we can setup prepared queries + #~ self._prepare_queries() + + #~ # Connect to mysqldb only once + #~ if not type( self ).connection: + + #~ type( self ).connection = mysqldb.connect( + #~ host=type( self ).db_hostname, + #~ port=type( self ).db_port, + #~ user=type( self ).db_username, + #~ passwd=type( self ).db_password, + #~ db=type( self ).db_name ) + + #~ # Register callback for warnig if exit with cached db write querys + #~ atexit.register( type(self).warn_if_not_flushed ) + + #~ def __del__( self ): + #~ """ + #~ Before deleting class, close connection to MySQL-DB + #~ """ + + #~ type( self ).connection.close() + + #~ def _prepare_queries( self ): + #~ """ + #~ Used to replace placeholders in prepared queries + #~ """ + #~ type(self)._update_query = type(self)._update_query.format( + #~ prefix=type(self).db_table_prefix) + #~ type(self)._insert_query = type(self)._insert_query.format( + #~ prefix=type(self).db_table_prefix) + + #~ @classmethod + #~ def flush( cls ): + #~ """ + #~ Run cached querys + #~ """ + #~ if not cls.connection: + #~ raise MysqlRedConnectionError( "No connection exists!" ) + + #~ cursor = cls.connection.cursor() + + #~ # Execute insert query + #~ if cls._cached_insert_data: + #~ # Since cls._cached_insert_data is a dict, we need to have a custom + #~ # Generator to iterate over it + #~ cursor.executemany( cls._insert_query, + #~ ( cls._cached_insert_data[ key ] + #~ for key in cls._cached_insert_data ) ) + #~ # Reset after writing + #~ cls._cached_insert_data = {} + + #~ # Execute update query + #~ # Use executemany since update could not be reduced to one query + #~ if cls._cached_update_data: + #~ cursor.executemany( cls._update_query, cls._cached_update_data ) + #~ # Reset after writing + #~ cls._cached_update_data = [] + + #~ # Commit db changes + #~ if cls._cached_insert_data or cls._cached_update_data: + #~ cls.connection.commit() + + #~ @classmethod + #~ def warn_if_not_flushed(cls): + #~ """ + #~ Outputs a warning if there are db write querys cached and not flushed + #~ before exiting programm! + #~ """ + #~ if cls._cached_update_data or cls._cached_insert_data: + #~ jogobot.output( "Cached Database write querys not flushed!!! " + + #~ "Data loss is possible!", "WARNING" ) + + +#~ class MysqlRedPage( MysqlRed ): + #~ """ + #~ MySQL-db Interface for handling querys for RedPages + #~ """ + + #~ # Class variables for storing cached querys + #~ # '{prefix}' will be replaced during super().__init__() + #~ _cached_update_data = [] + #~ _update_query = 'UPDATE `{prefix}_redpages` \ +#~ SET `pagetitle` = ?, `revid` = ?, `status`= ? WHERE `pageid` = ?;' + + #~ _cached_insert_data = {} + #~ _insert_query = 'INSERT INTO `{prefix}_redpages` \ +#~ ( pageid, pagetitle, revid, status ) VALUES ( ?, ?, ?, ? );' + + #~ def __init__( self, pageid ): + #~ """ + #~ Creates a new instance, runs __init__ of parent class + #~ """ + + #~ super().__init__( ) + + #~ self.__pageid = int( pageid ) + + #~ self.data = self.get_page() + + #~ def __del__( self ): + #~ """ + #~ Needed to prevent descendant classes of MYSQL_RED from deleting + #~ connection to db + #~ """ + #~ pass + + #~ def get_page( self ): + #~ """ + #~ Retrieves a red page row from MySQL-Database for given page_id + + #~ @param int pageid MediaWiki page_id for page to retrieve + + #~ @returns tuple Tuple with data for given page_id + #~ bool FALSE if none found + #~ """ + + #~ cursor = type( self ).connection.cursor(mysqldb.DictCursor) + + #~ cursor.execute( + #~ 'SELECT * FROM `{prefix}_redpages` WHERE `pageid` = ?;'.format( + #~ prefix=type(self).db_table_prefix), ( self.__pageid, ) ) + + #~ res = cursor.fetchone() + + #~ if res: + #~ return res + #~ else: + #~ return False - cursor.execute( - 'SELECT * FROM `{prefix}_redfams` WHERE `famhash` = ?;'. - format( prefix=type(self).db_table_prefix), ( famhash, ) ) + #~ def add_page( self, pagetitle, revid, status=0 ): + #~ """ + #~ Inserts a red page row in MySQL-Database for given pageid - self.data = cursor.fetchone() + #~ @param int revid MediaWiki current revid + #~ @param str pagetitle MediaWiki new pagetitle + #~ @param int status Page parsing status + #~ """ - def add_fam( self, articlesList, heading, redpageid, - beginning, ending=None, status=0 ): + #~ insert_data = { self.__pageid: ( self.__pageid, pagetitle, + #~ revid, status ) } - data = [ self.__famhash, redpageid, beginning, ending, - status, heading ] + #~ type( self )._cached_insert_data.update( insert_data ) - for article in articlesList: - data.append( str( article ) ) + #~ # Manualy construct self.data dict + #~ self.data = { 'pageid': self.__pageid, 'revid': revid, + #~ 'pagetitle': pagetitle, 'status': status } - while len( data ) < 14: - data.append( None ) + #~ def update_page( self, revid=None, pagetitle=None, status=0 ): + #~ """ + #~ Updates the red page row in MySQL-Database for given page_id - data = tuple( data ) + #~ @param int revid MediaWiki current rev_id + #~ @param str pagetitle MediaWiki new page_title + #~ @param int status Page parsing status + #~ """ - insert_data = { self.__famhash: data } - type( self )._cached_insert_data.update( insert_data ) + #~ if not pagetitle: + #~ pagetitle = self.data[ 'pagetitle' ] + #~ if not revid: + #~ revid = self.data[ 'revid' ] - # Manualy construct self.data dict - data_keys = ( 'famhash', 'redpageid', 'beginning', 'ending', - 'status', 'heading', 'article0', 'article1', 'article2', - 'article3', 'article4', 'article5', 'article6', - 'article7' ) - self.data = dict( zip( data_keys, data ) ) + #~ type( self )._cached_update_data.append( ( pagetitle, revid, + #~ status, self.__pageid ) ) - def update_fam( self, redpageid, heading, beginning, ending, status ): - """ - Updates the red fam row in MySQL-Database for given fam_hash - @param int redpageid MediaWiki page_id - @param datetime beginning Timestamp of beginning - qparam datetime ending Timestamp of ending of - @param int status red_fam status - """ +#~ class MysqlRedFam( MysqlRed ): + #~ """ + #~ MySQL-db Interface for handling querys for RedFams + #~ """ - type( self )._cached_update_data.append( ( redpageid, heading, - beginning, ending, status, - self.__famhash ) ) + #~ # Class variables for storing cached querys + #~ _cached_update_data = [] + #~ _update_query = 'UPDATE `{prefix}_redfams` \ +#~ SET `redpageid` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ +#~ `status`= ? WHERE `famhash` = ?;' - def get_by_status( self, status ): - """ - Generator witch fetches redFams with given status from DB - """ + #~ _cached_insert_data = {} + #~ _insert_query = 'INSERT INTO `{prefix}_redfams` \ +#~ ( famhash, redpageid, beginning, ending, status, heading, \ +#~ article0, article1, article2, article3, article4, article5, article6, \ +#~ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + #~ def __init__( self, famhash=None ): + #~ """ + #~ Creates a new instance, runs __init__ of parent class + #~ """ - cursor.execute( - 'SELECT * FROM `{prefix}_redfams` WHERE `status` = LIKE %?%;'. - format( prefix=type( self ).db_table_prefix), ( status, ) ) + #~ self.__famhash = famhash - while True: - res = cursor.fetchmany( 1000 ) - if not res: - break - for row in res: - yield row + #~ super().__init__( ) - def get_by_status_and_ending( self, status, ending ): - """ - Generator witch fetches redFams with given status from DB - """ + #~ def __del__( self ): + #~ """ + #~ Needed to prevent descendant classes of MYSQL_RED from deleting + #~ connection to db + #~ """ + #~ pass + + #~ def get_fam( self, famhash ): + #~ """ + #~ Retrieves a red family row from MySQL-Database for given fam_hash + + #~ @returns dict Dictionairy with data for given fam hash + #~ False if none found + #~ """ + #~ self.__famhash = famhash + + #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - - cursor.execute( ( - 'SELECT * ' + - 'FROM `{prefix}_redfams` `F` ' + - 'INNER JOIN `{prefix}_redpages` `P` ' + - 'ON `F`.`status` = ? ' + - 'AND `F`.`ending` >= ? ' + - 'AND `F`.`redpageid` = `P`.`pageid`;').format( - prefix=type( self ).db_table_prefix), - ( status, ending ) ) - - while True: - res = cursor.fetchmany( 1000 ) - if not res: - break - for row in res: - yield row + #~ cursor.execute( + #~ 'SELECT * FROM `{prefix}_redfams` WHERE `famhash` = ?;'. + #~ format( prefix=type(self).db_table_prefix), ( famhash, ) ) + + #~ self.data = cursor.fetchone() + + #~ def add_fam( self, articlesList, heading, redpageid, + #~ beginning, ending=None, status=0 ): + + #~ data = [ self.__famhash, redpageid, beginning, ending, + #~ status, heading ] + + #~ for article in articlesList: + #~ data.append( str( article ) ) + + #~ while len( data ) < 14: + #~ data.append( None ) + + #~ data = tuple( data ) + + #~ insert_data = { self.__famhash: data } + #~ type( self )._cached_insert_data.update( insert_data ) + + #~ # Manualy construct self.data dict + #~ data_keys = ( 'famhash', 'redpageid', 'beginning', 'ending', + #~ 'status', 'heading', 'article0', 'article1', 'article2', + #~ 'article3', 'article4', 'article5', 'article6', + #~ 'article7' ) + #~ self.data = dict( zip( data_keys, data ) ) + + #~ def update_fam( self, redpageid, heading, beginning, ending, status ): + #~ """ + #~ Updates the red fam row in MySQL-Database for given fam_hash + + #~ @param int redpageid MediaWiki page_id + #~ @param datetime beginning Timestamp of beginning + #~ qparam datetime ending Timestamp of ending of + #~ @param int status red_fam status + #~ """ + + #~ type( self )._cached_update_data.append( ( redpageid, heading, + #~ beginning, ending, status, + #~ self.__famhash ) ) + + #~ def get_by_status( self, status ): + #~ """ + #~ Generator witch fetches redFams with given status from DB + #~ """ + + #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + + #~ cursor.execute( + #~ 'SELECT * FROM `{prefix}_redfams` WHERE `status` = LIKE %?%;'. + #~ format( prefix=type( self ).db_table_prefix), ( status, ) ) + + #~ while True: + #~ res = cursor.fetchmany( 1000 ) + #~ if not res: + #~ break + #~ for row in res: + #~ yield row + + #~ def get_by_status_and_ending( self, status, ending ): + #~ """ + #~ Generator witch fetches redFams with given status from DB + #~ """ + + #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + + #~ cursor.execute( ( + #~ 'SELECT * ' + + #~ 'FROM `{prefix}_redfams` `F` ' + + #~ 'INNER JOIN `{prefix}_redpages` `P` ' + + #~ 'ON `F`.`status` = ? ' + + #~ 'AND `F`.`ending` >= ? ' + + #~ 'AND `F`.`redpageid` = `P`.`pageid`;').format( + #~ prefix=type( self ).db_table_prefix), + #~ ( status, ending ) ) + + #~ while True: + #~ res = cursor.fetchmany( 1000 ) + #~ if not res: + #~ break + #~ for row in res: + #~ yield row class MysqlRedError(Exception): diff --git a/lib/redfam.py b/lib/redfam.py index 6e8b3d5..526f902 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -3,7 +3,7 @@ # # redfam.py # -# Copyright 2015 GOLDERWEB – Jonathan Golder +# Copyright 2017 GOLDERWEB – Jonathan Golder # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -35,16 +35,17 @@ import pywikibot # noqa from pywikibot.tools import deprecated # noqa import jogobot -from lib.mysqlred import MysqlRedFam +#~ from lib.mysqlred import Column, Integer, String, Text, DateTime, ForeignKey, ColumnList, Status +from lib.mysqlred import MysqlRedFam, MutableSet, ColumnList #, Mysql, Base, relationship, composite, -class RedFam: +class RedFam( MysqlRedFam ): """ Basic class for RedFams, containing the basic data structure """ def __init__( self, articlesList, beginning, ending=None, redpageid=None, - status=None, famhash=None, heading=None ): + status=MutableSet(), famhash=None, heading=None ): """ Generates a new RedFam object @@ -61,21 +62,32 @@ class RedFam: self.site = pywikibot.Site() # Database interface - self._mysql = MysqlRedFam( famhash ) + #self._mysql = MysqlRedFam( famhash ) # Initial attribute values - self._articlesList = articlesList - self._beginning = beginning - self._ending = ending - self._redpageid = redpageid - self._status = set() - self._status = self._parse_status(status) - self._famhash = famhash - self._heading = heading + #~ self.articlesList = articlesList + #~ self.beginning = beginning + #~ self.ending = ending + #~ self.redpageid = redpageid +#~ # self._status = set() +#~ # self._status = self._parse_status(status) + #~ self.famhash = famhash + #~ self.heading = heading + #self.status = status - # Calculates the sha1 hash over self._articlesList to - # rediscover known redundance families - self.calc_famhash() + #articlesStatus = ColumnList([ MutableSet() for x in range(0,8) ]) + + #~ # Calculates the sha1 hash over self._articlesList to + #~ # rediscover known redundance families + #~ self.calc_famhash() + + #~ if not status: + #~ status = MutableSet() + + super().__init__( articlesList=articlesList, beginning=beginning, ending=ending, redpageid=redpageid, + famhash=famhash, heading=heading, status=status, articlesStatus=None ) + + #super().__init__() def __repr__( self ): """ @@ -85,64 +97,75 @@ class RedFam: """ __repr = "RedFam( " + \ - "articlesList=" + repr( self._articlesList ) + \ - ", heading=" + repr( self._heading ) + \ - ", beginning=" + repr( self._beginning ) + \ - ", ending=" + repr( self._ending ) + \ - ", red_page_id=" + repr( self._redpageid ) + \ - ", status=" + repr( self._status ) + \ - ", fam_hash=" + repr( self._famhash ) + \ + "articlesList=" + repr( self.articlesList ) + \ + ", heading=" + repr( self.heading ) + \ + ", beginning=" + repr( self.beginning ) + \ + ", ending=" + repr( self.ending ) + \ + ", red_page_id=" + repr( self.redpageid ) + \ + ", status=" + repr( self.status ) + \ + ", fam_hash=" + repr( self.famhash ) + \ " )" return __repr - def calc_famhash( self ): + @classmethod + def calc_famhash(cls, articlesList ): + + h = hashlib.sha1() + # Since articlesList attr of RedFam will have always 8 Members we + # need to fill up smaller lists (longers will be cropped below). + while len( articlesList) < 8: + articlesList.append(None) + + h.update( str( articlesList[:8] ).encode('utf-8') ) + + return h.hexdigest() + + def c_famhash( self ): """ Calculates the SHA-1 hash for the articlesList of redundance family. Since we don't need security SHA-1 is just fine. @returns str String with the hexadecimal hash digest """ + print( type( self ) ) - h = hashlib.sha1() - h.update( str( self._articlesList[:8] ).encode('utf-8') ) - - if self._famhash and h.hexdigest() != self._famhash: - raise RedFamHashError( self._famhash, h.hexdigest() ) - - elif self._famhash: + if self.famhash and type(self).calc_famhash(self.articlesList) != self.famhash: + raise RedFamHashError( self.famhash, h.hexdigest() ) + elif self.famhash: return else: - self._famhash = h.hexdigest() + self.famhash = type(self).calc_famhash(self.articlesList) - def changed( self ): - """ - Checks wether anything has changed and maybe triggers db update - """ + #~ def changed( self ): + #~ """ + #~ Checks wether anything has changed and maybe triggers db update + #~ """ - # On archived redfams do not delete possibly existing ending - if( not self._ending and "archived" in self._status and - self._mysql.data[ 'ending' ] ): + #~ # On archived redfams do not delete possibly existing ending + #~ if( not self.ending and "archived" in self._status and + #~ self._mysql.data[ 'ending' ] ): - self._ending = self._mysql.data[ 'ending' ] + #~ self._ending = self._mysql.data[ 'ending' ] - # Since status change means something has changed, update database - if( self._raw_status != self._mysql.data[ 'status' ] or - self._beginning != self._mysql.data[ 'beginning' ] or - self._ending != self._mysql.data[ 'ending' ] or - self._red_page_id != self._mysql.data[ 'redpageid' ] or - self._heading != self._mysql.data[ 'heading' ]): + #~ # Since status change means something has changed, update database + #~ if( self._raw_status != self._mysql.data[ 'status' ] or + #~ self._beginning != self._mysql.data[ 'beginning' ] or + #~ self._ending != self._mysql.data[ 'ending' ] or + #~ self._red_page_id != self._mysql.data[ 'redpageid' ] or + #~ self._heading != self._mysql.data[ 'heading' ]): - self._mysql.update_fam( self._redpageid, self._heading, - self._beginning, self._ending, - self._raw_status() ) + #~ self._mysql.update_fam( self._redpageid, self._heading, + #~ self._beginning, self._ending, + #~ self._raw_status() ) @classmethod def flush_db_cache( cls ): """ Calls flush method of Mysql Interface class """ - MysqlRedFam.flush() + cls.session.commit() + #~ MysqlRedFam.flush() def add_status(self, status): """ @@ -151,7 +174,7 @@ class RedFam: @param status Statusstring to add @type status str """ - self._status.add(status) + self.status.add(status) def remove_status(self, status, weak=True): """ @@ -164,9 +187,9 @@ class RedFam: @type bool """ if weak: - self._status.discard(status) + self.status.discard(status) else: - self._status.remove(status) + self.status.remove(status) def has_status(self, status): """ @@ -176,28 +199,28 @@ class RedFam: @type status str @returns True if status is present else False """ - if status in self._status: + if status in self.status: return True else: return False - def _parse_status(self, raw_status ): - """ - Sets status based on comma separated list + #~ def _parse_status(self, raw_status ): + #~ """ + #~ Sets status based on comma separated list - @param raw_status Commaseparated string of stati (from DB) - @type raw_status str - """ - self._status = set( raw_status.strip().split(",")) + #~ @param raw_status Commaseparated string of stati (from DB) + #~ @type raw_status str + #~ """ + #~ self._status = set( raw_status.strip().split(",")) - def _raw_status( self ): - """ - Returns status as commaseparated string (to save in DB) + #~ def _raw_status( self ): + #~ """ + #~ Returns status as commaseparated string (to save in DB) - @returns Raw status string - @rtype str - """ - return ",".join( self._status ) + #~ @returns Raw status string + #~ @rtype str + #~ """ + #~ return ",".join( self._status ) def article_add_status(self, status, index=None, title=None ): """ @@ -331,7 +354,7 @@ class RedFamParser( RedFam ): wurde gewünscht von:" __done_notice2 = "{{Erledigt|" - def __init__( self, heading, redpage, redpagearchive, + def __init__( self, articlesList, heading, redpage, redpagearchive, beginning, ending=None ): """ Creates a RedFam object based on data collected while parsing red_pages @@ -346,57 +369,111 @@ class RedFamParser( RedFam ): str strptime parseable string """ + # Parse the provided heading of redundance section + # to set self._articlesList + #~ self.heading = str(heading) + #~ self.articlesList = articlesList + + #~ # Catch sections with more then 8 articles, print error + #~ if len( self.articlesList ) > 8: + #~ # For repression in output we need to know the fam hash + #~ self.calc_famhash() + + #~ jogobot.output( + #~ ( "\03{{lightred}}" + + #~ "Maximum number of articles in red_fam exceeded, " + + #~ "maximum number is 8, {number:d} were given \n {repress}" + #~ ).format( datetime=datetime.now().strftime( + #~ "%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ), + #~ repress=repr( self ) ), + #~ "WARNING" ) + + #~ # Only save the first 8 articles +#~ # self.articlesList = self.articlesList[:8] + + # Calculates the sha1 hash over self._articlesList to + # rediscover known redundance families + famhash = type(self).calc_famhash(articlesList) + + #~ obj = self.session.query(RedFamParser).filter(RedFamParser.famhash == self.famhash ).one_or_none() + #~ if obj: + #~ self = obj + + # Set object attributes: - self._redpageid = redpage._pageid + #~ self.redpageid = redpage._pageid self._redpagearchive = redpagearchive - self._famhash = None +# self.famhash = None # Method self.add_beginning sets self._beginning directly - self.add_beginning( beginning ) + #~ self.add_beginning( beginning ) - # Method self.add_ending sets self._ending directly - if( ending ): - self.add_ending( ending ) - else: - # If no ending was provided set to None - self._ending = None + #~ # Method self.add_ending sets self._ending directly + #~ if( ending ): + #~ self.add_ending( ending ) + #~ else: + #~ # If no ending was provided set to None + #~ self.ending = None - self._status = set() + #~ self.status = MutableSet() - # Parse the provided heading of redundance section - # to set self._articlesList - self.heading_parser( heading ) + beginning = self.__datetime(beginning) + if ending: + ending = self.__datetime(ending) - # Calculates the sha1 hash over self._articlesList to - # rediscover known redundance families - self.calc_famhash() + super().__init__( articlesList, beginning, ending=ending, redpageid=redpage._pageid, + famhash=famhash, heading=heading ) + # Check status changes + self.check_status() + + self.session.add(self) # Open database connection, ask for data if existing, # otherwise create entry - self.__handle_db() +# self.__handle_db() + - # Check status changes - self.status() # Triggers db update if anything changed - self.changed() +# self.changed() - def __handle_db( self ): - """ - Handles opening of db connection - """ - # We need a connection to our mysqldb - self._mysql = MysqlRedFam( ) - self._mysql.get_fam( self._famhash ) - if not self._mysql.data: - self._mysql.add_fam( self._articlesList, self._heading, - self._redpageid, self._beginning, - self._ending ) + #~ def __handle_db( self ): + #~ """ + #~ Handles opening of db connection + #~ """ + + #~ # We need a connection to our mysqldb + #~ self._mysql = MysqlRedFam( ) + #~ self._mysql.get_fam( self._famhash ) + + #~ if not self._mysql.data: + #~ self._mysql.add_fam( self._articlesList, self._heading, + #~ self._redpageid, self._beginning, + #~ self._ending ) + + def update( self, articlesList, heading, redpage, redpagearchive, + beginning, ending=None): + + self.articlesList = articlesList; + self.heading = heading; + self.redpage = redpage; + self.redpageid = redpage.pageid; + + self.add_beginning( beginning ) + + if( ending ): + self.add_ending( ending ) + + self._redpagearchive = redpagearchive - def heading_parser( self, heading ): + # Check status changes + self.check_status() + + @classmethod + def heading_parser( cls, heading ): """ Parses given red_fam_heading string and saves articles list @@ -404,34 +481,16 @@ class RedFamParser( RedFam ): @type heading wikicode or mwparser-parseable """ - # Save heading as string - self._heading = str( heading ) - # Parse string heading with mwparse again everytime # In some cases the given wikicode is broken due to syntax errors # (Task FS#77) - heading = mwparser.parse( self._heading ) + heading = mwparser.parse( str( heading ) ) # Save destinations of wikilinks in headings - self._articlesList = [ str( link.title ) for link + return [ str( link.title ) for link in heading.ifilter_wikilinks() ] - # Catch sections with more then 8 articles, print error - if len( self._articlesList ) > 8: - # For repression in output we need to know the fam hash - self.calc_famhash() - jogobot.output( - ( "\03{{lightred}}" + - "Maximum number of articles in red_fam exceeded, " + - "maximum number is 8, {number:d} were given \n {repress}" - ).format( datetime=datetime.now().strftime( - "%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ), - repress=repr( self ) ), - "WARNING" ) - - # Only save the first 8 articles - self._articlesList = self._articlesList[:8] def add_beginning( self, beginning ): """ @@ -440,7 +499,7 @@ class RedFamParser( RedFam ): @param datetime datetime Beginning date """ - self._beginning = self.__datetime( beginning ) + self.beginning = self.__datetime( beginning ) def add_ending( self, ending ): """ @@ -449,7 +508,7 @@ class RedFamParser( RedFam ): @param datetime datetime Ending date """ - self._ending = self.__datetime( ending ) + self.ending = self.__datetime( ending ) def __datetime( self, timestamp ): """ @@ -473,7 +532,7 @@ class RedFamParser( RedFam ): type( self ).__timestamp_format ) return result - def status( self ): + def check_status( self ): """ Handles detection of correct status There are three possible stati: @@ -485,7 +544,7 @@ class RedFamParser( RedFam ): # No ending, discussion is running: # Sometimes archived discussions also have no detectable ending - if not self._ending and not self._redpagearchive: + if not self.ending and not self._redpagearchive: self.add_status("open") else: self.remove_status("open") @@ -513,7 +572,7 @@ class RedFamParser( RedFam ): return False @classmethod - def parser( cls, text, page, isarchive=False ): + def parser( cls, text, redpage, isarchive=False ): """ Handles parsing of redfam section @@ -536,16 +595,33 @@ class RedFamParser( RedFam ): if not beginning: match = re.search( jogobot.config["redundances"]["reddiscs_onlyinclude_re"], - page.title() ) + redpage.page.title() ) if match: beginning = datetime.strptime( "01. {month} {year}".format( month=match.group(1), year=match.group(2)), "%d. %B %Y" ) + articlesList = RedFamParser.heading_parser( heading ) + famhash = RedFamParser.calc_famhash( articlesList ) + + # Check for existing objects in DB first in current redpage + redfam = redpage.redfams.get(famhash) + + with RedFamParser.session.no_autoflush: + if not redfam: + # Otherwise in db table + redfam = RedFamParser.session.query(RedFamParser).filter( + RedFamParser.famhash == famhash ).one_or_none() + + if redfam: + # Existing redfams need to be updated + redfam.update( articlesList, str(heading), redpage, isarchive, beginning, ending ) - # Create the RedFam object - RedFamParser( heading, page, isarchive, beginning, ending ) + else: + # Create the RedFam object + redfam = RedFamParser( articlesList, str(heading).strip(), redpage.page, isarchive, beginning, ending ) + return redfam @classmethod def extract_dates( cls, text, isarchive=False ): @@ -615,16 +691,16 @@ class RedFamWorker( RedFam ): mysql_data[ 'status' ], mysql_data[ 'famhash' ], mysql_data[ 'heading' ] ) - self._mysql.data = mysql_data +# #~ self._mysql.data = mysql_data - # Set up article status - index = 0 - for article in self._articlesList: - raw_status = mysql_data[ "article" + str(index) + "_status" ] - if not raw_status: - raw_status = str() - self._article_parse_status( raw_status, index ) - index += 1 + #~ # Set up article status + #~ index = 0 + #~ for article in self.articlesList: + #~ raw_status = mysql_data[ "article" + str(index) + "_status" ] + #~ if not raw_status: + #~ raw_status = str() + #~ self._article_parse_status( raw_status, index ) + #~ index += 1 # Get related RedPage-Information self.redpageid = mysql_data[ 'pageid' ] diff --git a/lib/redpage.py b/lib/redpage.py index b4361b9..558cd8c 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -30,15 +30,23 @@ import mwparserfromhell as mwparser import jogobot # noqa -from lib.mysqlred import MysqlRedPage -from lib.redfam import RedFamParser +#~ from lib.mysqlred import Column, Integer, String, Text, DateTime, ForeignKey, ColumnList, Status +from lib.mysqlred import MysqlRedPage, relationship, MutableSet #MysqlRedFam, Base, composite, +from lib.redfam import RedFam, RedFamParser +from sqlalchemy.orm.collections import attribute_mapped_collection -class RedPage: +class RedPage( MysqlRedPage ): """ Class for handling redundance discussion pages and archives """ + #TODO POLYMORPHISM? of BASEClass + redfams = relationship( + "RedFamParser", order_by=RedFamParser.famhash, + back_populates="redpage", + collection_class=attribute_mapped_collection( "famhash" ) ) + def __init__( self, page=None, pageid=None, archive=False ): """ Generate a new RedPage object based on the given pywikibot page object @@ -49,57 +57,91 @@ class RedPage: @type pageid int """ - self._status = set() - # Safe the pywikibot page object - self.page = page - self.pageid = pageid - self._archive = archive + if page: + self._page = page + pageid = self._page.pageid - self.__handle_db( ) - self.is_page_changed() + super().__init__( + pageid=pageid, + revid=self.page._revid, + pagetitle=self.page.title(), + status=MutableSet() ) #TODO EMPTY MutableSet() necessary? + #~ self._status = set() - self._parsed = None + if archive: + self.status.add("archived") - def __handle_db( self ): - """ - Handles opening of db connection - """ + #~ self._archive = archive - # We need a connection to our mysqldb - if self.page: - self.__mysql = MysqlRedPage( self.page._pageid ) - self.pageid = self.page._pageid - elif self.pageid: - self.__mysql = MysqlRedPage( self.pageid ) - self.page = pywikibot.Page( pywikibot.Site(), - self.__mysql.data['pagetitle'] ) - self.page.exists() - else: - raise ValueError( "Page NOR pagid provided!" ) + #~ self.pageid = pageid + #~ self.revid = self.page._revid + #~ self.p + #~ self.status = MutableSet() + +# self.__handle_db( ) + #~ self.is_page_changed() + + #~ self._parsed = None + + self.session.add(self) + + #~ def __handle_db( self ): + #~ """ + #~ Handles opening of db connection + #~ """ + + #~ # We need a connection to our mysqldb + #~ if self.page: + #~ self.__mysql = MysqlRedPage( self.page._pageid ) + #~ self.pageid = self.page._pageid + #~ elif self.pageid: + #~ self.__mysql = MysqlRedPage( self.pageid ) + #~ self.page = pywikibot.Page( pywikibot.Site(), + #~ self.pagetitle ) + #~ self.page.exists() + #~ else: + #~ raise ValueError( "Page NOR pagid provided!" ) + + #~ if not self.__mysql.data: + #~ self.__mysql.add_page( self.page.title(), self.page._revid ) + + def update( self, page ): + + self._page = page + self.revid = page._revid + self.pagetitle = page.title() - if not self.__mysql.data: - self.__mysql.add_page( self.page.title(), self.page._revid ) + @property + def page(self): + if not hasattr(self,"_page"): + self._page = pywikibot.Page( pywikibot.Site(), self.pagetitle ) + + return self._page + + @property + def archive(self): + return self.has_status("archived") def is_page_changed( self ): """ Check wether the page was changed since last run """ - - if( self.__mysql.data != { 'pageid': self.page._pageid, - 'revid': self.page._revid, - 'pagetitle': self.page.title(), - 'status': self.__mysql.data[ 'status' ] } ): - self._changed = True - else: - self._changed = False + self._changed = self.changedp() + #~ if( self.__mysql.data != { 'pageid': self.page._pageid, + #~ 'revid': self.page._revid, + #~ 'pagetitle': self.page.title(), + #~ 'status': self.__mysql.data[ 'status' ] } ): + #~ self._changed = True + #~ else: + #~ self._changed = False def is_archive( self ): """ Detects wether current page is an archive of discussions """ - if( self._archive or ( u"/Archiv" in self.page.title() ) or + if( self.archive or ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ) ): @@ -111,8 +153,7 @@ class RedPage: """ Decides wether current RedPage needs to be parsed or not """ - - if( self._changed or self.__mysql.data[ 'status' ] == "" ): + if( self.changedp() or not self.has_status("parsed") ): return True else: return False @@ -140,31 +181,34 @@ class RedPage: yield fam else: + self.status.add("parsed") self._parsed = True - self.__update_db() + #~ self.__update_db() - def __update_db( self ): - """ - Updates the page meta data in mysql db - """ - if( self._parsed or not self._changed ): - self.add_status( "open" ) + #~ def __update_db( self ): + #~ """ + #~ Updates the page meta data in mysql db + #~ """ + #~ if( self._parsed or not self._changed ): + #~ self.add_status( "open" ) - if( self.is_archive() ): - self.remove_status( "open" ) - self.add_status( "archived" ) - else: - self._status = set() + #~ if( self.is_archive() ): + #~ self.remove_status( "open" ) + #~ self.add_status( "archived" ) + #~ else: + #~ pass + #~ self._status = set() - self.__mysql.update_page( self.page._revid, self.page.title(), - self._raw_status() ) + #~ self.__mysql.update_page( self.page._revid, self.page.title(), + #~ self._raw_status() ) @classmethod def flush_db_cache( cls ): """ Calls flush method of Mysql Interface class """ - MysqlRedPage.flush() + cls.session.commit() + #~ MysqlRedPage.flush() def add_status(self, status): """ @@ -173,7 +217,7 @@ class RedPage: @param status Statusstring to add @type status str """ - self._status.add(status) + self.status.add(status) def remove_status(self, status, weak=True): """ @@ -186,9 +230,9 @@ class RedPage: @type bool """ if weak: - self._status.discard(status) + self.status.discard(status) else: - self._status.remove(status) + self.status.remove(status) def has_status(self, status): """ @@ -198,25 +242,25 @@ class RedPage: @type status str @returns True if status is present else False """ - if status in self._status: + if status in self.status: return True else: return False - def _parse_status(self, raw_status ): - """ - Sets status based on comma separated list + #~ def _parse_status(self, raw_status ): + #~ """ + #~ Sets status based on comma separated list - @param raw_status Commaseparated string of stati (from DB) - @type raw_status str - """ - self._status = set( raw_status.strip().split(",")) + #~ @param raw_status Commaseparated string of stati (from DB) + #~ @type raw_status str + #~ """ + #~ self._status = set( raw_status.strip().split(",")) - def _raw_status( self ): - """ - Returns status as commaseparated string (to save in DB) + #~ def _raw_status( self ): + #~ """ + #~ Returns status as commaseparated string (to save in DB) - @returns Raw status string - @rtype str - """ - return ",".join( self._status ) + #~ @returns Raw status string + #~ @rtype str + #~ """ + #~ return ",".join( self._status ) From 467f829af2f8a24222a5da3f2823ad53b2de3166 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 7 Mar 2017 10:54:10 +0100 Subject: [PATCH 02/11] Some cleanups Remove old commented out code from manual mysql solution --- bots/reddiscparser.py | 2 +- lib/mysqlred.py | 336 +----------------------------------------- lib/redfam.py | 180 +--------------------- lib/redpage.py | 97 +----------- 4 files changed, 12 insertions(+), 603 deletions(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index c789d86..336cd9f 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -153,7 +153,7 @@ class DiscussionParserBot( else: redpage = RedPage( self.current_page ) - #~ # Check whether parsing is needed + # Check whether parsing is needed if redpage.is_parsing_needed(): # Count families for failure analysis fam_counter = 0 diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 8257822..3710219 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -67,6 +67,7 @@ session = Session() family = "dewpbeta" + class Mysql(object): session = session @declared_attr @@ -122,6 +123,7 @@ class ColumnList( list, MutableComposite ): """ return self + class Status( types.TypeDecorator ): impl = types.String @@ -157,7 +159,6 @@ class Status( types.TypeDecorator ): return Status(self.impl.length) - class MysqlRedFam( Mysql, Base ): famhash = Column( String(64), primary_key=True, unique=True ) @@ -243,6 +244,7 @@ class MysqlRedFam( Mysql, Base ): def articlesStatus(self, articlesStatus): self.__articlesStatus = ColumnList(articlesStatus) + class MysqlRedPage( Mysql, Base ): pageid = Column( Integer, unique=True, primary_key=True ) revid = Column( Integer, unique=True, nullable=False ) @@ -254,338 +256,8 @@ class MysqlRedPage( Mysql, Base ): collection_class=attribute_mapped_collection("famhash")) -Base.metadata.create_all(engine) - -#~ class MysqlRed: - #~ """ - #~ Basic interface class, containing opening of connection - - #~ Specific querys should be defined in descendant classes per data type - #~ """ - - #~ # Save mysqldb-connection as class attribute to use only one - #~ # in descendant classes - #~ connection = False - #~ db_hostname = config.db_hostname - #~ db_port = config.db_port - #~ db_username = config.db_username - #~ db_password = config.db_password - #~ db_name = config.db_username + jogobot.config['db_suffix'] - #~ db_table_prefix = False - - #~ # Class variables for storing cached querys - #~ _cached_update_data = [] - #~ _update_query = '' - #~ _cached_insert_data = {} - #~ _insert_query = '' - - #~ def __init__( self ): - #~ """ - #~ Opens a connection to MySQL-DB - - #~ @returns mysql-stream MySQL Connection - #~ """ - - #~ # Needs to be generated after Parsing of Args (not at import time) - #~ if not type(self).db_table_prefix: - #~ type(self).db_table_prefix = \ - #~ pywikibot.Site().family.dbName(pywikibot.Site().code) - - #~ # Now we can setup prepared queries - #~ self._prepare_queries() - - #~ # Connect to mysqldb only once - #~ if not type( self ).connection: - - #~ type( self ).connection = mysqldb.connect( - #~ host=type( self ).db_hostname, - #~ port=type( self ).db_port, - #~ user=type( self ).db_username, - #~ passwd=type( self ).db_password, - #~ db=type( self ).db_name ) - - #~ # Register callback for warnig if exit with cached db write querys - #~ atexit.register( type(self).warn_if_not_flushed ) - - #~ def __del__( self ): - #~ """ - #~ Before deleting class, close connection to MySQL-DB - #~ """ - - #~ type( self ).connection.close() - - #~ def _prepare_queries( self ): - #~ """ - #~ Used to replace placeholders in prepared queries - #~ """ - #~ type(self)._update_query = type(self)._update_query.format( - #~ prefix=type(self).db_table_prefix) - #~ type(self)._insert_query = type(self)._insert_query.format( - #~ prefix=type(self).db_table_prefix) - - #~ @classmethod - #~ def flush( cls ): - #~ """ - #~ Run cached querys - #~ """ - #~ if not cls.connection: - #~ raise MysqlRedConnectionError( "No connection exists!" ) - - #~ cursor = cls.connection.cursor() - - #~ # Execute insert query - #~ if cls._cached_insert_data: - #~ # Since cls._cached_insert_data is a dict, we need to have a custom - #~ # Generator to iterate over it - #~ cursor.executemany( cls._insert_query, - #~ ( cls._cached_insert_data[ key ] - #~ for key in cls._cached_insert_data ) ) - #~ # Reset after writing - #~ cls._cached_insert_data = {} - - #~ # Execute update query - #~ # Use executemany since update could not be reduced to one query - #~ if cls._cached_update_data: - #~ cursor.executemany( cls._update_query, cls._cached_update_data ) - #~ # Reset after writing - #~ cls._cached_update_data = [] - - #~ # Commit db changes - #~ if cls._cached_insert_data or cls._cached_update_data: - #~ cls.connection.commit() - - #~ @classmethod - #~ def warn_if_not_flushed(cls): - #~ """ - #~ Outputs a warning if there are db write querys cached and not flushed - #~ before exiting programm! - #~ """ - #~ if cls._cached_update_data or cls._cached_insert_data: - #~ jogobot.output( "Cached Database write querys not flushed!!! " + - #~ "Data loss is possible!", "WARNING" ) - - -#~ class MysqlRedPage( MysqlRed ): - #~ """ - #~ MySQL-db Interface for handling querys for RedPages - #~ """ - - #~ # Class variables for storing cached querys - #~ # '{prefix}' will be replaced during super().__init__() - #~ _cached_update_data = [] - #~ _update_query = 'UPDATE `{prefix}_redpages` \ -#~ SET `pagetitle` = ?, `revid` = ?, `status`= ? WHERE `pageid` = ?;' - - #~ _cached_insert_data = {} - #~ _insert_query = 'INSERT INTO `{prefix}_redpages` \ -#~ ( pageid, pagetitle, revid, status ) VALUES ( ?, ?, ?, ? );' - - #~ def __init__( self, pageid ): - #~ """ - #~ Creates a new instance, runs __init__ of parent class - #~ """ - - #~ super().__init__( ) - - #~ self.__pageid = int( pageid ) - - #~ self.data = self.get_page() - - #~ def __del__( self ): - #~ """ - #~ Needed to prevent descendant classes of MYSQL_RED from deleting - #~ connection to db - #~ """ - #~ pass - - #~ def get_page( self ): - #~ """ - #~ Retrieves a red page row from MySQL-Database for given page_id - - #~ @param int pageid MediaWiki page_id for page to retrieve - - #~ @returns tuple Tuple with data for given page_id - #~ bool FALSE if none found - #~ """ - - #~ cursor = type( self ).connection.cursor(mysqldb.DictCursor) - - #~ cursor.execute( - #~ 'SELECT * FROM `{prefix}_redpages` WHERE `pageid` = ?;'.format( - #~ prefix=type(self).db_table_prefix), ( self.__pageid, ) ) - - #~ res = cursor.fetchone() - - #~ if res: - #~ return res - #~ else: - #~ return False - - #~ def add_page( self, pagetitle, revid, status=0 ): - #~ """ - #~ Inserts a red page row in MySQL-Database for given pageid - - #~ @param int revid MediaWiki current revid - #~ @param str pagetitle MediaWiki new pagetitle - #~ @param int status Page parsing status - #~ """ - - #~ insert_data = { self.__pageid: ( self.__pageid, pagetitle, - #~ revid, status ) } - - #~ type( self )._cached_insert_data.update( insert_data ) - - #~ # Manualy construct self.data dict - #~ self.data = { 'pageid': self.__pageid, 'revid': revid, - #~ 'pagetitle': pagetitle, 'status': status } - - #~ def update_page( self, revid=None, pagetitle=None, status=0 ): - #~ """ - #~ Updates the red page row in MySQL-Database for given page_id - #~ @param int revid MediaWiki current rev_id - #~ @param str pagetitle MediaWiki new page_title - #~ @param int status Page parsing status - #~ """ - - #~ if not pagetitle: - #~ pagetitle = self.data[ 'pagetitle' ] - #~ if not revid: - #~ revid = self.data[ 'revid' ] - - #~ type( self )._cached_update_data.append( ( pagetitle, revid, - #~ status, self.__pageid ) ) - - -#~ class MysqlRedFam( MysqlRed ): - #~ """ - #~ MySQL-db Interface for handling querys for RedFams - #~ """ - - #~ # Class variables for storing cached querys - #~ _cached_update_data = [] - #~ _update_query = 'UPDATE `{prefix}_redfams` \ -#~ SET `redpageid` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ -#~ `status`= ? WHERE `famhash` = ?;' - - #~ _cached_insert_data = {} - #~ _insert_query = 'INSERT INTO `{prefix}_redfams` \ -#~ ( famhash, redpageid, beginning, ending, status, heading, \ -#~ article0, article1, article2, article3, article4, article5, article6, \ -#~ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - - #~ def __init__( self, famhash=None ): - #~ """ - #~ Creates a new instance, runs __init__ of parent class - #~ """ - - #~ self.__famhash = famhash - - #~ super().__init__( ) - - #~ def __del__( self ): - #~ """ - #~ Needed to prevent descendant classes of MYSQL_RED from deleting - #~ connection to db - #~ """ - #~ pass - - #~ def get_fam( self, famhash ): - #~ """ - #~ Retrieves a red family row from MySQL-Database for given fam_hash - - #~ @returns dict Dictionairy with data for given fam hash - #~ False if none found - #~ """ - #~ self.__famhash = famhash - - #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - - #~ cursor.execute( - #~ 'SELECT * FROM `{prefix}_redfams` WHERE `famhash` = ?;'. - #~ format( prefix=type(self).db_table_prefix), ( famhash, ) ) - - #~ self.data = cursor.fetchone() - - #~ def add_fam( self, articlesList, heading, redpageid, - #~ beginning, ending=None, status=0 ): - - #~ data = [ self.__famhash, redpageid, beginning, ending, - #~ status, heading ] - - #~ for article in articlesList: - #~ data.append( str( article ) ) - - #~ while len( data ) < 14: - #~ data.append( None ) - - #~ data = tuple( data ) - - #~ insert_data = { self.__famhash: data } - #~ type( self )._cached_insert_data.update( insert_data ) - - #~ # Manualy construct self.data dict - #~ data_keys = ( 'famhash', 'redpageid', 'beginning', 'ending', - #~ 'status', 'heading', 'article0', 'article1', 'article2', - #~ 'article3', 'article4', 'article5', 'article6', - #~ 'article7' ) - #~ self.data = dict( zip( data_keys, data ) ) - - #~ def update_fam( self, redpageid, heading, beginning, ending, status ): - #~ """ - #~ Updates the red fam row in MySQL-Database for given fam_hash - - #~ @param int redpageid MediaWiki page_id - #~ @param datetime beginning Timestamp of beginning - #~ qparam datetime ending Timestamp of ending of - #~ @param int status red_fam status - #~ """ - - #~ type( self )._cached_update_data.append( ( redpageid, heading, - #~ beginning, ending, status, - #~ self.__famhash ) ) - - #~ def get_by_status( self, status ): - #~ """ - #~ Generator witch fetches redFams with given status from DB - #~ """ - - #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - - #~ cursor.execute( - #~ 'SELECT * FROM `{prefix}_redfams` WHERE `status` = LIKE %?%;'. - #~ format( prefix=type( self ).db_table_prefix), ( status, ) ) - - #~ while True: - #~ res = cursor.fetchmany( 1000 ) - #~ if not res: - #~ break - #~ for row in res: - #~ yield row - - #~ def get_by_status_and_ending( self, status, ending ): - #~ """ - #~ Generator witch fetches redFams with given status from DB - #~ """ - - #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - - #~ cursor.execute( ( - #~ 'SELECT * ' + - #~ 'FROM `{prefix}_redfams` `F` ' + - #~ 'INNER JOIN `{prefix}_redpages` `P` ' + - #~ 'ON `F`.`status` = ? ' + - #~ 'AND `F`.`ending` >= ? ' + - #~ 'AND `F`.`redpageid` = `P`.`pageid`;').format( - #~ prefix=type( self ).db_table_prefix), - #~ ( status, ending ) ) - - #~ while True: - #~ res = cursor.fetchmany( 1000 ) - #~ if not res: - #~ break - #~ for row in res: - #~ yield row +Base.metadata.create_all(engine) class MysqlRedError(Exception): diff --git a/lib/redfam.py b/lib/redfam.py index 526f902..d4f00be 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -35,8 +35,7 @@ import pywikibot # noqa from pywikibot.tools import deprecated # noqa import jogobot -#~ from lib.mysqlred import Column, Integer, String, Text, DateTime, ForeignKey, ColumnList, Status -from lib.mysqlred import MysqlRedFam, MutableSet, ColumnList #, Mysql, Base, relationship, composite, +from lib.mysqlred import MysqlRedFam class RedFam( MysqlRedFam ): @@ -45,7 +44,7 @@ class RedFam( MysqlRedFam ): """ def __init__( self, articlesList, beginning, ending=None, redpageid=None, - status=MutableSet(), famhash=None, heading=None ): + status=None, famhash=None, heading=None ): """ Generates a new RedFam object @@ -61,34 +60,9 @@ class RedFam( MysqlRedFam ): # Having pywikibot.Site() is a good idea most of the time self.site = pywikibot.Site() - # Database interface - #self._mysql = MysqlRedFam( famhash ) - - # Initial attribute values - #~ self.articlesList = articlesList - #~ self.beginning = beginning - #~ self.ending = ending - #~ self.redpageid = redpageid -#~ # self._status = set() -#~ # self._status = self._parse_status(status) - #~ self.famhash = famhash - #~ self.heading = heading - #self.status = status - - #articlesStatus = ColumnList([ MutableSet() for x in range(0,8) ]) - - #~ # Calculates the sha1 hash over self._articlesList to - #~ # rediscover known redundance families - #~ self.calc_famhash() - - #~ if not status: - #~ status = MutableSet() - super().__init__( articlesList=articlesList, beginning=beginning, ending=ending, redpageid=redpageid, famhash=famhash, heading=heading, status=status, articlesStatus=None ) - #super().__init__() - def __repr__( self ): """ Returns repression str of RedFam object @@ -137,35 +111,12 @@ class RedFam( MysqlRedFam ): else: self.famhash = type(self).calc_famhash(self.articlesList) - #~ def changed( self ): - #~ """ - #~ Checks wether anything has changed and maybe triggers db update - #~ """ - - #~ # On archived redfams do not delete possibly existing ending - #~ if( not self.ending and "archived" in self._status and - #~ self._mysql.data[ 'ending' ] ): - - #~ self._ending = self._mysql.data[ 'ending' ] - - #~ # Since status change means something has changed, update database - #~ if( self._raw_status != self._mysql.data[ 'status' ] or - #~ self._beginning != self._mysql.data[ 'beginning' ] or - #~ self._ending != self._mysql.data[ 'ending' ] or - #~ self._red_page_id != self._mysql.data[ 'redpageid' ] or - #~ self._heading != self._mysql.data[ 'heading' ]): - - #~ self._mysql.update_fam( self._redpageid, self._heading, - #~ self._beginning, self._ending, - #~ self._raw_status() ) - @classmethod def flush_db_cache( cls ): """ Calls flush method of Mysql Interface class """ cls.session.commit() - #~ MysqlRedFam.flush() def add_status(self, status): """ @@ -204,24 +155,6 @@ class RedFam( MysqlRedFam ): else: return False - #~ def _parse_status(self, raw_status ): - #~ """ - #~ Sets status based on comma separated list - - #~ @param raw_status Commaseparated string of stati (from DB) - #~ @type raw_status str - #~ """ - #~ self._status = set( raw_status.strip().split(",")) - - #~ def _raw_status( self ): - #~ """ - #~ Returns status as commaseparated string (to save in DB) - - #~ @returns Raw status string - #~ @rtype str - #~ """ - #~ return ",".join( self._status ) - def article_add_status(self, status, index=None, title=None ): """ Adds a status specified by status, to article (identified by title @@ -292,46 +225,6 @@ class RedFam( MysqlRedFam ): else: raise IndexError( "No index given or wrong format!") - def _article_parse_status(self, raw_status, index=None, title=None ): - """ - Sets status based on comma separated list to articles (identified by - title or index in articlesList) status set - - @param status Statusstring to set - @type status str - @param index Add to article with index in articlesList - @type index int - @param title Add to article with title in articlesList - @type title str - """ - if title and not index: - index = self._articlesList.index( title ) - - if isinstance( index, int ) and index < len(self._articlesList): - self._article_status[index] = set( raw_status.strip().split(",")) - else: - raise IndexError( "No index given or wrong format!") - - def _article_raw_status( self, index=None, title=None ): - """ - Returns status as commaseparated string (to save in DB) of article - (identified by title or index in articlesList) status set - - @param index Get from article with index in articlesList - @type index int - @param title Get from article with title in articlesList - @type title str - @returns Raw status string - @rtype str - """ - if title and not index: - index = self._articlesList.index( title ) - - if isinstance( index, int ) and index < len(self._articlesList): - return ",".join( self._article_status[index] ) - else: - raise IndexError( "No index given or wrong format!") - class RedFamParser( RedFam ): """ @@ -369,54 +262,14 @@ class RedFamParser( RedFam ): str strptime parseable string """ - # Parse the provided heading of redundance section - # to set self._articlesList - #~ self.heading = str(heading) - #~ self.articlesList = articlesList - - #~ # Catch sections with more then 8 articles, print error - #~ if len( self.articlesList ) > 8: - #~ # For repression in output we need to know the fam hash - #~ self.calc_famhash() - - #~ jogobot.output( - #~ ( "\03{{lightred}}" + - #~ "Maximum number of articles in red_fam exceeded, " + - #~ "maximum number is 8, {number:d} were given \n {repress}" - #~ ).format( datetime=datetime.now().strftime( - #~ "%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ), - #~ repress=repr( self ) ), - #~ "WARNING" ) - - #~ # Only save the first 8 articles -#~ # self.articlesList = self.articlesList[:8] - # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families famhash = type(self).calc_famhash(articlesList) - #~ obj = self.session.query(RedFamParser).filter(RedFamParser.famhash == self.famhash ).one_or_none() - #~ if obj: - #~ self = obj - - # Set object attributes: - #~ self.redpageid = redpage._pageid self._redpagearchive = redpagearchive -# self.famhash = None - - # Method self.add_beginning sets self._beginning directly - #~ self.add_beginning( beginning ) - - #~ # Method self.add_ending sets self._ending directly - #~ if( ending ): - #~ self.add_ending( ending ) - #~ else: - #~ # If no ending was provided set to None - #~ self.ending = None - - #~ self.status = MutableSet() + # Parse Timestamps beginning = self.__datetime(beginning) if ending: ending = self.__datetime(ending) @@ -429,31 +282,8 @@ class RedFamParser( RedFam ): self.check_status() self.session.add(self) - # Open database connection, ask for data if existing, - # otherwise create entry -# self.__handle_db() - - - - # Triggers db update if anything changed -# self.changed() - - #~ def __handle_db( self ): - #~ """ - #~ Handles opening of db connection - #~ """ - - #~ # We need a connection to our mysqldb - #~ self._mysql = MysqlRedFam( ) - #~ self._mysql.get_fam( self._famhash ) - - #~ if not self._mysql.data: - #~ self._mysql.add_fam( self._articlesList, self._heading, - #~ self._redpageid, self._beginning, - #~ self._ending ) - def update( self, articlesList, heading, redpage, redpagearchive, beginning, ending=None): @@ -490,8 +320,6 @@ class RedFamParser( RedFam ): return [ str( link.title ) for link in heading.ifilter_wikilinks() ] - - def add_beginning( self, beginning ): """ Adds the beginning date of a redundance diskussion to the object @@ -780,8 +608,6 @@ class RedFamWorker( RedFam ): self._article_raw_status( index=index ) index += 1 - print( repr(self) ) - def get_disc_link( self ): """ Constructs and returns the link to Redundancy discussion diff --git a/lib/redpage.py b/lib/redpage.py index 558cd8c..fa1c695 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -30,8 +30,7 @@ import mwparserfromhell as mwparser import jogobot # noqa -#~ from lib.mysqlred import Column, Integer, String, Text, DateTime, ForeignKey, ColumnList, Status -from lib.mysqlred import MysqlRedPage, relationship, MutableSet #MysqlRedFam, Base, composite, +from lib.mysqlred import MysqlRedPage, relationship from lib.redfam import RedFam, RedFamParser from sqlalchemy.orm.collections import attribute_mapped_collection @@ -60,7 +59,6 @@ class RedPage( MysqlRedPage ): # Safe the pywikibot page object if page: self._page = page - pageid = self._page.pageid super().__init__( pageid=pageid, @@ -69,48 +67,15 @@ class RedPage( MysqlRedPage ): status=MutableSet() ) #TODO EMPTY MutableSet() necessary? #~ self._status = set() - if archive: - self.status.add("archived") - - #~ self._archive = archive - - #~ self.pageid = pageid - #~ self.revid = self.page._revid - #~ self.p - #~ self.status = MutableSet() - -# self.__handle_db( ) - #~ self.is_page_changed() - - #~ self._parsed = None + self.is_archive() self.session.add(self) - #~ def __handle_db( self ): - #~ """ - #~ Handles opening of db connection - #~ """ - - #~ # We need a connection to our mysqldb - #~ if self.page: - #~ self.__mysql = MysqlRedPage( self.page._pageid ) - #~ self.pageid = self.page._pageid - #~ elif self.pageid: - #~ self.__mysql = MysqlRedPage( self.pageid ) - #~ self.page = pywikibot.Page( pywikibot.Site(), - #~ self.pagetitle ) - #~ self.page.exists() - #~ else: - #~ raise ValueError( "Page NOR pagid provided!" ) - - #~ if not self.__mysql.data: - #~ self.__mysql.add_page( self.page.title(), self.page._revid ) - def update( self, page ): - self._page = page self.revid = page._revid self.pagetitle = page.title() + self.is_archive() @property def page(self): @@ -123,24 +88,10 @@ class RedPage( MysqlRedPage ): def archive(self): return self.has_status("archived") - def is_page_changed( self ): - """ - Check wether the page was changed since last run - """ - self._changed = self.changedp() - #~ if( self.__mysql.data != { 'pageid': self.page._pageid, - #~ 'revid': self.page._revid, - #~ 'pagetitle': self.page.title(), - #~ 'status': self.__mysql.data[ 'status' ] } ): - #~ self._changed = True - #~ else: - #~ self._changed = False - def is_archive( self ): """ Detects wether current page is an archive of discussions """ - if( self.archive or ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ) ): @@ -153,10 +104,7 @@ class RedPage( MysqlRedPage ): """ Decides wether current RedPage needs to be parsed or not """ - if( self.changedp() or not self.has_status("parsed") ): - return True - else: - return False + return self.changedp() or not self.has_status("parsed") def parse( self ): """ @@ -183,24 +131,6 @@ class RedPage( MysqlRedPage ): else: self.status.add("parsed") self._parsed = True - #~ self.__update_db() - - #~ def __update_db( self ): - #~ """ - #~ Updates the page meta data in mysql db - #~ """ - #~ if( self._parsed or not self._changed ): - #~ self.add_status( "open" ) - - #~ if( self.is_archive() ): - #~ self.remove_status( "open" ) - #~ self.add_status( "archived" ) - #~ else: - #~ pass - #~ self._status = set() - - #~ self.__mysql.update_page( self.page._revid, self.page.title(), - #~ self._raw_status() ) @classmethod def flush_db_cache( cls ): @@ -208,7 +138,6 @@ class RedPage( MysqlRedPage ): Calls flush method of Mysql Interface class """ cls.session.commit() - #~ MysqlRedPage.flush() def add_status(self, status): """ @@ -246,21 +175,3 @@ class RedPage( MysqlRedPage ): return True else: return False - - #~ def _parse_status(self, raw_status ): - #~ """ - #~ Sets status based on comma separated list - - #~ @param raw_status Commaseparated string of stati (from DB) - #~ @type raw_status str - #~ """ - #~ self._status = set( raw_status.strip().split(",")) - - #~ def _raw_status( self ): - #~ """ - #~ Returns status as commaseparated string (to save in DB) - - #~ @returns Raw status string - #~ @rtype str - #~ """ - #~ return ",".join( self._status ) From bf8e47f916ee632e5c4f56a6d1b1e2f69a84bb35 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 7 Mar 2017 10:55:44 +0100 Subject: [PATCH 03/11] Improve new status API Make sure state changes are only detected as such by sqlalchemy if they are real changes --- lib/mysqlred.py | 63 +++++++++++++++++++++++++++++++++++++++++++++++-- lib/redpage.py | 14 +++++------ 2 files changed, 68 insertions(+), 9 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 3710219..46fa811 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -85,7 +85,54 @@ class Mysql(object): suffix = "s" return cls._tableprefix + name + cls._tablesuffix def changedp(self): - return self in self.session.dirty + return self.session.is_modified(self) + + +class MutableSet(MutableSet): + """ + Extended version of the mutable set for our states + """ + + def has(self, item): + """ + Check if item is in set + + @param item Item to check + """ + return item in self + + def add(self, item): + """ + Extended add method, which only result in changed object if there is + really an item added. + + @param item Item to add + """ + if not item in self: + super().add(item) + + def discard(self, item): + """ + Wrapper for extended remove below + + @param item Item to discard + """ + self.remove(item) + + def remove(self, item, weak=True ): + """ + Extended remove method, which only results in changed object if there + is really an item removed. Additionally, combine remove and discard! + + @param item Item to remove/discard + @param weak Set to false to use remove, else discard behavior + """ + if item in self: + if weak: + super().discard(item) + else: + super().remove(item) + class ColumnList( list, MutableComposite ): """ @@ -249,13 +296,25 @@ class MysqlRedPage( Mysql, Base ): pageid = Column( Integer, unique=True, primary_key=True ) revid = Column( Integer, unique=True, nullable=False ) pagetitle = Column( String(255), nullable=False ) - status = Column( MutableSet.as_mutable(Status(255)), nullable=True ) + __status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) redfams = relationship( "MysqlRedFam", order_by=MysqlRedFam.famhash, back_populates="redpage", collection_class=attribute_mapped_collection("famhash")) + @property + def status( self ): + """ + Current fam status + """ + return self.__status + @status.setter + def status( self, status ): + if status: + self.__status = MutableSet( status ) + else: + self.__status = MutableSet() Base.metadata.create_all(engine) diff --git a/lib/redpage.py b/lib/redpage.py index fa1c695..cba4268 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -61,11 +61,11 @@ class RedPage( MysqlRedPage ): self._page = page super().__init__( - pageid=pageid, - revid=self.page._revid, - pagetitle=self.page.title(), - status=MutableSet() ) #TODO EMPTY MutableSet() necessary? - #~ self._status = set() + pageid=self._page.pageid, + revid=self._page._revid, + pagetitle=self._page.title(), + status=None + ) self.is_archive() @@ -95,9 +95,9 @@ class RedPage( MysqlRedPage ): if( self.archive or ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ) ): - - return True + self.status.add("archive") else: + self.status.discard("archive") return False def is_parsing_needed( self ): From 89b50e3312a59827fdb454335324c8735cac95c2 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 7 Mar 2017 12:06:11 +0100 Subject: [PATCH 04/11] Remove old status API Now we use the methods of status object directly --- bots/reddiscparser.py | 3 +- lib/redfam.py | 95 +++++++++++-------------------------------- lib/redpage.py | 45 ++------------------ 3 files changed, 29 insertions(+), 114 deletions(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 336cd9f..2e203ba 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -161,8 +161,7 @@ class DiscussionParserBot( # Iterate over returned generator with redfam sections for fam in redpage.parse(): # Run RedFamParser on section text - RedFamParser.parser( fam, redpage, - redpage.is_archive() ) + RedFamParser.parser( fam, redpage, redpage.archive ) fam_counter += 1 diff --git a/lib/redfam.py b/lib/redfam.py index d4f00be..763bfcc 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -84,6 +84,12 @@ class RedFam( MysqlRedFam ): @classmethod def calc_famhash(cls, articlesList ): + """ + Calculates the SHA-1 hash for the articlesList of redundance family. + Since we don't need security SHA-1 is just fine. + + @returns str String with the hexadecimal hash digest + """ h = hashlib.sha1() # Since articlesList attr of RedFam will have always 8 Members we @@ -95,22 +101,6 @@ class RedFam( MysqlRedFam ): return h.hexdigest() - def c_famhash( self ): - """ - Calculates the SHA-1 hash for the articlesList of redundance family. - Since we don't need security SHA-1 is just fine. - - @returns str String with the hexadecimal hash digest - """ - print( type( self ) ) - - if self.famhash and type(self).calc_famhash(self.articlesList) != self.famhash: - raise RedFamHashError( self.famhash, h.hexdigest() ) - elif self.famhash: - return - else: - self.famhash = type(self).calc_famhash(self.articlesList) - @classmethod def flush_db_cache( cls ): """ @@ -118,43 +108,6 @@ class RedFam( MysqlRedFam ): """ cls.session.commit() - def add_status(self, status): - """ - Adds a status specified by status, to status set - - @param status Statusstring to add - @type status str - """ - self.status.add(status) - - def remove_status(self, status, weak=True): - """ - Removes a status, specified by status from set. If weak is set to - False it will throw a KeyError when trying to remove a status not set. - - @param status Statusstring to add - @type status str - @param weak Change behavior on missing status - @type bool - """ - if weak: - self.status.discard(status) - else: - self.status.remove(status) - - def has_status(self, status): - """ - Returns True, if redfam has given status - - @param status Statusstring to check - @type status str - @returns True if status is present else False - """ - if status in self.status: - return True - else: - return False - def article_add_status(self, status, index=None, title=None ): """ Adds a status specified by status, to article (identified by title @@ -267,7 +220,7 @@ class RedFamParser( RedFam ): famhash = type(self).calc_famhash(articlesList) # Set object attributes: - self._redpagearchive = redpagearchive + self.redpage = redpage # Parse Timestamps beginning = self.__datetime(beginning) @@ -275,7 +228,7 @@ class RedFamParser( RedFam ): ending = self.__datetime(ending) - super().__init__( articlesList, beginning, ending=ending, redpageid=redpage._pageid, + super().__init__( articlesList, beginning, ending=ending, redpageid=redpage.page._pageid, famhash=famhash, heading=heading ) # Check status changes @@ -294,7 +247,7 @@ class RedFamParser( RedFam ): self.add_beginning( beginning ) - if( ending ): + if ending: self.add_ending( ending ) self._redpagearchive = redpagearchive @@ -372,16 +325,16 @@ class RedFamParser( RedFam ): # No ending, discussion is running: # Sometimes archived discussions also have no detectable ending - if not self.ending and not self._redpagearchive: - self.add_status("open") + if not self.ending and not self.redpage.archive: + self.status.add("open") else: - self.remove_status("open") - if not self._redpagearchive: - self.add_status("done") + self.status.remove("open") + if not self.redpage.archive: + self.status.add("done") else: - self.remove_status("done") - self.remove_status("open") - self.add_status("archived") + self.status.remove("done") + self.status.remove("open") + self.status.add("archived") @classmethod def is_section_redfam_cb( cls, heading ): @@ -413,7 +366,7 @@ class RedFamParser( RedFam ): text = mwparser.parse( text ) # Extract heading text - heading = next( text.ifilter_headings() ).title + heading = next( text.ifilter_headings() ).title.strip() # Extract beginnig and maybe ending (beginning, ending) = RedFamParser.extract_dates( text, isarchive ) @@ -448,7 +401,7 @@ class RedFamParser( RedFam ): else: # Create the RedFam object - redfam = RedFamParser( articlesList, str(heading).strip(), redpage.page, isarchive, beginning, ending ) + redfam = RedFamParser( articlesList, str(heading), redpage, isarchive, beginning, ending ) return redfam @classmethod @@ -593,13 +546,13 @@ class RedFamWorker( RedFam ): """ for article in self._articlesList: if self.article_has_status( "note_rej", title=article ): - self.add_status( "note_rej" ) + self.status.add( "note_rej" ) if self.article_has_status( "sav_err", title=article ): - self.add_status( "sav_err" ) + self.status.add( "sav_err" ) - if not self.has_status( "sav_err" ) and \ - not self.has_status( "note_rej" ): - self.add_status( "marked" ) + if not self.status.has( "sav_err" ) and \ + not self.status.has( "note_rej" ): + self.status.add( "marked" ) self._mysql.data[ 'status' ] = self._raw_status() index = 0 diff --git a/lib/redpage.py b/lib/redpage.py index cba4268..f9f0aa8 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -86,25 +86,25 @@ class RedPage( MysqlRedPage ): @property def archive(self): - return self.has_status("archived") + self.is_archive() + return self.status.has("archive") def is_archive( self ): """ Detects wether current page is an archive of discussions """ - if( self.archive or ( u"/Archiv" in self.page.title() ) or + if( ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ) ): self.status.add("archive") else: self.status.discard("archive") - return False def is_parsing_needed( self ): """ Decides wether current RedPage needs to be parsed or not """ - return self.changedp() or not self.has_status("parsed") + return self.changedp() or not self.status.has("parsed") def parse( self ): """ @@ -138,40 +138,3 @@ class RedPage( MysqlRedPage ): Calls flush method of Mysql Interface class """ cls.session.commit() - - def add_status(self, status): - """ - Adds a status specified by status, to status set - - @param status Statusstring to add - @type status str - """ - self.status.add(status) - - def remove_status(self, status, weak=True): - """ - Removes a status, specified by status from set. If weak is set to - False it will throw a KeyError when trying to remove a status not set. - - @param status Statusstring to add - @type status str - @param weak Change behavior on missing status - @type bool - """ - if weak: - self.status.discard(status) - else: - self.status.remove(status) - - def has_status(self, status): - """ - Returns True, if redfam has given status - - @param status Statusstring to check - @type status str - @returns True if status is present else False - """ - if status in self.status: - return True - else: - return False From 43e31c108a408af39434572129208b63e4c178c9 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 7 Mar 2017 14:51:55 +0100 Subject: [PATCH 05/11] Working RedFamWorker query Modify RedfamWorker class to work with new DB API --- lib/mysqlred.py | 10 +++--- lib/redfam.py | 90 ++++++++++++++++++++++++++----------------------- 2 files changed, 53 insertions(+), 47 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 46fa811..1f92026 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -227,7 +227,7 @@ class MysqlRedFam( Mysql, Base ): Integer, ForeignKey( "dewpbeta_redpages.pageid" ), nullable=False ) beginning = Column( DateTime, nullable=False ) ending = Column( DateTime, nullable=True ) - __status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) + _status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) __article0_status = Column( 'article0_status', MutableSet.as_mutable(Status(64)), nullable=True ) @@ -250,7 +250,7 @@ class MysqlRedFam( Mysql, Base ): __article3_status, __article4_status, __article5_status, __article6_status, __article7_status ) - redpage = relationship( "RedPage", back_populates="redfams" ) + redpage = relationship( "MysqlRedPage", back_populates="redfams" ) @property def articlesList(self): @@ -271,14 +271,14 @@ class MysqlRedFam( Mysql, Base ): """ Current fam status """ - return self.__status + return self._status @status.setter def status( self, status ): if status: - self.__status = MutableSet( status ) + self._status = MutableSet( status ) else: - self.__status = MutableSet() + self._status = MutableSet() @property def articlesStatus(self): diff --git a/lib/redfam.py b/lib/redfam.py index 763bfcc..69b68c7 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -121,10 +121,10 @@ class RedFam( MysqlRedFam ): @type title str """ if title and not index: - index = self._articlesList.index( title ) + index = self.articlesList.index( title ) - if isinstance( index, int ) and index < len(self._articlesList): - self._article_status[index].add(status) + if isinstance( index, int ) and index < len(self.articlesList): + self.articlesStatus[index].add(status) else: raise IndexError( "No index given or wrong format!") @@ -145,13 +145,13 @@ class RedFam( MysqlRedFam ): @type bool """ if title and not index: - index = self._articlesList.index( title ) + index = self.articlesList.index( title ) - if isinstance( index, int ) and index < len(self._articlesList): + if isinstance( index, int ) and index < len(self.articlesList): if weak: - self._article_status[index].discard(status) + self.articlesStatus[index].discard(status) else: - self._article_status[index].remove(status) + self.articlesStatus[index].remove(status) else: raise IndexError( "No index given or wrong format!") @@ -168,10 +168,10 @@ class RedFam( MysqlRedFam ): @type title str """ if title and not index: - index = self._articlesList.index( title ) + index = self.articlesList.index( title ) - if isinstance( index, int ) and index < len(self._articlesList): - if status in self._article_status[index]: + if isinstance( index, int ) and index < len(self.articlesList): + if status in self.articlesStatus[index]: return True else: return False @@ -458,19 +458,20 @@ class RedFamWorker( RedFam ): """ def __init__( self, mysql_data ): - articlesList = [] + #~ articlesList = [] - for key in sorted( mysql_data.keys() ): - if 'article' in key and 'status' not in key and mysql_data[ key ]: - articlesList.append( mysql_data[ key ] ) + #~ for key in sorted( mysql_data.keys() ): + #~ if 'article' in key and 'status' not in key and mysql_data[ key ]: + #~ articlesList.append( mysql_data[ key ] ) - # Preset article status list with empty sets for existing articles - self._article_status = [set() for x in range(0, len(articlesList))] + #~ # Preset article status list with empty sets for existing articles + #~ self._article_status = [set() for x in range(0, len(articlesList))] - super().__init__( articlesList, mysql_data[ 'beginning' ], - mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], - mysql_data[ 'status' ], mysql_data[ 'famhash' ], - mysql_data[ 'heading' ] ) + #~ super().__init__( articlesList, mysql_data[ 'beginning' ], + #~ mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], + #~ mysql_data[ 'status' ], mysql_data[ 'famhash' ], + #~ mysql_data[ 'heading' ] ) + super().__init__() # #~ self._mysql.data = mysql_data @@ -510,8 +511,12 @@ class RedFamWorker( RedFam ): """ # Iterate over articles in redfam - for article in self._articlesList: - page = pywikibot.Page(pywikibot.Link(article), self.site) + for article in self.articlesList: + # Not all list elements contain articles + if not article: + break + + page = pywikibot.Page(pywikibot.Link(article), pywikibot.Site()) # Exclude by article status for status in exclude_article_status: @@ -544,7 +549,10 @@ class RedFamWorker( RedFam ): """ Sets status to 3 when worked on """ - for article in self._articlesList: + for article in self.articlesList: + if not article: + break + if self.article_has_status( "note_rej", title=article ): self.status.add( "note_rej" ) if self.article_has_status( "sav_err", title=article ): @@ -554,13 +562,6 @@ class RedFamWorker( RedFam ): not self.status.has( "note_rej" ): self.status.add( "marked" ) - self._mysql.data[ 'status' ] = self._raw_status() - index = 0 - for article in self._articlesList: - self._mysql.data[ "article" + str(index) + 'status' ] = \ - self._article_raw_status( index=index ) - index += 1 - def get_disc_link( self ): """ Constructs and returns the link to Redundancy discussion @@ -570,7 +571,7 @@ class RedFamWorker( RedFam ): """ # We need to Replace Links with their linktext - anchor_code = mwparser.parse( self._mysql.data[ 'heading' ].strip() ) + anchor_code = mwparser.parse( self.heading.strip() ) for link in anchor_code.ifilter_wikilinks(): if link.text: text = link.text @@ -583,7 +584,7 @@ class RedFamWorker( RedFam ): anchor_code.replace( " ", "_" ) # We try it with out any more parsing as mw will do while parsing page - return ( self.redpagetitle + "#" + + return ( self.redpage.pagetitle + "#" + str(anchor_code).strip() ) def generate_disc_notice_template( self ): @@ -603,7 +604,9 @@ class RedFamWorker( RedFam ): param_cnt = 3 # Iterate over articles in redfam - for article in self._articlesList: + for article in self.articlesList: + if not article: + break # Make sure to only use 8 articles (max. param 10) if param_cnt > 10: break @@ -614,11 +617,11 @@ class RedFamWorker( RedFam ): param_cnt += 1 # Add begin - begin = self._mysql.data[ 'beginning' ].strftime( "%B %Y" ) + begin = self.beginning.strftime( "%B %Y" ) template.add( "Beginn", begin, True ) # Add end (if not same as begin) - end = self._mysql.data[ 'ending' ].strftime( "%B %Y" ) + end = self.ending.strftime( "%B %Y" ) if not end == begin: template.add( "Ende", end, True ) @@ -650,13 +653,16 @@ class RedFamWorker( RedFam ): Yield red_fams stored in db by given status which have an ending after given one """ - mysql = MysqlRedFam() - for fam in mysql.get_by_status_and_ending( status, ending ): - try: - yield cls( fam ) - except RedFamHashError: - print(fam) - raise + from sqlalchemy import text + + for redfam in RedFamWorker.session.query(RedFamWorker).filter( + #~ RedFamWorker._status.like('archived'), + #RedFamWorker._status.like("%{0:s}%".format(status)), + text("status LIKE '%archived%'"), + RedFamWorker.ending >= ending + ): + + yield redfam class RedFamError( Exception ): From 844fee52aec378bd9b16e649fd8e437ee93d939e Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Wed, 8 Mar 2017 00:01:36 +0100 Subject: [PATCH 06/11] Make markpages using new DB/Class structure Update markpages and RedFamWorker-Code to use the new sqlalchemy based DB ORM Interface --- bots/markpages.py | 5 ++++- lib/redfam.py | 31 ++----------------------------- 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index b7b45c0..664f5d4 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -87,6 +87,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() for redfam in self.redfams: redfam.update_status() + RedFamWorker.flush_db_cache() + + @property def redfams(self): """ @@ -168,7 +171,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() save_ret = self.put_current( self.new_text, summary=summary ) # Status - if add_ret is None or add_ret and save_ret: + if add_ret is None or ( add_ret and save_ret ): self.current_page.redfam.article_add_status( "marked", title=self.current_page.title(withNamespace=False)) diff --git a/lib/redfam.py b/lib/redfam.py index 69b68c7..8dae7ec 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -78,6 +78,7 @@ class RedFam( MysqlRedFam ): ", red_page_id=" + repr( self.redpageid ) + \ ", status=" + repr( self.status ) + \ ", fam_hash=" + repr( self.famhash ) + \ + ", articlesStatus=" + repr( self.articlesStatus ) + \ " )" return __repr @@ -456,38 +457,10 @@ class RedFamWorker( RedFam ): Handles working with redundance families stored in database where discussion is finished """ - def __init__( self, mysql_data ): + def __init__( self ): - #~ articlesList = [] - - #~ for key in sorted( mysql_data.keys() ): - #~ if 'article' in key and 'status' not in key and mysql_data[ key ]: - #~ articlesList.append( mysql_data[ key ] ) - - #~ # Preset article status list with empty sets for existing articles - #~ self._article_status = [set() for x in range(0, len(articlesList))] - - #~ super().__init__( articlesList, mysql_data[ 'beginning' ], - #~ mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], - #~ mysql_data[ 'status' ], mysql_data[ 'famhash' ], - #~ mysql_data[ 'heading' ] ) super().__init__() -# #~ self._mysql.data = mysql_data - - #~ # Set up article status - #~ index = 0 - #~ for article in self.articlesList: - #~ raw_status = mysql_data[ "article" + str(index) + "_status" ] - #~ if not raw_status: - #~ raw_status = str() - #~ self._article_parse_status( raw_status, index ) - #~ index += 1 - - # Get related RedPage-Information - self.redpageid = mysql_data[ 'pageid' ] - self.redpagetitle = mysql_data[ 'pagetitle' ] - # Make sure locale is set to 'de_DE.UTF-8' to prevent problems # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') From 9ba7d2e51755733cda9357d30832dfc40216af20 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Wed, 8 Mar 2017 00:04:15 +0100 Subject: [PATCH 07/11] Change redfam generator filters Change and clear up the filters in redfam generator to keep track of article status and use positive conditionals --- lib/redfam.py | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/lib/redfam.py b/lib/redfam.py index 8dae7ec..8be9cf3 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -491,6 +491,28 @@ class RedFamWorker( RedFam ): page = pywikibot.Page(pywikibot.Link(article), pywikibot.Site()) + # Filter existing pages if requested with filter_existing=False + if page.exists(): + self.article_remove_status( "deleted", title=article ) + if filter_existing is False: + continue + # Filter non existing Pages if requested with filter_existing=True + else: + self.article_add_status( "deleted", title=article ) + if filter_existing: + continue + + # Filter redirects if requested with filter_redirects=True + if page.isRedirectPage(): + self.article_add_status( "redirect", title=article ) + if filter_redirects: + continue + # Filter noredirects if requested with filter_redirects=False + else: + self.article_remove_status("redirect", title=article ) + if filter_redirects is False: + continue + # Exclude by article status for status in exclude_article_status: if self.article_has_status( status, title=article ): @@ -501,20 +523,6 @@ class RedFamWorker( RedFam ): if not self.article_has_status( status, title=article ): continue - # Filter non existing Pages if requested with filter_existing=True - if filter_existing and not page.exists(): - continue - # Filter existing pages if requested with filter_existing=False - elif filter_existing is False and page.exists(): - continue - - # Filter redirects if requested with filter_redirects=True - if filter_redirects and page.isRedirectPage(): - continue - # Filter noredirects if requested with filter_redirects=False - elif filter_redirects is False and not page.isRedirectPage(): - continue - # Yield filtered pages yield page From e16925197cb1a71e63e2d2604caa73abe274f2e5 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Wed, 8 Mar 2017 18:38:15 +0100 Subject: [PATCH 08/11] Fix pep8.. compliance To be concordant with the coding styles fix pep8 compliance --- bots/markpages.py | 1 - bots/reddiscparser.py | 3 ++- lib/mysqlred.py | 60 +++++++++++++++++++++---------------------- lib/redfam.py | 58 ++++++++++++++++++++++++----------------- lib/redpage.py | 8 +++--- 5 files changed, 70 insertions(+), 60 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 664f5d4..0fbaded 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -89,7 +89,6 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() RedFamWorker.flush_db_cache() - @property def redfams(self): """ diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 2e203ba..9179841 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -146,7 +146,8 @@ class DiscussionParserBot( return # Initiate RedPage object - redpage = RedPage.session.query(RedPage).filter(RedPage.pageid == self.current_page.pageid ).one_or_none() + redpage = RedPage.session.query(RedPage).filter( + RedPage.pageid == self.current_page.pageid ).one_or_none() if redpage: redpage.update( self.current_page ) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 1f92026..232dc7c 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -25,22 +25,27 @@ Provides interface classes for communication of redundances bot with mysql-db """ -# Prefere using oursql then MySQLdb -try: - import oursql as mysqldb -except ImportError: - import MySQLdb as mysqldb +import atexit # noqa -import atexit - -import pywikibot +import pywikibot # noqa from pywikibot import config import jogobot - -from sqlalchemy import create_engine +from sqlalchemy import ( + create_engine, Column, Integer, String, Text, DateTime, ForeignKey ) +from sqlalchemy import text # noqa from sqlalchemy.engine.url import URL +from sqlalchemy.ext.declarative import ( + declarative_base, declared_attr, has_inherited_table ) +from sqlalchemy.ext.mutable import MutableComposite, MutableSet +from sqlalchemy.orm import sessionmaker, relationship, composite +from sqlalchemy.orm.collections import attribute_mapped_collection +import sqlalchemy.types as types + + +Base = declarative_base() + url = URL( "mysql+oursql", username=config.db_username, password=config.db_password, @@ -50,18 +55,6 @@ url = URL( "mysql+oursql", engine = create_engine(url, echo=True) -from sqlalchemy.ext.declarative import ( - declarative_base, declared_attr, has_inherited_table ) -Base = declarative_base() - -from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey - -from sqlalchemy.orm import sessionmaker, relationship, composite -from sqlalchemy.ext.mutable import MutableComposite, MutableSet -from sqlalchemy.orm.collections import attribute_mapped_collection -import sqlalchemy.types as types - - Session = sessionmaker(bind=engine) session = Session() @@ -70,20 +63,22 @@ family = "dewpbeta" class Mysql(object): session = session + @declared_attr def _tableprefix(cls): return family + "_" + @declared_attr def _tablesuffix(cls): return "s" + @declared_attr def __tablename__(cls): if has_inherited_table(cls): return None - prefix = family + "_" name = cls.__name__[len("Mysql"):].lower() - suffix = "s" return cls._tableprefix + name + cls._tablesuffix + def changedp(self): return self.session.is_modified(self) @@ -108,7 +103,7 @@ class MutableSet(MutableSet): @param item Item to add """ - if not item in self: + if item not in self: super().add(item) def discard(self, item): @@ -187,8 +182,11 @@ class Status( types.TypeDecorator ): elif isinstance(value, String ) or value is None: return value else: - raise ProgrammingError - + raise TypeError( + "Value should be an instance of one of {0:s},".format( + str( [type(MutableSet()), type(String()), type(None)] ) ) + + "given value was an instance of {1:s}".format( + str(type(value))) ) def process_result_value(self, value, dialect): """ @@ -226,8 +224,9 @@ class MysqlRedFam( Mysql, Base ): redpageid = Column( Integer, ForeignKey( "dewpbeta_redpages.pageid" ), nullable=False ) beginning = Column( DateTime, nullable=False ) - ending = Column( DateTime, nullable=True ) - _status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) + ending = Column( DateTime, nullable=True ) + _status = Column( 'status', MutableSet.as_mutable(Status(255)), + nullable=True ) __article0_status = Column( 'article0_status', MutableSet.as_mutable(Status(64)), nullable=True ) @@ -296,7 +295,8 @@ class MysqlRedPage( Mysql, Base ): pageid = Column( Integer, unique=True, primary_key=True ) revid = Column( Integer, unique=True, nullable=False ) pagetitle = Column( String(255), nullable=False ) - __status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) + __status = Column( 'status', MutableSet.as_mutable(Status(255)), + nullable=True ) redfams = relationship( "MysqlRedFam", order_by=MysqlRedFam.famhash, back_populates="redpage", diff --git a/lib/redfam.py b/lib/redfam.py index 8be9cf3..5c31364 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -35,7 +35,7 @@ import pywikibot # noqa from pywikibot.tools import deprecated # noqa import jogobot -from lib.mysqlred import MysqlRedFam +from lib.mysqlred import MysqlRedFam, text class RedFam( MysqlRedFam ): @@ -60,8 +60,16 @@ class RedFam( MysqlRedFam ): # Having pywikibot.Site() is a good idea most of the time self.site = pywikibot.Site() - super().__init__( articlesList=articlesList, beginning=beginning, ending=ending, redpageid=redpageid, - famhash=famhash, heading=heading, status=status, articlesStatus=None ) + super().__init__( + articlesList=articlesList, + beginning=beginning, + ending=ending, + redpageid=redpageid, + famhash=famhash, + heading=heading, + status=status, + articlesStatus=None + ) def __repr__( self ): """ @@ -228,23 +236,25 @@ class RedFamParser( RedFam ): if ending: ending = self.__datetime(ending) - - super().__init__( articlesList, beginning, ending=ending, redpageid=redpage.page._pageid, - famhash=famhash, heading=heading ) + super().__init__( articlesList, + beginning, + ending=ending, + redpageid=redpage.page._pageid, + famhash=famhash, + heading=heading ) # Check status changes self.check_status() self.session.add(self) - def update( self, articlesList, heading, redpage, redpagearchive, - beginning, ending=None): + beginning, ending=None ): - self.articlesList = articlesList; - self.heading = heading; - self.redpage = redpage; - self.redpageid = redpage.pageid; + self.articlesList = articlesList + self.heading = heading + self.redpage = redpage + self.redpageid = redpage.pageid self.add_beginning( beginning ) @@ -271,8 +281,7 @@ class RedFamParser( RedFam ): heading = mwparser.parse( str( heading ) ) # Save destinations of wikilinks in headings - return [ str( link.title ) for link - in heading.ifilter_wikilinks() ] + return [ str( link.title ) for link in heading.ifilter_wikilinks() ] def add_beginning( self, beginning ): """ @@ -398,11 +407,13 @@ class RedFamParser( RedFam ): if redfam: # Existing redfams need to be updated - redfam.update( articlesList, str(heading), redpage, isarchive, beginning, ending ) + redfam.update( articlesList, str(heading), redpage, isarchive, + beginning, ending ) else: # Create the RedFam object - redfam = RedFamParser( articlesList, str(heading), redpage, isarchive, beginning, ending ) + redfam = RedFamParser( articlesList, str(heading), + redpage, isarchive, beginning, ending ) return redfam @classmethod @@ -465,7 +476,8 @@ class RedFamWorker( RedFam ): # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') - def article_generator(self, filter_existing=None, filter_redirects=None, + def article_generator(self, # noqa + filter_existing=None, filter_redirects=None, exclude_article_status=[], onlyinclude_article_status=[] ): """ @@ -602,7 +614,7 @@ class RedFamWorker( RedFam ): template.add( "Beginn", begin, True ) # Add end (if not same as begin) - end = self.ending.strftime( "%B %Y" ) + end = self.ending.strftime( "%B %Y" ) if not end == begin: template.add( "Ende", end, True ) @@ -634,14 +646,12 @@ class RedFamWorker( RedFam ): Yield red_fams stored in db by given status which have an ending after given one """ - from sqlalchemy import text - for redfam in RedFamWorker.session.query(RedFamWorker).filter( - #~ RedFamWorker._status.like('archived'), - #RedFamWorker._status.like("%{0:s}%".format(status)), + # NOT WORKING WITH OBJECT NOTATION + # RedFamWorker._status.like('archived'), + # RedFamWorker._status.like("%{0:s}%".format(status)), text("status LIKE '%archived%'"), - RedFamWorker.ending >= ending - ): + RedFamWorker.ending >= ending ): yield redfam diff --git a/lib/redpage.py b/lib/redpage.py index f9f0aa8..3678111 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -31,8 +31,8 @@ import mwparserfromhell as mwparser import jogobot # noqa from lib.mysqlred import MysqlRedPage, relationship -from lib.redfam import RedFam, RedFamParser from sqlalchemy.orm.collections import attribute_mapped_collection +from lib.redfam import RedFamParser class RedPage( MysqlRedPage ): @@ -40,7 +40,7 @@ class RedPage( MysqlRedPage ): Class for handling redundance discussion pages and archives """ - #TODO POLYMORPHISM? of BASEClass + # TODO POLYMORPHISM? of BASEClass redfams = relationship( "RedFamParser", order_by=RedFamParser.famhash, back_populates="redpage", @@ -65,7 +65,7 @@ class RedPage( MysqlRedPage ): revid=self._page._revid, pagetitle=self._page.title(), status=None - ) + ) self.is_archive() @@ -79,7 +79,7 @@ class RedPage( MysqlRedPage ): @property def page(self): - if not hasattr(self,"_page"): + if not hasattr(self, "_page"): self._page = pywikibot.Page( pywikibot.Site(), self.pagetitle ) return self._page From 3fe47e666f9db09be9b1eab4a11620e8ea71ea65 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Wed, 8 Mar 2017 18:41:02 +0100 Subject: [PATCH 09/11] Fix polymorphism problem with relationships Since we are using subclasses of the ORM mapped classes, disable typechecks for ORM relations --- lib/mysqlred.py | 8 +++++--- lib/redpage.py | 9 +-------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 232dc7c..4f6101e 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -249,7 +249,8 @@ class MysqlRedFam( Mysql, Base ): __article3_status, __article4_status, __article5_status, __article6_status, __article7_status ) - redpage = relationship( "MysqlRedPage", back_populates="redfams" ) + redpage = relationship( "MysqlRedPage", enable_typechecks=False, + back_populates="redfams" ) @property def articlesList(self): @@ -299,8 +300,9 @@ class MysqlRedPage( Mysql, Base ): nullable=True ) redfams = relationship( - "MysqlRedFam", order_by=MysqlRedFam.famhash, back_populates="redpage", - collection_class=attribute_mapped_collection("famhash")) + "MysqlRedFam", enable_typechecks=False, + back_populates="redpage", order_by=MysqlRedFam.famhash, + collection_class=attribute_mapped_collection("famhash") ) @property def status( self ): diff --git a/lib/redpage.py b/lib/redpage.py index 3678111..69f02b8 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -30,8 +30,7 @@ import mwparserfromhell as mwparser import jogobot # noqa -from lib.mysqlred import MysqlRedPage, relationship -from sqlalchemy.orm.collections import attribute_mapped_collection +from lib.mysqlred import MysqlRedPage from lib.redfam import RedFamParser @@ -40,12 +39,6 @@ class RedPage( MysqlRedPage ): Class for handling redundance discussion pages and archives """ - # TODO POLYMORPHISM? of BASEClass - redfams = relationship( - "RedFamParser", order_by=RedFamParser.famhash, - back_populates="redpage", - collection_class=attribute_mapped_collection( "famhash" ) ) - def __init__( self, page=None, pageid=None, archive=False ): """ Generate a new RedPage object based on the given pywikibot page object From 281f1c49a8f13349084107389128ad2714e8b089 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Thu, 9 Mar 2017 00:00:17 +0100 Subject: [PATCH 10/11] mysqlred: Set family via pywikibot Get family/language part of table names from PyWikiBot Site --- lib/mysqlred.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 4f6101e..1760fda 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -58,7 +58,7 @@ engine = create_engine(url, echo=True) Session = sessionmaker(bind=engine) session = Session() -family = "dewpbeta" +family = pywikibot.Site().family.dbName(pywikibot.Site().code) class Mysql(object): @@ -222,7 +222,7 @@ class MysqlRedFam( Mysql, Base ): heading = Column( Text, nullable=False ) redpageid = Column( - Integer, ForeignKey( "dewpbeta_redpages.pageid" ), nullable=False ) + Integer, ForeignKey( family + "_redpages.pageid" ), nullable=False ) beginning = Column( DateTime, nullable=False ) ending = Column( DateTime, nullable=True ) _status = Column( 'status', MutableSet.as_mutable(Status(255)), From 4aaacf144314cbc5c83eed566e38fc428525fdd4 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Thu, 9 Mar 2017 10:13:56 +0100 Subject: [PATCH 11/11] Add redfams to redpage-obj after parsing To have redfams available for updates immediately after parsing. Double redfams then will be seen as Update. Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=108 FS#108] --- lib/redfam.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/redfam.py b/lib/redfam.py index 5c31364..ca10e87 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -414,7 +414,9 @@ class RedFamParser( RedFam ): # Create the RedFam object redfam = RedFamParser( articlesList, str(heading), redpage, isarchive, beginning, ending ) - return redfam + + # Add redfam to redpage object + redpage.redfams.set( redfam ) @classmethod def extract_dates( cls, text, isarchive=False ):