Browse Source

Apply changes to data structure

See related ticket

Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=94 FS#94]
develop
Jonathan Golder 8 years ago
parent
commit
6149dcdb8b
  1. 2
      bots/markpages.py
  2. 98
      lib/mysqlred.py
  3. 156
      lib/redfam.py
  4. 77
      lib/redpage.py

2
bots/markpages.py

@ -80,7 +80,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
jogobot.config["red.markpages"]["mark_done_after"],
"%Y-%m-%d" )
self.__redfams = RedFamWorker.gen_by_status_and_ending(
2, end_after)
"archived", end_after)
return self.__redfams

98
lib/mysqlred.py

@ -156,21 +156,21 @@ class MysqlRedPage( MysqlRed ):
# Class variables for storing cached querys
# '{prefix}' will be replaced during super().__init__()
_cached_update_data = []
_update_query = 'UPDATE `{prefix}_red_pages` \
SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'
_update_query = 'UPDATE `{prefix}_redpages` \
SET `pagetitle` = ?, `revid` = ?, `status`= ? WHERE `pageid` = ?;'
_cached_insert_data = {}
_insert_query = 'INSERT INTO `{prefix}_red_pages` \
( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );'
_insert_query = 'INSERT INTO `{prefix}_redpages` \
( pageid, pagetitle, revid, status ) VALUES ( ?, ?, ?, ? );'
def __init__( self, page_id ):
def __init__( self, pageid ):
"""
Creates a new instance, runs __init__ of parent class
"""
super().__init__( )
self.__page_id = int( page_id )
self.__pageid = int( pageid )
self.data = self.get_page()
@ -185,7 +185,7 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'
"""
Retrieves a red page row from MySQL-Database for given page_id
@param int page_id MediaWiki page_id for page to retrieve
@param int pageid MediaWiki page_id for page to retrieve
@returns tuple Tuple with data for given page_id
bool FALSE if none found
@ -194,8 +194,8 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'
cursor = type( self ).connection.cursor(mysqldb.DictCursor)
cursor.execute(
'SELECT * FROM `{prefix}_red_pages` WHERE `page_id` = ?;'.format(
prefix=type(self).db_table_prefix), ( self.__page_id, ) )
'SELECT * FROM `{prefix}_redpages` WHERE `pageid` = ?;'.format(
prefix=type(self).db_table_prefix), ( self.__pageid, ) )
res = cursor.fetchone()
@ -204,40 +204,40 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'
else:
return False
def add_page( self, page_title, rev_id, status=0 ):
def add_page( self, pagetitle, revid, status=0 ):
"""
Inserts a red page row in MySQL-Database for given page_id
Inserts a red page row in MySQL-Database for given pageid
@param int rev_id MediaWiki current rev_id
@param str page_title MediaWiki new page_title
@param int revid MediaWiki current revid
@param str pagetitle MediaWiki new pagetitle
@param int status Page parsing status
"""
insert_data = { self.__page_id: ( self.__page_id, page_title,
rev_id, status ) }
insert_data = { self.__pageid: ( self.__pageid, pagetitle,
revid, status ) }
type( self )._cached_insert_data.update( insert_data )
# Manualy construct self.data dict
self.data = { 'page_id': self.__page_id, 'rev_id': rev_id,
'page_title': page_title, 'status': status }
self.data = { 'pageid': self.__pageid, 'revid': revid,
'pagetitle': pagetitle, 'status': status }
def update_page( self, rev_id=None, page_title=None, status=0 ):
def update_page( self, revid=None, pagetitle=None, status=0 ):
"""
Updates the red page row in MySQL-Database for given page_id
@param int rev_id MediaWiki current rev_id
@param str page_title MediaWiki new page_title
@param int revid MediaWiki current rev_id
@param str pagetitle MediaWiki new page_title
@param int status Page parsing status
"""
if not page_title:
page_title = self.data[ 'page_title' ]
if not rev_id:
rev_id = self.data[ 'rev_id' ]
if not pagetitle:
pagetitle = self.data[ 'pagetitle' ]
if not revid:
revid = self.data[ 'revid' ]
type( self )._cached_update_data.append( ( page_title, rev_id,
status, self.__page_id ) )
type( self )._cached_update_data.append( ( pagetitle, revid,
status, self.__pageid ) )
class MysqlRedFam( MysqlRed ):
@ -247,22 +247,22 @@ class MysqlRedFam( MysqlRed ):
# Class variables for storing cached querys
_cached_update_data = []
_update_query = 'UPDATE `{prefix}_red_families` \
SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \
`status`= ? WHERE `fam_hash` = ?;'
_update_query = 'UPDATE `{prefix}_redfams` \
SET `redpageid` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \
`status`= ? WHERE `famhash` = ?;'
_cached_insert_data = {}
_insert_query = 'INSERT INTO `{prefix}_red_families` \
( fam_hash, red_page_id, beginning, ending, status, heading, \
_insert_query = 'INSERT INTO `{prefix}_redfams` \
( famhash, redpageid, beginning, ending, status, heading, \
article0, article1, article2, article3, article4, article5, article6, \
article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
def __init__( self, fam_hash=None ):
def __init__( self, famhash=None ):
"""
Creates a new instance, runs __init__ of parent class
"""
self.__fam_hash = fam_hash
self.__famhash = famhash
super().__init__( )
@ -273,27 +273,27 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
"""
pass
def get_fam( self, fam_hash ):
def get_fam( self, famhash ):
"""
Retrieves a red family row from MySQL-Database for given fam_hash
@returns dict Dictionairy with data for given fam hash
False if none found
"""
self.__fam_hash = fam_hash
self.__famhash = famhash
cursor = type( self ).connection.cursor( mysqldb.DictCursor )
cursor.execute(
'SELECT * FROM `{prefix}_red_families` WHERE `fam_hash` = ?;'.
format( prefix=type(self).db_table_prefix), ( fam_hash, ) )
'SELECT * FROM `{prefix}_redfams` WHERE `famhash` = ?;'.
format( prefix=type(self).db_table_prefix), ( famhash, ) )
self.data = cursor.fetchone()
def add_fam( self, articlesList, heading, red_page_id,
def add_fam( self, articlesList, heading, redpageid,
beginning, ending=None, status=0 ):
data = [ self.__fam_hash, red_page_id, beginning, ending,
data = [ self.__famhash, redpageid, beginning, ending,
status, heading ]
for article in articlesList:
@ -304,29 +304,29 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
data = tuple( data )
insert_data = { self.__fam_hash: data }
insert_data = { self.__famhash: data }
type( self )._cached_insert_data.update( insert_data )
# Manualy construct self.data dict
data_keys = ( 'fam_hash', 'red_page_id', 'beginning', 'ending',
data_keys = ( 'fam_hash', 'redpageid', 'beginning', 'ending',
'status', 'heading', 'article0', 'article1', 'article2',
'article3', 'article4', 'article5', 'article6',
'article7' )
self.data = dict( zip( data_keys, data ) )
def update_fam( self, red_page_id, heading, beginning, ending, status ):
def update_fam( self, redpageid, heading, beginning, ending, status ):
"""
Updates the red fam row in MySQL-Database for given fam_hash
@param int red_page_id MediaWiki page_id
@param int redpageid MediaWiki page_id
@param datetime beginning Timestamp of beginning
qparam datetime ending Timestamp of ending of
@param int status red_fam status
"""
type( self )._cached_update_data.append( ( red_page_id, heading,
type( self )._cached_update_data.append( ( redpageid, heading,
beginning, ending, status,
self.__fam_hash ) )
self.__famhash ) )
def get_by_status( self, status ):
"""
@ -336,7 +336,7 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
cursor = type( self ).connection.cursor( mysqldb.DictCursor )
cursor.execute(
'SELECT * FROM `{prefix}_red_families` WHERE `status` = ?;'.format(
'SELECT * FROM `{prefix}_redfams` WHERE `status` = ?;'.format(
prefix=type( self ).db_table_prefix), ( status, ) )
while True:
@ -355,11 +355,11 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
cursor.execute( (
'SELECT * ' +
'FROM `{prefix}_red_families` `F` ' +
'INNER JOIN `{prefix}_red_pages` `P` ' +
'FROM `{prefix}_redfams` `F` ' +
'INNER JOIN `{prefix}_redpages` `P` ' +
'ON `F`.`status` = ? ' +
'AND `F`.`ending` >= ? '
'AND `F`.`red_page_id` = `P`.`page_id`;').format(
'AND `F`.`redpageid` = `P`.`pageid`;').format(
prefix=type( self ).db_table_prefix), ( status, ending ) )
while True:

156
lib/redfam.py

@ -43,8 +43,8 @@ class RedFam:
Basic class for RedFams, containing the basic data structure
"""
def __init__( self, articlesList, beginning, ending=None, red_page_id=None,
status=0, fam_hash=None, heading=None ):
def __init__( self, articlesList, beginning, ending=None, redpageid=None,
status=None, famhash=None, heading=None ):
"""
Generates a new RedFam object
@ -52,7 +52,7 @@ class RedFam:
@param beginning datetime Beginning date
@param ending datetime Ending date
@param red_page_id int MW pageid of containing RedPage
@param status int Status of RedFam
@param status str Status of RedFam
@param fam_hash str SHA1 hash of articlesList
@param heading str Original heading of RedFam (Link)
"""
@ -61,20 +61,20 @@ class RedFam:
self.site = pywikibot.Site()
# Database interface
self._mysql = MysqlRedFam( fam_hash )
self._mysql = MysqlRedFam( famhash )
# Initial attribute values
self._articlesList = articlesList
self._beginning = beginning
self._ending = ending
self._red_page_id = red_page_id
self._status = status
self._fam_hash = fam_hash
self._redpageid = redpageid
self._status = self._parse_status(status)
self._famhash = famhash
self._heading = heading
# Calculates the sha1 hash over self._articlesList to
# rediscover known redundance families
self.calc_fam_hash()
self.calc_famhash()
def __repr__( self ):
"""
@ -88,14 +88,14 @@ class RedFam:
", heading=" + repr( self._heading ) + \
", beginning=" + repr( self._beginning ) + \
", ending=" + repr( self._ending ) + \
", red_page_id=" + repr( self._red_page_id ) + \
", red_page_id=" + repr( self._redpageid ) + \
", status=" + repr( self._status ) + \
", fam_hash=" + repr( self._fam_hash ) + \
", fam_hash=" + repr( self._famhash ) + \
" )"
return __repr
def calc_fam_hash( self ):
def calc_famhash( self ):
"""
Calculates the SHA-1 hash for the articlesList of redundance family.
Since we don't need security SHA-1 is just fine.
@ -106,35 +106,35 @@ class RedFam:
h = hashlib.sha1()
h.update( str( self._articlesList[:8] ).encode('utf-8') )
if self._fam_hash and h.hexdigest() != self._fam_hash:
raise RedFamHashError( self._fam_hash, h.hexdigest() )
if self._famhash and h.hexdigest() != self._famhash:
raise RedFamHashError( self._famhash, h.hexdigest() )
elif self._fam_hash:
elif self._famhash:
return
else:
self._fam_hash = h.hexdigest()
self._famhash = h.hexdigest()
def changed( self ):
"""
Checks wether anything has changed and maybe triggers db update
"""
# On archived red_fams do not delete possibly existing ending
if( not self._ending and self._status > 1 and
# On archived redfams do not delete possibly existing ending
if( not self._ending and "archived" in self._status and
self._mysql.data[ 'ending' ] ):
self._ending = self._mysql.data[ 'ending' ]
# Since status change means something has changed, update database
if( self._status != self._mysql.data[ 'status' ] or
if( self._raw_status != self._mysql.data[ 'status' ] or
self._beginning != self._mysql.data[ 'beginning' ] or
self._ending != self._mysql.data[ 'ending' ] or
self._red_page_id != self._mysql.data[ 'red_page_id' ] or
self._red_page_id != self._mysql.data[ 'redpageid' ] or
self._heading != self._mysql.data[ 'heading' ]):
self._mysql.update_fam( self._red_page_id, self._heading,
self._mysql.update_fam( self._redpageid, self._heading,
self._beginning, self._ending,
self._status )
self._raw_status() )
@classmethod
def flush_db_cache( cls ):
@ -143,6 +143,61 @@ class RedFam:
"""
MysqlRedFam.flush()
def add_status(self, status):
"""
Adds a status specified by status, to status set
@param status Statusstring to add
@type status str
"""
self._status.add(status)
def remove_status(self, status, weak=True):
"""
Removes a status, specified by status from set. If weak is set to
False it will throw a KeyError when trying to remove a status not set.
@param status Statusstring to add
@type status str
@param weak Change behavior on missing status
@type bool
"""
if weak:
self._status.discard(status)
else:
self._status.remove(status)
def has_status(self, status):
"""
Returns True, if redfam has given status
@param status Statusstring to check
@type status str
@returns True if status is present else False
"""
if status in self._status:
return True
else:
return False
def _parse_status(self, raw_status ):
"""
Sets status based on comma separated list
@param raw_status Commaseparated string of stati (from DB)
@type raw_status str
"""
self._status = set( raw_status.strip().split(","))
def _raw_status( self ):
"""
Returns status as commaseparated string (to save in DB)
@returns Raw status string
@rtype str
"""
return ",".join( self._status )
class RedFamParser( RedFam ):
"""
@ -165,15 +220,15 @@ class RedFamParser( RedFam ):
wurde gewünscht von:"
__done_notice2 = "{{Erledigt|"
def __init__( self, heading, red_page, red_page_archive,
def __init__( self, heading, redpage, redpagearchive,
beginning, ending=None ):
"""
Creates a RedFam object based on data collected while parsing red_pages
combined with possibly former known data from db
@param red_fam_heading str Wikitext heading of section
@param red_page page Pywikibot.page object
@param red_page_archive bool Is red_page an archive
@param redfam_heading str Wikitext heading of section
@param redpage page Pywikibot.page object
@param redpagearchive bool Is red_page an archive
@param beginning datetime Timestamp of beginning
str as strptime parseable string
@param ending datetime Timestamp of ending
@ -181,9 +236,9 @@ class RedFamParser( RedFam ):
"""
# Set object attributes:
self._red_page_id = red_page._pageid
self._red_page_archive = red_page_archive
self._fam_hash = None
self._redpageid = redpage._pageid
self._redpagearchive = redpagearchive
self._famhash = None
# Method self.add_beginning sets self._beginning directly
self.add_beginning( beginning )
@ -195,7 +250,7 @@ class RedFamParser( RedFam ):
# If no ending was provided set to None
self._ending = None
self._status = None
self._status = set()
# Parse the provided heading of redundance section
# to set self._articlesList
@ -204,7 +259,7 @@ class RedFamParser( RedFam ):
# Calculates the sha1 hash over self._articlesList to
# rediscover known redundance families
self.calc_fam_hash()
self.calc_famhash()
# Open database connection, ask for data if existing,
# otherwise create entry
@ -223,11 +278,11 @@ class RedFamParser( RedFam ):
# We need a connection to our mysqldb
self._mysql = MysqlRedFam( )
self._mysql.get_fam( self._fam_hash )
self._mysql.get_fam( self._famhash )
if not self._mysql.data:
self._mysql.add_fam( self._articlesList, self._heading,
self._red_page_id, self._beginning,
self._redpageid, self._beginning,
self._ending )
def heading_parser( self, heading ):
@ -253,7 +308,7 @@ class RedFamParser( RedFam ):
# Catch sections with more then 8 articles, print error
if len( self._articlesList ) > 8:
# For repression in output we need to know the fam hash
self.calc_fam_hash()
self.calc_famhash()
jogobot.output(
( "\03{{lightred}}" +
@ -317,21 +372,18 @@ class RedFamParser( RedFam ):
- 3 and greater status was set by worker script, do not change it
"""
# Do not change stati set by worker script etc.
if not self._mysql.data['status'] > 2:
# No ending, discussion is running:
# Sometimes archived discussions also have no detectable ending
if not self._ending and not self._red_page_archive:
self._status = 0
else:
if not self._red_page_archive:
self._status = 1
else:
self._status = 2
# No ending, discussion is running:
# Sometimes archived discussions also have no detectable ending
if not self._ending and not self._redpagearchive:
self.add_status("open")
else:
self._status = self._mysql.data[ 'status' ]
self.remove_status("open")
if not self._redpagearchive:
self.add_status("done")
else:
self.remove_status("done")
self.remove_status("open")
self.add_status("archived")
@classmethod
def is_section_redfam_cb( cls, heading ):
@ -444,15 +496,15 @@ class RedFamWorker( RedFam ):
articlesList.append( mysql_data[ key ] )
super().__init__( articlesList, mysql_data[ 'beginning' ],
mysql_data[ 'ending' ], mysql_data[ 'red_page_id' ],
mysql_data[ 'status' ], mysql_data[ 'fam_hash' ],
mysql_data[ 'ending' ], mysql_data[ 'redpageid' ],
mysql_data[ 'status' ], mysql_data[ 'famhash' ],
mysql_data[ 'heading' ] )
self._mysql.data = mysql_data
# Get related RedPage-Information
self.redpageid = mysql_data[ 'page_id' ]
self.redpagetitle = mysql_data[ 'page_title' ]
self.redpageid = mysql_data[ 'pageid' ]
self.redpagetitle = mysql_data[ 'pagetitle' ]
# Make sure locale is set to 'de_DE.UTF-8' to prevent problems
# with wrong month abreviations in strptime
@ -499,7 +551,7 @@ class RedFamWorker( RedFam ):
Sets status to 3 when worked on
"""
self._status = 3
pass
def get_disc_link( self ):
"""

77
lib/redpage.py

@ -49,6 +49,8 @@ class RedPage:
@type pageid int
"""
self._status = set()
# Safe the pywikibot page object
self.page = page
self.pageid = pageid
@ -71,7 +73,7 @@ class RedPage:
elif self.pageid:
self.__mysql = MysqlRedPage( self.pageid )
self.page = pywikibot.Page( pywikibot.Site(),
self.__mysql.data['page_title'] )
self.__mysql.data['pagetitle'] )
self.page.exists()
else:
raise ValueError( "Page NOR pagid provided!" )
@ -84,9 +86,9 @@ class RedPage:
Check wether the page was changed since last run
"""
if( self.__mysql.data != { 'page_id': self.page._pageid,
'rev_id': self.page._revid,
'page_title': self.page.title(),
if( self.__mysql.data != { 'pageid': self.page._pageid,
'revid': self.page._revid,
'pagetitle': self.page.title(),
'status': self.__mysql.data[ 'status' ] } ):
self._changed = True
else:
@ -110,7 +112,7 @@ class RedPage:
Decides wether current RedPage needs to be parsed or not
"""
if( self._changed or self.__mysql.data[ 'status' ] == 0 ):
if( self._changed or self.__mysql.data[ 'status' ] == "" ):
return True
else:
return False
@ -146,14 +148,16 @@ class RedPage:
Updates the page meta data in mysql db
"""
if( self._parsed or not self._changed ):
status = 1
self.add_status( "open" )
if( self.is_archive() ):
status = 2
self.remove_status( "open" )
self.add_status( "archived" )
else:
status = 0
self._status = set()
self.__mysql.update_page( self.page._revid, self.page.title(), status )
self.__mysql.update_page( self.page._revid, self.page.title(),
self._raw_status() )
@classmethod
def flush_db_cache( cls ):
@ -161,3 +165,58 @@ class RedPage:
Calls flush method of Mysql Interface class
"""
MysqlRedPage.flush()
def add_status(self, status):
"""
Adds a status specified by status, to status set
@param status Statusstring to add
@type status str
"""
self._status.add(status)
def remove_status(self, status, weak=True):
"""
Removes a status, specified by status from set. If weak is set to
False it will throw a KeyError when trying to remove a status not set.
@param status Statusstring to add
@type status str
@param weak Change behavior on missing status
@type bool
"""
if weak:
self._status.discard(status)
else:
self._status.remove(status)
def has_status(self, status):
"""
Returns True, if redfam has given status
@param status Statusstring to check
@type status str
@returns True if status is present else False
"""
if status in self._status:
return True
else:
return False
def _parse_status(self, raw_status ):
"""
Sets status based on comma separated list
@param raw_status Commaseparated string of stati (from DB)
@type raw_status str
"""
self._status = set( raw_status.strip().split(","))
def _raw_status( self ):
"""
Returns status as commaseparated string (to save in DB)
@returns Raw status string
@rtype str
"""
return ",".join( self._status )

Loading…
Cancel
Save