Files
jogobot-red/redfam.py
GOLDERWEB – Jonathan Golder 79dbde2413 Provide Replacement to @deprecated() as str
Since use of pywikibot-master (or Python3.5 @see ticket below)
the @deprecator requires a str as param and no callable object like
done before

Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=69 FS#69]
2016-08-23 21:23:24 +02:00

526 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# redfam.py
#
# Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Provides classes for working with RedFams
"""
import hashlib
import locale
import re
from datetime import datetime
import mwparserfromhell as mwparser # noqa
import pywikibot # noqa
from pywikibot.tools import deprecated # noqa
import jogobot
from mysqlred import MysqlRedFam
class RedFam:
"""
Basic class for RedFams, containing the basic data structure
"""
def __init__( self, articlesList, beginning, ending=None, red_page_id=None,
status=0, fam_hash=None, heading=None ):
"""
Generates a new RedFam object
@param articlesList list List of articles
@param beginning datetime Beginning date
@param ending datetime Ending date
@param red_page_id int MW pageid of containing RedPage
@param status int Status of RedFam
@param fam_hash str SHA1 hash of articlesList
@param heading str Original heading of RedFam (Link)
"""
# Initial attribute values
self._articlesList = articlesList
self._beginning = beginning
self._ending = ending
self._red_page_id = red_page_id
self._status = status
self._fam_hash = fam_hash
self._heading = heading
# Calculates the sha1 hash over self._articlesList to
# rediscover known redundance families
self.calc_fam_hash()
def __repr__( self ):
"""
Returns repression str of RedFam object
@returns str repr() string
"""
__repr = "RedFam( " + \
"articlesList=" + repr( self._articlesList ) + \
", heading=" + repr( self._heading ) + \
", beginning=" + repr( self._beginning ) + \
", ending=" + repr( self._ending ) + \
", red_page_id=" + repr( self._red_page_id ) + \
", status=" + repr( self._status ) + \
", fam_hash=" + repr( self._fam_hash ) + \
" )"
return __repr
def calc_fam_hash( self ):
"""
Calculates the SHA-1 hash for the articlesList of redundance family.
Since we don't need security SHA-1 is just fine.
@returns str String with the hexadecimal hash digest
"""
h = hashlib.sha1()
h.update( str( self._articlesList[:8] ).encode('utf-8') )
if self._fam_hash and h.hexdigest() != self._fam_hash:
raise RedFamHashError( self._fam_hash, h.hexdigest() )
elif self._fam_hash:
return
else:
self._fam_hash = h.hexdigest()
@classmethod
def flush_db_cache( cls ):
"""
Calls flush method of Mysql Interface class
"""
MysqlRedFam.flush()
class RedFamParser( RedFam ):
"""
Provides an interface to RedFam for adding/updating redundance families
while parsig redundance pages
"""
# Define the timestamp format
__timestamp_format = jogobot.config['redundances']['timestamp_format']
# Define section heading re.pattern
__sectionhead_pat = re.compile( r"^(.*\[\[.+\]\].*\[\[.+\]\].*)" )
# Define timestamp re.pattern
__timestamp_pat = re.compile( jogobot.config['redundances']
['timestamp_regex'] )
# Textpattern for recognisation of done-notices
__done_notice = ":<small>Archivierung dieses Abschnittes \
wurde gewünscht von:"
__done_notice2 = "{{Erledigt|"
def __init__( self, heading, red_page_id, red_page_archive,
beginning, ending=None ):
"""
Creates a RedFam object based on data collected while parsing red_pages
combined with possibly former known data from db
@param red_fam_heading str Wikitext heading of section
@param red_page_id int MediaWiki page_id
@param red_page_archive bool Is red_page an archive
@param beginning datetime Timestamp of beginning
str as strptime parseable string
@param ending datetime Timestamp of ending
str strptime parseable string
"""
# Set object attributes:
self._red_page_id = red_page_id
self._red_page_archive = red_page_archive
self._fam_hash = None
# Method self.add_beginning sets self._beginning directly
self.add_beginning( beginning )
# Method self.add_ending sets self._ending directly
if( ending ):
self.add_ending( ending )
else:
# If no ending was provided set to None
self._ending = None
self._status = None
# Parse the provided heading of redundance section
# to set self._articlesList
self.heading_parser( heading )
# Calculates the sha1 hash over self._articlesList to
# rediscover known redundance families
self.calc_fam_hash()
# Open database connection, ask for data if existing,
# otherwise create entry
self.__handle_db()
# Check status changes
self.status()
# Triggers db update if anything changed
self.changed()
def __handle_db( self ):
"""
Handles opening of db connection
"""
# We need a connection to our mysqldb
self.__mysql = MysqlRedFam( )
self.__mysql.get_fam( self._fam_hash )
if not self.__mysql.data:
self.__mysql.add_fam( self._articlesList, self._heading,
self._red_page_id, self._beginning,
self._ending )
def heading_parser( self, heading ):
"""
Parses given red_fam_heading string and saves articles list
@param heading Heading of RedFam-Section
@type heading wikicode or mwparser-parseable
"""
# Parse heading with mwparse if needed
if not isinstance( heading, mwparser.wikicode.Wikicode ):
heading = mwparser.parse( heading )
# Save heading as string
self._heading = str( heading )
# Save destinations of wikilinks in headings
self._articlesList = [ str( link.title ) for link
in heading.ifilter_wikilinks() ]
# Catch sections with more then 8 articles, print error
if len( self._articlesList ) > 8:
# For repression in output we need to know the fam hash
self.calc_fam_hash()
jogobot.output(
( "\03{{lightred}}" +
"Maximum number of articles in red_fam exceeded, " +
"maximum number is 8, {number:d} were given \n {repress}"
).format( datetime=datetime.now().strftime(
"%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ),
repress=repr( self ) ),
"WARNING" )
# Only save the first 8 articles
self._articlesList = self._articlesList[:8]
def add_beginning( self, beginning ):
"""
Adds the beginning date of a redundance diskussion to the object
@param datetime datetime Beginning date
"""
self._beginning = self.__datetime( beginning )
def add_ending( self, ending ):
"""
Adds the ending date of a redundance diskussion to the object.
@param datetime datetime Ending date
"""
self._ending = self.__datetime( ending )
def __datetime( self, timestamp ):
"""
Decides wether given timestamp is a parseable string or a
datetime object and returns a datetime object in both cases
@param datetime timestamp Datetime object
str timestamp Parseable string with timestamp
@returns datetime Datetime object
"""
# Make sure locale is set to 'de_DE.UTF-8' to prevent problems
# with wrong month abreviations in strptime
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
if( isinstance( timestamp, datetime ) ):
return timestamp
else:
result = datetime.strptime( timestamp,
type( self ).__timestamp_format )
return result
def status( self ):
"""
Handles detection of correct status
There are three possible stati:
- 0 Discussion running --> no ending, page is not an archive
- 1 Discussion over --> ending present, page is not an archive
- 2 Discussion archived --> ending (normaly) present, page is archive
- 3 and greater status was set by worker script, do not change it
"""
# Do not change stati set by worker script etc.
if not self.__mysql.data['status'] > 2:
# No ending, discussion is running:
# Sometimes archived discussions also have no detectable ending
if not self._ending and not self._red_page_archive:
self._status = 0
else:
if not self._red_page_archive:
self._status = 1
else:
self._status = 2
else:
self._status = self.__mysql.data[ 'status' ]
def changed( self ):
"""
Checks wether anything has changed and maybe triggers db update
"""
# On archived red_fams do not delete possibly existing ending
if( not self._ending and self._status > 1 and
self.__mysql.data[ 'ending' ] ):
self._ending = self.__mysql.data[ 'ending' ]
# Since status change means something has changed, update database
if( self._status != self.__mysql.data[ 'status' ] or
self._beginning != self.__mysql.data[ 'beginning' ] or
self._ending != self.__mysql.data[ 'ending' ] or
self._red_page_id != self.__mysql.data[ 'red_page_id' ] or
self._heading != self.__mysql.data[ 'heading' ]):
self.__mysql.update_fam( self._red_page_id, self._heading,
self._beginning, self._ending,
self._status )
@classmethod
@deprecated
def is_sectionheading( cls, line ):
"""
Checks wether given line is a red_fam section heading
@param str line String to check
@returns bool Returns True if it is a section heading
"""
if cls.__sectionhead_pat.search( str(line) ):
return True
else:
return False
@classmethod
def parser( cls, text, pageid, isarchive=False ):
"""
Handles parsing of redfam section
@param text Text of RedFam-Section
@type text wikicode or mwparser-parseable
"""
# Parse heading with mwparse if needed
if not isinstance( text, mwparser.wikicode.Wikicode ):
text = mwparser.parse( text )
# Extract heading text
heading = next( text.ifilter_headings() ).title
# Extract beginnig and maybe ending
(beginning, ending) = RedFamParser.extract_dates( text, isarchive )
# Create the RedFam object
RedFamParser( heading, pageid, isarchive, beginning, ending )
@classmethod
def extract_dates( cls, text, isarchive=False ):
"""
Returns tuple of the first and maybe last timestamp of a section.
Last timestamp is only returned if there is a done notice or param
*isarchiv* is set to 'True'
@param text Text to search in
@type line Any Type castable to str
@param isarchive If true skip searching done notice (on archivepages)
@type isarchive bool
@returns Timestamps, otherwise None
@returntype tuple of strs
"""
# Match all timestamps
matches = cls.__timestamp_pat.findall( str( text ) )
if matches:
# First one is beginning
# Since some timestamps are broken we need to reconstruct them
# by regex match groups
beginning = ( matches[0][0] + ", " + matches[0][1] + ". " +
matches[0][2] + ". " + matches[0][3] )
# Last one maybe is ending
# Done notice format 1
# Done notice format 2
# Or on archivepages
if ( cls.__done_notice in text or
cls.__done_notice2 in text or
isarchive ):
ending = ( matches[-1][0] + ", " + matches[-1][1] + ". " +
matches[-1][2] + ". " + matches[-1][3] )
else:
ending = None
return (beginning, ending)
@classmethod
@deprecated( 'extract_dates' )
def is_beginning( cls, line ):
"""
Returns the first timestamp found in line, otherwise None
@param str line String to search in
@returns str Timestamp, otherwise None
"""
return cls.extract_dates( line )[0]
@classmethod
@deprecated( 'extract_dates' )
def is_ending( cls, line, isarchive=False ):
"""
Returns the timestamp of done notice ( if one ), otherwise None
@param line String to search in
@type line str
@param isarchive If true skip searching done notice (on archivepages)
@type isarchive bool
@returns Timestamp, otherwise None
@returntype str
"""
return cls.extract_dates( line )[1]
@classmethod
@deprecated( 'extract_dates' )
def is_ending2( cls, line ):
"""
Returns the last timestamp found in line, otherwise None
@param str line String to search in
@returns str Timestamp, otherwise None
"""
return cls.extract_dates( line, True )[1]
class RedFamWorker( RedFam ):
"""
Handles working with redundance families stored in database
where discussion is finished
"""
def __init__( self, mysql_data ):
articlesList = []
for key in sorted( mysql_data.keys() ):
if 'article' in key and mysql_data[ key ]:
articlesList.append( mysql_data[ key ] )
super().__init__( articlesList, mysql_data[ 'beginning' ],
mysql_data[ 'ending' ], mysql_data[ 'red_page_id' ],
mysql_data[ 'status' ], mysql_data[ 'fam_hash' ],
mysql_data[ 'heading' ] )
@classmethod
def list_by_status( cls, status ):
"""
Lists red_fams stored in db by given status
"""
mysql = MysqlRedFam()
for fam in mysql.get_by_status( status ):
try:
print( cls( fam ) )
except RedFamHashError:
print(fam)
raise
class RedFamError( Exception ):
"""
Base class for all Errors of RedFam-Module
"""
def __init__( self, message=None ):
"""
Handles Instantiation of RedFamError's
"""
if not message:
self.message = "An Error occured while executing a RedFam action"
else:
self.message = message
def __str__( self ):
"""
Output of error message
"""
return self.message
class RedFamHashError( RedFamError ):
"""
Raised when given RedFamHash does not match with calculated
"""
def __init__( self, givenHash, calculatedHash ):
message = "Given fam_hash ('{given}') does not match with \
calculated ('{calc}'".format( given=givenHash, calc=calculatedHash )
super().__init__( message )
class RedFamHeadingError ( RedFamError ):
"""
Raised when given RedFamHeading does not match __sectionhead_pat Regex
"""
def __init__( self, heading ):
message = "Error while trying to parse section heading. Given heading \
'{heading}' does not match RegEx".format( heading=heading )
super().__init__( message )