Files
jogobot-red/red_fam.py

310 lines
9.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import hashlib
import re
import locale
from datetime import datetime
import pywikibot
from mysql_red import MYSQL_RED_FAM
class RED_FAM:
def __init__( self, fam_hash=None, articlesList=None, red_page_id=None, beginning=None, ending=None, status=0 ):
"""
Generates a new RED_FAM object
@param articlesList list List of articles of redundance family
@param beginning datetime Beginning date of redundance diskussion
@param ending datetime Ending date of redundance diskussion
"""
#if( beginning ):
# self.add_beginning( beginning )
# self._beginning = None
#if( ending ):
# self.add_ending( ending )
#else:
# self._ending = None
#self._status = status # __TODO__ STATUS CODE
#self._handle_db()
def __repr__( self ):
if( self._beginning ):
beginning = ", beginning=" + repr( self._beginning )
else:
beginning = ""
if( self._ending ):
ending = ", ending=" + repr( self._ending )
else:
ending = ""
__repr = "RED_FAM( " + repr( self._articlesList ) + beginning + ending + ", status=" + repr( self._status ) + " )"
return __repr
class RED_FAM_PARSER( RED_FAM ):
"""
Provides an interface to RED_FAM for adding/updating redundance families while parsig redundance pages
"""
# Define the timestamp format
__timestamp_format = "%H:%M, %d. %b. %Y (%Z)"
# Define section heading re.pattern
__sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" )
# Define timestamp re.pattern
__timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4}) (\(CES?T\))" )
# Textpattern for recognisation of done-notices
__done_notice = ":<small>Archivierung dieses Abschnittes wurde gewünscht von:"
__done_notice2 = "{{Erledigt|"
def __init__( self, heading, red_page_id, red_page_archive, beginning, ending=None ):
"""
Creates a RED_FAM object based on data collected while parsing red_pages combined with possibly former known data from db
@param red_fam_heading string String with wikitext heading of redundance section
@param red_page_id int MediaWiki page_id of red_page containing red_fam
@param red_page_archive bool Is red_page an archive
@param beginning datetime Timestamp of beginning of redundance discussion
string Timestamp of beginning of redundance discussion as srftime parseable string
@param ending datetime Timestamp of ending of redundance discussion
string Timestamp of ending of redundance discussion as srftime parseable string
"""
## Set object attributes:
self._red_page_id = red_page_id
self._red_page_archive = red_page_archive
# Method self.add_beginning sets self._beginning directly
self.add_beginning( beginning )
# Method self.add_ending sets self._ending directly
if( ending ):
self.add_ending( ending )
else:
#If no ending was provided set to None
self._ending = None
self._status = None
# Parse the provided heading of redundance section to set self._articlesList
self.heading_parser( heading )
# Calculates the sha1 hash over self._articlesList to rediscover known redundance families
self.fam_hash()
# Open database connection, ask for data if existing, otherwise create entry
self.__handle_db()
# Check status changes
self.status()
# Triggers db update if anything changed
self.changed()
def __handle_db( self ):
"""
Handles opening of db connection
"""
# We need a connection to our mysqldb
self.__mysql = MYSQL_RED_FAM( self._fam_hash )
if not self.__mysql.data:
self.__mysql.add_fam( self._articlesList, self._heading, self._red_page_id, self._beginning, self._ending )
def heading_parser( self, heading ):
"""
Parses given red_fam_heading string and saves articles list
"""
# Predefine a pattern for wikilinks' destination
wikilink_pat = re.compile( r"\[\[([^\[\]\|]*)(\]\]|\|)" )
# Parse content of heading for generating section links later
match = self.__sectionhead_pat.search( heading )
if match:
self._heading = match.group(2).lstrip()
else:
raise ValueError( "Heading is not valid" )
# We get the pages in first [0] element iterating over wikilink_pat.findall( line )
self._articlesList = [ link[0] for link in wikilink_pat.findall( self._heading ) ]
# Catch sections with more then 8 articles, print error
if len( self._articlesList ) > 8:
pywikibot.output( "{datetime} \03{{lightred}}[WARNING] Maximum number of articles in red_fam exceeded, maximum number is 8, {number:d} were given\n{repress}".format( datetime=datetime.now().strftime("%Y-%m-%d %H:%M:%S (%Z)"), number=len( self._articlesList ), repress=repr( self ) ) )
self._articlesList = self._articlesList[:8]
def fam_hash( self ):
"""
Calculates the SHA-1 hash for the articlesList of redundance family.
Since we don't need security SHA-1 is just fine.
@returns str String with the hexadecimal hash digest
"""
h = hashlib.sha1()
h.update( str( self._articlesList ).encode('utf-8') )
self._fam_hash= h.hexdigest()
def add_beginning( self, beginning ):
"""
Adds the beginning date of a redundance diskussion to the object and sets changed to True
@param datetime datetime Beginning date of redundance diskussion
"""
self._beginning = self.__datetime( beginning )
def add_ending( self, ending ):
"""
Adds the ending date of a redundance diskussion to the object. Also sets the status to __TODO__ STATUS NUMBER and changed to True
@param datetime datetime Ending date of redundance diskussion
"""
self._ending = self.__datetime( ending )
def __datetime( self, timestamp ):
"""
Decides wether given timestamp is a parseable string or a datetime object and returns a datetime object in both cases
@param timestamp datetime Datetime object
str Parseable string with timestamp in format __timestamp_format
@returns datetime Datetime object
"""
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
if( isinstance( timestamp, datetime ) ):
return timestamp
else:
result = datetime.strptime( timestamp, type( self ).__timestamp_format )
return result
def status( self ):
"""
Handles detection of correct status
There are three possible stati:
- 0 Discussion is running --> no ending, page is not an archive
- 1 Discussion is over --> ending present, page is not an archive
- 2 Discussion is archived --> ending (normaly) present, page is an archive
- 3 and greater status was set by worker script, do not change it
"""
# Do not change stati set by worker script etc.
if not self.__mysql.data['status'] > 2:
# No ending, discussion is running:
# Sometimes archived discussions also have no detectable ending
if not self._ending and not self._red_page_archive:
self._status = 0
else:
if not self._red_page_archive:
self._status = 1
else:
self._status = 2
else:
self._status = self.__mysql.data[ 'status' ]
def changed( self ):
"""
Checks wether anything has changed and maybe triggers db update
"""
# On archived red_fams do not delete possibly existing ending
if not self._ending and self._status > 1 and self.__mysql.data[ 'ending' ]:
self._ending = self.__mysql.data[ 'ending' ]
# Since status change means something has changed, update database
if( self._status != self.__mysql.data[ 'status' ] or self._beginning != self.__mysql.data[ 'beginning' ] or self._ending != self.__mysql.data[ 'ending' ] or self._red_page_id != self.__mysql.data[ 'red_page_id' ] or self._heading != self.__mysql.data[ 'heading' ]):
self.__mysql.update_fam( self._red_page_id, self._heading, self._beginning, self._ending, self._status )
@classmethod
def is_sectionheading( cls, line ):
"""
Checks wether given line is a red_fam section heading
@param line string String to check
@returns bool Returns True if it is a section heading, otherwise false
"""
if cls.__sectionhead_pat.search( line ):
return True
else:
return False
@classmethod
def is_beginning( cls, line ):
"""
Returns the first timestamp found in line, otherwise None
@param str line String to search in
@returns str Timestamp, otherwise None
"""
match = cls.__timestamp_pat.search( line )
if match:
# Since some timestamps are broken we need to reconstruct them by regex match groups
result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) + " " + match.group(5)
return result
else:
return None
@classmethod
def is_ending( cls, line ):
"""
Returns the timestamp of done notice ( if one ), otherwise None
@param str line String to search in
@returns str Timestamp, otherwise None
"""
if ( cls.__done_notice in line ) or ( cls.__done_notice2 in line ):
match = cls.__timestamp_pat.search( line )
if match:
# Since some timestamps are broken we need to reconstruct them by regex match groups
result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) + " " + match.group(5)
return result
return None
@classmethod
def is_ending2( cls, line ):
"""
Returns the last timestamp found in line, otherwise None
@param str line String to search in
@returns str Timestamp, otherwise None
"""
matches = cls.__timestamp_pat.findall( line )
if matches:
# Since some timestamps are broken we need to reconstruct them by regex match groups
result = matches[-1][0] + ", " + matches[-1][1] + ". " + matches[-1][2] + ". " + matches[-1][3] + " " + matches[-1][4]
return result
else:
return None
class RED_FAM_WORKER( RED_FAM ):
"""
Handles working with redundance families stored in database where discussion is finished
"""
pass