#!/usr/bin/env python3 # -*- coding: utf-8 -*- import hashlib import re import locale from datetime import datetime from mysql_red import MYSQL_RED_FAM class RED_FAM: def __init__( self, fam_hash=None, articlesList=None, red_page_id=None, beginning=None, ending=None, status=0 ): """ Generates a new RED_FAM object @param articlesList list List of articles of redundance family @param beginning datetime Beginning date of redundance diskussion @param ending datetime Ending date of redundance diskussion """ #if( beginning ): # self.add_beginning( beginning ) # self._beginning = None #if( ending ): # self.add_ending( ending ) #else: # self._ending = None #self._status = status # __TODO__ STATUS CODE #self._handle_db() def __repr__( self ): if( self._beginning ): beginning = ", beginning=" + repr( self._beginning ) else: beginning = "" if( self._ending ): ending = ", ending=" + repr( self._ending ) else: ending = "" __repr = "RED_FAM( " + repr( self._articlesList ) + beginning + ending + ", status=" + repr( self._status ) + " )" return __repr class RED_FAM_PARSER( RED_FAM ): """ Provides an interface to RED_FAM for adding/updating redundance families while parsig redundance pages """ # Define the timestamp format __timestamp_format = "%H:%M, %d. %b. %Y (%Z)" __timestamp_format2 = "%H:%M, %d. %b %Y (%Z)" # Catch missing point after month abreviation # Define section heading re.pattern __sectionhead_pat = re.compile( r"={3,4}[^=]*={3,4}" ) # Define timestamp re.pattern __timestamp_pat = re.compile( r"\d{2}:\d{2}, (\d{1,2}. (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? \d{4}) \(CES?T\)" ) # Textpattern for recognisation of done-notices __done_notice = ":Archivierung dieses Abschnittes wurde gewünscht von:" __done_notice2 = "{{Erledigt|" def __init__( self, red_fam_heading, red_page_id, red_page_archive, beginning, ending=None ): """ Creates a RED_FAM object based on data collected while parsing red_pages combined with possibly former known data from db @param red_fam_heading string String with wikitext heading of redundance section @param red_page_id int MediaWiki page_id of red_page containing red_fam @param red_page_archive bool Is red_page an archive @param beginning datetime Timestamp of beginning of redundance discussion string Timestamp of beginning of redundance discussion as srftime parseable string @param ending datetime Timestamp of ending of redundance discussion string Timestamp of ending of redundance discussion as srftime parseable string """ ## Set object attributes: self._red_page_id = red_page_id self._red_page_archive = red_page_archive # Method self.add_beginning sets self._beginning directly self.add_beginning( beginning ) # Method self.add_ending sets self._ending directly if( ending ): self.add_ending( ending ) else: #If no ending was provided set to None self._ending = None # Parse the provided heading of redundance section to set self._articlesList self.heading_parser( red_fam_heading ) # Calculates the sha1 hash over self._articlesList to rediscover known redundance families self.fam_hash() # Open database connection, ask for data if existing, otherwise create entry self.__handle_db() # Check status changes self.status() # Triggers db update if anything changed self.changed() def __handle_db( self ): """ Handles opening of db connection """ # We need a connection to our mysqldb self.__mysql = MYSQL_RED_FAM( self._fam_hash ) if not self.__mysql.data: self.__mysql.add_fam( self._articlesList, self._red_page_id, self._beginning, self._ending ) def heading_parser( self, red_fam_heading): """ Parses given red_fam_heading string and saves articles list """ # Predefine a pattern for wikilinks' destination wikilink_pat = re.compile( r"\[\[([^\[\]\|]*)(\]\]|\|)" ) # We get the pages in first [0] element iterating over wikilink_pat.findall( line ) self._articlesList = [ link[0] for link in wikilink_pat.findall( red_fam_heading ) ] def fam_hash( self ): """ Calculates the SHA-1 hash for the articlesList of redundance family. Since we don't need security SHA-1 is just fine. @returns str String with the hexadecimal hash digest """ h = hashlib.sha1() h.update( str( self._articlesList ).encode('utf-8') ) self._fam_hash= h.hexdigest() def add_beginning( self, beginning ): """ Adds the beginning date of a redundance diskussion to the object and sets changed to True @param datetime datetime Beginning date of redundance diskussion """ self._beginning = self.__datetime( beginning ) def add_ending( self, ending ): """ Adds the ending date of a redundance diskussion to the object. Also sets the status to __TODO__ STATUS NUMBER and changed to True @param datetime datetime Ending date of redundance diskussion """ self._ending = self.__datetime( ending ) def __datetime( self, timestamp ): """ Decides wether given timestamp is a parseable string or a datetime object and returns a datetime object in both cases @param timestamp datetime Datetime object str Parseable string with timestamp in format __timestamp_format @returns datetime Datetime object """ locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') if( isinstance( timestamp, datetime ) ): return timestamp else: # Catch missing point after month abreviation try: result = datetime.strptime( timestamp, type( self ).__timestamp_format ) except ValueError: result = datetime.strptime( timestamp, type( self ).__timestamp_format2 ) return result def status( self ): """ Handles detection of correct status There are three possible stati: - 0 Discussion is running --> no ending, page is not an archive - 1 Discussion is over --> ending present, page is not an archive - 2 Discussion is archived --> ending (normaly) present, page is an archive - 3 and greater status was set by worker script, do not change it """ # Do not change stati set by worker script etc. if not self.__mysql.data['status'] > 2: # No ending, discussion is running: # Sometimes archived discussions also have no detectable ending if not self._ending and not self._red_page_archive: self._status = 0 else: if not self._red_page_archive: self._status = 1 else: self._status = 2 else: self._status = self.__mysql.data[ 'status' ] def changed( self ): """ Checks wether anything has changed and maybe triggers db update """ # On archived red_fams do not delete possibly existing ending if not self._ending and self._status > 1 and self.__mysql.data[ 'ending' ]: self._ending = self.__mysql.data[ 'ending' ] # Since status change means something has changed, update database if( self._status != self.__mysql.data[ 'status' ] or self._beginning != self.__mysql.data[ 'beginning' ] or self._ending != self.__mysql.data[ 'ending' ] or self._red_page_id != self.__mysql.data[ 'red_page_id' ] ): self.__mysql.update_fam( self._red_page_id, self._beginning, self._ending, self._status ) @classmethod def is_sectionheading( cls, line ): """ Checks wether given line is a red_fam section heading @param line string String to check @returns bool Returns True if it is a section heading, otherwise false """ if cls.__sectionhead_pat.search( line ): return True else: return False @classmethod def is_beginning( cls, line ): """ Returns the first timestamp found in line, otherwise None @param str line String to search in @returns str Timestamp, otherwise None """ result = cls.__timestamp_pat.search( line ) if result: return result.group() else: return None @classmethod def is_ending( cls, line ): """ Returns the timestamp of done notice ( if one ), otherwise None @param str line String to search in @returns str Timestamp, otherwise None """ if ( cls.__done_notice in line ) or ( cls.__done_notice2 in line ): result = cls.__timestamp_pat.search( line ) if result: return result.group() return None class RED_FAM_WORKER( RED_FAM ): """ Handles working with redundance families stored in database where discussion is finished """ pass