From 30e1672557732f3734c0c8293af2e46c55bdf19a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 10 Sep 2015 11:23:15 +0200 Subject: [PATCH] Implement methods for parsing red_fams Add Interfaces for RED_FAM --- red_fam.py | 166 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 128 insertions(+), 38 deletions(-) diff --git a/red_fam.py b/red_fam.py index 9cc358b..77afd4c 100644 --- a/red_fam.py +++ b/red_fam.py @@ -1,13 +1,15 @@ import hashlib +import re from datetime import datetime +from mysql_red import MYSQL_RED_FAM + class RED_FAM: - # Define the timestamp format - __timestamp_format = "%H:%M, %d. %b. %Y (%Z)" + - def __init__( self, articlesList, beginning=None, ending=None, status=0 ): + def __init__( self, fam_hash=None, articlesList=None, red_page_id=None, beginning=None, ending=None, status=0 ): """ Generates a new RED_FAM object @@ -16,23 +18,109 @@ class RED_FAM: @param ending datetime Ending date of redundance diskussion """ - self.__articlesList = articlesList + #if( beginning ): + # self.add_beginning( beginning ) + # self._beginning = None - self.__hash = self.__get_fam_hash( ) + #if( ending ): + # self.add_ending( ending ) + #else: + # self._ending = None + + #self._status = status # __TODO__ STATUS CODE - if( beginning ): - self.add_beginning( beginning ) + #self._handle_db() + + + + def __repr__( self ): + + if( self._beginning ): + beginning = ", beginning=" + repr( self._beginning ) else: - self.__beginning = None + beginning = "" + + if( self._ending ): + ending = ", ending=" + repr( self._ending ) + else: + ending = "" + __repr = "RED_FAM( " + repr( self._articlesList ) + beginning + ending + ", status=" + repr( self._status ) + " )" + + return __repr + +class RED_FAM_PARSER( RED_FAM ): + """ + Provides an interface to RED_FAM for adding/updating redundance families while parsig redundance pages + """ + + # Define the timestamp format + __timestamp_format = "%H:%M, %d. %b. %Y (%Z)" + + def __init__( self, red_fam_heading, red_page_id, red_page_archive, beginning, ending=None ): + """ + Creates a RED_FAM object based on data collected while parsing red_pages combined with possibly former known data from db + + @param red_fam_heading string String with wikitext heading of redundance section + @param red_page_id int MediaWiki page_id of red_page containing red_fam + @param red_page_archive bool Is red_page an archive + @param beginning datetime Timestamp of beginning of redundance discussion + string Timestamp of beginning of redundance discussion as srftime parseable string + @param ending datetime Timestamp of ending of redundance discussion + string Timestamp of ending of redundance discussion as srftime parseable string + """ + ## Set object attributes: + self._red_page_id = red_page_id + self._red_page_archive = red_page_archive + + # Method self.add_beginning sets self._beginning directly + self.add_beginning( beginning ) + + # Method self.add_ending sets self._ending directly if( ending ): self.add_ending( ending ) else: - self.__ending = None - - self.__status = status # __TODO__ STATUS CODE + #If no ending was provided set to None + self._ending = None - def __get_fam_hash( self ): + # Parse the provided heading of redundance section to set self._articlesList + self.heading_parser( red_fam_heading ) + + # Calculates the sha1 hash over self._articlesList to rediscover known redundance families + self.fam_hash() + + # Open database connection, ask for data if existing, otherwise create entry + self.__handle_db() + + # Check status changes + self.status() + + # Since status change means something has changed, update database + if( self._status != self.__mysql.data[ 'status' ] ): + self.__mysql.update_fam( self._red_page_id, self._beginning, self._ending, self._status ) + + def __handle_db( self ): + """ + Handles opening of db connection + """ + # We need a connection to our mysqldb + self.__mysql = MYSQL_RED_FAM( self._fam_hash ) + + if not self.__mysql.data: + self.__mysql.add_fam( self._articlesList, self._red_page_id, self._beginning, self._ending ) + + def heading_parser( self, red_fam_heading): + """ + Parses given red_fam_heading string and saves articles list + """ + + # Predefine a pattern for wikilinks' destination + wikilink_pat = re.compile( r"\[\[([^\[\]\|]*)(\]\]|\|)" ) + + # We get the pages in first [0] element iterating over wikilink_pat.findall( line ) + self._articlesList = [ link[0] for link in wikilink_pat.findall( red_fam_heading ) ] + + def fam_hash( self ): """ Calculates the SHA-1 hash for the articlesList of redundance family. Since we don't need security SHA-1 is just fine. @@ -41,10 +129,10 @@ class RED_FAM: """ h = hashlib.sha1() - h.update( str( self.__articlesList ).encode('utf-8') ) + h.update( str( self._articlesList ).encode('utf-8') ) - return h.hexdigest() - + self._fam_hash= h.hexdigest() + def add_beginning( self, beginning ): """ Adds the beginning date of a redundance diskussion to the object and sets changed to True @@ -52,9 +140,7 @@ class RED_FAM: @param datetime datetime Beginning date of redundance diskussion """ - self.__beginning = self.__datetime( beginning ) - - self.__changed = True + self._beginning = self.__datetime( beginning ) def add_ending( self, ending ): """ @@ -63,11 +149,11 @@ class RED_FAM: @param datetime datetime Ending date of redundance diskussion """ - self.__ending = self.__datetime( ending ) + self._ending = self.__datetime( ending ) - self.__status = 2 #__TODO__ STATUS NUMBER - self.__changed = True - + self._status = 1 #__TODO__ STATUS NUMBER + self._changed = True + def __datetime( self, timestamp ): """ Decides wether given timestamp is a parseable string or a datetime object and returns a datetime object in both cases @@ -82,22 +168,26 @@ class RED_FAM: return timestamp else: return datetime.strptime( timestamp, type( self ).__timestamp_format ) - - def __repr__( self ): - - if( self.__beginning ): - beginning = ", beginning=" + repr( self.__beginning ) - else: - beginning = "" - if( self.__ending ): - ending = ", ending=" + repr( self.__ending ) + def status( self ): + """ + Handles detection of correct status + """ + # Diskussion läuft --> Status ? --> 0 ! _ending + # Diskussion beendet --> Status 0 --> 1 _ending + # Diskussion archiviert --> Status 0/1 --> 2 ??? + + if not self._ending: + self._status = 0 else: - ending = "" - - __repr = "RED_FAM( " + repr( self.__articlesList ) + beginning + ending + ", status=" + repr( self.__status ) + " )" - - return __repr - -x = RED_FAM( [ "Test", "Foo", "Bar" ] ) + if not self._red_page_archive: + self._status = 1 + else: + self._status = 2 + +class RED_FAM_WORKER( RED_FAM ): + """ + Handles working with redundance families stored in database where discussion is finished + """ + pass print( repr( x ) )