diff --git a/mysql_red.py b/mysql_red.py index f6d9535..3293912 100644 --- a/mysql_red.py +++ b/mysql_red.py @@ -150,17 +150,17 @@ class MYSQL_RED_FAM( MYSQL_RED ): else: return False - def add_fam( self, articlesList, red_page_id, beginning, ending=None, status=0 ): + def add_fam( self, articlesList, heading, red_page_id, beginning, ending=None, status=0 ): cursor = type( self ).connection.cursor() - query = 'INSERT INTO `red_families` ( fam_hash, red_page_id, beginning, ending, status, article0, article1, article2, article3, article4, article5, article6, article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - data = [ str( self.__fam_hash ), red_page_id, beginning, ending, status ] + query = 'INSERT INTO `red_families` ( fam_hash, red_page_id, beginning, ending, status, heading, article0, article1, article2, article3, article4, article5, article6, article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' + data = [ str( self.__fam_hash ), red_page_id, beginning, ending, status, heading ] for article in articlesList: data.append( str( article ) ) - while len( data ) < 13: + while len( data ) < 14: data.append( None ) data = tuple( data ) @@ -171,7 +171,7 @@ class MYSQL_RED_FAM( MYSQL_RED ): self.data = self.get_fam() - def update_fam( self, red_page_id, beginning, ending, status ): + def update_fam( self, red_page_id, heading, beginning, ending, status ): """ Updates the red fam row in MySQL-Database for given fam_hash @@ -183,8 +183,8 @@ class MYSQL_RED_FAM( MYSQL_RED ): cursor = type( self ).connection.cursor() - query = 'UPDATE `red_families` SET `red_page_id` = ?, `beginning` = ?, `ending` = ?, `status`= ? WHERE `fam_hash` = ?;' - data = ( int(red_page_id ), beginning, ending, int( status ), self.__fam_hash ) + query = 'UPDATE `red_families` SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, `status`= ? WHERE `fam_hash` = ?;' + data = ( int(red_page_id ), str( heading ), beginning, ending, int( status ), self.__fam_hash ) cursor.execute( query, data) diff --git a/red_fam.py b/red_fam.py index 072321e..0eb16fb 100644 --- a/red_fam.py +++ b/red_fam.py @@ -61,7 +61,7 @@ class RED_FAM_PARSER( RED_FAM ): __timestamp_format = "%H:%M, %d. %b. %Y (%Z)" # Define section heading re.pattern - __sectionhead_pat = re.compile( r"^=+.*\[\[.+\]\].*\[\[.+\]\].*=+" ) + __sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" ) # Define timestamp re.pattern __timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4}) (\(CES?T\))" ) @@ -70,7 +70,7 @@ class RED_FAM_PARSER( RED_FAM ): __done_notice = ":Archivierung dieses Abschnittes wurde gewünscht von:" __done_notice2 = "{{Erledigt|" - def __init__( self, red_fam_heading, red_page_id, red_page_archive, beginning, ending=None ): + def __init__( self, heading, red_page_id, red_page_archive, beginning, ending=None ): """ Creates a RED_FAM object based on data collected while parsing red_pages combined with possibly former known data from db @@ -97,7 +97,7 @@ class RED_FAM_PARSER( RED_FAM ): self._ending = None # Parse the provided heading of redundance section to set self._articlesList - self.heading_parser( red_fam_heading ) + self.heading_parser( heading ) # Calculates the sha1 hash over self._articlesList to rediscover known redundance families self.fam_hash() @@ -119,9 +119,9 @@ class RED_FAM_PARSER( RED_FAM ): self.__mysql = MYSQL_RED_FAM( self._fam_hash ) if not self.__mysql.data: - self.__mysql.add_fam( self._articlesList, self._red_page_id, self._beginning, self._ending ) + self.__mysql.add_fam( self._articlesList, self._heading, self._red_page_id, self._beginning, self._ending ) - def heading_parser( self, red_fam_heading): + def heading_parser( self, heading ): """ Parses given red_fam_heading string and saves articles list """ @@ -129,8 +129,15 @@ class RED_FAM_PARSER( RED_FAM ): # Predefine a pattern for wikilinks' destination wikilink_pat = re.compile( r"\[\[([^\[\]\|]*)(\]\]|\|)" ) + # Parse content of heading for generating section links later + match = self.__sectionhead_pat.search( heading ) + if match: + self._heading = match.group(2).lstrip() + else: + raise ValueError( "Heading is not valid" ) + # We get the pages in first [0] element iterating over wikilink_pat.findall( line ) - self._articlesList = [ link[0] for link in wikilink_pat.findall( red_fam_heading ) ] + self._articlesList = [ link[0] for link in wikilink_pat.findall( self._heading ) ] def fam_hash( self ): """ @@ -218,8 +225,8 @@ class RED_FAM_PARSER( RED_FAM ): # Since status change means something has changed, update database - if( self._status != self.__mysql.data[ 'status' ] or self._beginning != self.__mysql.data[ 'beginning' ] or self._ending != self.__mysql.data[ 'ending' ] or self._red_page_id != self.__mysql.data[ 'red_page_id' ] ): - self.__mysql.update_fam( self._red_page_id, self._beginning, self._ending, self._status ) + if( self._status != self.__mysql.data[ 'status' ] or self._beginning != self.__mysql.data[ 'beginning' ] or self._ending != self.__mysql.data[ 'ending' ] or self._red_page_id != self.__mysql.data[ 'red_page_id' ] or self._heading != self.__mysql.data[ 'heading' ]): + self.__mysql.update_fam( self._red_page_id, self._heading, self._beginning, self._ending, self._status ) @classmethod def is_sectionheading( cls, line ):