From 99f050acd33ada72aaa0c9bff7a78b62452ddce0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 16:22:19 +0200 Subject: [PATCH] Add workaround to detect ending datetime if there is no done notice --- red_fam.py | 16 +++++++++++++++- red_page.py | 12 ++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/red_fam.py b/red_fam.py index 2215dc6..c332eac 100644 --- a/red_fam.py +++ b/red_fam.py @@ -65,7 +65,7 @@ class RED_FAM_PARSER( RED_FAM ): __sectionhead_pat = re.compile( r"^=+.*\[\[.+\]\].*\[\[.+\]\].*=+$" ) # Define timestamp re.pattern - __timestamp_pat = re.compile( r"\d{2}:\d{2}, (\d{1,2}. (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? \d{4}) \(CES?T\)" ) + __timestamp_pat = re.compile( r"(\d{2}:\d{2}, \d{1,2}. (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? \d{4} \(CES?T\))" ) # Textpattern for recognisation of done-notices __done_notice = ":Archivierung dieses Abschnittes wurde gewünscht von:" @@ -270,6 +270,20 @@ class RED_FAM_PARSER( RED_FAM ): if result: return result.group() return None + + @classmethod + def is_ending2( cls, line ): + """ + Returns the last timestamp found in line, otherwise None + @param str line String to search in + + @returns str Timestamp, otherwise None + """ + result = cls.__timestamp_pat.findall( line ) + if result: + return result[-1][0] + else: + return None class RED_FAM_WORKER( RED_FAM ): """ diff --git a/red_page.py b/red_page.py index 3f870d0..d719994 100644 --- a/red_page.py +++ b/red_page.py @@ -85,9 +85,15 @@ class RED_PAGE: # Before working with next red_fam create the object for the one before (if one) if( fam_heading and beginning ): - try: + #Maybe we can find a ending by feed + if not ending: + j = i + while (j > last_fam) and not ending: + j -= 1 + ending = RED_FAM_PARSER.is_ending2( text_lines[ j ] ) + + red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) - except: pass # Save line number for last detected Redundance-Family @@ -116,6 +122,8 @@ class RED_PAGE: red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) #~ except: #~ pass + # Set status of red_page to parsed + self.__parsed = True def __update_db( self ): """ Updates the page meta data in mysql db