From a97e2cea5a9f3f5a3ef9cb3263905b3484f00d38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 23:47:35 +0200 Subject: [PATCH] Since some timestamps are broken we need to reconstruct them by regex match groups Prevents ValueErrors of datetime.strptime in most cases --- red_fam.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/red_fam.py b/red_fam.py index e7df494..072321e 100644 --- a/red_fam.py +++ b/red_fam.py @@ -59,13 +59,12 @@ class RED_FAM_PARSER( RED_FAM ): # Define the timestamp format __timestamp_format = "%H:%M, %d. %b. %Y (%Z)" - __timestamp_format2 = "%H:%M, %d. %b %Y (%Z)" # Catch missing point after month abreviation # Define section heading re.pattern __sectionhead_pat = re.compile( r"^=+.*\[\[.+\]\].*\[\[.+\]\].*=+" ) # Define timestamp re.pattern - __timestamp_pat = re.compile( r"(\d{2}:\d{2}, \d{1,2}. (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? \d{4} \(CES?T\))" ) + __timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4}) (\(CES?T\))" ) # Textpattern for recognisation of done-notices __done_notice = ":Archivierung dieses Abschnittes wurde gewünscht von:" @@ -178,11 +177,7 @@ class RED_FAM_PARSER( RED_FAM ): if( isinstance( timestamp, datetime ) ): return timestamp else: - # Catch missing point after month abreviation - try: - result = datetime.strptime( timestamp, type( self ).__timestamp_format ) - except ValueError: - result = datetime.strptime( timestamp, type( self ).__timestamp_format2 ) + result = datetime.strptime( timestamp, type( self ).__timestamp_format ) return result def status( self ): @@ -251,9 +246,11 @@ class RED_FAM_PARSER( RED_FAM ): @returns str Timestamp, otherwise None """ - result = cls.__timestamp_pat.search( line ) - if result: - return result.group() + match = cls.__timestamp_pat.search( line ) + if match: + # Since some timestamps are broken we need to reconstruct them by regex match groups + result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) + " " + match.group(5) + return result else: return None @@ -266,9 +263,11 @@ class RED_FAM_PARSER( RED_FAM ): @returns str Timestamp, otherwise None """ if ( cls.__done_notice in line ) or ( cls.__done_notice2 in line ): - result = cls.__timestamp_pat.search( line ) - if result: - return result.group() + match = cls.__timestamp_pat.search( line ) + if match: + # Since some timestamps are broken we need to reconstruct them by regex match groups + result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) + " " + match.group(5) + return result return None @classmethod @@ -279,9 +278,11 @@ class RED_FAM_PARSER( RED_FAM ): @returns str Timestamp, otherwise None """ - result = cls.__timestamp_pat.findall( line ) - if result: - return result[-1][0] + matches = cls.__timestamp_pat.findall( line ) + if matches: + # Since some timestamps are broken we need to reconstruct them by regex match groups + result = matches[-1][0] + ", " + matches[-1][1] + ". " + matches[-1][2] + ". " + matches[-1][3] + " " + matches[-1][4] + return result else: return None