Since some timestamps are broken we need to reconstruct them by regex match groups

Prevents ValueErrors of datetime.strptime in most cases
This commit is contained in:
2015-09-11 23:47:35 +02:00
parent 6e64d8448e
commit a97e2cea5a

View File

@@ -59,13 +59,12 @@ class RED_FAM_PARSER( RED_FAM ):
# Define the timestamp format
__timestamp_format = "%H:%M, %d. %b. %Y (%Z)"
__timestamp_format2 = "%H:%M, %d. %b %Y (%Z)" # Catch missing point after month abreviation
# Define section heading re.pattern
__sectionhead_pat = re.compile( r"^=+.*\[\[.+\]\].*\[\[.+\]\].*=+" )
# Define timestamp re.pattern
__timestamp_pat = re.compile( r"(\d{2}:\d{2}, \d{1,2}. (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? \d{4} \(CES?T\))" )
__timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4}) (\(CES?T\))" )
# Textpattern for recognisation of done-notices
__done_notice = ":<small>Archivierung dieses Abschnittes wurde gewünscht von:"
@@ -178,11 +177,7 @@ class RED_FAM_PARSER( RED_FAM ):
if( isinstance( timestamp, datetime ) ):
return timestamp
else:
# Catch missing point after month abreviation
try:
result = datetime.strptime( timestamp, type( self ).__timestamp_format )
except ValueError:
result = datetime.strptime( timestamp, type( self ).__timestamp_format2 )
result = datetime.strptime( timestamp, type( self ).__timestamp_format )
return result
def status( self ):
@@ -251,9 +246,11 @@ class RED_FAM_PARSER( RED_FAM ):
@returns str Timestamp, otherwise None
"""
result = cls.__timestamp_pat.search( line )
if result:
return result.group()
match = cls.__timestamp_pat.search( line )
if match:
# Since some timestamps are broken we need to reconstruct them by regex match groups
result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) + " " + match.group(5)
return result
else:
return None
@@ -266,9 +263,11 @@ class RED_FAM_PARSER( RED_FAM ):
@returns str Timestamp, otherwise None
"""
if ( cls.__done_notice in line ) or ( cls.__done_notice2 in line ):
result = cls.__timestamp_pat.search( line )
if result:
return result.group()
match = cls.__timestamp_pat.search( line )
if match:
# Since some timestamps are broken we need to reconstruct them by regex match groups
result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) + " " + match.group(5)
return result
return None
@classmethod
@@ -279,9 +278,11 @@ class RED_FAM_PARSER( RED_FAM ):
@returns str Timestamp, otherwise None
"""
result = cls.__timestamp_pat.findall( line )
if result:
return result[-1][0]
matches = cls.__timestamp_pat.findall( line )
if matches:
# Since some timestamps are broken we need to reconstruct them by regex match groups
result = matches[-1][0] + ", " + matches[-1][1] + ". " + matches[-1][2] + ". " + matches[-1][3] + " " + matches[-1][4]
return result
else:
return None