#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import hashlib
import re
import locale
from datetime import datetime
import pywikibot
from mysql_red import MYSQL_RED_FAM
class RED_FAM :
def __init__ ( self , fam_hash = None , articlesList = None , red_page_id = None , beginning = None , ending = None , status = 0 ) :
"""
Generates a new RED_FAM object
@param articlesList list List of articles of redundance family
@param beginning datetime Beginning date of redundance diskussion
@param ending datetime Ending date of redundance diskussion
"""
#if( beginning ):
# self.add_beginning( beginning )
# self._beginning = None
#if( ending ):
# self.add_ending( ending )
#else:
# self._ending = None
#self._status = status # __TODO__ STATUS CODE
#self._handle_db()
def __repr__ ( self ) :
if ( self . _beginning ) :
beginning = " , beginning= " + repr ( self . _beginning )
else :
beginning = " "
if ( self . _ending ) :
ending = " , ending= " + repr ( self . _ending )
else :
ending = " "
__repr = " RED_FAM( " + repr ( self . _articlesList ) + beginning + ending + " , status= " + repr ( self . _status ) + " ) "
return __repr
class RED_FAM_PARSER ( RED_FAM ) :
"""
Provides an interface to RED_FAM for adding / updating redundance families while parsig redundance pages
"""
# Define the timestamp format
__timestamp_format = " % H: % M, %d . % b. % Y "
# Define section heading re.pattern
__sectionhead_pat = re . compile ( r " ^(=+)(.* \ [ \ [.+ \ ] \ ].* \ [ \ [.+ \ ] \ ].*) \ 1 " )
# Define timestamp re.pattern
__timestamp_pat = re . compile ( r " ( \ d {2} : \ d {2} ), ( \ d { 1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? ( \ d {4} ) " )
# Textpattern for recognisation of done-notices
__done_notice = " :<small>Archivierung dieses Abschnittes wurde gewünscht von: "
__done_notice2 = " {{ Erledigt| "
def __init__ ( self , heading , red_page_id , red_page_archive , beginning , ending = None ) :
"""
Creates a RED_FAM object based on data collected while parsing red_pages combined with possibly former known data from db
@param red_fam_heading string String with wikitext heading of redundance section
@param red_page_id int MediaWiki page_id of red_page containing red_fam
@param red_page_archive bool Is red_page an archive
@param beginning datetime Timestamp of beginning of redundance discussion
string Timestamp of beginning of redundance discussion as srftime parseable string
@param ending datetime Timestamp of ending of redundance discussion
string Timestamp of ending of redundance discussion as srftime parseable string
"""
## Set object attributes:
self . _red_page_id = red_page_id
self . _red_page_archive = red_page_archive
# Method self.add_beginning sets self._beginning directly
self . add_beginning ( beginning )
# Method self.add_ending sets self._ending directly
if ( ending ) :
self . add_ending ( ending )
else :
#If no ending was provided set to None
self . _ending = None
self . _status = None
# Parse the provided heading of redundance section to set self._articlesList
self . heading_parser ( heading )
# Calculates the sha1 hash over self._articlesList to rediscover known redundance families
self . fam_hash ( )
# Open database connection, ask for data if existing, otherwise create entry
self . __handle_db ( )
# Check status changes
self . status ( )
# Triggers db update if anything changed
self . changed ( )
def __handle_db ( self ) :
"""
Handles opening of db connection
"""
# We need a connection to our mysqldb
self . __mysql = MYSQL_RED_FAM ( self . _fam_hash )
if not self . __mysql . data :
self . __mysql . add_fam ( self . _articlesList , self . _heading , self . _red_page_id , self . _beginning , self . _ending )
def heading_parser ( self , heading ) :
"""
Parses given red_fam_heading string and saves articles list
"""
# Predefine a pattern for wikilinks' destination
wikilink_pat = re . compile ( r " \ [ \ [([^ \ [ \ ] \ |]*)( \ ] \ ]| \ |) " )
# Parse content of heading for generating section links later
match = self . __sectionhead_pat . search ( heading )
if match :
self . _heading = match . group ( 2 ) . lstrip ( )
else :
raise ValueError ( " Heading is not valid " )
# We get the pages in first [0] element iterating over wikilink_pat.findall( line )
self . _articlesList = [ link [ 0 ] for link in wikilink_pat . findall ( self . _heading ) ]
# Catch sections with more then 8 articles, print error
if len ( self . _articlesList ) > 8 :
pywikibot . output ( " {datetime} – \03 {{ lightred}}[WARNING] – Maximum number of articles in red_fam exceeded, maximum number is 8, {number:d} were given \n {repress} " . format ( datetime = datetime . now ( ) . strftime ( " % Y- % m- %d % H: % M: % S ( % Z) " ) , number = len ( self . _articlesList ) , repress = repr ( self ) ) )
self . _articlesList = self . _articlesList [ : 8 ]
def fam_hash ( self ) :
"""
Calculates the SHA - 1 hash for the articlesList of redundance family .
Since we don ' t need security SHA-1 is just fine.
@returns str String with the hexadecimal hash digest
"""
h = hashlib . sha1 ( )
h . update ( str ( self . _articlesList ) . encode ( ' utf-8 ' ) )
self . _fam_hash = h . hexdigest ( )
def add_beginning ( self , beginning ) :
"""
Adds the beginning date of a redundance diskussion to the object and sets changed to True
@param datetime datetime Beginning date of redundance diskussion
"""
self . _beginning = self . __datetime ( beginning )
def add_ending ( self , ending ) :
"""
Adds the ending date of a redundance diskussion to the object . Also sets the status to __TODO__ STATUS NUMBER and changed to True
@param datetime datetime Ending date of redundance diskussion
"""
self . _ending = self . __datetime ( ending )
def __datetime ( self , timestamp ) :
"""
Decides wether given timestamp is a parseable string or a datetime object and returns a datetime object in both cases
@param timestamp datetime Datetime object
str Parseable string with timestamp in format __timestamp_format
@returns datetime Datetime object
"""
locale . setlocale ( locale . LC_ALL , ' de_DE.UTF-8 ' )
if ( isinstance ( timestamp , datetime ) ) :
return timestamp
else :
result = datetime . strptime ( timestamp , type ( self ) . __timestamp_format )
return result
def status ( self ) :
"""
Handles detection of correct status
There are three possible stati :
- 0 Discussion is running - - > no ending , page is not an archive
- 1 Discussion is over - - > ending present , page is not an archive
- 2 Discussion is archived - - > ending ( normaly ) present , page is an archive
- 3 and greater status was set by worker script , do not change it
"""
# Do not change stati set by worker script etc.
if not self . __mysql . data [ ' status ' ] > 2 :
# No ending, discussion is running:
# Sometimes archived discussions also have no detectable ending
if not self . _ending and not self . _red_page_archive :
self . _status = 0
else :
if not self . _red_page_archive :
self . _status = 1
else :
self . _status = 2
else :
self . _status = self . __mysql . data [ ' status ' ]
def changed ( self ) :
"""
Checks wether anything has changed and maybe triggers db update
"""
# On archived red_fams do not delete possibly existing ending
if not self . _ending and self . _status > 1 and self . __mysql . data [ ' ending ' ] :
self . _ending = self . __mysql . data [ ' ending ' ]
# Since status change means something has changed, update database
if ( self . _status != self . __mysql . data [ ' status ' ] or self . _beginning != self . __mysql . data [ ' beginning ' ] or self . _ending != self . __mysql . data [ ' ending ' ] or self . _red_page_id != self . __mysql . data [ ' red_page_id ' ] or self . _heading != self . __mysql . data [ ' heading ' ] ) :
self . __mysql . update_fam ( self . _red_page_id , self . _heading , self . _beginning , self . _ending , self . _status )
@classmethod
def is_sectionheading ( cls , line ) :
"""
Checks wether given line is a red_fam section heading
@param line string String to check
@returns bool Returns True if it is a section heading , otherwise false
"""
if cls . __sectionhead_pat . search ( line ) :
return True
else :
return False
@classmethod
def is_beginning ( cls , line ) :
"""
Returns the first timestamp found in line , otherwise None
@param str line String to search in
@returns str Timestamp , otherwise None
"""
match = cls . __timestamp_pat . search ( line )
if match :
# Since some timestamps are broken we need to reconstruct them by regex match groups
result = match . group ( 1 ) + " , " + match . group ( 2 ) + " . " + match . group ( 3 ) + " . " + match . group ( 4 )
return result
else :
return None
@classmethod
def is_ending ( cls , line ) :
"""
Returns the timestamp of done notice ( if one ) , otherwise None
@param str line String to search in
@returns str Timestamp , otherwise None
"""
if ( cls . __done_notice in line ) or ( cls . __done_notice2 in line ) :
match = cls . __timestamp_pat . search ( line )
if match :
# Since some timestamps are broken we need to reconstruct them by regex match groups
result = match . group ( 1 ) + " , " + match . group ( 2 ) + " . " + match . group ( 3 ) + " . " + match . group ( 4 )
return result
return None
@classmethod
def is_ending2 ( cls , line ) :
"""
Returns the last timestamp found in line , otherwise None
@param str line String to search in
@returns str Timestamp , otherwise None
"""
matches = cls . __timestamp_pat . findall ( line )
if matches :
# Since some timestamps are broken we need to reconstruct them by regex match groups
result = matches [ - 1 ] [ 0 ] + " , " + matches [ - 1 ] [ 1 ] + " . " + matches [ - 1 ] [ 2 ] + " . " + matches [ - 1 ] [ 3 ]
return result
else :
return None
class RED_FAM_WORKER ( RED_FAM ) :
"""
Handles working with redundance families stored in database where discussion is finished
"""
pass