Files
jogobot-red/redfam.py

365 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# redfam.py
#
# Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Provides classes for working with RedFams
"""
import hashlib
import locale
import re
from datetime import datetime
import pywikibot
from .mysqlred import MysqlRedFam
class RedFam:
"""
Basic class for RedFams, containing the basic data structure
"""
def __init__( self, fam_hash=None, articlesList=None, red_page_id=None,
beginning=None, ending=None, status=0 ):
"""
Generates a new RedFam object
@param articlesList list List of articles
@param beginning datetime Beginning date
@param ending datetime Ending date
"""
pass
def __repr__( self ):
if( self._beginning ):
beginning = ", beginning=" + repr( self._beginning )
else:
beginning = ""
if( self._ending ):
ending = ", ending=" + repr( self._ending )
else:
ending = ""
__repr = "RedFam( " + repr( self._articlesList ) + beginning +\
ending + ", status=" + repr( self._status ) + " )"
return __repr
class RedFamParser( RedFam ):
"""
Provides an interface to RedFam for adding/updating redundance families
while parsig redundance pages
"""
# Define the timestamp format
__timestamp_format = "%H:%M, %d. %b. %Y"
# Define section heading re.pattern
__sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" )
# Define timestamp re.pattern
__timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4})" ) # noqa
# Textpattern for recognisation of done-notices
__done_notice = ":<small>Archivierung dieses Abschnittes \
wurde gewünscht von:"
__done_notice2 = "{{Erledigt|"
def __init__( self, heading, red_page_id, red_page_archive,
beginning, ending=None ):
"""
Creates a RedFam object based on data collected while parsing red_pages
combined with possibly former known data from db
@param red_fam_heading str Wikitext heading of section
@param red_page_id int MediaWiki page_id
@param red_page_archive bool Is red_page an archive
@param beginning datetime Timestamp of beginning
str as strptime parseable string
@param ending datetime Timestamp of ending
str strptime parseable string
"""
# Set object attributes:
self._red_page_id = red_page_id
self._red_page_archive = red_page_archive
# Method self.add_beginning sets self._beginning directly
self.add_beginning( beginning )
# Method self.add_ending sets self._ending directly
if( ending ):
self.add_ending( ending )
else:
# If no ending was provided set to None
self._ending = None
self._status = None
# Parse the provided heading of redundance section
# to set self._articlesList
self.heading_parser( heading )
# Calculates the sha1 hash over self._articlesList to
# rediscover known redundance families
self.fam_hash()
# Open database connection, ask for data if existing,
# otherwise create entry
self.__handle_db()
# Check status changes
self.status()
# Triggers db update if anything changed
self.changed()
def __handle_db( self ):
"""
Handles opening of db connection
"""
# We need a connection to our mysqldb
self.__mysql = MysqlRedFam( self._fam_hash )
if not self.__mysql.data:
self.__mysql.add_fam( self._articlesList, self._heading,
self._red_page_id, self._beginning,
self._ending )
def heading_parser( self, heading ):
"""
Parses given red_fam_heading string and saves articles list
"""
# Predefine a pattern for wikilinks' destination
wikilink_pat = re.compile( r"\[\[([^\[\]\|]*)(\]\]|\|)" )
# Parse content of heading for generating section links later
match = self.__sectionhead_pat.search( heading )
if match:
self._heading = match.group(2).lstrip()
else:
raise ValueError( "Heading is not valid" )
# We get the pages in first [0] element iterating over
# wikilink_pat.findall( line )
self._articlesList = [ link[0] for link
in wikilink_pat.findall( self._heading ) ]
# Catch sections with more then 8 articles, print error
if len( self._articlesList ) > 8:
pywikibot.output( "{datetime} \03{{lightred}}[WARNING] \
Maximum number of articles in red_fam exceeded, \
maximum number is 8, {number:d} were given\n\
{repress}".format(
datetime=datetime.now().strftime( "%Y-%m-%d %H:%M:%S" ),
number=len( self._articlesList ), repress=repr( self ) ) )
self._articlesList = self._articlesList[:8]
def fam_hash( self ):
"""
Calculates the SHA-1 hash for the articlesList of redundance family.
Since we don't need security SHA-1 is just fine.
@returns str String with the hexadecimal hash digest
"""
h = hashlib.sha1()
h.update( str( self._articlesList ).encode('utf-8') )
self._fam_hash = h.hexdigest()
def add_beginning( self, beginning ):
"""
Adds the beginning date of a redundance diskussion to the object
@param datetime datetime Beginning date
"""
self._beginning = self.__datetime( beginning )
def add_ending( self, ending ):
"""
Adds the ending date of a redundance diskussion to the object.
@param datetime datetime Ending date
"""
self._ending = self.__datetime( ending )
def __datetime( self, timestamp ):
"""
Decides wether given timestamp is a parseable string or a
datetime object and returns a datetime object in both cases
@param datetime timestamp Datetime object
str timestamp Parseable string with timestamp
@returns datetime Datetime object
"""
# Make sure locale is set to 'de_DE.UTF-8' to prevent problems
# with wrong month abreviations in strptime
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
if( isinstance( timestamp, datetime ) ):
return timestamp
else:
result = datetime.strptime( timestamp,
type( self ).__timestamp_format )
return result
def status( self ):
"""
Handles detection of correct status
There are three possible stati:
- 0 Discussion running --> no ending, page is not an archive
- 1 Discussion over --> ending present, page is not an archive
- 2 Discussion archived --> ending (normaly) present, page is archive
- 3 and greater status was set by worker script, do not change it
"""
# Do not change stati set by worker script etc.
if not self.__mysql.data['status'] > 2:
# No ending, discussion is running:
# Sometimes archived discussions also have no detectable ending
if not self._ending and not self._red_page_archive:
self._status = 0
else:
if not self._red_page_archive:
self._status = 1
else:
self._status = 2
else:
self._status = self.__mysql.data[ 'status' ]
def changed( self ):
"""
Checks wether anything has changed and maybe triggers db update
"""
# On archived red_fams do not delete possibly existing ending
if( not self._ending and self._status > 1
and self.__mysql.data[ 'ending' ] ):
self._ending = self.__mysql.data[ 'ending' ]
# Since status change means something has changed, update database
if( self._status != self.__mysql.data[ 'status' ] or
self._beginning != self.__mysql.data[ 'beginning' ] or
self._ending != self.__mysql.data[ 'ending' ] or
self._red_page_id != self.__mysql.data[ 'red_page_id' ] or
self._heading != self.__mysql.data[ 'heading' ]):
self.__mysql.update_fam( self._red_page_id, self._heading,
self._beginning, self._ending,
self._status )
@classmethod
def is_sectionheading( cls, line ):
"""
Checks wether given line is a red_fam section heading
@param str line String to check
@returns bool Returns True if it is a section heading
"""
if cls.__sectionhead_pat.search( line ):
return True
else:
return False
@classmethod
def is_beginning( cls, line ):
"""
Returns the first timestamp found in line, otherwise None
@param str line String to search in
@returns str Timestamp, otherwise None
"""
match = cls.__timestamp_pat.search( line )
if match:
# Since some timestamps are broken we need to reconstruct them
# by regex match groups
result = match.group(1) + ", " + match.group(2) + ". " +\
match.group(3) + ". " + match.group(4)
return result
else:
return None
@classmethod
def is_ending( cls, line ):
"""
Returns the timestamp of done notice ( if one ), otherwise None
@param str line String to search in
@returns str Timestamp, otherwise None
"""
if ( cls.__done_notice in line ) or ( cls.__done_notice2 in line ):
match = cls.__timestamp_pat.search( line )
if match:
# Since some timestamps are broken we need to reconstruct them
# by regex match groups
result = match.group(1) + ", " + match.group(2) + ". " +\
match.group(3) + ". " + match.group(4)
return result
return None
@classmethod
def is_ending2( cls, line ):
"""
Returns the last timestamp found in line, otherwise None
@param str line String to search in
@returns str Timestamp, otherwise None
"""
matches = cls.__timestamp_pat.findall( line )
if matches:
# Since some timestamps are broken we need to reconstruct them
# by regex match groups
result = matches[-1][0] + ", " + matches[-1][1] + ". " +\
matches[-1][2] + ". " + matches[-1][3]
return result
else:
return None
class RedFamWorker( RedFam ):
"""
Handles working with redundance families stored in database
where discussion is finished
"""
pass