#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # redfam.py # # Copyright 2015 GOLDERWEB – Jonathan Golder # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. # # """ Provides classes for working with RedFams """ import hashlib import locale import re from datetime import datetime import mwparserfromhell as mwparser # noqa import pywikibot # noqa from pywikibot.tools import deprecated # noqa import jogobot from lib.mysqlred import MysqlRedFam class RedFam: """ Basic class for RedFams, containing the basic data structure """ def __init__( self, articlesList, beginning, ending=None, red_page_id=None, status=0, fam_hash=None, heading=None ): """ Generates a new RedFam object @param articlesList list List of articles @param beginning datetime Beginning date @param ending datetime Ending date @param red_page_id int MW pageid of containing RedPage @param status int Status of RedFam @param fam_hash str SHA1 hash of articlesList @param heading str Original heading of RedFam (Link) """ # Database interface self._mysql = MysqlRedFam( fam_hash ) # Initial attribute values self._articlesList = articlesList self._beginning = beginning self._ending = ending self._red_page_id = red_page_id self._status = status self._fam_hash = fam_hash self._heading = heading # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families self.calc_fam_hash() def __repr__( self ): """ Returns repression str of RedFam object @returns str repr() string """ __repr = "RedFam( " + \ "articlesList=" + repr( self._articlesList ) + \ ", heading=" + repr( self._heading ) + \ ", beginning=" + repr( self._beginning ) + \ ", ending=" + repr( self._ending ) + \ ", red_page_id=" + repr( self._red_page_id ) + \ ", status=" + repr( self._status ) + \ ", fam_hash=" + repr( self._fam_hash ) + \ " )" return __repr def calc_fam_hash( self ): """ Calculates the SHA-1 hash for the articlesList of redundance family. Since we don't need security SHA-1 is just fine. @returns str String with the hexadecimal hash digest """ h = hashlib.sha1() h.update( str( self._articlesList[:8] ).encode('utf-8') ) if self._fam_hash and h.hexdigest() != self._fam_hash: raise RedFamHashError( self._fam_hash, h.hexdigest() ) elif self._fam_hash: return else: self._fam_hash = h.hexdigest() def changed( self ): """ Checks wether anything has changed and maybe triggers db update """ # On archived red_fams do not delete possibly existing ending if( not self._ending and self._status > 1 and self._mysql.data[ 'ending' ] ): self._ending = self._mysql.data[ 'ending' ] # Since status change means something has changed, update database if( self._status != self._mysql.data[ 'status' ] or self._beginning != self._mysql.data[ 'beginning' ] or self._ending != self._mysql.data[ 'ending' ] or self._red_page_id != self._mysql.data[ 'red_page_id' ] or self._heading != self._mysql.data[ 'heading' ]): self._mysql.update_fam( self._red_page_id, self._heading, self._beginning, self._ending, self._status ) @classmethod def flush_db_cache( cls ): """ Calls flush method of Mysql Interface class """ MysqlRedFam.flush() class RedFamParser( RedFam ): """ Provides an interface to RedFam for adding/updating redundance families while parsig redundance pages """ # Define the timestamp format __timestamp_format = jogobot.config['redundances']['timestamp_format'] # Define section heading re.pattern __sectionhead_pat = re.compile( r"^(.*\[\[.+\]\].*\[\[.+\]\].*)" ) # Define timestamp re.pattern __timestamp_pat = re.compile( jogobot.config['redundances'] ['timestamp_regex'] ) # Textpattern for recognisation of done-notices __done_notice = ":Archivierung dieses Abschnittes \ wurde gewünscht von:" __done_notice2 = "{{Erledigt|" def __init__( self, heading, red_page, red_page_archive, beginning, ending=None ): """ Creates a RedFam object based on data collected while parsing red_pages combined with possibly former known data from db @param red_fam_heading str Wikitext heading of section @param red_page page Pywikibot.page object @param red_page_archive bool Is red_page an archive @param beginning datetime Timestamp of beginning str as strptime parseable string @param ending datetime Timestamp of ending str strptime parseable string """ # Set object attributes: self._red_page_id = red_page._pageid self._red_page_archive = red_page_archive self._fam_hash = None # Method self.add_beginning sets self._beginning directly self.add_beginning( beginning ) # Method self.add_ending sets self._ending directly if( ending ): self.add_ending( ending ) else: # If no ending was provided set to None self._ending = None self._status = None # Parse the provided heading of redundance section # to set self._articlesList self.heading_parser( heading ) # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families self.calc_fam_hash() # Open database connection, ask for data if existing, # otherwise create entry self.__handle_db() # Check status changes self.status() # Triggers db update if anything changed self.changed() def __handle_db( self ): """ Handles opening of db connection """ # We need a connection to our mysqldb self._mysql = MysqlRedFam( ) self._mysql.get_fam( self._fam_hash ) if not self._mysql.data: self._mysql.add_fam( self._articlesList, self._heading, self._red_page_id, self._beginning, self._ending ) def heading_parser( self, heading ): """ Parses given red_fam_heading string and saves articles list @param heading Heading of RedFam-Section @type heading wikicode or mwparser-parseable """ # Save heading as string self._heading = str( heading ) # Parse string heading with mwparse again everytime # In some cases the given wikicode is broken due to syntax errors # (Task FS#77) heading = mwparser.parse( self._heading ) # Save destinations of wikilinks in headings self._articlesList = [ str( link.title ) for link in heading.ifilter_wikilinks() ] # Catch sections with more then 8 articles, print error if len( self._articlesList ) > 8: # For repression in output we need to know the fam hash self.calc_fam_hash() jogobot.output( ( "\03{{lightred}}" + "Maximum number of articles in red_fam exceeded, " + "maximum number is 8, {number:d} were given \n {repress}" ).format( datetime=datetime.now().strftime( "%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ), repress=repr( self ) ), "WARNING" ) # Only save the first 8 articles self._articlesList = self._articlesList[:8] def add_beginning( self, beginning ): """ Adds the beginning date of a redundance diskussion to the object @param datetime datetime Beginning date """ self._beginning = self.__datetime( beginning ) def add_ending( self, ending ): """ Adds the ending date of a redundance diskussion to the object. @param datetime datetime Ending date """ self._ending = self.__datetime( ending ) def __datetime( self, timestamp ): """ Decides wether given timestamp is a parseable string or a datetime object and returns a datetime object in both cases @param datetime timestamp Datetime object str timestamp Parseable string with timestamp @returns datetime Datetime object """ # Make sure locale is set to 'de_DE.UTF-8' to prevent problems # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') if( isinstance( timestamp, datetime ) ): return timestamp else: result = datetime.strptime( timestamp, type( self ).__timestamp_format ) return result def status( self ): """ Handles detection of correct status There are three possible stati: - 0 Discussion running --> no ending, page is not an archive - 1 Discussion over --> ending present, page is not an archive - 2 Discussion archived --> ending (normaly) present, page is archive - 3 and greater status was set by worker script, do not change it """ # Do not change stati set by worker script etc. if not self._mysql.data['status'] > 2: # No ending, discussion is running: # Sometimes archived discussions also have no detectable ending if not self._ending and not self._red_page_archive: self._status = 0 else: if not self._red_page_archive: self._status = 1 else: self._status = 2 else: self._status = self._mysql.data[ 'status' ] @classmethod def is_section_redfam_cb( cls, heading ): """ Used as callback for wikicode.get_sections in redpage.parse to select sections which are redfams """ # Because of strange behavior in some cases, parse heading again # (Task FS#77) heading = mwparser.parse( str( heading ) ) # Make sure we have min. two wikilinks in heading to assume a redfam if len( heading.filter_wikilinks() ) >= 2: return True else: return False @classmethod def parser( cls, text, page, isarchive=False ): """ Handles parsing of redfam section @param text Text of RedFam-Section @type text wikicode or mwparser-parseable """ # Parse heading with mwparse if needed if not isinstance( text, mwparser.wikicode.Wikicode ): text = mwparser.parse( text ) # Extract heading text heading = next( text.ifilter_headings() ).title # Extract beginnig and maybe ending (beginning, ending) = RedFamParser.extract_dates( text, isarchive ) # Missing beginning (Task: FS#76) # Use first day of month of reddisc if not beginning: match = re.search( jogobot.config["redundances"]["reddiscs_onlyinclude_re"], page.title() ) if match: beginning = datetime.strptime( "01. {month} {year}".format( month=match.group(1), year=match.group(2)), "%d. %B %Y" ) # Create the RedFam object RedFamParser( heading, page, isarchive, beginning, ending ) @classmethod def extract_dates( cls, text, isarchive=False ): """ Returns tuple of the first and maybe last timestamp of a section. Last timestamp is only returned if there is a done notice or param *isarchiv* is set to 'True' @param text Text to search in @type line Any Type castable to str @param isarchive If true skip searching done notice (on archivepages) @type isarchive bool @returns Timestamps, otherwise None @returntype tuple of strs """ # Match all timestamps matches = cls.__timestamp_pat.findall( str( text ) ) if matches: # First one is beginning # Since some timestamps are broken we need to reconstruct them # by regex match groups beginning = ( matches[0][0] + ", " + matches[0][1] + ". " + matches[0][2] + ". " + matches[0][3] ) # Last one maybe is ending # Done notice format 1 # Done notice format 2 # Or on archivepages if ( cls.__done_notice in text or cls.__done_notice2 in text or isarchive ): ending = ( matches[-1][0] + ", " + matches[-1][1] + ". " + matches[-1][2] + ". " + matches[-1][3] ) else: ending = None # Missing dates (Task: FS#76) else: beginning = None ending = None return (beginning, ending) class RedFamWorker( RedFam ): """ Handles working with redundance families stored in database where discussion is finished """ def __init__( self, mysql_data ): articlesList = [] for key in sorted( mysql_data.keys() ): if 'article' in key and mysql_data[ key ]: articlesList.append( mysql_data[ key ] ) super().__init__( articlesList, mysql_data[ 'beginning' ], mysql_data[ 'ending' ], mysql_data[ 'red_page_id' ], mysql_data[ 'status' ], mysql_data[ 'fam_hash' ], mysql_data[ 'heading' ] ) self._mysql.data = mysql_data # Get related RedPage-Information self.redpageid = mysql_data[ 'page_id' ] self.redpagetitle = mysql_data[ 'page_title' ] # Make sure locale is set to 'de_DE.UTF-8' to prevent problems # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') def update_status( self ): """ Sets status to 3 when worked on """ self._status = 3 def get_disc_link( self ): """ Constructs and returns the link to Redundancy discussion @returns Link to diskussion @rtype str """ # We need to Replace Links with their linktext anchor_code = mwparser.parse( self._mysql.data[ 'heading' ].strip() ) for link in anchor_code.ifilter_wikilinks(): if link.text: text = link.text else: text = link.title anchor_code.replace( link, text ) # Whitespace is replaced with underscores anchor_code.replace( " ", "_" ) # We try it with out any more parsing as mw will do while parsing page return ( self.redpagetitle + "#" + str(anchor_code).strip() ) @classmethod def list_by_status( cls, status ): """ Lists red_fams stored in db by given status """ mysql = MysqlRedFam() for fam in mysql.get_by_status( status ): try: print( cls( fam ) ) except RedFamHashError: print(fam) raise @classmethod def gen_by_status_and_ending( cls, status, ending ): """ Yield red_fams stored in db by given status which have an ending after given one """ mysql = MysqlRedFam() for fam in mysql.get_by_status_and_ending( status, ending ): try: yield cls( fam ) except RedFamHashError: print(fam) raise class RedFamError( Exception ): """ Base class for all Errors of RedFam-Module """ def __init__( self, message=None ): """ Handles Instantiation of RedFamError's """ if not message: self.message = "An Error occured while executing a RedFam action" else: self.message = message def __str__( self ): """ Output of error message """ return self.message class RedFamHashError( RedFamError ): """ Raised when given RedFamHash does not match with calculated """ def __init__( self, givenHash, calculatedHash ): message = "Given fam_hash ('{given}') does not match with \ calculated ('{calc}'".format( given=givenHash, calc=calculatedHash ) super().__init__( message ) class RedFamHeadingError ( RedFamError ): """ Raised when given RedFamHeading does not match __sectionhead_pat Regex """ def __init__( self, heading ): message = "Error while trying to parse section heading. Given heading \ '{heading}' does not match RegEx".format( heading=heading ) super().__init__( message )