jogobot-red/redpage.py

#!/usr/bin/env python3
# -*- coding: utf-8  -*-
#
#  redpage.py
#
#  Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#
#
"""
Provides a class for handling redundance discussion pages and archives
"""

import pywikibot  # noqa

from .mysqlred import MysqlRedPage
from .redfam import RedFamParser


class RedPage:
    """
    Class for handling redundance discussion pages and archives
    """

    def __init__( self, page, archive=False ):
        """
        Generate a new RedPage object based on the given pywikibot page object

        @param    page    page    Pywikibot/MediaWiki page object for page
        """

        # Safe the pywikibot page object
        self.page = page
        self._archive = archive

        self.__handle_db( )

        self.is_page_changed()

        self._parsed = None
        if( self._changed or self.__mysql.data[ 'status' ] == 0 ):
            self.parse()

        self.__update_db()

    def __handle_db( self ):
        """
        Handles opening of db connection
        """

        # We need a connection to our mysqldb
        self.__mysql = MysqlRedPage( self.page._pageid )

        if not self.__mysql.data:
            self.__mysql.add_page( self.page.title(), self.page._revid )

    def is_page_changed( self ):
        """
        Check wether the page was changed since last run
        """

        if( self.__mysql.data != { 'page_id': self.page._pageid,
                                   'rev_id': self.page._revid,
                                   'page_title': self.page.title(),
                                   'status': self.__mysql.data[ 'status' ] } ):
            self._changed = True
        else:
            self._changed = False

    def is_archive( self ):
        """
        Detects wether current page is an archive of discussions
        """

        if( self._archive or ( u"/Archiv" in self.page.title() ) or
            ( "{{Archiv}}" in self.page.text ) or
            ( "{{Archiv|" in self.page.text ) ):

                return True
        else:
                return False

    def parse( self ):
        """
        Handles the parsing process
        """

        # Since @param text is a string we need to split it in lines
        text_lines = self.page.text.split( "\n" )
        length = len( text_lines )

        # Initialise line counter
        i = 0
        fam_heading = None
        beginning = None
        ending = None

        # Set line for last detected Redundance-Family to 0
        last_fam = 0

        # Iterate over the lines of the page
        for line in text_lines:

            # Check wether we have an "Redundance-Family"-Section heading
            if RedFamParser.is_sectionheading( line ):

                # Save line number for last detected Redundance-Family
                last_fam = i
                # Save heading
                fam_heading = line

                # Defined (re)initialisation of dates
                beginning = None
                ending = None

            # Check wether we are currently in an "Redundance-Family"-Section
            if i > last_fam and last_fam > 0:

                # Check if we have alredy recognized the beginning date of the
                # discussion (in former iteration) or if we have a done-notice
                if not beginning:
                    beginning = RedFamParser.is_beginning( line )
                elif not ending:
                    ending = RedFamParser.is_ending( line )

            # Detect end of red_fam section (next line is new sectionheading)
            # or end of file
            # Prevent from running out of index
            if i < (length - 1):
                test = RedFamParser.is_sectionheading( text_lines[ i + 1 ] )
            else:
                test = False
            if ( test or ( length == ( i + 1 ) ) ):

                # Create the red_fam object
                if( fam_heading and beginning ):

                    # Maybe we can find a ending by feed if we have None yet
                    # (No done notice on archive pages)
                    if not ending and self.is_archive():
                        j = i
                        while (j > last_fam) and not ending:
                            j -= 1
                            ending = RedFamParser.is_ending2( text_lines[ j ] )

                    # Create the RedFam object
                    red_fam = RedFamParser( fam_heading, self.page._pageid,
                                            self.is_archive(), beginning,
                                            ending )

            # Increment line counter
            i += 1
        else:
            RedFamParser.flush_db_cache()
            self._parsed = True

    def __update_db( self ):
        """
        Updates the page meta data in mysql db
        """
        if( self._parsed or not self._changed ):
            status = 1

            if( self.is_archive() ):
                status = 2
        else:
            status = 0

        self.__mysql.update_page( self.page._revid, self.page.title(), status )

    @classmethod
    def flush_db_cache( cls ):
        """
        Calls flush method of Mysql Interface class
        """
        MysqlRedPage.flush()