jogobot-red/bots/reddiscparser.py


								#!/usr/bin/env python3

								# -*- coding: utf-8  -*-

								#

								#  reddiscparser.py

								#

								#  Copyright 2017 Jonathan Golder <jonathan@golderweb.de>

								#

								#  This program is free software; you can redistribute it and/or modify

								#  it under the terms of the GNU General Public License as published by

								#  the Free Software Foundation; either version 2 of the License, or

								#  (at your option) any later version.

								#

								#  This program is distributed in the hope that it will be useful,

								#  but WITHOUT ANY WARRANTY; without even the implied warranty of

								#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

								#  GNU General Public License for more details.

								#

								#  You should have received a copy of the GNU General Public License

								#  along with this program; if not, write to the Free Software

								#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,

								#  MA 02110-1301, USA.

								#

								#

								"""

								Bot to parse all reddisc pages in given Generator or configured categories

								"""


								import re


								import pywikibot  # noqa

								from pywikibot import pagegenerators  # noqa

								from pywikibot.bot import ExistingPageBot, NoRedirectPageBot


								import jogobot


								from lib.redpage import RedPageParser

								from lib.redfam import RedFamParser


								class DiscussionParserBot(

								        # CurrentPageBot,  # via next two sets 'current_page' on each treat()

								        ExistingPageBot,  # CurrentPageBot only treats existing pages

								        NoRedirectPageBot ):  # class which only treats non-redirects

								    """

								    Botclass witch initialises the parsing process of Redundancy Discussions

								    """


								    # RegEx to filter wrong pages

								    onlyinclude_re = re.compile(

								        jogobot.config["redundances"]["reddiscs_onlyinclude_re"] )


								    def __init__( self, genFactory, **kwargs ):

								        """

								        Constructor


								        Parameters:

								            @param  genFactory  GenFactory with parsed pagegenerator args to

								                                build generator

								            @type  genFactory  pagegenerators.GeneratorFactory

								            @param  **kwargs  Additional args

								            @type  iterable

								        """


								        # Copy needed args

								        self.genFactory = genFactory


								        # Build generator with genFactory

								        self.build_generator()


								        # Run super class init with builded generator

								        super( DiscussionParserBot, self ).__init__(generator=self.gen)


								    def build_generator(self):

								        """

								        Builds generator to work on, based on self.genFactory

								        """

								        # Check wether there are generators waiting for factoring, if not

								        # use configured categories

								        if not self.genFactory.gens:

								            self.apply_conf_cat_generators()


								        # Create combined Generator (Union of all Generators)

								        gen = self.genFactory.getCombinedGenerator()


								        if gen:

								            # The preloading generator is responsible for downloading multiple

								            # pages from the wiki simultaneously.

								            self.gen = pagegenerators.PreloadingGenerator(gen)


								        else:

								            pywikibot.showHelp()


								    def apply_conf_cat_generators( self ):

								        """

								        Builds generators for categories which are read from jogobot.config


								        Parameters:

								            @param genFactory: The GeneratorFactory to which the builded

								                               generators should be added.

								            @type genFactory: pagegenerators.GeneratorFactory

								        """

								        # Create Generators for configured Categories

								        for category in jogobot.config["redundances"]["redpage_cats"]:

								            gen = self.genFactory.getCategoryGen(

								                category, gen_func=pagegenerators.CategorizedPageGenerator)


								            # If there is one, append to genFactory

								            if gen:

								                self.genFactory.gens.append(gen)


								            # Reset gen for next iteration

								            gen = None


								    def run( self ):

								        """

								        Controls the overal parsing process, using super class for page switch


								        Needed to do things before/after treating pages is done

								        """

								        try:


								            super( DiscussionParserBot, self ).run()


								        except:

								            raise


								        else:


								            # If successfully parsed all pages in cat, flush db write cache

								            RedPageParser.flush_db_cache()


								    def treat_page( self ):

								        """

								        Handles work on current page

								        """


								        # Short circuit excluded pages

								        if self.current_page.title() in (

								                jogobot.config["redundances"]["redpage_exclude"] ):


								            return


								        # Exclude pages which does not match pattern

								        if not type(self).onlyinclude_re.search( self.current_page.title() ):


								            return


								        # Initiate RedPage object

								        redpage = RedPageParser.session.query(RedPageParser).filter(

								            RedPageParser.pageid == self.current_page.pageid ).one_or_none()


								        if redpage:

								            redpage.update( self.current_page )

								        else:

								            redpage = RedPageParser( self.current_page )


								        # Check whether parsing is needed

								        if redpage.is_parsing_needed():

								            # Count families for failure analysis

								            fam_counter = 0


								            # Iterate over returned generator with redfam sections

								            for fam in redpage.parse():

								                # Run RedFamParser on section text

								                RedFamParser.parser( fam, redpage, redpage.archive )


								                fam_counter += 1


								            else:

								                # If successfully parsed whole page, flush

								                # db write cache

								                if( fam_counter ):


								                    RedFamParser.flush_db_cache()

								                    jogobot.output( "Page [[{reddisc}]] parsed".format(

								                        reddisc=redpage.page.title() ) )

								                else:

								                    jogobot.output(

								                        "\03{red}" + "Page [[{reddisc}]], ".format(

								                            reddisc=redpage.page.title() ) +

								                        "containing no redfam, parsed!",

								                        "WARNING" )