diff --git a/reddiscparser.py b/bots/reddiscparser.py similarity index 100% rename from reddiscparser.py rename to bots/reddiscparser.py diff --git a/red.py b/red.py new file mode 100644 index 0000000..f9b2059 --- /dev/null +++ b/red.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# reddiscparser.py +# +# Copyright 2016 GOLDERWEB – Jonathan Golder +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# +""" +Script to parse all reddisc pages in configured categories +""" + +import os +import sys +import re + +import pywikibot +from pywikibot import pagegenerators +from pywikibot.bot import ExistingPageBot, NoRedirectPageBot + +import jogobot + +from lib import redpage +from lib import redfam + + +class DiscussionParserBot( + # CurrentPageBot, # via next two sets 'current_page' on each treat() + ExistingPageBot, # CurrentPageBot only treats existing pages + NoRedirectPageBot ): # class which only treats non-redirects + """ + Botclass witch initialises the parsing process of Redundancy Discussions + """ + + # RegEx to filter wrong pages + onlyinclude_re = re.compile( + jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) + + def __init__( self, generator ): + """ + Constructor + + Parameters: + @param generator: The page generator that determines on which pages + to work. + @type generator: generator. + """ + super( DiscussionParserBot, self ).__init__(generator=generator) + + def run( self ): + """ + Controls the overal parsing process, using super class for page switch + + Needed to do things before/after treating pages is done + """ + try: + + super( DiscussionParserBot, self ).run() + + except: + raise + + else: + + # If successfully parsed all pages in cat, flush db write cache + redpage.RedPage.flush_db_cache() + + def treat_page( self ): + """ + Handles work on current page + """ + + # Short circuit excluded pages + if self.current_page.title() in ( + jogobot.config["redundances"]["redpage_exclude"] ): + + return + + # Exclude pages which does not match pattern + if not type(self).onlyinclude_re.search( self.current_page.title() ): + + return + + # Initiate RedPage object + red_page = redpage.RedPage( self.current_page ) + + # Check whether parsing is needed + if red_page.is_parsing_needed(): + + # Count families for failure analysis + fam_counter = 0 + + # Iterate over returned generator with redfam sections + for fam in red_page.parse(): + + # Run RedFamParser on section text + redfam.RedFamParser.parser( fam, red_page.page, + red_page.is_archive() ) + + fam_counter += 1 + + else: + # If successfully parsed whole page, flush + # db write cache + if( fam_counter ): + redfam.RedFamParser.flush_db_cache() + jogobot.output( "Page [[{reddisc}]] parsed".format( + reddisc=red_page.page.title() ) ) + else: + jogobot.output( + "\03{red}" + "Page [[{reddisc}]], ".format( + reddisc=red_page.page.title() ) + + "containing no redfam, parsed!", + "WARNING" ) + + +def apply_conf_cat_generators( genFactory ): + """ + Builds generators for categories which are read from jogobot.config + + Parameters: + @param genFactory: The GeneratorFactory to which the builded generators + should be added. + @type genFactory: pagegenerators.GeneratorFactory + """ + # Create Generators for configured Categories + for category in jogobot.config["redundances"]["redpage_cats"]: + cgen = genFactory.getCategoryGen( + category, gen_func=pagegenerators.CategorizedPageGenerator) + + # If there is one, append to genFactory + if cgen: + genFactory.gens.append(cgen) + + +def main(*args): + """ + Process command line arguments and invoke bot. + + If args is an empty list, sys.argv is used. + + @param args: command line arguments + @type args: list of unicode + """ + + # Process global arguments to determine desired site + local_args = pywikibot.handle_args(args) + + # Get the jogobot-task_slug (basename of current file without ending) + task_slug = os.path.basename(__file__)[:-len(".py")] + + # Before run, we need to check wether we are currently active or not + try: + # Will throw Exception if disabled/blocked + # jogobot.is_active( task_slug ) + pass + + except jogobot.jogobot.Blocked: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), + "CRITICAL" ) + + except jogobot.jogobot.Disabled: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{red} %s (%s)" % (value, type ), + "ERROR" ) + + # Bot/Task is active + else: + + # This factory is responsible for processing command line arguments + # that are also used by other scripts and that determine on which pages + # to work on. + genFactory = pagegenerators.GeneratorFactory() + # The generator gives the pages that should be worked upon. + gen = None + + # If always is True, bot won't ask for confirmation of edit (automode) + # always = False + + # If force_reload is True, bot will always parse Countrylist regardless + # if parsing is needed or not + # force_reload = False + + # Parse command line arguments + for arg in local_args: + if arg.startswith("-always"): + # always = True + pass + else: + genFactory.handleArg(arg) + + if not gen: + + # Check wether there are generators waiting for factoring, if not + # use configured categories + if not genFactory.gens: + apply_conf_cat_generators( genFactory ) + + # Create combined Generator (Union of all Generators) + gen = genFactory.getCombinedGenerator() + + if gen: + # Log beginning of parsing + jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) ) + + # The preloading generator is responsible for downloading multiple + # pages from the wiki simultaneously. + gen = pagegenerators.PreloadingGenerator(gen) + DiscussionParserBot( gen ).run() + else: + pywikibot.showHelp() + +if( __name__ == "__main__" ): + main()