diff --git a/bots/__init__.py b/bots/__init__.py new file mode 100644 index 0000000..9327388 --- /dev/null +++ b/bots/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index f9b2059..7f66a2f 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -22,15 +22,13 @@ # # """ -Script to parse all reddisc pages in configured categories +Bot to parse all reddisc pages in given Generator or configured categories """ -import os -import sys import re -import pywikibot -from pywikibot import pagegenerators +import pywikibot # noqa +from pywikibot import pagegenerators # noqa from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot @@ -127,104 +125,3 @@ class DiscussionParserBot( reddisc=red_page.page.title() ) + "containing no redfam, parsed!", "WARNING" ) - - -def apply_conf_cat_generators( genFactory ): - """ - Builds generators for categories which are read from jogobot.config - - Parameters: - @param genFactory: The GeneratorFactory to which the builded generators - should be added. - @type genFactory: pagegenerators.GeneratorFactory - """ - # Create Generators for configured Categories - for category in jogobot.config["redundances"]["redpage_cats"]: - cgen = genFactory.getCategoryGen( - category, gen_func=pagegenerators.CategorizedPageGenerator) - - # If there is one, append to genFactory - if cgen: - genFactory.gens.append(cgen) - - -def main(*args): - """ - Process command line arguments and invoke bot. - - If args is an empty list, sys.argv is used. - - @param args: command line arguments - @type args: list of unicode - """ - - # Process global arguments to determine desired site - local_args = pywikibot.handle_args(args) - - # Get the jogobot-task_slug (basename of current file without ending) - task_slug = os.path.basename(__file__)[:-len(".py")] - - # Before run, we need to check wether we are currently active or not - try: - # Will throw Exception if disabled/blocked - # jogobot.is_active( task_slug ) - pass - - except jogobot.jogobot.Blocked: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), - "CRITICAL" ) - - except jogobot.jogobot.Disabled: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{red} %s (%s)" % (value, type ), - "ERROR" ) - - # Bot/Task is active - else: - - # This factory is responsible for processing command line arguments - # that are also used by other scripts and that determine on which pages - # to work on. - genFactory = pagegenerators.GeneratorFactory() - # The generator gives the pages that should be worked upon. - gen = None - - # If always is True, bot won't ask for confirmation of edit (automode) - # always = False - - # If force_reload is True, bot will always parse Countrylist regardless - # if parsing is needed or not - # force_reload = False - - # Parse command line arguments - for arg in local_args: - if arg.startswith("-always"): - # always = True - pass - else: - genFactory.handleArg(arg) - - if not gen: - - # Check wether there are generators waiting for factoring, if not - # use configured categories - if not genFactory.gens: - apply_conf_cat_generators( genFactory ) - - # Create combined Generator (Union of all Generators) - gen = genFactory.getCombinedGenerator() - - if gen: - # Log beginning of parsing - jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) ) - - # The preloading generator is responsible for downloading multiple - # pages from the wiki simultaneously. - gen = pagegenerators.PreloadingGenerator(gen) - DiscussionParserBot( gen ).run() - else: - pywikibot.showHelp() - -if( __name__ == "__main__" ): - main() diff --git a/red.py b/red.py index f9b2059..bee76b8 100644 --- a/red.py +++ b/red.py @@ -22,111 +22,17 @@ # # """ -Script to parse all reddisc pages in configured categories +Wrapper script to invoke all redundances bot tasks """ import os import sys -import re import pywikibot from pywikibot import pagegenerators -from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot - -from lib import redpage -from lib import redfam - - -class DiscussionParserBot( - # CurrentPageBot, # via next two sets 'current_page' on each treat() - ExistingPageBot, # CurrentPageBot only treats existing pages - NoRedirectPageBot ): # class which only treats non-redirects - """ - Botclass witch initialises the parsing process of Redundancy Discussions - """ - - # RegEx to filter wrong pages - onlyinclude_re = re.compile( - jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) - - def __init__( self, generator ): - """ - Constructor - - Parameters: - @param generator: The page generator that determines on which pages - to work. - @type generator: generator. - """ - super( DiscussionParserBot, self ).__init__(generator=generator) - - def run( self ): - """ - Controls the overal parsing process, using super class for page switch - - Needed to do things before/after treating pages is done - """ - try: - - super( DiscussionParserBot, self ).run() - - except: - raise - - else: - - # If successfully parsed all pages in cat, flush db write cache - redpage.RedPage.flush_db_cache() - - def treat_page( self ): - """ - Handles work on current page - """ - - # Short circuit excluded pages - if self.current_page.title() in ( - jogobot.config["redundances"]["redpage_exclude"] ): - - return - - # Exclude pages which does not match pattern - if not type(self).onlyinclude_re.search( self.current_page.title() ): - - return - - # Initiate RedPage object - red_page = redpage.RedPage( self.current_page ) - - # Check whether parsing is needed - if red_page.is_parsing_needed(): - - # Count families for failure analysis - fam_counter = 0 - - # Iterate over returned generator with redfam sections - for fam in red_page.parse(): - - # Run RedFamParser on section text - redfam.RedFamParser.parser( fam, red_page.page, - red_page.is_archive() ) - - fam_counter += 1 - - else: - # If successfully parsed whole page, flush - # db write cache - if( fam_counter ): - redfam.RedFamParser.flush_db_cache() - jogobot.output( "Page [[{reddisc}]] parsed".format( - reddisc=red_page.page.title() ) ) - else: - jogobot.output( - "\03{red}" + "Page [[{reddisc}]], ".format( - reddisc=red_page.page.title() ) + - "containing no redfam, parsed!", - "WARNING" ) +from bots.reddiscparser import DiscussionParserBot def apply_conf_cat_generators( genFactory ):