Browse Source

Reflect stucture changes in Code

Since bot class is moved to separate dir/file we need to do some changes
to rebuild functionality

Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82]
develop
Jonathan Golder 7 years ago
parent
commit
b88efb6bdd
  1. 2
      bots/__init__.py
  2. 109
      bots/reddiscparser.py
  3. 98
      red.py

2
bots/__init__.py

@ -0,0 +1,2 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

109
bots/reddiscparser.py

@ -22,15 +22,13 @@
#
#
"""
Script to parse all reddisc pages in configured categories
Bot to parse all reddisc pages in given Generator or configured categories
"""
import os
import sys
import re
import pywikibot
from pywikibot import pagegenerators
import pywikibot # noqa
from pywikibot import pagegenerators # noqa
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
import jogobot
@ -127,104 +125,3 @@ class DiscussionParserBot(
reddisc=red_page.page.title() ) +
"containing no redfam, parsed!",
"WARNING" )
def apply_conf_cat_generators( genFactory ):
"""
Builds generators for categories which are read from jogobot.config
Parameters:
@param genFactory: The GeneratorFactory to which the builded generators
should be added.
@type genFactory: pagegenerators.GeneratorFactory
"""
# Create Generators for configured Categories
for category in jogobot.config["redundances"]["redpage_cats"]:
cgen = genFactory.getCategoryGen(
category, gen_func=pagegenerators.CategorizedPageGenerator)
# If there is one, append to genFactory
if cgen:
genFactory.gens.append(cgen)
def main(*args):
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
@param args: command line arguments
@type args: list of unicode
"""
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# Get the jogobot-task_slug (basename of current file without ending)
task_slug = os.path.basename(__file__)[:-len(".py")]
# Before run, we need to check wether we are currently active or not
try:
# Will throw Exception if disabled/blocked
# jogobot.is_active( task_slug )
pass
except jogobot.jogobot.Blocked:
(type, value, traceback) = sys.exc_info()
jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ),
"CRITICAL" )
except jogobot.jogobot.Disabled:
(type, value, traceback) = sys.exc_info()
jogobot.output( "\03{red} %s (%s)" % (value, type ),
"ERROR" )
# Bot/Task is active
else:
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# The generator gives the pages that should be worked upon.
gen = None
# If always is True, bot won't ask for confirmation of edit (automode)
# always = False
# If force_reload is True, bot will always parse Countrylist regardless
# if parsing is needed or not
# force_reload = False
# Parse command line arguments
for arg in local_args:
if arg.startswith("-always"):
# always = True
pass
else:
genFactory.handleArg(arg)
if not gen:
# Check wether there are generators waiting for factoring, if not
# use configured categories
if not genFactory.gens:
apply_conf_cat_generators( genFactory )
# Create combined Generator (Union of all Generators)
gen = genFactory.getCombinedGenerator()
if gen:
# Log beginning of parsing
jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) )
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
DiscussionParserBot( gen ).run()
else:
pywikibot.showHelp()
if( __name__ == "__main__" ):
main()

98
red.py

@ -22,111 +22,17 @@
#
#
"""
Script to parse all reddisc pages in configured categories
Wrapper script to invoke all redundances bot tasks
"""
import os
import sys
import re
import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
import jogobot
from lib import redpage
from lib import redfam
class DiscussionParserBot(
# CurrentPageBot, # via next two sets 'current_page' on each treat()
ExistingPageBot, # CurrentPageBot only treats existing pages
NoRedirectPageBot ): # class which only treats non-redirects
"""
Botclass witch initialises the parsing process of Redundancy Discussions
"""
# RegEx to filter wrong pages
onlyinclude_re = re.compile(
jogobot.config["redundances"]["reddiscs_onlyinclude_re"] )
def __init__( self, generator ):
"""
Constructor
Parameters:
@param generator: The page generator that determines on which pages
to work.
@type generator: generator.
"""
super( DiscussionParserBot, self ).__init__(generator=generator)
def run( self ):
"""
Controls the overal parsing process, using super class for page switch
Needed to do things before/after treating pages is done
"""
try:
super( DiscussionParserBot, self ).run()
except:
raise
else:
# If successfully parsed all pages in cat, flush db write cache
redpage.RedPage.flush_db_cache()
def treat_page( self ):
"""
Handles work on current page
"""
# Short circuit excluded pages
if self.current_page.title() in (
jogobot.config["redundances"]["redpage_exclude"] ):
return
# Exclude pages which does not match pattern
if not type(self).onlyinclude_re.search( self.current_page.title() ):
return
# Initiate RedPage object
red_page = redpage.RedPage( self.current_page )
# Check whether parsing is needed
if red_page.is_parsing_needed():
# Count families for failure analysis
fam_counter = 0
# Iterate over returned generator with redfam sections
for fam in red_page.parse():
# Run RedFamParser on section text
redfam.RedFamParser.parser( fam, red_page.page,
red_page.is_archive() )
fam_counter += 1
else:
# If successfully parsed whole page, flush
# db write cache
if( fam_counter ):
redfam.RedFamParser.flush_db_cache()
jogobot.output( "Page [[{reddisc}]] parsed".format(
reddisc=red_page.page.title() ) )
else:
jogobot.output(
"\03{red}" + "Page [[{reddisc}]], ".format(
reddisc=red_page.page.title() ) +
"containing no redfam, parsed!",
"WARNING" )
from bots.reddiscparser import DiscussionParserBot
def apply_conf_cat_generators( genFactory ):

Loading…
Cancel
Save