Browse Source

Prepare new structure to use subtasks

To have only one entry point for the bot we want to have a single file
(red.py) which is calling the specfic task class from bots dir with a
standardized call

Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82]
develop
Jonathan Golder 8 years ago
parent
commit
177a8f920f
  1. 0
      bots/reddiscparser.py
  2. 230
      red.py

0
reddiscparser.py → bots/reddiscparser.py

230
red.py

@ -0,0 +1,230 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# reddiscparser.py
#
# Copyright 2016 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Script to parse all reddisc pages in configured categories
"""
import os
import sys
import re
import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
import jogobot
from lib import redpage
from lib import redfam
class DiscussionParserBot(
# CurrentPageBot, # via next two sets 'current_page' on each treat()
ExistingPageBot, # CurrentPageBot only treats existing pages
NoRedirectPageBot ): # class which only treats non-redirects
"""
Botclass witch initialises the parsing process of Redundancy Discussions
"""
# RegEx to filter wrong pages
onlyinclude_re = re.compile(
jogobot.config["redundances"]["reddiscs_onlyinclude_re"] )
def __init__( self, generator ):
"""
Constructor
Parameters:
@param generator: The page generator that determines on which pages
to work.
@type generator: generator.
"""
super( DiscussionParserBot, self ).__init__(generator=generator)
def run( self ):
"""
Controls the overal parsing process, using super class for page switch
Needed to do things before/after treating pages is done
"""
try:
super( DiscussionParserBot, self ).run()
except:
raise
else:
# If successfully parsed all pages in cat, flush db write cache
redpage.RedPage.flush_db_cache()
def treat_page( self ):
"""
Handles work on current page
"""
# Short circuit excluded pages
if self.current_page.title() in (
jogobot.config["redundances"]["redpage_exclude"] ):
return
# Exclude pages which does not match pattern
if not type(self).onlyinclude_re.search( self.current_page.title() ):
return
# Initiate RedPage object
red_page = redpage.RedPage( self.current_page )
# Check whether parsing is needed
if red_page.is_parsing_needed():
# Count families for failure analysis
fam_counter = 0
# Iterate over returned generator with redfam sections
for fam in red_page.parse():
# Run RedFamParser on section text
redfam.RedFamParser.parser( fam, red_page.page,
red_page.is_archive() )
fam_counter += 1
else:
# If successfully parsed whole page, flush
# db write cache
if( fam_counter ):
redfam.RedFamParser.flush_db_cache()
jogobot.output( "Page [[{reddisc}]] parsed".format(
reddisc=red_page.page.title() ) )
else:
jogobot.output(
"\03{red}" + "Page [[{reddisc}]], ".format(
reddisc=red_page.page.title() ) +
"containing no redfam, parsed!",
"WARNING" )
def apply_conf_cat_generators( genFactory ):
"""
Builds generators for categories which are read from jogobot.config
Parameters:
@param genFactory: The GeneratorFactory to which the builded generators
should be added.
@type genFactory: pagegenerators.GeneratorFactory
"""
# Create Generators for configured Categories
for category in jogobot.config["redundances"]["redpage_cats"]:
cgen = genFactory.getCategoryGen(
category, gen_func=pagegenerators.CategorizedPageGenerator)
# If there is one, append to genFactory
if cgen:
genFactory.gens.append(cgen)
def main(*args):
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
@param args: command line arguments
@type args: list of unicode
"""
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# Get the jogobot-task_slug (basename of current file without ending)
task_slug = os.path.basename(__file__)[:-len(".py")]
# Before run, we need to check wether we are currently active or not
try:
# Will throw Exception if disabled/blocked
# jogobot.is_active( task_slug )
pass
except jogobot.jogobot.Blocked:
(type, value, traceback) = sys.exc_info()
jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ),
"CRITICAL" )
except jogobot.jogobot.Disabled:
(type, value, traceback) = sys.exc_info()
jogobot.output( "\03{red} %s (%s)" % (value, type ),
"ERROR" )
# Bot/Task is active
else:
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# The generator gives the pages that should be worked upon.
gen = None
# If always is True, bot won't ask for confirmation of edit (automode)
# always = False
# If force_reload is True, bot will always parse Countrylist regardless
# if parsing is needed or not
# force_reload = False
# Parse command line arguments
for arg in local_args:
if arg.startswith("-always"):
# always = True
pass
else:
genFactory.handleArg(arg)
if not gen:
# Check wether there are generators waiting for factoring, if not
# use configured categories
if not genFactory.gens:
apply_conf_cat_generators( genFactory )
# Create combined Generator (Union of all Generators)
gen = genFactory.getCombinedGenerator()
if gen:
# Log beginning of parsing
jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) )
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
DiscussionParserBot( gen ).run()
else:
pywikibot.showHelp()
if( __name__ == "__main__" ):
main()
Loading…
Cancel
Save