Browse Source
To have only one entry point for the bot we want to have a single file (red.py) which is calling the specfic task class from bots dir with a standardized call Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82]develop
Jonathan Golder
8 years ago
2 changed files with 230 additions and 0 deletions
@ -0,0 +1,230 @@ |
|||
#!/usr/bin/env python3 |
|||
# -*- coding: utf-8 -*- |
|||
# |
|||
# reddiscparser.py |
|||
# |
|||
# Copyright 2016 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de> |
|||
# |
|||
# This program is free software; you can redistribute it and/or modify |
|||
# it under the terms of the GNU General Public License as published by |
|||
# the Free Software Foundation; either version 2 of the License, or |
|||
# (at your option) any later version. |
|||
# |
|||
# This program is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
# GNU General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU General Public License |
|||
# along with this program; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, |
|||
# MA 02110-1301, USA. |
|||
# |
|||
# |
|||
""" |
|||
Script to parse all reddisc pages in configured categories |
|||
""" |
|||
|
|||
import os |
|||
import sys |
|||
import re |
|||
|
|||
import pywikibot |
|||
from pywikibot import pagegenerators |
|||
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot |
|||
|
|||
import jogobot |
|||
|
|||
from lib import redpage |
|||
from lib import redfam |
|||
|
|||
|
|||
class DiscussionParserBot( |
|||
# CurrentPageBot, # via next two sets 'current_page' on each treat() |
|||
ExistingPageBot, # CurrentPageBot only treats existing pages |
|||
NoRedirectPageBot ): # class which only treats non-redirects |
|||
""" |
|||
Botclass witch initialises the parsing process of Redundancy Discussions |
|||
""" |
|||
|
|||
# RegEx to filter wrong pages |
|||
onlyinclude_re = re.compile( |
|||
jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) |
|||
|
|||
def __init__( self, generator ): |
|||
""" |
|||
Constructor |
|||
|
|||
Parameters: |
|||
@param generator: The page generator that determines on which pages |
|||
to work. |
|||
@type generator: generator. |
|||
""" |
|||
super( DiscussionParserBot, self ).__init__(generator=generator) |
|||
|
|||
def run( self ): |
|||
""" |
|||
Controls the overal parsing process, using super class for page switch |
|||
|
|||
Needed to do things before/after treating pages is done |
|||
""" |
|||
try: |
|||
|
|||
super( DiscussionParserBot, self ).run() |
|||
|
|||
except: |
|||
raise |
|||
|
|||
else: |
|||
|
|||
# If successfully parsed all pages in cat, flush db write cache |
|||
redpage.RedPage.flush_db_cache() |
|||
|
|||
def treat_page( self ): |
|||
""" |
|||
Handles work on current page |
|||
""" |
|||
|
|||
# Short circuit excluded pages |
|||
if self.current_page.title() in ( |
|||
jogobot.config["redundances"]["redpage_exclude"] ): |
|||
|
|||
return |
|||
|
|||
# Exclude pages which does not match pattern |
|||
if not type(self).onlyinclude_re.search( self.current_page.title() ): |
|||
|
|||
return |
|||
|
|||
# Initiate RedPage object |
|||
red_page = redpage.RedPage( self.current_page ) |
|||
|
|||
# Check whether parsing is needed |
|||
if red_page.is_parsing_needed(): |
|||
|
|||
# Count families for failure analysis |
|||
fam_counter = 0 |
|||
|
|||
# Iterate over returned generator with redfam sections |
|||
for fam in red_page.parse(): |
|||
|
|||
# Run RedFamParser on section text |
|||
redfam.RedFamParser.parser( fam, red_page.page, |
|||
red_page.is_archive() ) |
|||
|
|||
fam_counter += 1 |
|||
|
|||
else: |
|||
# If successfully parsed whole page, flush |
|||
# db write cache |
|||
if( fam_counter ): |
|||
redfam.RedFamParser.flush_db_cache() |
|||
jogobot.output( "Page [[{reddisc}]] parsed".format( |
|||
reddisc=red_page.page.title() ) ) |
|||
else: |
|||
jogobot.output( |
|||
"\03{red}" + "Page [[{reddisc}]], ".format( |
|||
reddisc=red_page.page.title() ) + |
|||
"containing no redfam, parsed!", |
|||
"WARNING" ) |
|||
|
|||
|
|||
def apply_conf_cat_generators( genFactory ): |
|||
""" |
|||
Builds generators for categories which are read from jogobot.config |
|||
|
|||
Parameters: |
|||
@param genFactory: The GeneratorFactory to which the builded generators |
|||
should be added. |
|||
@type genFactory: pagegenerators.GeneratorFactory |
|||
""" |
|||
# Create Generators for configured Categories |
|||
for category in jogobot.config["redundances"]["redpage_cats"]: |
|||
cgen = genFactory.getCategoryGen( |
|||
category, gen_func=pagegenerators.CategorizedPageGenerator) |
|||
|
|||
# If there is one, append to genFactory |
|||
if cgen: |
|||
genFactory.gens.append(cgen) |
|||
|
|||
|
|||
def main(*args): |
|||
""" |
|||
Process command line arguments and invoke bot. |
|||
|
|||
If args is an empty list, sys.argv is used. |
|||
|
|||
@param args: command line arguments |
|||
@type args: list of unicode |
|||
""" |
|||
|
|||
# Process global arguments to determine desired site |
|||
local_args = pywikibot.handle_args(args) |
|||
|
|||
# Get the jogobot-task_slug (basename of current file without ending) |
|||
task_slug = os.path.basename(__file__)[:-len(".py")] |
|||
|
|||
# Before run, we need to check wether we are currently active or not |
|||
try: |
|||
# Will throw Exception if disabled/blocked |
|||
# jogobot.is_active( task_slug ) |
|||
pass |
|||
|
|||
except jogobot.jogobot.Blocked: |
|||
(type, value, traceback) = sys.exc_info() |
|||
jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), |
|||
"CRITICAL" ) |
|||
|
|||
except jogobot.jogobot.Disabled: |
|||
(type, value, traceback) = sys.exc_info() |
|||
jogobot.output( "\03{red} %s (%s)" % (value, type ), |
|||
"ERROR" ) |
|||
|
|||
# Bot/Task is active |
|||
else: |
|||
|
|||
# This factory is responsible for processing command line arguments |
|||
# that are also used by other scripts and that determine on which pages |
|||
# to work on. |
|||
genFactory = pagegenerators.GeneratorFactory() |
|||
# The generator gives the pages that should be worked upon. |
|||
gen = None |
|||
|
|||
# If always is True, bot won't ask for confirmation of edit (automode) |
|||
# always = False |
|||
|
|||
# If force_reload is True, bot will always parse Countrylist regardless |
|||
# if parsing is needed or not |
|||
# force_reload = False |
|||
|
|||
# Parse command line arguments |
|||
for arg in local_args: |
|||
if arg.startswith("-always"): |
|||
# always = True |
|||
pass |
|||
else: |
|||
genFactory.handleArg(arg) |
|||
|
|||
if not gen: |
|||
|
|||
# Check wether there are generators waiting for factoring, if not |
|||
# use configured categories |
|||
if not genFactory.gens: |
|||
apply_conf_cat_generators( genFactory ) |
|||
|
|||
# Create combined Generator (Union of all Generators) |
|||
gen = genFactory.getCombinedGenerator() |
|||
|
|||
if gen: |
|||
# Log beginning of parsing |
|||
jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) ) |
|||
|
|||
# The preloading generator is responsible for downloading multiple |
|||
# pages from the wiki simultaneously. |
|||
gen = pagegenerators.PreloadingGenerator(gen) |
|||
DiscussionParserBot( gen ).run() |
|||
else: |
|||
pywikibot.showHelp() |
|||
|
|||
if( __name__ == "__main__" ): |
|||
main() |
Loading…
Reference in new issue