Files
jogobot-red/bots/reddiscparser.py
GOLDERWEB – Jonathan Golder b88efb6bdd Reflect stucture changes in Code
Since bot class is moved to separate dir/file we need to do some changes
to rebuild functionality

Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82]
2016-08-27 15:40:09 +02:00

128 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# reddiscparser.py
#
# Copyright 2016 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Bot to parse all reddisc pages in given Generator or configured categories
"""
import re
import pywikibot # noqa
from pywikibot import pagegenerators # noqa
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
import jogobot
from lib import redpage
from lib import redfam
class DiscussionParserBot(
# CurrentPageBot, # via next two sets 'current_page' on each treat()
ExistingPageBot, # CurrentPageBot only treats existing pages
NoRedirectPageBot ): # class which only treats non-redirects
"""
Botclass witch initialises the parsing process of Redundancy Discussions
"""
# RegEx to filter wrong pages
onlyinclude_re = re.compile(
jogobot.config["redundances"]["reddiscs_onlyinclude_re"] )
def __init__( self, generator ):
"""
Constructor
Parameters:
@param generator: The page generator that determines on which pages
to work.
@type generator: generator.
"""
super( DiscussionParserBot, self ).__init__(generator=generator)
def run( self ):
"""
Controls the overal parsing process, using super class for page switch
Needed to do things before/after treating pages is done
"""
try:
super( DiscussionParserBot, self ).run()
except:
raise
else:
# If successfully parsed all pages in cat, flush db write cache
redpage.RedPage.flush_db_cache()
def treat_page( self ):
"""
Handles work on current page
"""
# Short circuit excluded pages
if self.current_page.title() in (
jogobot.config["redundances"]["redpage_exclude"] ):
return
# Exclude pages which does not match pattern
if not type(self).onlyinclude_re.search( self.current_page.title() ):
return
# Initiate RedPage object
red_page = redpage.RedPage( self.current_page )
# Check whether parsing is needed
if red_page.is_parsing_needed():
# Count families for failure analysis
fam_counter = 0
# Iterate over returned generator with redfam sections
for fam in red_page.parse():
# Run RedFamParser on section text
redfam.RedFamParser.parser( fam, red_page.page,
red_page.is_archive() )
fam_counter += 1
else:
# If successfully parsed whole page, flush
# db write cache
if( fam_counter ):
redfam.RedFamParser.flush_db_cache()
jogobot.output( "Page [[{reddisc}]] parsed".format(
reddisc=red_page.page.title() ) )
else:
jogobot.output(
"\03{red}" + "Page [[{reddisc}]], ".format(
reddisc=red_page.page.title() ) +
"containing no redfam, parsed!",
"WARNING" )