Check reddisc page titles against regex

To prevent parsing Pages which have been categorized in configured cats
wrong or are given via cmd params
Parsing them results in unexpected behaviour

Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75]
This commit is contained in:
2016-08-24 15:27:42 +02:00
parent 0ea1b0039d
commit dcc4851513

View File

@@ -27,6 +27,7 @@ Script to parse all reddisc pages in configured categories
import os
import sys
import re
import pywikibot
from pywikibot import pagegenerators
@@ -46,6 +47,10 @@ class DiscussionParserBot(
Botclass witch initialises the parsing process of Redundancy Discussions
"""
# RegEx to filter wrong pages
onlyinclude_re = re.compile(
jogobot.config["redundances"]["reddiscs_onlyinclude_re"] )
def __init__( self, generator ):
"""
Constructor
@@ -86,6 +91,11 @@ class DiscussionParserBot(
return
# Exclude pages which does not match pattern
if not type(self).onlyinclude_re.search( self.current_page.title() ):
return
# Initiate RedPage object
red_page = redpage.RedPage( self.current_page )
@@ -102,7 +112,7 @@ class DiscussionParserBot(
# If successfully parsed whole page, flush
# db write cache
redfam.RedFamParser.flush_db_cache()
jogobot.output( "Page [[{redisc}]] parsed".format(
jogobot.output( "Page [[{reddisc}]] parsed".format(
reddisc=red_page.page.title() ) )