Check reddisc page titles against regex

To prevent parsing Pages which have been categorized in configured cats
wrong or are given via cmd params
Parsing them results in unexpected behaviour

Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75]
This commit is contained in:
2016-08-24 15:27:42 +02:00
parent 0ea1b0039d
commit dcc4851513

View File

@@ -27,6 +27,7 @@ Script to parse all reddisc pages in configured categories
import os import os
import sys import sys
import re
import pywikibot import pywikibot
from pywikibot import pagegenerators from pywikibot import pagegenerators
@@ -46,6 +47,10 @@ class DiscussionParserBot(
Botclass witch initialises the parsing process of Redundancy Discussions Botclass witch initialises the parsing process of Redundancy Discussions
""" """
# RegEx to filter wrong pages
onlyinclude_re = re.compile(
jogobot.config["redundances"]["reddiscs_onlyinclude_re"] )
def __init__( self, generator ): def __init__( self, generator ):
""" """
Constructor Constructor
@@ -86,6 +91,11 @@ class DiscussionParserBot(
return return
# Exclude pages which does not match pattern
if not type(self).onlyinclude_re.search( self.current_page.title() ):
return
# Initiate RedPage object # Initiate RedPage object
red_page = redpage.RedPage( self.current_page ) red_page = redpage.RedPage( self.current_page )
@@ -102,7 +112,7 @@ class DiscussionParserBot(
# If successfully parsed whole page, flush # If successfully parsed whole page, flush
# db write cache # db write cache
redfam.RedFamParser.flush_db_cache() redfam.RedFamParser.flush_db_cache()
jogobot.output( "Page [[{redisc}]] parsed".format( jogobot.output( "Page [[{reddisc}]] parsed".format(
reddisc=red_page.page.title() ) ) reddisc=red_page.page.title() ) )