Check reddisc page titles against regex
To prevent parsing Pages which have been categorized in configured cats wrong or are given via cmd params Parsing them results in unexpected behaviour Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75]
This commit is contained in:
@@ -27,6 +27,7 @@ Script to parse all reddisc pages in configured categories
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
|
||||
import pywikibot
|
||||
from pywikibot import pagegenerators
|
||||
@@ -46,6 +47,10 @@ class DiscussionParserBot(
|
||||
Botclass witch initialises the parsing process of Redundancy Discussions
|
||||
"""
|
||||
|
||||
# RegEx to filter wrong pages
|
||||
onlyinclude_re = re.compile(
|
||||
jogobot.config["redundances"]["reddiscs_onlyinclude_re"] )
|
||||
|
||||
def __init__( self, generator ):
|
||||
"""
|
||||
Constructor
|
||||
@@ -86,6 +91,11 @@ class DiscussionParserBot(
|
||||
|
||||
return
|
||||
|
||||
# Exclude pages which does not match pattern
|
||||
if not type(self).onlyinclude_re.search( self.current_page.title() ):
|
||||
|
||||
return
|
||||
|
||||
# Initiate RedPage object
|
||||
red_page = redpage.RedPage( self.current_page )
|
||||
|
||||
@@ -102,7 +112,7 @@ class DiscussionParserBot(
|
||||
# If successfully parsed whole page, flush
|
||||
# db write cache
|
||||
redfam.RedFamParser.flush_db_cache()
|
||||
jogobot.output( "Page [[{redisc}]] parsed".format(
|
||||
jogobot.output( "Page [[{reddisc}]] parsed".format(
|
||||
reddisc=red_page.page.title() ) )
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user