From ac54aea69832baa92d4bcb3cac86f7adf6b1991d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 20:02:48 +0200 Subject: [PATCH] Use callback to detect redfam.section Detecting redfam-Sections via RegExp caused some false positives due to wrong formated things in wikisyntax. See Task Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=78 FS#78] --- redfam.py | 16 ++++++++++++++++ redpage.py | 5 +++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/redfam.py b/redfam.py index 41e6367..b58b94a 100644 --- a/redfam.py +++ b/redfam.py @@ -341,6 +341,22 @@ class RedFamParser( RedFam ): else: return False + @classmethod + def is_section_redfam_cb( cls, heading ): + """ + Used as callback for wikicode.get_sections in redpage.parse to + select sections which are redfams + """ + # Because of strange behavior in some cases, parse heading again + # (Task FS#77) + heading = mwparser.parse( str( heading ) ) + + # Make sure we have min. two wikilinks in heading to assume a redfam + if len( heading.filter_wikilinks() ) >= 2: + return True + else: + return False + @classmethod def parser( cls, text, page, isarchive=False ): """ diff --git a/redpage.py b/redpage.py index 2b93ae8..6bb6cc4 100644 --- a/redpage.py +++ b/redpage.py @@ -28,9 +28,10 @@ Provides a class for handling redundance discussion pages and archives import pywikibot # noqa import mwparserfromhell as mwparser -import jogobot +import jogobot # noqa from mysqlred import MysqlRedPage +from redfam import RedFamParser class RedPage: @@ -116,7 +117,7 @@ class RedPage: # include_lead = if true include first section (intro) # include_heading = if true include heading fams = self.wikicode.get_sections( - matches=jogobot.config["redundances"]["section_heading_regex"], + matches=RedFamParser.is_section_redfam_cb, include_lead=False, include_headings=True ) # Iterate over RedFam