Browse Source

Implement normalisation

Replace wrong dash symbols and normalize space
develop
Jonathan Golder 4 years ago
parent
commit
7208c70e26
  1. 55
      teamstation.py

55
teamstation.py

@ -33,6 +33,8 @@ import re
import pywikibot
from pywikibot.bot import CurrentPageBot
import mwparserfromhell as mwparser
import jogobot
@ -85,6 +87,59 @@ class TeamstationBot( CurrentPageBot ): # sets 'current_page' on each treat()
Handles work on current page
"""
# Parse article text with mwparser
self.current_page.wikicode = mwparser.parse( self.current_page.text)
# Normalize param 1 (jahre)
self.normalize_jahre()
def normalize_jahre(self):
"""
Replace wrong dash symbols and do some space normalisations in param 1
(jahre) of template Team-Station
"""
# Iterate occurences of template Team-Station
for occ in self.current_page.wikicode.ifilter_templates(
matches=r"^\{\{Team-Station" ):
# Short circuit wrong templates
if not occ.name == "Team-Station":
continue
# Make sure param 1 exists
if occ.has(1):
# Get value of param 1
jahre = occ.get(1);
# Get parts of value
mo = re.search(r"\s*(\{\{0\|0000\}\}|(?:1|2)\d{3})\s*(–|‐|-|‑|‒|—|―|˗)?\s*((?:1|2)\d{3})?",str(jahre.value))
# Short circuit if no years found
if not mo:
continue
jahre_n = list()
# Reconstruct normalized value with correct enDash
if mo.group(1):
jahre_n.append(mo.group(1))
else:
jahre_n.append(" ")
if mo.group(2):
jahre_n.append("\u2013")
else:
jahre_n.append(" ")
if mo.group(3):
jahre_n.append(mo.group(3))
else:
jahre_n.append(" ")
# Overwrite old param value with normalized value
occ.add(1,"".join(jahre_n))
def main(*args):
"""

Loading…
Cancel
Save