diff --git a/README.md b/README.md new file mode 100644 index 0000000..a864139 --- /dev/null +++ b/README.md @@ -0,0 +1,30 @@ +# wiki-jogobot-teamstation +This is a [Pywikibot](https://www.mediawiki.org/wiki/Manual:Pywikibot) based [Wikipedia Bot](https://de.wikipedia.org/wiki/Wikipedia:Bots) +of [User:JogoBot](https://de.wikipedia.org/wiki/Benutzer:JogoBot) on the +[German Wikipedia](https://de.wikipedia.org/wiki/Wikipedia:Hauptseite). + +It is used to correct errors in usage of [Template Team-Station](https://de.wikipedia.org/wiki/Vorlage:Team-Station). + +## Requirements +* Python 3.4+ (at least it is only tested with those) +* pywikibot-core 2.0 +* mwparserfromhell 0.5.1 +* [jogobot-core module](https://git.golderweb.de/wiki/jogobot) used as submodule + +## Usage +``` +python teamstation.py -family:wikipedia +``` +_Important_: Do not use `-always` since there are many special cases which are not detected. Always check diff! + +## Versions +* v0.1 Semi-automatical correction of wrong dash symbols in time spans + +## Bugs +[jogobot-teamstation Issues](https://git.golderweb.de/wiki/jogobot-teamstation/issues) + +## License +GPLv3+ + +## Author Information +Copyright 2018 Jonathan Golder diff --git a/teamstation.py b/teamstation.py new file mode 100644 index 0000000..e1da67e --- /dev/null +++ b/teamstation.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# teamstation.py +# +# Copyright 2018 Jonathan Golder +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# +""" +Bot to correct errors in usage of +[Template Team-Station](https://de.wikipedia.org/wiki/Vorlage:Team-Station). +""" + +import os +import locale +import re + +import pywikibot +from pywikibot.bot import CurrentPageBot + +import mwparserfromhell as mwparser + +import jogobot + + +class TeamstationBot( CurrentPageBot ): # sets 'current_page' on each treat() + """ + Bot to correct errors in usage of [Template Team-Station] + (https://de.wikipedia.org/wiki/Vorlage:Team-Station). + """ + + wrong_dash_search = r"hastemplate:Team-Station insource:/Team-Station *\| *[12][0-9]{3} *(‐|-|‑|‒|—|―|˗)/" + wrong_dash_summary = "Bot: ([[Benutzer:JogoBot/Teamstation|teamstation]]) Korrigiere falschen [[Halbgeviertstrich#Bis-Strich|Bis-Strich]]" + + def __init__( self, genFactory, **kwargs ): + """ + Constructor + + Parameters: + @param genFactory GenFactory with parsed pagegenerator args to + build generator + @type genFactory pagegenerators.GeneratorFactory + @param **kwargs Additional args + @type iterable + """ + + # Copy needed args + self.genFactory = genFactory + + # Build generator with genFactory + self.build_generator() + + # Run super class init with builded generator + super( TeamstationBot, self ).__init__(generator=self.gen) + + def build_generator( self ): + """ + Builds generator + """ + + # Search articles with wrong dash symbols used in time span + self.genFactory.gens.append( + pywikibot.pagegenerators.SearchPageGenerator( + type(self).wrong_dash_search, namespaces=[0] + ) + ) + + # Use this to create the generator the bot should work on + self.gen = self.genFactory.getCombinedGenerator() + + def treat_page( self ): + """ + Handles work on current page + """ + + # Parse article text with mwparser + self.current_page.wikicode = mwparser.parse( self.current_page.text) + + # Normalize param 1 (jahre) + self.normalize_jahre() + + # Convert wikicode back to string to save + self.new_text = str( self.current_page.wikicode ) + + # Save + self.put_current(self.new_text, summary=type(self).wrong_dash_summary) + + def normalize_jahre(self): + """ + Replace wrong dash symbols and do some space normalisations in param 1 + (jahre) of template Team-Station + """ + + # Iterate occurences of template Team-Station + for occ in self.current_page.wikicode.ifilter_templates( + matches=r"^\{\{Team-Station" ): + + # Short circuit wrong templates + if not occ.name == "Team-Station": + continue + + # Make sure param 1 exists + if occ.has(1): + + # Get value of param 1 + jahre = occ.get(1); + + # Normalize rarely used "bis 2000" notation + if "bis" in jahre.value: + jahre.value.replace("bis", "{{0|0000}}\u2013") + + # Get parts of value + mo = re.search(r"\s*(\{\{0\|0000\}\}|(?:1|2)\d{3})\s*(–|‐|-|‑|‒|—|―|˗)?\s*((?:1|2)\d{3})?",str(jahre.value)) + + # Short circuit if no years found + if not mo: + continue + + jahre_n = list() + + # Reconstruct normalized value with correct enDash + if mo.group(1): + jahre_n.append(mo.group(1)) + else: + jahre_n.append(" ") + + if mo.group(2): + jahre_n.append("\u2013") + else: + jahre_n.append(" ") + + if mo.group(3): + jahre_n.append(mo.group(3)) + else: + jahre_n.append(" ") + + # Overwrite old param value with normalized value + occ.add(1,"".join(jahre_n)) + +def main(*args): + """ + Process command line arguments and invoke bot. + + If args is an empty list, sys.argv is used. + + @param args: command line arguments + @type args: list of unicode + """ + + # Make sure locale is set to 'de_DE.UTF-8' to prevent problems + # with wrong month abreviations in strptime + locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') + + # Process global arguments to determine desired site + local_args = pywikibot.handle_args(args) + + # Get the jogobot-task_slug (basename of current file without ending) + task_slug = os.path.basename(__file__)[:-len(".py")] + + # Actually not needed since we only run semi-automaticall + # Before run, we need to check wether we are currently active or not + #~ if not jogobot.bot.active( task_slug ): + #~ return + + # Parse local Args to get information about subtask + ( subtask, genFactory, subtask_args ) = jogobot.bot.parse_local_args( + local_args, None ) + + # Init Bot + bot = jogobot.bot.init_bot( task_slug, None, TeamstationBot, genFactory) + + # Run bot + jogobot.bot.run_bot( task_slug, None, bot ) + + +if( __name__ == "__main__" ): + main()