2 changed files with 222 additions and 0 deletions
@ -0,0 +1,30 @@ |
|||
# wiki-jogobot-teamstation |
|||
This is a [Pywikibot](https://www.mediawiki.org/wiki/Manual:Pywikibot) based [Wikipedia Bot](https://de.wikipedia.org/wiki/Wikipedia:Bots) |
|||
of [User:JogoBot](https://de.wikipedia.org/wiki/Benutzer:JogoBot) on the |
|||
[German Wikipedia](https://de.wikipedia.org/wiki/Wikipedia:Hauptseite). |
|||
|
|||
It is used to correct errors in usage of [Template Team-Station](https://de.wikipedia.org/wiki/Vorlage:Team-Station). |
|||
|
|||
## Requirements |
|||
* Python 3.4+ (at least it is only tested with those) |
|||
* pywikibot-core 2.0 |
|||
* mwparserfromhell 0.5.1 |
|||
* [jogobot-core module](https://git.golderweb.de/wiki/jogobot) used as submodule |
|||
|
|||
## Usage |
|||
``` |
|||
python teamstation.py -family:wikipedia |
|||
``` |
|||
_Important_: Do not use `-always` since there are many special cases which are not detected. Always check diff! |
|||
|
|||
## Versions |
|||
* v0.1 Semi-automatical correction of wrong dash symbols in time spans |
|||
|
|||
## Bugs |
|||
[jogobot-teamstation Issues](https://git.golderweb.de/wiki/jogobot-teamstation/issues) |
|||
|
|||
## License |
|||
GPLv3+ |
|||
|
|||
## Author Information |
|||
Copyright 2018 Jonathan Golder <jonathan@golderweb.de> |
@ -0,0 +1,192 @@ |
|||
#!/usr/bin/env python |
|||
# -*- coding: utf-8 -*- |
|||
# |
|||
# teamstation.py |
|||
# |
|||
# Copyright 2018 Jonathan Golder <jonathan@golderweb.de> |
|||
# |
|||
# This program is free software; you can redistribute it and/or modify |
|||
# it under the terms of the GNU General Public License as published by |
|||
# the Free Software Foundation; either version 2 of the License, or |
|||
# (at your option) any later version. |
|||
# |
|||
# This program is distributed in the hope that it will be useful, |
|||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
# GNU General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU General Public License |
|||
# along with this program; if not, write to the Free Software |
|||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, |
|||
# MA 02110-1301, USA. |
|||
# |
|||
# |
|||
""" |
|||
Bot to correct errors in usage of |
|||
[Template Team-Station](https://de.wikipedia.org/wiki/Vorlage:Team-Station). |
|||
""" |
|||
|
|||
import os |
|||
import locale |
|||
import re |
|||
|
|||
import pywikibot |
|||
from pywikibot.bot import CurrentPageBot |
|||
|
|||
import mwparserfromhell as mwparser |
|||
|
|||
import jogobot |
|||
|
|||
|
|||
class TeamstationBot( CurrentPageBot ): # sets 'current_page' on each treat() |
|||
""" |
|||
Bot to correct errors in usage of [Template Team-Station] |
|||
(https://de.wikipedia.org/wiki/Vorlage:Team-Station). |
|||
""" |
|||
|
|||
wrong_dash_search = r"hastemplate:Team-Station insource:/Team-Station *\| *[12][0-9]{3} *(‐|-|‑|‒|—|―|˗)/" |
|||
wrong_dash_summary = "Bot: ([[Benutzer:JogoBot/Teamstation|teamstation]]) Korrigiere falschen [[Halbgeviertstrich#Bis-Strich|Bis-Strich]]" |
|||
|
|||
def __init__( self, genFactory, **kwargs ): |
|||
""" |
|||
Constructor |
|||
|
|||
Parameters: |
|||
@param genFactory GenFactory with parsed pagegenerator args to |
|||
build generator |
|||
@type genFactory pagegenerators.GeneratorFactory |
|||
@param **kwargs Additional args |
|||
@type iterable |
|||
""" |
|||
|
|||
# Copy needed args |
|||
self.genFactory = genFactory |
|||
|
|||
# Build generator with genFactory |
|||
self.build_generator() |
|||
|
|||
# Run super class init with builded generator |
|||
super( TeamstationBot, self ).__init__(generator=self.gen) |
|||
|
|||
def build_generator( self ): |
|||
""" |
|||
Builds generator |
|||
""" |
|||
|
|||
# Search articles with wrong dash symbols used in time span |
|||
self.genFactory.gens.append( |
|||
pywikibot.pagegenerators.SearchPageGenerator( |
|||
type(self).wrong_dash_search, namespaces=[0] |
|||
) |
|||
) |
|||
|
|||
# Use this to create the generator the bot should work on |
|||
self.gen = self.genFactory.getCombinedGenerator() |
|||
|
|||
def treat_page( self ): |
|||
""" |
|||
Handles work on current page |
|||
""" |
|||
|
|||
# Parse article text with mwparser |
|||
self.current_page.wikicode = mwparser.parse( self.current_page.text) |
|||
|
|||
# Normalize param 1 (jahre) |
|||
self.normalize_jahre() |
|||
|
|||
# Convert wikicode back to string to save |
|||
self.new_text = str( self.current_page.wikicode ) |
|||
|
|||
# Save |
|||
self.put_current(self.new_text, summary=type(self).wrong_dash_summary) |
|||
|
|||
def normalize_jahre(self): |
|||
""" |
|||
Replace wrong dash symbols and do some space normalisations in param 1 |
|||
(jahre) of template Team-Station |
|||
""" |
|||
|
|||
# Iterate occurences of template Team-Station |
|||
for occ in self.current_page.wikicode.ifilter_templates( |
|||
matches=r"^\{\{Team-Station" ): |
|||
|
|||
# Short circuit wrong templates |
|||
if not occ.name == "Team-Station": |
|||
continue |
|||
|
|||
# Make sure param 1 exists |
|||
if occ.has(1): |
|||
|
|||
# Get value of param 1 |
|||
jahre = occ.get(1); |
|||
|
|||
# Normalize rarely used "bis 2000" notation |
|||
if "bis" in jahre.value: |
|||
jahre.value.replace("bis", "{{0|0000}}\u2013") |
|||
|
|||
# Get parts of value |
|||
mo = re.search(r"\s*(\{\{0\|0000\}\}|(?:1|2)\d{3})\s*(–|‐|-|‑|‒|—|―|˗)?\s*((?:1|2)\d{3})?",str(jahre.value)) |
|||
|
|||
# Short circuit if no years found |
|||
if not mo: |
|||
continue |
|||
|
|||
jahre_n = list() |
|||
|
|||
# Reconstruct normalized value with correct enDash |
|||
if mo.group(1): |
|||
jahre_n.append(mo.group(1)) |
|||
else: |
|||
jahre_n.append(" ") |
|||
|
|||
if mo.group(2): |
|||
jahre_n.append("\u2013") |
|||
else: |
|||
jahre_n.append(" ") |
|||
|
|||
if mo.group(3): |
|||
jahre_n.append(mo.group(3)) |
|||
else: |
|||
jahre_n.append(" ") |
|||
|
|||
# Overwrite old param value with normalized value |
|||
occ.add(1,"".join(jahre_n)) |
|||
|
|||
def main(*args): |
|||
""" |
|||
Process command line arguments and invoke bot. |
|||
|
|||
If args is an empty list, sys.argv is used. |
|||
|
|||
@param args: command line arguments |
|||
@type args: list of unicode |
|||
""" |
|||
|
|||
# Make sure locale is set to 'de_DE.UTF-8' to prevent problems |
|||
# with wrong month abreviations in strptime |
|||
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') |
|||
|
|||
# Process global arguments to determine desired site |
|||
local_args = pywikibot.handle_args(args) |
|||
|
|||
# Get the jogobot-task_slug (basename of current file without ending) |
|||
task_slug = os.path.basename(__file__)[:-len(".py")] |
|||
|
|||
# Actually not needed since we only run semi-automaticall |
|||
# Before run, we need to check wether we are currently active or not |
|||
#~ if not jogobot.bot.active( task_slug ): |
|||
#~ return |
|||
|
|||
# Parse local Args to get information about subtask |
|||
( subtask, genFactory, subtask_args ) = jogobot.bot.parse_local_args( |
|||
local_args, None ) |
|||
|
|||
# Init Bot |
|||
bot = jogobot.bot.init_bot( task_slug, None, TeamstationBot, genFactory) |
|||
|
|||
# Run bot |
|||
jogobot.bot.run_bot( task_slug, None, bot ) |
|||
|
|||
|
|||
if( __name__ == "__main__" ): |
|||
main() |
Loading…
Reference in new issue