Jonathan Golder
7 years ago
8 changed files with 1256 additions and 187 deletions
@ -0,0 +1,4 @@ |
|||||
|
[submodule "jogobot"] |
||||
|
path = jogobot |
||||
|
url = git@github.com:golderweb/wiki-jogobot-core.git |
||||
|
branch = test-v1 |
@ -0,0 +1,21 @@ |
|||||
|
# wiki-jogobot-charts |
||||
|
This is a [Pywikibot](https://www.mediawiki.org/wiki/Manual:Pywikibot) based [Wikipedia Bot](https://de.wikipedia.org/wiki/Wikipedia:Bots) |
||||
|
of [User:JogoBot](https://de.wikipedia.org/wiki/Benutzer:JogoBot) on the |
||||
|
[German Wikipedia](https://de.wikipedia.org/wiki/Wikipedia:Hauptseite). |
||||
|
|
||||
|
On [JogoBots wikipedia user page](https://de.wikipedia.org/wiki/Benutzer:JogoBot/Charts) a more detailed description can be found. |
||||
|
|
||||
|
## Requirements |
||||
|
* Python 3.4+ (at least it is only tested with those) |
||||
|
* pywikibot-core 2.0 |
||||
|
* [jogobot-core module](https://github.com/golderweb/wiki-jogobot-core) used as submodule |
||||
|
* [Isoweek module](https://pypi.python.org/pypi/isoweek) |
||||
|
|
||||
|
## Bugs |
||||
|
[wiki-jogobot-charts on fs.golderweb.de (de)](https://fs.golderweb.de/proj20) |
||||
|
|
||||
|
## License |
||||
|
GPLv3+ |
||||
|
|
||||
|
## Author Information |
||||
|
Copyright 2016 Jonathan Golder <jonathan@golderweb.de> |
@ -0,0 +1,585 @@ |
|||||
|
#!/usr/bin/env python3 |
||||
|
# -*- coding: utf-8 -*- |
||||
|
# |
||||
|
# countrylist.py |
||||
|
# |
||||
|
# Copyright 2016 Jonathan Golder <jonathan@golderweb.de> |
||||
|
# |
||||
|
# This program is free software; you can redistribute it and/or modify |
||||
|
# it under the terms of the GNU General Public License as published by |
||||
|
# the Free Software Foundation; either version 2 of the License, or |
||||
|
# (at your option) any later version. |
||||
|
# |
||||
|
# This program is distributed in the hope that it will be useful, |
||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
|
# GNU General Public License for more details. |
||||
|
# |
||||
|
# You should have received a copy of the GNU General Public License |
||||
|
# along with this program; if not, write to the Free Software |
||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, |
||||
|
# MA 02110-1301, USA. |
||||
|
# |
||||
|
# |
||||
|
""" |
||||
|
Provides a class for handling charts list per country and year |
||||
|
""" |
||||
|
|
||||
|
import re |
||||
|
import locale |
||||
|
from datetime import datetime |
||||
|
|
||||
|
from isoweek import Week |
||||
|
|
||||
|
import pywikibot |
||||
|
import mwparserfromhell as mwparser |
||||
|
|
||||
|
import jogobot |
||||
|
|
||||
|
|
||||
|
class CountryList(): |
||||
|
""" |
||||
|
Handles charts list per country and year |
||||
|
""" |
||||
|
|
||||
|
def __init__( self, wikilink ): |
||||
|
""" |
||||
|
Generate new instance of class |
||||
|
|
||||
|
Checks wether page given with country_list_link exists |
||||
|
|
||||
|
@param wikilink Wikilink object by mwparser linking CountryList |
||||
|
|
||||
|
@returns self Object representing CountryList |
||||
|
False if page does not exists |
||||
|
""" |
||||
|
|
||||
|
# Generate pywikibot site object |
||||
|
# @TODO: Maybe store it outside??? |
||||
|
self.site = pywikibot.Site() |
||||
|
|
||||
|
# Set locale to 'de_DE.UTF-8' |
||||
|
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') |
||||
|
|
||||
|
# Generate pywikibot page object |
||||
|
self.page = pywikibot.Page( self.site, wikilink.title ) |
||||
|
|
||||
|
# Store given wikilink for page object |
||||
|
self.wikilink = wikilink |
||||
|
|
||||
|
# Check if page exits |
||||
|
if not self.page.exists(): |
||||
|
raise CountryListError( "CountryList " + |
||||
|
str(wikilink.title) + " does not exists!" ) |
||||
|
|
||||
|
# Initialise attributes |
||||
|
__attr = ( "wikicode", "entry", "chartein", "_chartein_raw", |
||||
|
"_titel_raw", "titel", "interpret", "_interpret_raw" ) |
||||
|
for attr in __attr: |
||||
|
setattr( self, attr, None ) |
||||
|
|
||||
|
self.parsed = False |
||||
|
|
||||
|
# Try to find year |
||||
|
self.find_year() |
||||
|
|
||||
|
def is_parsing_needed( self, revid ): |
||||
|
""" |
||||
|
Check if current revid of CountryList differs from given one |
||||
|
|
||||
|
@param int Revid to check against |
||||
|
|
||||
|
@return True Given revid differs from current revid |
||||
|
False Given revid is equal to current revid |
||||
|
""" |
||||
|
|
||||
|
if revid != self.page.latest_revision_id: |
||||
|
return True |
||||
|
else: |
||||
|
return False |
||||
|
|
||||
|
def find_year( self ): |
||||
|
""" |
||||
|
Try to find the year related to CountryList using regex |
||||
|
""" |
||||
|
match = re.search( r"^.+\((\d{4})\)", self.page.title() ) |
||||
|
|
||||
|
# We matched something |
||||
|
if match: |
||||
|
self.year = int(match.group(1)) |
||||
|
|
||||
|
else: |
||||
|
raise CountryListError( "CountryList year is errorneous!" ) |
||||
|
|
||||
|
def parse( self ): |
||||
|
""" |
||||
|
Handles the parsing process |
||||
|
""" |
||||
|
|
||||
|
# Set revid |
||||
|
self.revid = self.page.latest_revision_id |
||||
|
|
||||
|
# Parse page with mwparser |
||||
|
self.generate_wikicode() |
||||
|
|
||||
|
# Select lastest entry |
||||
|
self.get_latest_entry() |
||||
|
|
||||
|
# Prepare chartein, titel, interpret |
||||
|
self.prepare_chartein() |
||||
|
self.prepare_titel() |
||||
|
self.prepare_interpret() |
||||
|
|
||||
|
# For easy detecting wether we have parsed self |
||||
|
self.parsed = True |
||||
|
|
||||
|
# Log parsed page |
||||
|
jogobot.output( "Parsed revision {revid} of page [[{title}]]".format( |
||||
|
revid=self.revid, title=self.page.title() ) ) |
||||
|
|
||||
|
def detect_belgian( self ): |
||||
|
""" |
||||
|
Detect wether current entry is on of the belgian (Belgien/Wallonien) |
||||
|
""" |
||||
|
# Check if begian province name is in link text or title |
||||
|
if( "Wallonien" in str( self.wikilink.text ) or |
||||
|
"Wallonien" in str( self.wikilink.title) ): |
||||
|
return "Wallonie" |
||||
|
elif( "Flandern" in str( self.wikilink.text ) or |
||||
|
"Flandern" in str( self.wikilink.title) ): |
||||
|
return "Flandern" |
||||
|
else: |
||||
|
return None |
||||
|
|
||||
|
def generate_wikicode( self ): |
||||
|
""" |
||||
|
Runs mwparser on page.text to get mwparser.objects |
||||
|
""" |
||||
|
|
||||
|
self.wikicode = mwparser.parse( self.page.text ) |
||||
|
|
||||
|
def get_latest_entry( self ): |
||||
|
""" |
||||
|
Get latest list entry template object |
||||
|
""" |
||||
|
|
||||
|
# Select the section "Singles" |
||||
|
# For belgian list we need to select subsection of country |
||||
|
belgian = self.detect_belgian() |
||||
|
|
||||
|
# Select Singles-Section |
||||
|
# Catch Error if we have none |
||||
|
try: |
||||
|
if belgian: |
||||
|
singles_section = self.wikicode.get_sections( |
||||
|
matches=belgian )[0].get_sections( matches="Singles" )[0] |
||||
|
else: |
||||
|
singles_section = self.wikicode.get_sections( |
||||
|
matches="Singles" )[0] |
||||
|
|
||||
|
except IndexError: |
||||
|
raise CountryListError( "No Singles-Section found!") |
||||
|
|
||||
|
# Since we have multiple categories in some countrys we need |
||||
|
# to select the first wrapping template |
||||
|
try: |
||||
|
wrapping = next( singles_section.ifilter_templates( |
||||
|
matches="Nummer-eins-Hits" ) ) |
||||
|
except StopIteration: |
||||
|
raise CountryListError( "Wrapping template is missing!") |
||||
|
|
||||
|
# Select the last occurence of template "Nummer-eins-Hits Zeile" in |
||||
|
# Wrapper-template |
||||
|
for self.entry in wrapping.get("Inhalt").value.ifilter_templates( |
||||
|
matches="Nummer-eins-Hits Zeile" ): |
||||
|
pass |
||||
|
|
||||
|
# Check if we have found something |
||||
|
if not self.entry: |
||||
|
raise CountryListError( self.page.title() ) |
||||
|
|
||||
|
def get_year_correction( self ): |
||||
|
""" |
||||
|
Reads value of jahr parameter for correcting week numbers near to |
||||
|
year changes |
||||
|
""" |
||||
|
# If param is present return correction, otherwise null |
||||
|
if self.entry.has( "Jahr" ): |
||||
|
|
||||
|
# Read value of param |
||||
|
jahr = self.entry.get( "Jahr" ).strip() |
||||
|
|
||||
|
if jahr == "+1": |
||||
|
return 1 |
||||
|
elif jahr == "-1": |
||||
|
return -1 |
||||
|
|
||||
|
# None or wrong parameter value |
||||
|
return 0 |
||||
|
|
||||
|
def prepare_chartein( self ): |
||||
|
""" |
||||
|
Checks wether self._chartein_raw is a date or a week number and |
||||
|
calculates related datetime object |
||||
|
""" |
||||
|
|
||||
|
# If self._chartein_raw is not set, get it |
||||
|
if not self._chartein_raw: |
||||
|
self.get_chartein_value() |
||||
|
|
||||
|
# Detect weather we have a date or a weeknumber for Template Param |
||||
|
# "Chartein" |
||||
|
# Numeric string means week number |
||||
|
if( self._chartein_raw.isnumeric() ): |
||||
|
|
||||
|
# Calculate date of monday in given week and add number of |
||||
|
# days given in Template parameter "Korrektur" with monday |
||||
|
# as day (zero) |
||||
|
self.chartein = ( Week( self.year + self.get_year_correction(), |
||||
|
int( self._chartein_raw ) ).monday() ) |
||||
|
# Complete date string present |
||||
|
else: |
||||
|
self.chartein = datetime.strptime( self._chartein_raw, |
||||
|
"%Y-%m-%d" ) |
||||
|
|
||||
|
def get_chartein_value( self ): |
||||
|
""" |
||||
|
Reads value of chartein parameter |
||||
|
If param is not present raise Error |
||||
|
""" |
||||
|
if self.entry.has( "Chartein" ): |
||||
|
self._chartein_raw = self.entry.get("Chartein").value |
||||
|
|
||||
|
# Remove possible ref-tags |
||||
|
for ref in self._chartein_raw.ifilter_tags(matches="ref"): |
||||
|
self._chartein_raw.remove( ref ) |
||||
|
|
||||
|
# Remove whitespace |
||||
|
self._chartein_raw = str(self._chartein_raw).strip() |
||||
|
|
||||
|
else: |
||||
|
raise CountryListEntryError( "Template Parameter 'Chartein' is \ |
||||
|
missing!" ) |
||||
|
|
||||
|
def prepare_titel( self ): |
||||
|
""" |
||||
|
Loads and prepares Titel of latest entry |
||||
|
""" |
||||
|
|
||||
|
# If self._titel_raw is not set, get it |
||||
|
if not self._titel_raw: |
||||
|
self.get_titel_value() |
||||
|
|
||||
|
# Try to find a wikilink for Titel on countrylist |
||||
|
if "[[" not in self._titel_raw: |
||||
|
self.titel = self._search_links( str(self._titel_raw) ) |
||||
|
else: |
||||
|
self.titel = self._titel_raw |
||||
|
|
||||
|
def get_titel_value( self ): |
||||
|
""" |
||||
|
Reads value of Titel parameter |
||||
|
If param is not present raise Error |
||||
|
""" |
||||
|
if self.entry.has( "Titel" ): |
||||
|
self._titel_raw = self.entry.get("Titel").value |
||||
|
|
||||
|
# Remove possible ref-tags |
||||
|
for ref in self._titel_raw.ifilter_tags(matches="ref"): |
||||
|
self._titel_raw.remove( ref ) |
||||
|
|
||||
|
# Remove whitespace |
||||
|
self._titel_raw = str(self._titel_raw).strip() |
||||
|
else: |
||||
|
raise CountryListEntryError( "Template Parameter 'Titel' is \ |
||||
|
missing!" ) |
||||
|
|
||||
|
def prepare_interpret( self ): |
||||
|
""" |
||||
|
Loads and prepares Interpret of latest entry |
||||
|
""" |
||||
|
|
||||
|
# If self._interpret_raw is not set, get it |
||||
|
if not self._interpret_raw: |
||||
|
self.get_interpret_value() |
||||
|
|
||||
|
# Work with interpret value to add missing links |
||||
|
# Split it in words |
||||
|
words = self._interpret_raw.split() |
||||
|
|
||||
|
# Interpret name separating words |
||||
|
seps = ( "feat.", "&" ) |
||||
|
|
||||
|
# Create empty list for concatenated interpret names |
||||
|
parts = [ " ", ] |
||||
|
# Another list for managing indexes which need to be worked on |
||||
|
indexes = list() |
||||
|
index = 0 |
||||
|
|
||||
|
# Reconcatenate interpret names |
||||
|
for word in words: |
||||
|
|
||||
|
# Name parts |
||||
|
if word not in seps: |
||||
|
parts[-1] += (" " + word) |
||||
|
|
||||
|
# Remove unnecessary whitespace |
||||
|
parts[-1] = parts[-1].strip() |
||||
|
|
||||
|
# We only need to work on it, if no wikilink is present |
||||
|
if index not in indexes and "[[" not in parts[-1]: |
||||
|
indexes.append( index ) |
||||
|
else: |
||||
|
# Count up index 2 times ( Separator + next Name ) |
||||
|
index += 2 |
||||
|
parts.append( word ) |
||||
|
parts.append( " " ) |
||||
|
|
||||
|
# If we have indexes without links, search for links |
||||
|
if indexes: |
||||
|
|
||||
|
parts = self._search_links( parts, indexes ) |
||||
|
|
||||
|
# Join the collected links |
||||
|
sep = " " |
||||
|
self.interpret = sep.join( parts ) |
||||
|
|
||||
|
# Nothing to do, just use raw |
||||
|
else: |
||||
|
self.interpret = self._interpret_raw |
||||
|
|
||||
|
def get_interpret_value( self ): |
||||
|
""" |
||||
|
Reads value of Interpret parameter |
||||
|
If param is not present raise Error |
||||
|
""" |
||||
|
if self.entry.has( "Interpret" ): |
||||
|
self._interpret_raw = self.entry.get("Interpret").value |
||||
|
|
||||
|
# Remove possible ref-tags |
||||
|
for ref in self._interpret_raw.ifilter_tags(matches="ref"): |
||||
|
self._interpret_raw.remove( ref ) |
||||
|
|
||||
|
# Handle SortKeyName and SortKey |
||||
|
for template in self._interpret_raw.ifilter_templates( |
||||
|
matches="SortKey" ): |
||||
|
|
||||
|
if template.name == "SortKeyName": |
||||
|
# Differing Link-Destination is provided as param 3 |
||||
|
if template.has(3): |
||||
|
# Construct link out of Template, Params: |
||||
|
# 1 = Surname |
||||
|
# 2 = Name |
||||
|
# 3 = Link-Dest |
||||
|
interpret_link = mwparser.nodes.wikilink.Wikilink( |
||||
|
str(template.get(3).value), |
||||
|
str(template.get(1).value) + " " + |
||||
|
str(template.get(2).value) ) |
||||
|
|
||||
|
# Default Link-Dest [[Surname Name]] |
||||
|
else: |
||||
|
interpret_link = mwparser.nodes.wikilink.Wikilink( |
||||
|
str(template.get(1).value) + " " + |
||||
|
str(template.get(2).value) ) |
||||
|
|
||||
|
# Replace Template with link |
||||
|
self._interpret_raw.replace( template, interpret_link ) |
||||
|
|
||||
|
# SortKey |
||||
|
else: |
||||
|
# Replace SortKey with text from param 2 if present |
||||
|
if template.has(2): |
||||
|
self._interpret_raw.replace( template, |
||||
|
template.get(2).value) |
||||
|
# Else Remove SortKey (text should follow behind SortKey) |
||||
|
else: |
||||
|
self._interpret_raw.replace( template, None) |
||||
|
|
||||
|
# Normally won't be needed as there should be only one |
||||
|
# SortKey-Temlate but ... its a wiki |
||||
|
break |
||||
|
|
||||
|
# Remove whitespace |
||||
|
self._interpret_raw = str(self._interpret_raw).strip() |
||||
|
else: |
||||
|
raise CountryListEntryError( "Template Parameter 'Interpret' is \ |
||||
|
missing!" ) |
||||
|
|
||||
|
def _search_links( self, keywords, indexes=None ): |
||||
|
""" |
||||
|
Search matching wikilinks for keyword(s) in CountryList's wikicode |
||||
|
|
||||
|
@param keywords: One or more keywords to search for |
||||
|
@type keywords: str, list |
||||
|
@param indexes: List with numeric indexes for items of keywords to work |
||||
|
on only |
||||
|
@type indexes: list of ints |
||||
|
@return: List or String with replaced keywords |
||||
|
@return type: str, list |
||||
|
""" |
||||
|
|
||||
|
# Maybe convert keywords string to list |
||||
|
if( isinstance( keywords, str ) ): |
||||
|
keywords = [ keywords, ] |
||||
|
string = True |
||||
|
else: |
||||
|
string = False |
||||
|
|
||||
|
# If indexes worklist was not provided, work on all elements |
||||
|
if not indexes: |
||||
|
indexes = list(range( len( keywords ) )) |
||||
|
|
||||
|
# Iterate over wikilinks of refpage and try to find related links |
||||
|
for wikilink in self.wikicode.ifilter_wikilinks(): |
||||
|
|
||||
|
# Iterate over interpret names |
||||
|
for index in indexes: |
||||
|
|
||||
|
# Check wether wikilink matches |
||||
|
if( keywords[index] == wikilink.text or |
||||
|
keywords[index] == wikilink.title ): |
||||
|
|
||||
|
# Overwrite name with complete wikilink |
||||
|
keywords[index] = str( wikilink ) |
||||
|
|
||||
|
# Remove index from worklist |
||||
|
indexes.remove( index ) |
||||
|
|
||||
|
# Other indexes won't also match |
||||
|
break |
||||
|
|
||||
|
# If worklist is empty, stop iterating over wikilinks |
||||
|
if not indexes: |
||||
|
break |
||||
|
|
||||
|
# Choose wether return list or string based on input type |
||||
|
if not string: |
||||
|
return keywords |
||||
|
else: |
||||
|
return str(keywords[0]) |
||||
|
|
||||
|
def __str__( self ): |
||||
|
""" |
||||
|
Returns str repression for Object |
||||
|
""" |
||||
|
if self.parsed: |
||||
|
return ("CountryList( Link = \"{link}\", Revid = \"{revid}\", " + |
||||
|
"Interpret = \"{interpret}\", Titel = \"{titel}\", " + |
||||
|
"Chartein = \"{chartein}\" )").format( |
||||
|
link=repr(self.wikilink), |
||||
|
revid=self.revid, |
||||
|
interpret=self.interpret, |
||||
|
titel=self.titel, |
||||
|
chartein=repr(self.chartein)) |
||||
|
else: |
||||
|
return "CountryList( Link = \"{link}\" )".format( |
||||
|
link=repr(self.wikilink)) |
||||
|
|
||||
|
|
||||
|
class CountryListError( Exception ): |
||||
|
""" |
||||
|
Handles errors occuring in class CountryList |
||||
|
""" |
||||
|
pass |
||||
|
|
||||
|
|
||||
|
class CountryListEntryError( CountryListError ): |
||||
|
""" |
||||
|
Handles errors occuring in class CountryList related to entrys |
||||
|
""" |
||||
|
pass |
||||
|
|
||||
|
|
||||
|
class CountryListUnitTest(): |
||||
|
""" |
||||
|
Defines Test-Functions for CountryList-Module |
||||
|
""" |
||||
|
|
||||
|
testcases = ( { "Link": mwparser.nodes.Wikilink( "Benutzer:JogoBot/Charts/Tests/Liste der Nummer-eins-Hits in Frankreich (2015)" ), # noqa |
||||
|
"revid": 148453827, |
||||
|
"interpret": "[[Adele (Sängerin)|Adele]]", |
||||
|
"titel": "[[Hello (Adele-Lied)|Hello]]", |
||||
|
"chartein": datetime( 2015, 10, 23 ) }, |
||||
|
{ "Link": mwparser.nodes.Wikilink( "Benutzer:JogoBot/Charts/Tests/Liste der Nummer-eins-Hits in Belgien (2015)", "Wallonien"), # noqa |
||||
|
"revid": 148455281, |
||||
|
"interpret": "[[Nicky Jam]] & [[Enrique Iglesias (Sänger)|Enrique Iglesias]]", # noqa |
||||
|
"titel": "El perdón", |
||||
|
"chartein": datetime( 2015, 9, 12 ) } ) |
||||
|
|
||||
|
def __init__( self, page=None ): |
||||
|
""" |
||||
|
Constructor |
||||
|
Set attribute page |
||||
|
""" |
||||
|
if page: |
||||
|
self.page_link = mwparser.nodes.Wikilink( page ) |
||||
|
else: |
||||
|
self.page_link = None |
||||
|
|
||||
|
def treat( self ): |
||||
|
""" |
||||
|
Start testing either manually with page provided by cmd-arg page or |
||||
|
automatically with predefined test case |
||||
|
""" |
||||
|
if self.page_link: |
||||
|
self.man_test() |
||||
|
else: |
||||
|
self.auto_test() |
||||
|
|
||||
|
def auto_test( self ): |
||||
|
""" |
||||
|
Run automatic tests with predefined test data from wiki |
||||
|
""" |
||||
|
|
||||
|
for case in type(self).testcases: |
||||
|
|
||||
|
self.countrylist = CountryList( case["Link"] ) |
||||
|
|
||||
|
if( self.countrylist.is_parsing_needed( case["revid"] ) or not |
||||
|
self.countrylist.is_parsing_needed( case["revid"] + 1 ) ): |
||||
|
raise Exception( |
||||
|
"CountryList.is_parsing_needed() does not work!" ) |
||||
|
|
||||
|
self.countrylist.parse() |
||||
|
|
||||
|
for key in case: |
||||
|
|
||||
|
if key == "Link": |
||||
|
continue |
||||
|
|
||||
|
if not case[key] == getattr(self.countrylist, key ): |
||||
|
raise Exception( key + " – " + str( |
||||
|
getattr(self.countrylist, key ) )) |
||||
|
|
||||
|
def man_test( self ): |
||||
|
""" |
||||
|
Run manual test with page given in parameter |
||||
|
""" |
||||
|
self.countrylist = CountryList( self.page_link ) |
||||
|
|
||||
|
self.countrylist.parse() |
||||
|
|
||||
|
print( self.countrylist ) |
||||
|
print( "Since we have no data to compare, you need to manually " + |
||||
|
"check data above against given page to ensure correct " + |
||||
|
"working of module!" ) |
||||
|
|
||||
|
|
||||
|
def main(*args): |
||||
|
""" |
||||
|
Handling direct calls --> unittest |
||||
|
""" |
||||
|
# Process global arguments to determine desired site |
||||
|
local_args = pywikibot.handle_args(args) |
||||
|
|
||||
|
# Parse command line arguments |
||||
|
for arg in local_args: |
||||
|
if arg.startswith("-page:"): |
||||
|
page = arg[ len("-page:"): ] |
||||
|
|
||||
|
# Call unittest-class |
||||
|
test = CountryListUnitTest( page ) |
||||
|
test.treat() |
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
main() |
@ -0,0 +1,411 @@ |
|||||
|
#!/usr/bin/env python3 |
||||
|
# -*- coding: utf-8 -*- |
||||
|
# |
||||
|
# summarypage.py |
||||
|
# |
||||
|
# Copyright 2016 Jonathan Golder <jonathan@golderweb.de> |
||||
|
# |
||||
|
# This program is free software; you can redistribute it and/or modify |
||||
|
# it under the terms of the GNU General Public License as published by |
||||
|
# the Free Software Foundation; either version 2 of the License, or |
||||
|
# (at your option) any later version. |
||||
|
# |
||||
|
# This program is distributed in the hope that it will be useful, |
||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
|
# GNU General Public License for more details. |
||||
|
# |
||||
|
# You should have received a copy of the GNU General Public License |
||||
|
# along with this program; if not, write to the Free Software |
||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, |
||||
|
# MA 02110-1301, USA. |
||||
|
# |
||||
|
# |
||||
|
""" |
||||
|
Provides classes for handling Charts summary page |
||||
|
""" |
||||
|
|
||||
|
from datetime import datetime, timedelta |
||||
|
|
||||
|
# import pywikibot |
||||
|
import mwparserfromhell as mwparser |
||||
|
|
||||
|
import jogobot |
||||
|
|
||||
|
from countrylist import CountryList, CountryListError |
||||
|
|
||||
|
|
||||
|
class SummaryPage(): |
||||
|
""" |
||||
|
Handles summary page related actions |
||||
|
""" |
||||
|
|
||||
|
def __init__( self, text, force_reload=False ): |
||||
|
""" |
||||
|
Create Instance |
||||
|
|
||||
|
@param text: Page Text of summarypage |
||||
|
@type text: str |
||||
|
@param force-reload: If given, countrylists will be always parsed |
||||
|
regardless if needed or not |
||||
|
@type force-reload: bool |
||||
|
|
||||
|
""" |
||||
|
|
||||
|
# Parse Text with mwparser |
||||
|
self.wikicode = mwparser.parse( text ) |
||||
|
|
||||
|
# Force parsing of countrylist |
||||
|
self.force_reload = force_reload |
||||
|
|
||||
|
def treat( self ): |
||||
|
""" |
||||
|
Handles parsing/editing of text |
||||
|
""" |
||||
|
|
||||
|
# Get mwparser.template objects for Template "/Eintrag" |
||||
|
for entry in self.wikicode.filter_templates( matches="/Eintrag" ): |
||||
|
|
||||
|
# Instantiate SummaryPageEntry-object |
||||
|
summarypageentry = SummaryPageEntry(entry, |
||||
|
force_reload=self.force_reload) |
||||
|
|
||||
|
# Treat SummaryPageEntry-object |
||||
|
summarypageentry.treat() |
||||
|
|
||||
|
# Get result |
||||
|
# We need to replace origninal entry since objectid changes due to |
||||
|
# recreation of template object and reassignment won't be reflected |
||||
|
self.wikicode.replace(entry, summarypageentry.get_entry().template) |
||||
|
|
||||
|
def get_new_text( self ): |
||||
|
""" |
||||
|
If writing page is needed, return new text, otherwise false |
||||
|
""" |
||||
|
|
||||
|
# Get information wether writing is needed from class attribute |
||||
|
if SummaryPageEntry.write_needed: |
||||
|
|
||||
|
# Convert wikicode back to string and return |
||||
|
return str( self.wikicode ) |
||||
|
|
||||
|
return False |
||||
|
|
||||
|
|
||||
|
class SummaryPageEntry(): |
||||
|
""" |
||||
|
Provides a generic wrapper for summary page entry template |
||||
|
""" |
||||
|
|
||||
|
write_needed = False |
||||
|
|
||||
|
def __init__( self, entry, force_reload=False ): |
||||
|
""" |
||||
|
Constructor |
||||
|
|
||||
|
@param entry: Entry template of summarypage entry |
||||
|
@type text: mwparser.template |
||||
|
@param force-reload: If given, countrylists will be always parsed |
||||
|
regardless if needed or not |
||||
|
@type force-reload: bool |
||||
|
""" |
||||
|
self.old_entry = SummaryPageEntryTemplate( entry ) |
||||
|
self.new_entry = SummaryPageEntryTemplate( ) |
||||
|
|
||||
|
# Force parsing of countrylist |
||||
|
self.force_reload = force_reload |
||||
|
|
||||
|
def treat( self ): |
||||
|
""" |
||||
|
Controls parsing/update-sequence of entry |
||||
|
""" |
||||
|
# Get CountryList-Object |
||||
|
self.get_countrylist() |
||||
|
|
||||
|
# Check if parsing country list is needed |
||||
|
if( self.countrylist.parsed): |
||||
|
|
||||
|
self.correct_chartein() |
||||
|
|
||||
|
self.update_params() |
||||
|
|
||||
|
self.is_write_needed() |
||||
|
|
||||
|
def get_countrylist( self ): |
||||
|
""" |
||||
|
Get the CountryList-Object for current entry |
||||
|
""" |
||||
|
|
||||
|
# Get wikilink to related countrylist |
||||
|
self.get_countrylist_wikilink() |
||||
|
|
||||
|
# Get saved revision of related countrylist |
||||
|
self.get_countrylist_saved_revid() |
||||
|
|
||||
|
# Get current year |
||||
|
current_year = datetime.now().year |
||||
|
|
||||
|
# If list is from last year, replace year |
||||
|
if (current_year - 1) in self.countrylist_wikilink.title: |
||||
|
jogobot.output( "Trying to use new years list for [[{page}]]" |
||||
|
.format( page=self.countrylist_wikilink.title ) ) |
||||
|
|
||||
|
self.countrylist_wikilink.title.replace( (current_year - 1), |
||||
|
current_year ) |
||||
|
|
||||
|
# Try to get current years list |
||||
|
try: |
||||
|
self.countrylist = CountryList( self.countrylist_wikilink ) |
||||
|
|
||||
|
self.maybe_parse_countrylist() |
||||
|
|
||||
|
# Maybe fallback to last years list |
||||
|
except CountryListError: |
||||
|
|
||||
|
# If list is from last year, replace year |
||||
|
if (current_year ) in self.countrylist_wikilink.title: |
||||
|
jogobot.output( "New years list for [[{page}]] does not " + |
||||
|
"exist, fall back to old list!".format( |
||||
|
page=self.countrylist_wikilink.title ) ) |
||||
|
|
||||
|
self.countrylist_wikilink.title.replace( current_year, |
||||
|
(current_year - 1) ) |
||||
|
|
||||
|
self.countrylist = CountryList( self.countrylist_wikilink ) |
||||
|
|
||||
|
self.maybe_parse_countrylist() |
||||
|
|
||||
|
if not self.countrylist: |
||||
|
raise SummaryPageEntryError( "CountryList does not exists!" ) |
||||
|
|
||||
|
def maybe_parse_countrylist( self ): |
||||
|
""" |
||||
|
Parse countrylist if page-object exists and if parsing is needed or |
||||
|
param -force-reload is set |
||||
|
""" |
||||
|
|
||||
|
# Fast return if no countrylist-object |
||||
|
if not self.countrylist: |
||||
|
return |
||||
|
|
||||
|
# Parse if needed or forced |
||||
|
if( self.countrylist.is_parsing_needed( self.countrylist_revid ) or |
||||
|
self.force_reload ): |
||||
|
self.countrylist.parse() |
||||
|
|
||||
|
def get_countrylist_wikilink( self ): |
||||
|
""" |
||||
|
Load wikilink to related countrylist |
||||
|
""" |
||||
|
if self.old_entry.Liste: |
||||
|
try: |
||||
|
self.countrylist_wikilink = next( |
||||
|
self.old_entry.Liste.ifilter_wikilinks() ) |
||||
|
except StopIteration: |
||||
|
raise SummaryPageEntryError( |
||||
|
"Parameter Liste does not contain valid wikilink!" ) |
||||
|
else: |
||||
|
raise SummaryPageEntryError( "Parameter Liste is not present!") |
||||
|
|
||||
|
def get_countrylist_saved_revid( self ): |
||||
|
""" |
||||
|
Load saved revid of related countrylist if Param is present |
||||
|
""" |
||||
|
if self.old_entry.Liste_Revision: |
||||
|
self.countrylist_revid = int(self.old_entry.Liste_Revision.strip()) |
||||
|
else: |
||||
|
self.countrylist_revid = 0 |
||||
|
|
||||
|
def update_params( self ): |
||||
|
""" |
||||
|
Updates values of Parameters of template |
||||
|
""" |
||||
|
|
||||
|
self.new_entry.Liste = self.countrylist_wikilink |
||||
|
self.new_entry.Liste_Revision = \ |
||||
|
self.countrylist.page.latest_revision_id |
||||
|
self.new_entry.Interpret = self.countrylist.interpret |
||||
|
self.new_entry.Titel = self.countrylist.titel |
||||
|
self.new_entry.Chartein = self._corrected_chartein |
||||
|
|
||||
|
if self.old_entry.Korrektur: |
||||
|
self.new_entry.Korrektur = self.old_entry.Korrektur |
||||
|
else: |
||||
|
self.new_entry.Korrektur = "" |
||||
|
|
||||
|
if self.old_entry.Hervor: |
||||
|
self.new_entry.Hervor = self.old_entry.Hervor |
||||
|
else: |
||||
|
self.new_entry.Hervor = "" |
||||
|
|
||||
|
def correct_chartein( self ): |
||||
|
""" |
||||
|
Calulates the correct value of chartein, based on the chartein value |
||||
|
from countrylist entry and param Korrektur of summarypage entry |
||||
|
""" |
||||
|
# If param Korrektur is present extract the value |
||||
|
if self.old_entry.Korrektur: |
||||
|
# If Korrektur is (after striping) castable to int use it |
||||
|
try: |
||||
|
days = int( str( self.old_entry.Korrektur ).strip() ) |
||||
|
# Otherwise, if casting fails, ignore it |
||||
|
except ValueError: |
||||
|
days = 0 |
||||
|
else: |
||||
|
days = 0 |
||||
|
|
||||
|
corrected = self.countrylist.chartein + timedelta( days=days ) |
||||
|
self._corrected_chartein = corrected.strftime( "%d. %B" ).lstrip( "0" ) |
||||
|
|
||||
|
def is_write_needed( self ): |
||||
|
""" |
||||
|
Detects wether writing of entry is needed and stores information in |
||||
|
Class-Attribute |
||||
|
""" |
||||
|
type( self ).write_needed = ( ( self.old_entry != self.new_entry ) and |
||||
|
self.countrylist.parsed or |
||||
|
type( self ).write_needed ) |
||||
|
|
||||
|
def get_entry( self ): |
||||
|
""" |
||||
|
Returns the new entry if CountryList was parsed otherwise returns the |
||||
|
old one |
||||
|
""" |
||||
|
if( self.countrylist.parsed): |
||||
|
return self.new_entry |
||||
|
else: |
||||
|
return self.old_entry |
||||
|
|
||||
|
|
||||
|
class SummaryPageEntryTemplate(): |
||||
|
""" |
||||
|
Interface class for mwparser.template to simply use template params as |
||||
|
Properties |
||||
|
""" |
||||
|
|
||||
|
# Classatribute |
||||
|
params = ( "Liste", "Liste_Revision", "Interpret", "Titel", "Chartein", |
||||
|
"Korrektur", "Hervor" ) |
||||
|
|
||||
|
def __init__( self, template_obj=None ): |
||||
|
""" |
||||
|
Creates Instance of Class for given mwparser.template object of |
||||
|
SummmaryPageEntry Template. If no object was given create empty one. |
||||
|
|
||||
|
@param template_obj Object of SummmaryPageEntry Template |
||||
|
@type template_obj: mwparser.template |
||||
|
""" |
||||
|
|
||||
|
# Check if object was given |
||||
|
if( template_obj ): |
||||
|
|
||||
|
# Check if object has correct type |
||||
|
if isinstance( template_obj, |
||||
|
mwparser.nodes.template.Template ): |
||||
|
|
||||
|
self.template = template_obj |
||||
|
self.__initial = False |
||||
|
|
||||
|
# Otherwise raise error |
||||
|
else: |
||||
|
raise SummaryPageEntryTemplateError( "Wrong type given" ) |
||||
|
|
||||
|
# Otherwise initialise template |
||||
|
else: |
||||
|
self.__initial_template() |
||||
|
self.__initial = True |
||||
|
|
||||
|
def __initial_template( self ): |
||||
|
""" |
||||
|
Builds the initial template |
||||
|
""" |
||||
|
|
||||
|
self.template = next( mwparser.parse( "{{Portal:Charts und Popmusik/\ |
||||
|
Aktuelle Nummer-eins-Hits/Eintrag|Liste=|Liste_Revision=|Interpret=|Titel=NN\ |
||||
|
|Chartein=|Korrektur=|Hervor=}}" ).ifilter_templates() ) |
||||
|
|
||||
|
def __getattr__( self, name ): |
||||
|
""" |
||||
|
Special getter for template params |
||||
|
""" |
||||
|
if name in type(self).params: |
||||
|
|
||||
|
if( self.template.has( name ) ): |
||||
|
return self.template.get( name ).value |
||||
|
else: |
||||
|
return False |
||||
|
|
||||
|
else: |
||||
|
raise AttributeError |
||||
|
|
||||
|
def __setattr__( self, name, value ): |
||||
|
""" |
||||
|
Special setter for template params |
||||
|
""" |
||||
|
if name in type(self).params: |
||||
|
|
||||
|
self.__dict__[ 'template' ].add( name, value ) |
||||
|
|
||||
|
else: |
||||
|
object.__setattr__(self, name, value) |
||||
|
|
||||
|
def __ne__( self, other ): |
||||
|
""" |
||||
|
Checks wether all Template param values except for Liste_Revision are |
||||
|
equal |
||||
|
""" |
||||
|
|
||||
|
# Detect which of the two was initialised (without) |
||||
|
# If none raise error |
||||
|
if( self.__initial ): |
||||
|
initial = self |
||||
|
cmpto = other |
||||
|
elif( other.__initial ): |
||||
|
initial = other |
||||
|
cmpto = self |
||||
|
else: |
||||
|
raise SummaryPageEntryTemplateError( |
||||
|
"One of the compared instances must have been initial!" ) |
||||
|
|
||||
|
# Iterate over each param |
||||
|
for param in initial.template.params: |
||||
|
|
||||
|
# Slice out only Param.name |
||||
|
param = param[:param.find("=")].strip() |
||||
|
|
||||
|
# If param is missing, writing is needed |
||||
|
if not cmpto.template.has( param ): |
||||
|
return True |
||||
|
|
||||
|
# Do not compare List Revisions (not just write about Revids) |
||||
|
if param == "Liste_Revision": |
||||
|
continue |
||||
|
|
||||
|
# Compare other param values, if one unequal write is needed |
||||
|
if( initial.template.get( param ).value.strip() != |
||||
|
cmpto.template.get( param ).value.strip() ): |
||||
|
return True |
||||
|
|
||||
|
# If not returned True until now |
||||
|
return False |
||||
|
|
||||
|
|
||||
|
class SummaryPageError( Exception ): |
||||
|
""" |
||||
|
Handles errors occuring in class SummaryPage |
||||
|
""" |
||||
|
pass |
||||
|
|
||||
|
|
||||
|
class SummaryPageEntryError( SummaryPageError ): |
||||
|
""" |
||||
|
Handles errors occuring in class SummaryPageEntry |
||||
|
""" |
||||
|
pass |
||||
|
|
||||
|
|
||||
|
class SummaryPageEntryTemplateError( SummaryPageError ): |
||||
|
""" |
||||
|
Handles errors occuring in class SummaryPageEntryTemplate |
||||
|
""" |
||||
|
pass |
Loading…
Reference in new issue