Browse Source

Merge branch 'release-1.0'

master v1.0
Jonathan Golder 7 years ago
parent
commit
db6e7fd246
  1. 2
      .gitignore
  2. 4
      .gitmodules
  3. 21
      README.md
  4. 2
      __init__.py
  5. 417
      charts.py
  6. 585
      countrylist.py
  7. 1
      jogobot
  8. 411
      summarypage.py

2
.gitignore

@ -62,3 +62,5 @@ target/
# Test
test.py
disabled

4
.gitmodules

@ -0,0 +1,4 @@
[submodule "jogobot"]
path = jogobot
url = git@github.com:golderweb/wiki-jogobot-core.git
branch = test-v1

21
README.md

@ -0,0 +1,21 @@
# wiki-jogobot-charts
This is a [Pywikibot](https://www.mediawiki.org/wiki/Manual:Pywikibot) based [Wikipedia Bot](https://de.wikipedia.org/wiki/Wikipedia:Bots)
of [User:JogoBot](https://de.wikipedia.org/wiki/Benutzer:JogoBot) on the
[German Wikipedia](https://de.wikipedia.org/wiki/Wikipedia:Hauptseite).
On [JogoBots wikipedia user page](https://de.wikipedia.org/wiki/Benutzer:JogoBot/Charts) a more detailed description can be found.
## Requirements
* Python 3.4+ (at least it is only tested with those)
* pywikibot-core 2.0
* [jogobot-core module](https://github.com/golderweb/wiki-jogobot-core) used as submodule
* [Isoweek module](https://pypi.python.org/pypi/isoweek)
## Bugs
[wiki-jogobot-charts on fs.golderweb.de (de)](https://fs.golderweb.de/proj20)
## License
GPLv3+
## Author Information
Copyright 2016 Jonathan Golder <jonathan@golderweb.de>

2
__init__.py

@ -3,7 +3,7 @@
#
# __init__.py
#
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
# Copyright 2016 Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by

417
charts.py

@ -3,7 +3,15 @@
#
# charts.py
#
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
# original version by:
#
# (C) Pywikibot team, 2006-2014 as basic.py
#
# Distributed under the terms of the MIT license.
#
# modified by:
#
# Copyright 2016 Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -22,208 +30,245 @@
#
#
"""
Provides a class for handling chart lists
Bot which automatically updates a ChartsSummaryPage like
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
CountryLists
The following parameters are supported:
&params;
-always If given, request for confirmation of edit is short circuited
Use for unattended run
-force-reload If given, countrylists will be always parsed regardless if
needed or not
"""
from datetime import datetime, timedelta
import locale
import os
import sys
from isoweek import Week
import pywikibot
from pywikibot import pagegenerators
import pywikibot # noqa
import mwparserfromhell as mwparser
import jogobot
from summarypage import SummaryPage
class Charts:
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'&params;': pagegenerators.parameterHelp
}
class ChartsBot( ):
"""
Class for handling chart lists
Bot which automatically updates a ChartsSummaryPage like
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
CountryLists
"""
def __init__( self ):
def __init__( self, generator, always, force_reload ):
"""
Generate a new ChartsList object based on given pywikibot page object
@param page page Pywikibot/MediaWiki page object for page
Constructor.
@param generator: the page generator that determines on which pages
to work
@type generator: generator
@param always: if True, request for confirmation of edit is short
circuited. Use for unattended run
@type always: bool
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
"""
self.generator = generator
self.always = always
# Force parsing of countrylist
self.force_reload = force_reload
# Output Information
jogobot.output( "Chartsbot invoked" )
# Save pywikibot site object
self.site = pywikibot.Site()
# Define edit summary
self.summary = jogobot.config["charts"]["edit_summary"].strip()
# Make sure summary starts with "Bot:"
if not self.summary[:len("Bot:")] == "Bot:":
self.summary = "Bot: " + self.summary.strip()
# Set locale to 'de_DE.UTF-8'
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
self.site = pywikibot.Site()
self.changed = None
# Safe the pywikibot page object
# self.page = page
self.open_overview()
self.parse_overview()
if self.changed:
self.save_overview()
def parse_charts_list( self, page ):
"""
Handles the parsing process
"""
# Parse charts list with mwparser
wikicode = mwparser.parse( page.text )
# Select the section "Singles"
singles_section = wikicode.get_sections( matches="Singles" )[0]
# Select the last occurence of template "Nummer-eins-Hits Zeile" in
# "Singles"-section
last_entry = singles_section.ifilter_templates(
matches="Nummer-eins-Hits Zeile" )
for last in last_entry:
pass
# Detect weather we have a date or a weeknumber for Template Param
# "Chartein"
if( last.get("Chartein").value.strip().isnumeric() ):
chartein = last.get("Chartein").value.strip()
def run(self):
"""Process each page from the generator."""
# Count skipped pages (redirect or missing)
skipped = 0
for page in self.generator:
if not self.treat(page):
skipped += 1
if skipped:
jogobot.output( "Chartsbot finished, {skipped} page(s) skipped"
.format( skipped=skipped ) )
else:
chartein = datetime.strptime( last.get("Chartein").value.strip(),
"%Y-%m-%d" )
title = last.get("Titel").value.strip()
interpret = last.get("Interpret").value.strip()
# Return collected data as tuple
return ( chartein, title, interpret )
def parse_overview( self ):
"""
Parses the given Charts-Overview-Page and returns the updated version
"""
# Parse text with mwparser to get access to nodes
wikicode = mwparser.parse( self.overview_text )
# Get mwparser.template objects for Template "/Eintrag"
for country in wikicode.ifilter_templates( matches="/Eintrag" ):
# Get mwparser.wikilink object
for link in country.get("Liste").value.ifilter_wikilinks():
# Create Page-Object for Chartslist
list_page = pywikibot.Page( self.site, link.title )
# Only use first wikilink in Template Param "Liste"
break
# Check if we have a saved revid
if not country.has( "Liste Revision" ):
try:
country.add( "Liste Revision", 0, before="Interpret" )
except ValueError:
country.add( "Liste Revision", 0 )
# Check if saved revid is unequal current revid
if( int( str( country.get( "Liste Revision" ).value ) ) !=
list_page.latest_revision_id ):
country = self.update_overview( country, list_page )
# If any param of any occurence of Template "/Eintrag" has changed,
# Save new version
# We need to convert mwparser-objects to string before saving
self.overview_text = str( wikicode )
def open_overview( self ):
"""
Opens the Charts-Overview-Page
"""
with open( "/home/joni/GOLDERWEB/Daten/Projekte/05_Wikimedia/62_BOT/bot/charts/test-data.wiki", "r" ) as fr: # noqa
self.overview_text = fr.read()
def update_overview( self, country, list_page ): # noqa
"""
Updates the templates given in county using data from given list_page
@param country wikicode-object with Template for country
@param list_page pywikibot-page-object for list-page
@returns wikicode-object with updated Template for country
"""
# Parse linked charts list for the country
data = self.parse_charts_list( list_page )
# Update "Liste Revision" param
self.changed = True
country.get( "Liste Revision" ).value = str(
list_page.latest_revision_id )
# For some countries we have weeknumbers instead of dates
if( isinstance( data[0], str ) ):
# Slice year out of link destination
year = int( list_page.title()[-5:-1] )
# Check if we have a param "Wochentag", otherwise add
if not country.has( "Wochentag" ):
country.add( "Wochentag", "" )
if( str( country.get( "Wochentag" ).value ).isnumeric() ):
days = int( str( country.get( "Wochentag" ).value ) )
else:
days = 0
# Calculate date of monday in given week and add number of
# days given in Template parameter "Wochentag" with monday
# as day (zero)
# We need double conversion since wikicode could not be casted
# as int directly
date = ( Week( year, int( data[0] ) ).monday() +
timedelta( days=days ) )
# Param Chartein contains a regular date
jogobot.output( "Chartsbot finished successfully" )
def treat(self, page):
"""Load the given page, does some changes, and saves it."""
text = self.load(page)
if not text:
return False
################################################################
# NOTE: Here you can modify the text in whatever way you want. #
################################################################
# Initialise and treat SummaryPageWorker
sumpage = SummaryPage( text, self.force_reload )
sumpage.treat()
# Check if editing is needed and if so get new text
if sumpage.get_new_text():
text = sumpage.get_new_text()
if not self.save(text, page, self.summary, False):
jogobot.output(u'Page %s not saved.' % page.title(asLink=True))
return True
def load(self, page):
"""Load the text of the given page."""
try:
# Load the page
text = page.get()
except pywikibot.NoPage:
jogobot.output( u"Page %s does not exist; skipping."
% page.title(asLink=True), "ERROR" )
except pywikibot.IsRedirectPage:
jogobot.output( u"Page %s is a redirect; skipping."
% page.title(asLink=True), "ERROR" )
else:
date = data[0]
# Check if param "Chartein" is present
if not country.has( "Chartein" ):
try:
country.add( "Chartein", "", before="Wochentag" )
except ValueError:
country.add( "Chartein", "" )
# Check if date has changed
if( date.strftime( "%d. %B" ).lstrip( "0" ) !=
country.get("Chartein").value ):
self.changed = True
country.get("Chartein").value = date.strftime( "%d. %B"
).lstrip( "0" )
# Check if param "Titel" is present
if not country.has( "Titel" ):
country.add( "Titel", "", before="Chartein" )
# Check if Titel has changed
if( data[1] != country.get( "Titel" ).value ):
self.changed = True
country.get( "Titel" ).value = data[1]
# Check if param "Intepret" is present
if not country.has( "Interpret" ):
country.add( "Interpret", "", before="Titel" )
# Check if Interpret has changed
if( data[2] != country.get( "Interpret" ).value ):
self.changed = True
country.get( "Interpret" ).value = data[2]
def save_overview( self ):
"""
Saves the current version of overview-text
"""
with open( "/home/joni/GOLDERWEB/Daten/Projekte/05_Wikimedia/62_BOT/bot/charts/test-data.wiki", "w" ) as fw: # noqa
fw.write( self.overview_text )
return text
return False
def save(self, text, page, comment=None, minorEdit=True,
botflag=True):
"""Update the given page with new text."""
# only save if something was changed (and not just revision)
if text != page.get():
# Show diff only in interactive mode or in verbose mode
if not self.always or pywikibot.config.verbose_output:
# Show the title of the page we're working on.
# Highlight the title in purple.
jogobot.output( u">>> \03{lightpurple}%s\03{default} <<<"
% page.title())
# show what was changed
pywikibot.showDiff(page.get(), text)
jogobot.output(u'Comment: %s' % comment)
def main():
Charts()
if self.always or pywikibot.input_yn(
u'Do you want to accept these changes?',
default=False, automatic_quit=False):
try:
page.text = text
# Save the page
page.save(summary=comment or self.comment,
minor=minorEdit, botflag=botflag)
except pywikibot.LockedPage:
jogobot.output( u"Page %s is locked; skipping."
% page.title(asLink=True), "ERROR" )
except pywikibot.EditConflict:
jogobot.output(
u'Skipping %s because of edit conflict'
% (page.title()), "ERROR")
except pywikibot.SpamfilterError as error:
jogobot.output(
u'Cannot change %s because of spam blacklist \
entry %s'
% (page.title(), error.url), "ERROR")
else:
return True
return False
def main(*args):
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
@param args: command line arguments
@type args: list of unicode
"""
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# Get the jogobot-task_slug (basename of current file without ending)
task_slug = os.path.basename(__file__)[:-len(".py")]
# Before run, we need to check wether we are currently active or not
try:
# Will throw Exception if disabled/blocked
jogobot.is_active( task_slug )
except jogobot.jogobot.Blocked:
(type, value, traceback) = sys.exc_info()
jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ),
"CRITICAL" )
except jogobot.jogobot.Disabled:
(type, value, traceback) = sys.exc_info()
jogobot.output( "\03{red} %s (%s)" % (value, type ),
"ERROR" )
# Bot/Task is active
else:
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# The generator gives the pages that should be worked upon.
gen = None
# If always is True, bot won't ask for confirmation of edit (automode)
always = False
# If force_reload is True, bot will always parse Countrylist regardless
# if parsing is needed or not
force_reload = False
# Parse command line arguments
for arg in local_args:
if arg.startswith("-always"):
always = True
elif arg.startswith("-force-reload"):
force_reload = True
else:
pass
genFactory.handleArg(arg)
if not gen:
gen = genFactory.getCombinedGenerator()
if gen:
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
bot = ChartsBot(gen, always, force_reload)
if bot:
bot.run()
else:
pywikibot.showHelp()
if( __name__ == "__main__" ):
main()

585
countrylist.py

@ -0,0 +1,585 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# countrylist.py
#
# Copyright 2016 Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Provides a class for handling charts list per country and year
"""
import re
import locale
from datetime import datetime
from isoweek import Week
import pywikibot
import mwparserfromhell as mwparser
import jogobot
class CountryList():
"""
Handles charts list per country and year
"""
def __init__( self, wikilink ):
"""
Generate new instance of class
Checks wether page given with country_list_link exists
@param wikilink Wikilink object by mwparser linking CountryList
@returns self Object representing CountryList
False if page does not exists
"""
# Generate pywikibot site object
# @TODO: Maybe store it outside???
self.site = pywikibot.Site()
# Set locale to 'de_DE.UTF-8'
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
# Generate pywikibot page object
self.page = pywikibot.Page( self.site, wikilink.title )
# Store given wikilink for page object
self.wikilink = wikilink
# Check if page exits
if not self.page.exists():
raise CountryListError( "CountryList " +
str(wikilink.title) + " does not exists!" )
# Initialise attributes
__attr = ( "wikicode", "entry", "chartein", "_chartein_raw",
"_titel_raw", "titel", "interpret", "_interpret_raw" )
for attr in __attr:
setattr( self, attr, None )
self.parsed = False
# Try to find year
self.find_year()
def is_parsing_needed( self, revid ):
"""
Check if current revid of CountryList differs from given one
@param int Revid to check against
@return True Given revid differs from current revid
False Given revid is equal to current revid
"""
if revid != self.page.latest_revision_id:
return True
else:
return False
def find_year( self ):
"""
Try to find the year related to CountryList using regex
"""
match = re.search( r"^.+\((\d{4})\)", self.page.title() )
# We matched something
if match:
self.year = int(match.group(1))
else:
raise CountryListError( "CountryList year is errorneous!" )
def parse( self ):
"""
Handles the parsing process
"""
# Set revid
self.revid = self.page.latest_revision_id
# Parse page with mwparser
self.generate_wikicode()
# Select lastest entry
self.get_latest_entry()
# Prepare chartein, titel, interpret
self.prepare_chartein()
self.prepare_titel()
self.prepare_interpret()
# For easy detecting wether we have parsed self
self.parsed = True
# Log parsed page
jogobot.output( "Parsed revision {revid} of page [[{title}]]".format(
revid=self.revid, title=self.page.title() ) )
def detect_belgian( self ):
"""
Detect wether current entry is on of the belgian (Belgien/Wallonien)
"""
# Check if begian province name is in link text or title
if( "Wallonien" in str( self.wikilink.text ) or
"Wallonien" in str( self.wikilink.title) ):
return "Wallonie"
elif( "Flandern" in str( self.wikilink.text ) or
"Flandern" in str( self.wikilink.title) ):
return "Flandern"
else:
return None
def generate_wikicode( self ):
"""
Runs mwparser on page.text to get mwparser.objects
"""
self.wikicode = mwparser.parse( self.page.text )
def get_latest_entry( self ):
"""
Get latest list entry template object
"""
# Select the section "Singles"
# For belgian list we need to select subsection of country
belgian = self.detect_belgian()
# Select Singles-Section
# Catch Error if we have none
try:
if belgian:
singles_section = self.wikicode.get_sections(
matches=belgian )[0].get_sections( matches="Singles" )[0]
else:
singles_section = self.wikicode.get_sections(
matches="Singles" )[0]
except IndexError:
raise CountryListError( "No Singles-Section found!")
# Since we have multiple categories in some countrys we need
# to select the first wrapping template
try:
wrapping = next( singles_section.ifilter_templates(
matches="Nummer-eins-Hits" ) )
except StopIteration:
raise CountryListError( "Wrapping template is missing!")
# Select the last occurence of template "Nummer-eins-Hits Zeile" in
# Wrapper-template
for self.entry in wrapping.get("Inhalt").value.ifilter_templates(
matches="Nummer-eins-Hits Zeile" ):
pass
# Check if we have found something
if not self.entry:
raise CountryListError( self.page.title() )
def get_year_correction( self ):
"""
Reads value of jahr parameter for correcting week numbers near to
year changes
"""
# If param is present return correction, otherwise null
if self.entry.has( "Jahr" ):
# Read value of param
jahr = self.entry.get( "Jahr" ).strip()
if jahr == "+1":
return 1
elif jahr == "-1":
return -1
# None or wrong parameter value
return 0
def prepare_chartein( self ):
"""
Checks wether self._chartein_raw is a date or a week number and
calculates related datetime object
"""
# If self._chartein_raw is not set, get it
if not self._chartein_raw:
self.get_chartein_value()
# Detect weather we have a date or a weeknumber for Template Param
# "Chartein"
# Numeric string means week number
if( self._chartein_raw.isnumeric() ):
# Calculate date of monday in given week and add number of
# days given in Template parameter "Korrektur" with monday
# as day (zero)
self.chartein = ( Week( self.year + self.get_year_correction(),
int( self._chartein_raw ) ).monday() )
# Complete date string present
else:
self.chartein = datetime.strptime( self._chartein_raw,
"%Y-%m-%d" )
def get_chartein_value( self ):
"""
Reads value of chartein parameter
If param is not present raise Error
"""
if self.entry.has( "Chartein" ):
self._chartein_raw = self.entry.get("Chartein").value
# Remove possible ref-tags
for ref in self._chartein_raw.ifilter_tags(matches="ref"):
self._chartein_raw.remove( ref )
# Remove whitespace
self._chartein_raw = str(self._chartein_raw).strip()
else:
raise CountryListEntryError( "Template Parameter 'Chartein' is \
missing!" )
def prepare_titel( self ):
"""
Loads and prepares Titel of latest entry
"""
# If self._titel_raw is not set, get it
if not self._titel_raw:
self.get_titel_value()
# Try to find a wikilink for Titel on countrylist
if "[[" not in self._titel_raw:
self.titel = self._search_links( str(self._titel_raw) )
else:
self.titel = self._titel_raw
def get_titel_value( self ):
"""
Reads value of Titel parameter
If param is not present raise Error
"""
if self.entry.has( "Titel" ):
self._titel_raw = self.entry.get("Titel").value
# Remove possible ref-tags
for ref in self._titel_raw.ifilter_tags(matches="ref"):
self._titel_raw.remove( ref )
# Remove whitespace
self._titel_raw = str(self._titel_raw).strip()
else:
raise CountryListEntryError( "Template Parameter 'Titel' is \
missing!" )
def prepare_interpret( self ):
"""
Loads and prepares Interpret of latest entry
"""
# If self._interpret_raw is not set, get it
if not self._interpret_raw:
self.get_interpret_value()
# Work with interpret value to add missing links
# Split it in words
words = self._interpret_raw.split()
# Interpret name separating words
seps = ( "feat.", "&" )
# Create empty list for concatenated interpret names
parts = [ " ", ]
# Another list for managing indexes which need to be worked on
indexes = list()
index = 0
# Reconcatenate interpret names
for word in words:
# Name parts
if word not in seps:
parts[-1] += (" " + word)
# Remove unnecessary whitespace
parts[-1] = parts[-1].strip()
# We only need to work on it, if no wikilink is present
if index not in indexes and "[[" not in parts[-1]:
indexes.append( index )
else:
# Count up index 2 times ( Separator + next Name )
index += 2
parts.append( word )
parts.append( " " )
# If we have indexes without links, search for links
if indexes:
parts = self._search_links( parts, indexes )
# Join the collected links
sep = " "
self.interpret = sep.join( parts )
# Nothing to do, just use raw
else:
self.interpret = self._interpret_raw
def get_interpret_value( self ):
"""
Reads value of Interpret parameter
If param is not present raise Error
"""
if self.entry.has( "Interpret" ):
self._interpret_raw = self.entry.get("Interpret").value
# Remove possible ref-tags
for ref in self._interpret_raw.ifilter_tags(matches="ref"):
self._interpret_raw.remove( ref )
# Handle SortKeyName and SortKey
for template in self._interpret_raw.ifilter_templates(
matches="SortKey" ):
if template.name == "SortKeyName":
# Differing Link-Destination is provided as param 3
if template.has(3):
# Construct link out of Template, Params:
# 1 = Surname
# 2 = Name
# 3 = Link-Dest
interpret_link = mwparser.nodes.wikilink.Wikilink(
str(template.get(3).value),
str(template.get(1).value) + " " +
str(template.get(2).value) )
# Default Link-Dest [[Surname Name]]
else:
interpret_link = mwparser.nodes.wikilink.Wikilink(
str(template.get(1).value) + " " +
str(template.get(2).value) )
# Replace Template with link
self._interpret_raw.replace( template, interpret_link )
# SortKey
else:
# Replace SortKey with text from param 2 if present
if template.has(2):
self._interpret_raw.replace( template,
template.get(2).value)
# Else Remove SortKey (text should follow behind SortKey)
else:
self._interpret_raw.replace( template, None)
# Normally won't be needed as there should be only one
# SortKey-Temlate but ... its a wiki
break
# Remove whitespace
self._interpret_raw = str(self._interpret_raw).strip()
else:
raise CountryListEntryError( "Template Parameter 'Interpret' is \
missing!" )
def _search_links( self, keywords, indexes=None ):
"""
Search matching wikilinks for keyword(s) in CountryList's wikicode
@param keywords: One or more keywords to search for
@type keywords: str, list
@param indexes: List with numeric indexes for items of keywords to work
on only
@type indexes: list of ints
@return: List or String with replaced keywords
@return type: str, list
"""
# Maybe convert keywords string to list
if( isinstance( keywords, str ) ):
keywords = [ keywords, ]
string = True
else:
string = False
# If indexes worklist was not provided, work on all elements
if not indexes:
indexes = list(range( len( keywords ) ))
# Iterate over wikilinks of refpage and try to find related links
for wikilink in self.wikicode.ifilter_wikilinks():
# Iterate over interpret names
for index in indexes:
# Check wether wikilink matches
if( keywords[index] == wikilink.text or
keywords[index] == wikilink.title ):
# Overwrite name with complete wikilink
keywords[index] = str( wikilink )
# Remove index from worklist
indexes.remove( index )
# Other indexes won't also match
break
# If worklist is empty, stop iterating over wikilinks
if not indexes:
break
# Choose wether return list or string based on input type
if not string:
return keywords
else:
return str(keywords[0])
def __str__( self ):
"""
Returns str repression for Object
"""
if self.parsed:
return ("CountryList( Link = \"{link}\", Revid = \"{revid}\", " +
"Interpret = \"{interpret}\", Titel = \"{titel}\", " +
"Chartein = \"{chartein}\" )").format(
link=repr(self.wikilink),
revid=self.revid,
interpret=self.interpret,
titel=self.titel,
chartein=repr(self.chartein))
else:
return "CountryList( Link = \"{link}\" )".format(
link=repr(self.wikilink))
class CountryListError( Exception ):
"""
Handles errors occuring in class CountryList
"""
pass
class CountryListEntryError( CountryListError ):
"""
Handles errors occuring in class CountryList related to entrys
"""
pass
class CountryListUnitTest():
"""
Defines Test-Functions for CountryList-Module
"""
testcases = ( { "Link": mwparser.nodes.Wikilink( "Benutzer:JogoBot/Charts/Tests/Liste der Nummer-eins-Hits in Frankreich (2015)" ), # noqa
"revid": 148453827,
"interpret": "[[Adele (Sängerin)|Adele]]",
"titel": "[[Hello (Adele-Lied)|Hello]]",
"chartein": datetime( 2015, 10, 23 ) },
{ "Link": mwparser.nodes.Wikilink( "Benutzer:JogoBot/Charts/Tests/Liste der Nummer-eins-Hits in Belgien (2015)", "Wallonien"), # noqa
"revid": 148455281,
"interpret": "[[Nicky Jam]] & [[Enrique Iglesias (Sänger)|Enrique Iglesias]]", # noqa
"titel": "El perdón",
"chartein": datetime( 2015, 9, 12 ) } )
def __init__( self, page=None ):
"""
Constructor
Set attribute page
"""
if page:
self.page_link = mwparser.nodes.Wikilink( page )
else:
self.page_link = None
def treat( self ):
"""
Start testing either manually with page provided by cmd-arg page or
automatically with predefined test case
"""
if self.page_link:
self.man_test()
else:
self.auto_test()
def auto_test( self ):
"""
Run automatic tests with predefined test data from wiki
"""
for case in type(self).testcases:
self.countrylist = CountryList( case["Link"] )
if( self.countrylist.is_parsing_needed( case["revid"] ) or not
self.countrylist.is_parsing_needed( case["revid"] + 1 ) ):
raise Exception(
"CountryList.is_parsing_needed() does not work!" )
self.countrylist.parse()
for key in case:
if key == "Link":
continue
if not case[key] == getattr(self.countrylist, key ):
raise Exception( key + "" + str(
getattr(self.countrylist, key ) ))
def man_test( self ):
"""
Run manual test with page given in parameter
"""
self.countrylist = CountryList( self.page_link )
self.countrylist.parse()
print( self.countrylist )
print( "Since we have no data to compare, you need to manually " +
"check data above against given page to ensure correct " +
"working of module!" )
def main(*args):
"""
Handling direct calls --> unittest
"""
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# Parse command line arguments
for arg in local_args:
if arg.startswith("-page:"):
page = arg[ len("-page:"): ]
# Call unittest-class
test = CountryListUnitTest( page )
test.treat()
if __name__ == "__main__":
main()

1
jogobot

@ -0,0 +1 @@
Subproject commit 9131235b7b1c976a068753f67064b9892a08e808

411
summarypage.py

@ -0,0 +1,411 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# summarypage.py
#
# Copyright 2016 Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Provides classes for handling Charts summary page
"""
from datetime import datetime, timedelta
# import pywikibot
import mwparserfromhell as mwparser
import jogobot
from countrylist import CountryList, CountryListError
class SummaryPage():
"""
Handles summary page related actions
"""
def __init__( self, text, force_reload=False ):
"""
Create Instance
@param text: Page Text of summarypage
@type text: str
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
"""
# Parse Text with mwparser
self.wikicode = mwparser.parse( text )
# Force parsing of countrylist
self.force_reload = force_reload
def treat( self ):
"""
Handles parsing/editing of text
"""
# Get mwparser.template objects for Template "/Eintrag"
for entry in self.wikicode.filter_templates( matches="/Eintrag" ):
# Instantiate SummaryPageEntry-object
summarypageentry = SummaryPageEntry(entry,
force_reload=self.force_reload)
# Treat SummaryPageEntry-object
summarypageentry.treat()
# Get result
# We need to replace origninal entry since objectid changes due to
# recreation of template object and reassignment won't be reflected
self.wikicode.replace(entry, summarypageentry.get_entry().template)
def get_new_text( self ):
"""
If writing page is needed, return new text, otherwise false
"""
# Get information wether writing is needed from class attribute
if SummaryPageEntry.write_needed:
# Convert wikicode back to string and return
return str( self.wikicode )
return False
class SummaryPageEntry():
"""
Provides a generic wrapper for summary page entry template
"""
write_needed = False
def __init__( self, entry, force_reload=False ):
"""
Constructor
@param entry: Entry template of summarypage entry
@type text: mwparser.template
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
"""
self.old_entry = SummaryPageEntryTemplate( entry )
self.new_entry = SummaryPageEntryTemplate( )
# Force parsing of countrylist
self.force_reload = force_reload
def treat( self ):
"""
Controls parsing/update-sequence of entry
"""
# Get CountryList-Object
self.get_countrylist()
# Check if parsing country list is needed
if( self.countrylist.parsed):
self.correct_chartein()
self.update_params()
self.is_write_needed()
def get_countrylist( self ):
"""
Get the CountryList-Object for current entry
"""
# Get wikilink to related countrylist
self.get_countrylist_wikilink()
# Get saved revision of related countrylist
self.get_countrylist_saved_revid()
# Get current year
current_year = datetime.now().year
# If list is from last year, replace year
if (current_year - 1) in self.countrylist_wikilink.title:
jogobot.output( "Trying to use new years list for [[{page}]]"
.format( page=self.countrylist_wikilink.title ) )
self.countrylist_wikilink.title.replace( (current_year - 1),
current_year )
# Try to get current years list
try:
self.countrylist = CountryList( self.countrylist_wikilink )
self.maybe_parse_countrylist()
# Maybe fallback to last years list
except CountryListError:
# If list is from last year, replace year
if (current_year ) in self.countrylist_wikilink.title:
jogobot.output( "New years list for [[{page}]] does not " +
"exist, fall back to old list!".format(
page=self.countrylist_wikilink.title ) )
self.countrylist_wikilink.title.replace( current_year,
(current_year - 1) )
self.countrylist = CountryList( self.countrylist_wikilink )
self.maybe_parse_countrylist()
if not self.countrylist:
raise SummaryPageEntryError( "CountryList does not exists!" )
def maybe_parse_countrylist( self ):
"""
Parse countrylist if page-object exists and if parsing is needed or
param -force-reload is set
"""
# Fast return if no countrylist-object
if not self.countrylist:
return
# Parse if needed or forced
if( self.countrylist.is_parsing_needed( self.countrylist_revid ) or
self.force_reload ):
self.countrylist.parse()
def get_countrylist_wikilink( self ):
"""
Load wikilink to related countrylist
"""
if self.old_entry.Liste:
try:
self.countrylist_wikilink = next(
self.old_entry.Liste.ifilter_wikilinks() )
except StopIteration:
raise SummaryPageEntryError(
"Parameter Liste does not contain valid wikilink!" )
else:
raise SummaryPageEntryError( "Parameter Liste is not present!")
def get_countrylist_saved_revid( self ):
"""
Load saved revid of related countrylist if Param is present
"""
if self.old_entry.Liste_Revision:
self.countrylist_revid = int(self.old_entry.Liste_Revision.strip())
else:
self.countrylist_revid = 0
def update_params( self ):
"""
Updates values of Parameters of template
"""
self.new_entry.Liste = self.countrylist_wikilink
self.new_entry.Liste_Revision = \
self.countrylist.page.latest_revision_id
self.new_entry.Interpret = self.countrylist.interpret
self.new_entry.Titel = self.countrylist.titel
self.new_entry.Chartein = self._corrected_chartein
if self.old_entry.Korrektur:
self.new_entry.Korrektur = self.old_entry.Korrektur
else:
self.new_entry.Korrektur = ""
if self.old_entry.Hervor:
self.new_entry.Hervor = self.old_entry.Hervor
else:
self.new_entry.Hervor = ""
def correct_chartein( self ):
"""
Calulates the correct value of chartein, based on the chartein value
from countrylist entry and param Korrektur of summarypage entry
"""
# If param Korrektur is present extract the value
if self.old_entry.Korrektur:
# If Korrektur is (after striping) castable to int use it
try:
days = int( str( self.old_entry.Korrektur ).strip() )
# Otherwise, if casting fails, ignore it
except ValueError:
days = 0
else:
days = 0
corrected = self.countrylist.chartein + timedelta( days=days )
self._corrected_chartein = corrected.strftime( "%d. %B" ).lstrip( "0" )
def is_write_needed( self ):
"""
Detects wether writing of entry is needed and stores information in
Class-Attribute
"""
type( self ).write_needed = ( ( self.old_entry != self.new_entry ) and
self.countrylist.parsed or
type( self ).write_needed )
def get_entry( self ):
"""
Returns the new entry if CountryList was parsed otherwise returns the
old one
"""
if( self.countrylist.parsed):
return self.new_entry
else:
return self.old_entry
class SummaryPageEntryTemplate():
"""
Interface class for mwparser.template to simply use template params as
Properties
"""
# Classatribute
params = ( "Liste", "Liste_Revision", "Interpret", "Titel", "Chartein",
"Korrektur", "Hervor" )
def __init__( self, template_obj=None ):
"""
Creates Instance of Class for given mwparser.template object of
SummmaryPageEntry Template. If no object was given create empty one.
@param template_obj Object of SummmaryPageEntry Template
@type template_obj: mwparser.template
"""
# Check if object was given
if( template_obj ):
# Check if object has correct type
if isinstance( template_obj,
mwparser.nodes.template.Template ):
self.template = template_obj
self.__initial = False
# Otherwise raise error
else:
raise SummaryPageEntryTemplateError( "Wrong type given" )
# Otherwise initialise template
else:
self.__initial_template()
self.__initial = True
def __initial_template( self ):
"""
Builds the initial template
"""
self.template = next( mwparser.parse( "{{Portal:Charts und Popmusik/\
Aktuelle Nummer-eins-Hits/Eintrag|Liste=|Liste_Revision=|Interpret=|Titel=NN\
|Chartein=|Korrektur=|Hervor=}}" ).ifilter_templates() )
def __getattr__( self, name ):
"""
Special getter for template params
"""
if name in type(self).params:
if( self.template.has( name ) ):
return self.template.get( name ).value
else:
return False
else:
raise AttributeError
def __setattr__( self, name, value ):
"""
Special setter for template params
"""
if name in type(self).params:
self.__dict__[ 'template' ].add( name, value )
else:
object.__setattr__(self, name, value)
def __ne__( self, other ):
"""
Checks wether all Template param values except for Liste_Revision are
equal
"""
# Detect which of the two was initialised (without)
# If none raise error
if( self.__initial ):
initial = self
cmpto = other
elif( other.__initial ):
initial = other
cmpto = self
else:
raise SummaryPageEntryTemplateError(
"One of the compared instances must have been initial!" )
# Iterate over each param
for param in initial.template.params:
# Slice out only Param.name
param = param[:param.find("=")].strip()
# If param is missing, writing is needed
if not cmpto.template.has( param ):
return True
# Do not compare List Revisions (not just write about Revids)
if param == "Liste_Revision":
continue
# Compare other param values, if one unequal write is needed
if( initial.template.get( param ).value.strip() !=
cmpto.template.get( param ).value.strip() ):
return True
# If not returned True until now
return False
class SummaryPageError( Exception ):
"""
Handles errors occuring in class SummaryPage
"""
pass
class SummaryPageEntryError( SummaryPageError ):
"""
Handles errors occuring in class SummaryPageEntry
"""
pass
class SummaryPageEntryTemplateError( SummaryPageError ):
"""
Handles errors occuring in class SummaryPageEntryTemplate
"""
pass
Loading…
Cancel
Save