You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
522 lines
17 KiB
522 lines
17 KiB
9 years ago
|
#!/usr/bin/env python3
|
||
|
# -*- coding: utf-8 -*-
|
||
|
#
|
||
|
# charts.py
|
||
|
#
|
||
|
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||
|
#
|
||
|
# This program is free software; you can redistribute it and/or modify
|
||
|
# it under the terms of the GNU General Public License as published by
|
||
|
# the Free Software Foundation; either version 3 of the License, or
|
||
|
# (at your option) any later version.
|
||
|
#
|
||
|
# This program is distributed in the hope that it will be useful,
|
||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
# GNU General Public License for more details.
|
||
|
#
|
||
|
# You should have received a copy of the GNU General Public License
|
||
|
# along with this program; if not, write to the Free Software
|
||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||
|
# MA 02110-1301, USA.
|
||
|
#
|
||
|
#
|
||
|
"""
|
||
9 years ago
|
Bot which automatically updates a ChartsSummaryPage like
|
||
|
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
|
||
|
CountryLists
|
||
|
|
||
|
The following parameters are supported:
|
||
|
|
||
|
¶ms;
|
||
|
|
||
|
-always If given, request for confirmation of edit is short circuited
|
||
|
Use for unattended run
|
||
9 years ago
|
"""
|
||
|
|
||
|
|
||
9 years ago
|
import locale
|
||
9 years ago
|
|
||
9 years ago
|
import pywikibot
|
||
|
from pywikibot import pagegenerators
|
||
9 years ago
|
from pywikibot.bot import Bot
|
||
9 years ago
|
import mwparserfromhell as mwparser
|
||
|
|
||
9 years ago
|
from summarypage import SummaryPage
|
||
|
|
||
9 years ago
|
# This is required for the text that is shown when you run this script
|
||
|
# with the parameter -help.
|
||
|
docuReplacements = {
|
||
|
'¶ms;': pagegenerators.parameterHelp
|
||
|
}
|
||
|
|
||
9 years ago
|
|
||
9 years ago
|
class ChartsBot( ):
|
||
9 years ago
|
"""
|
||
9 years ago
|
Bot which automatically updates a ChartsSummaryPage like
|
||
|
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
|
||
|
CountryListsAn incomplete sample bot.
|
||
9 years ago
|
"""
|
||
9 years ago
|
|
||
9 years ago
|
def __init__( self, generator, always ):
|
||
9 years ago
|
"""
|
||
9 years ago
|
Constructor.
|
||
|
|
||
9 years ago
|
@param generator: the page generator that determines on which pages
|
||
|
to work
|
||
|
@type generator: generator
|
||
|
@param always: if True, request for confirmation of edit is short
|
||
|
circuited. Use for unattended run
|
||
|
@type always: bool
|
||
9 years ago
|
"""
|
||
9 years ago
|
|
||
9 years ago
|
self.generator = generator
|
||
9 years ago
|
self.always = always
|
||
9 years ago
|
|
||
|
# Set the edit summary message
|
||
9 years ago
|
self.site = pywikibot.Site()
|
||
9 years ago
|
self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits"
|
||
9 years ago
|
|
||
9 years ago
|
# Set locale to 'de_DE.UTF-8'
|
||
|
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
|
||
|
|
||
|
def run(self):
|
||
|
"""Process each page from the generator."""
|
||
|
for page in self.generator:
|
||
|
self.treat(page)
|
||
9 years ago
|
|
||
9 years ago
|
def treat(self, page):
|
||
|
"""Load the given page, does some changes, and saves it."""
|
||
|
text = self.load(page)
|
||
|
if not text:
|
||
|
return
|
||
|
|
||
|
################################################################
|
||
|
# NOTE: Here you can modify the text in whatever way you want. #
|
||
|
################################################################
|
||
9 years ago
|
|
||
9 years ago
|
# Initialise and treat SummaryPageWorker
|
||
|
sumpage = SummaryPage( text )
|
||
|
sumpage.treat()
|
||
9 years ago
|
|
||
9 years ago
|
# Check if editing is needed and if so get new text
|
||
|
if sumpage.get_new_text():
|
||
|
text = sumpage.get_new_text()
|
||
9 years ago
|
|
||
9 years ago
|
if not self.save(text, page, self.summary, False):
|
||
9 years ago
|
pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))
|
||
|
|
||
|
def load(self, page):
|
||
|
"""Load the text of the given page."""
|
||
|
try:
|
||
|
# Load the page
|
||
|
text = page.get()
|
||
|
except pywikibot.NoPage:
|
||
|
pywikibot.output(u"Page %s does not exist; skipping."
|
||
|
% page.title(asLink=True))
|
||
|
except pywikibot.IsRedirectPage:
|
||
|
pywikibot.output(u"Page %s is a redirect; skipping."
|
||
|
% page.title(asLink=True))
|
||
|
else:
|
||
|
return text
|
||
|
return None
|
||
|
|
||
|
def save(self, text, page, comment=None, minorEdit=True,
|
||
|
botflag=True):
|
||
|
"""Update the given page with new text."""
|
||
9 years ago
|
# only save if something was changed (and not just revision)
|
||
9 years ago
|
if text != page.get():
|
||
9 years ago
|
# Show the title of the page we're working on.
|
||
|
# Highlight the title in purple.
|
||
|
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
|
||
|
% page.title())
|
||
|
# show what was changed
|
||
|
pywikibot.showDiff(page.get(), text)
|
||
|
pywikibot.output(u'Comment: %s' % comment)
|
||
|
if not self.dry:
|
||
9 years ago
|
if self.always or pywikibot.input_yn(
|
||
9 years ago
|
u'Do you want to accept these changes?',
|
||
|
default=False, automatic_quit=False):
|
||
|
try:
|
||
|
page.text = text
|
||
|
# Save the page
|
||
|
page.save(summary=comment or self.comment,
|
||
|
minor=minorEdit, botflag=botflag)
|
||
|
except pywikibot.LockedPage:
|
||
|
pywikibot.output(u"Page %s is locked; skipping."
|
||
|
% page.title(asLink=True))
|
||
|
except pywikibot.EditConflict:
|
||
|
pywikibot.output(
|
||
|
u'Skipping %s because of edit conflict'
|
||
|
% (page.title()))
|
||
|
except pywikibot.SpamfilterError as error:
|
||
|
pywikibot.output(
|
||
|
u'Cannot change %s because of spam blacklist \
|
||
|
entry %s'
|
||
|
% (page.title(), error.url))
|
||
|
else:
|
||
|
return True
|
||
|
return False
|
||
9 years ago
|
|
||
|
def parse_overview( self, text ):
|
||
9 years ago
|
"""
|
||
9 years ago
|
Parses the given Charts-Overview-Page and returns the updated version
|
||
9 years ago
|
"""
|
||
9 years ago
|
|
||
|
# Parse text with mwparser to get access to nodes
|
||
|
wikicode = mwparser.parse( text )
|
||
|
|
||
|
# Get mwparser.template objects for Template "/Eintrag"
|
||
|
for entry in wikicode.ifilter_templates( matches="/Eintrag" ):
|
||
|
|
||
|
# Maybe complete entry template
|
||
|
self.entry_template_complete( entry )
|
||
|
|
||
|
# Extract saved revision_id
|
||
|
ref_list_revid = int(str( entry.get( "Liste Revision" ).value ))
|
||
|
|
||
|
# Parse ref list
|
||
|
data = self.parse_ref_list( self.get_entry_ref_list( entry ),
|
||
|
ref_list_revid )
|
||
|
|
||
|
# Check that parsing was not short circuited
|
||
|
if data:
|
||
|
data = self.calculate_chartein( entry, data )
|
||
|
|
||
|
entry = self.entry_changed( entry, data )
|
||
|
|
||
|
#~ # Check if saved revid is unequal to current revid
|
||
|
#~ if( str( country.get( "Liste Revision" ).value ) !=
|
||
|
#~ list_page.latest_revision_id ):
|
||
|
#~
|
||
|
#~ country = self.update_overview( country, list_page )
|
||
|
|
||
|
# If any param of any occurence of Template "/Eintrag" has changed,
|
||
|
# Save new version
|
||
|
# We need to convert mwparser-objects to string before saving
|
||
|
return str( wikicode )
|
||
|
|
||
|
def parse_ref_list( self, ref_list_link , ref_list_revid):
|
||
|
"""
|
||
|
Handles the parsing process of ref list
|
||
|
"""
|
||
|
# Create Page-Object for Chartslist
|
||
|
ref_list_page = pywikibot.Page( self.site, ref_list_link.title )
|
||
|
|
||
|
# Short circuit if current revision is same than saved
|
||
|
if( ref_list_page.latest_revision_id == ref_list_revid ):
|
||
|
return False
|
||
|
|
||
9 years ago
|
# Try to find list related year
|
||
|
year = datetime.now().year
|
||
|
if str( year ) not in ref_list_page.title():
|
||
|
year = year - 1
|
||
9 years ago
|
|
||
9 years ago
|
# Parse charts list with mwparser
|
||
9 years ago
|
wikicode = mwparser.parse( ref_list_page.text )
|
||
|
|
||
|
# Detect if we are on begian list
|
||
|
belgian = self.detect_belgium( ref_list_link )
|
||
|
|
||
9 years ago
|
# Select the section "Singles"
|
||
9 years ago
|
# For belgian list we need to select subsection of country
|
||
|
if belgian:
|
||
9 years ago
|
singles_section = wikicode.get_sections(
|
||
9 years ago
|
matches=belgian )[0].get_sections( matches="Singles" )[0]
|
||
9 years ago
|
else:
|
||
|
singles_section = wikicode.get_sections( matches="Singles" )[0]
|
||
9 years ago
|
|
||
9 years ago
|
# Select the last occurence of template "Nummer-eins-Hits Zeile" in
|
||
|
# "Singles"-section
|
||
9 years ago
|
entries = singles_section.filter_templates(
|
||
9 years ago
|
matches="Nummer-eins-Hits Zeile" )
|
||
9 years ago
|
|
||
9 years ago
|
# Check, wether we found some entries
|
||
|
if not entries:
|
||
|
raise ChartsListError( page.title() )
|
||
|
else:
|
||
|
last = entries[-1]
|
||
9 years ago
|
|
||
9 years ago
|
# Detect weather we have a date or a weeknumber for Template Param
|
||
|
# "Chartein"
|
||
|
if( last.get("Chartein").value.strip().isnumeric() ):
|
||
|
chartein = last.get("Chartein").value.strip()
|
||
9 years ago
|
|
||
|
# Maybe there is a year correction for weeknumber
|
||
|
if last.has( "Jahr" ):
|
||
|
if last.get("Jahr").value.strip() == "+1":
|
||
|
year = year + 1
|
||
|
elif last.get("Jahr").value.strip() == "-1":
|
||
|
year = year - 1
|
||
|
|
||
|
chartein = ( year, chartein )
|
||
9 years ago
|
else:
|
||
|
chartein = datetime.strptime( last.get("Chartein").value.strip(),
|
||
|
"%Y-%m-%d" )
|
||
9 years ago
|
|
||
9 years ago
|
title = last.get("Titel").value.strip()
|
||
9 years ago
|
|
||
|
# Work with interpret value to add missing links
|
||
|
# Split them in words
|
||
9 years ago
|
interpret = last.get("Interpret").value.strip()
|
||
9 years ago
|
interpret_words = interpret.split()
|
||
|
|
||
|
# Interpret name concatenating words
|
||
|
interpret_cat = ( "feat.", "&" )
|
||
|
|
||
|
# Create empty list for concatenated interpret names
|
||
|
interpreten_raw = [ " ", ]
|
||
|
indexes = list()
|
||
|
index = 0
|
||
|
|
||
|
# Reconcatenate interpret names
|
||
|
for word in interpret_words:
|
||
|
|
||
|
if word not in interpret_cat:
|
||
|
interpreten_raw[-1] = (interpreten_raw[-1] + " " + word).strip()
|
||
|
|
||
|
if index not in indexes and "[[" not in interpreten_raw[-1]:
|
||
|
indexes.append( index )
|
||
|
else:
|
||
|
index += 2
|
||
|
interpreten_raw.append( word )
|
||
|
interpreten_raw.append( " " )
|
||
|
|
||
|
# Copy raw list to overwrite
|
||
|
interpreten = interpreten_raw
|
||
|
|
||
|
# Check if we have indexes with out links
|
||
|
if indexes:
|
||
|
|
||
|
print( ref_list_page.title() )
|
||
|
|
||
|
# Iterate over wikilinks of refpage and try to find related links
|
||
|
for wikilink in wikicode.ifilter_wikilinks():
|
||
|
|
||
|
# Iterate over interpret names to check wether wikilink matches
|
||
|
for index in indexes:
|
||
|
|
||
|
if interpreten_raw[index] == wikilink.text \
|
||
|
or interpreten_raw[index] == wikilink.title:
|
||
|
|
||
|
interpreten_raw[index] = str( wikilink )
|
||
|
indexes.remove( index )
|
||
|
break
|
||
|
|
||
|
if not indexes:
|
||
|
break
|
||
|
|
||
|
# Join the collected links
|
||
|
sep = " "
|
||
|
interpret = sep.join( interpreten )
|
||
9 years ago
|
|
||
9 years ago
|
# Return collected data as tuple
|
||
9 years ago
|
return ( chartein, title, interpret, ref_list_page.latest_revision_id )
|
||
|
|
||
|
def detect_belgium( self, ref_list_link ):
|
||
9 years ago
|
"""
|
||
9 years ago
|
Detect wether current entry is on of the belgian (Belgien/Wallonien)
|
||
9 years ago
|
"""
|
||
9 years ago
|
# Parse linked charts list for the country
|
||
|
if "Wallonien" in str( ref_list_link.text ) \
|
||
|
or "Wallonien" in str( ref_list_link.title):
|
||
|
return "Wallonie"
|
||
|
elif "Flandern" in str( ref_list_link.text ) \
|
||
|
or "Flandern" in str( ref_list_link.title):
|
||
|
return "Flandern"
|
||
|
else:
|
||
|
return None
|
||
|
|
||
9 years ago
|
def update_overview( self, country, list_page ): # noqa
|
||
|
"""
|
||
|
Updates the templates given in county using data from given list_page
|
||
9 years ago
|
|
||
9 years ago
|
@param country wikicode-object with Template for country
|
||
|
@param list_page pywikibot-page-object for list-page
|
||
9 years ago
|
|
||
9 years ago
|
@returns wikicode-object with updated Template for country
|
||
|
"""
|
||
9 years ago
|
|
||
|
data = self.parse_charts_list( ref_list_link, belgien )
|
||
|
|
||
|
def get_entry_ref_list( self, entry ):
|
||
|
"""
|
||
|
"""
|
||
|
# Get mwparser.wikilink object
|
||
9 years ago
|
link = next( entry.get("Liste").value.ifilter_wikilinks() )
|
||
|
|
||
|
year = datetime.now().year
|
||
|
|
||
|
old_link_title = link.title
|
||
|
|
||
|
# If year in link is lower then current year replace it
|
||
|
link.title = str(link.title).replace( str( year-1 ), str( year ) )
|
||
|
|
||
|
if ( pywikibot.Page( self.site, link.title).exists() ):
|
||
|
return link
|
||
|
else:
|
||
|
link.title = old_link_title
|
||
|
return link
|
||
|
|
||
9 years ago
|
|
||
|
def calculate_chartein( self, entry, data ):
|
||
|
"""
|
||
|
Calculates the correct value for param chartein in entry
|
||
|
"""
|
||
9 years ago
|
# If param Korrektur is present extract the value
|
||
9 years ago
|
if( entry.has( "Korrektur" ) ):
|
||
|
# If Korrektur is (after striping) castable to int use it
|
||
|
try:
|
||
|
days = int( str( entry.get( "Korrektur" ).value ).strip() )
|
||
|
# Otherwise, if casting fails, ignore it
|
||
|
except ValueError:
|
||
|
days = 0
|
||
9 years ago
|
else:
|
||
|
days = 0
|
||
9 years ago
|
|
||
9 years ago
|
# For some countries we have weeknumbers instead of dates
|
||
9 years ago
|
if( isinstance( data[0], tuple ) ):
|
||
|
|
||
9 years ago
|
# Calculate date of monday in given week and add number of
|
||
9 years ago
|
# days given in Template parameter "Korrektur" with monday
|
||
9 years ago
|
# as day (zero)
|
||
9 years ago
|
date = ( Week( data[0][0], int( data[0][1] ) ).monday() +
|
||
9 years ago
|
timedelta( days=days ) )
|
||
9 years ago
|
|
||
9 years ago
|
# Param Chartein contains a regular date
|
||
|
else:
|
||
9 years ago
|
date = data[0] + timedelta( days=days )
|
||
9 years ago
|
|
||
|
return (date,)+data[1:]
|
||
|
|
||
|
def entry_template_complete( self, entry ):
|
||
|
"""
|
||
|
Checks wether given entry template is complete, otherwise adds missing
|
||
|
params
|
||
|
"""
|
||
|
|
||
9 years ago
|
# Check if param "Chartein" is present
|
||
9 years ago
|
if not entry.has( "Chartein" ):
|
||
9 years ago
|
try:
|
||
9 years ago
|
entry.add( "Chartein", "", before="Korrektur" )
|
||
9 years ago
|
except ValueError:
|
||
9 years ago
|
entry.add( "Chartein", "" )
|
||
|
|
||
|
# Check if param "Titel" is present
|
||
|
if not entry.has( "Titel" ):
|
||
|
entry.add( "Titel", "", before="Chartein" )
|
||
|
|
||
|
# Check if param "Intepret" is present
|
||
|
if not entry.has( "Interpret" ):
|
||
|
entry.add( "Interpret", "", before="Titel" )
|
||
|
|
||
|
# Check if we have a saved revid
|
||
|
if not entry.has( "Liste Revision" ):
|
||
|
entry.add( "Liste Revision", 0, before="Interpret" )
|
||
|
|
||
|
return entry
|
||
|
|
||
|
def entry_changed( self, entry, data ):
|
||
|
"""
|
||
|
Checks wether given entry has changed
|
||
|
"""
|
||
|
|
||
9 years ago
|
# Check if date has changed
|
||
9 years ago
|
if( data[0].strftime( "%d. %B" ).lstrip( "0" ) !=
|
||
|
entry.get("Chartein").value ):
|
||
|
|
||
|
entry.get("Chartein").value = data[0].strftime( "%d. %B"
|
||
9 years ago
|
).lstrip( "0" )
|
||
9 years ago
|
|
||
9 years ago
|
# Check if Titel has changed
|
||
9 years ago
|
if( data[1] != entry.get( "Titel" ).value ):
|
||
|
entry.get( "Titel" ).value = data[1]
|
||
|
|
||
9 years ago
|
# Check if Interpret has changed
|
||
9 years ago
|
if( data[2] != entry.get( "Interpret" ).value ):
|
||
|
entry.get( "Interpret" ).value = data[2]
|
||
|
|
||
|
# Update "Liste Revision" param
|
||
|
entry.get( "Liste Revision" ).value = str(
|
||
|
data[3] )
|
||
9 years ago
|
|
||
9 years ago
|
return entry
|
||
9 years ago
|
|
||
9 years ago
|
class ChartsError( Exception ):
|
||
|
"""
|
||
|
Base class for all Errors of Charts-Module
|
||
|
"""
|
||
9 years ago
|
|
||
9 years ago
|
def __init__( self, message=None ):
|
||
|
"""
|
||
|
Handles Instantiation of ChartsError's
|
||
|
"""
|
||
|
if not message:
|
||
|
self.message = "An Error occured while executing a Charts action"
|
||
|
else:
|
||
|
self.message = message
|
||
9 years ago
|
|
||
9 years ago
|
def __str__( self ):
|
||
|
"""
|
||
|
Output of error message
|
||
|
"""
|
||
9 years ago
|
|
||
9 years ago
|
return self.message
|
||
|
|
||
|
|
||
|
class ChartsListError( ChartsError ):
|
||
|
"""
|
||
|
Raised when given ChartsListPage does not contain valid entrys
|
||
|
"""
|
||
9 years ago
|
|
||
9 years ago
|
def __init__( self, givenPage ):
|
||
9 years ago
|
|
||
9 years ago
|
message = "Given CharstListPage ('{given}') does not contain \
|
||
|
valid entries".format( given=givenPage )
|
||
9 years ago
|
|
||
9 years ago
|
super().__init__( message )
|
||
|
|
||
|
|
||
9 years ago
|
def main(*args):
|
||
|
"""
|
||
|
Process command line arguments and invoke bot.
|
||
|
|
||
|
If args is an empty list, sys.argv is used.
|
||
|
|
||
|
@param args: command line arguments
|
||
|
@type args: list of unicode
|
||
|
"""
|
||
|
# Process global arguments to determine desired site
|
||
|
local_args = pywikibot.handle_args(args)
|
||
|
|
||
|
# This factory is responsible for processing command line arguments
|
||
|
# that are also used by other scripts and that determine on which pages
|
||
|
# to work on.
|
||
|
genFactory = pagegenerators.GeneratorFactory()
|
||
|
# The generator gives the pages that should be worked upon.
|
||
|
gen = None
|
||
|
|
||
9 years ago
|
# If always is True, bot won't ask for confirmation of edit (automode)
|
||
|
always = False
|
||
|
|
||
9 years ago
|
# Parse command line arguments
|
||
|
for arg in local_args:
|
||
9 years ago
|
if arg.startswith("-always"):
|
||
9 years ago
|
always = True
|
||
9 years ago
|
else:
|
||
|
genFactory.handleArg(arg)
|
||
|
|
||
|
if not gen:
|
||
|
gen = genFactory.getCombinedGenerator()
|
||
|
if gen:
|
||
|
# The preloading generator is responsible for downloading multiple
|
||
|
# pages from the wiki simultaneously.
|
||
|
gen = pagegenerators.PreloadingGenerator(gen)
|
||
9 years ago
|
bot = ChartsBot(gen, always)
|
||
9 years ago
|
bot.run()
|
||
|
else:
|
||
|
pywikibot.showHelp()
|
||
9 years ago
|
|
||
|
if( __name__ == "__main__" ):
|
||
|
main()
|