345 lines
12 KiB
Python
345 lines
12 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
#
|
||
# charts.py
|
||
#
|
||
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||
#
|
||
# This program is free software; you can redistribute it and/or modify
|
||
# it under the terms of the GNU General Public License as published by
|
||
# the Free Software Foundation; either version 3 of the License, or
|
||
# (at your option) any later version.
|
||
#
|
||
# This program is distributed in the hope that it will be useful,
|
||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
# GNU General Public License for more details.
|
||
#
|
||
# You should have received a copy of the GNU General Public License
|
||
# along with this program; if not, write to the Free Software
|
||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||
# MA 02110-1301, USA.
|
||
#
|
||
#
|
||
"""
|
||
Provides a class for handling chart lists
|
||
"""
|
||
|
||
from datetime import datetime, timedelta
|
||
import locale
|
||
|
||
from isoweek import Week
|
||
|
||
import pywikibot
|
||
from pywikibot import pagegenerators
|
||
import mwparserfromhell as mwparser
|
||
|
||
# This is required for the text that is shown when you run this script
|
||
# with the parameter -help.
|
||
docuReplacements = {
|
||
'¶ms;': pagegenerators.parameterHelp
|
||
}
|
||
|
||
|
||
class Charts:
|
||
"""
|
||
Class for handling chart lists
|
||
"""
|
||
|
||
def __init__( self, generator, dry ):
|
||
"""
|
||
Constructor.
|
||
|
||
@param generator: The page generator that determines on which pages
|
||
to work.
|
||
@type generator: generator.
|
||
@param dry: If True, doesn't do any real changes, but only shows
|
||
what would have been changed.
|
||
@type dry: boolean.
|
||
"""
|
||
|
||
self.generator = generator
|
||
self.dry = dry
|
||
|
||
# Set the edit summary message
|
||
self.site = pywikibot.Site()
|
||
self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits"
|
||
|
||
# Set attribute to detect wether there was a real change
|
||
self.changed = None
|
||
|
||
# Set locale to 'de_DE.UTF-8'
|
||
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
|
||
|
||
def run(self):
|
||
"""Process each page from the generator."""
|
||
for page in self.generator:
|
||
self.treat(page)
|
||
|
||
def treat(self, page):
|
||
"""Load the given page, does some changes, and saves it."""
|
||
text = self.load(page)
|
||
if not text:
|
||
return
|
||
|
||
################################################################
|
||
# NOTE: Here you can modify the text in whatever way you want. #
|
||
################################################################
|
||
|
||
# If you find out that you do not want to edit this page, just return.
|
||
# Example: This puts the text 'Test' at the beginning of the page.
|
||
|
||
text = self.parse_overview( text )
|
||
|
||
if not self.save(text, page, self.summary, False):
|
||
pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))
|
||
|
||
def load(self, page):
|
||
"""Load the text of the given page."""
|
||
try:
|
||
# Load the page
|
||
text = page.get()
|
||
except pywikibot.NoPage:
|
||
pywikibot.output(u"Page %s does not exist; skipping."
|
||
% page.title(asLink=True))
|
||
except pywikibot.IsRedirectPage:
|
||
pywikibot.output(u"Page %s is a redirect; skipping."
|
||
% page.title(asLink=True))
|
||
else:
|
||
return text
|
||
return None
|
||
|
||
def save(self, text, page, comment=None, minorEdit=True,
|
||
botflag=True):
|
||
"""Update the given page with new text."""
|
||
# only save if something was changed (and not just revision)
|
||
if text != page.get() and self.changed:
|
||
# Show the title of the page we're working on.
|
||
# Highlight the title in purple.
|
||
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
|
||
% page.title())
|
||
# show what was changed
|
||
pywikibot.showDiff(page.get(), text)
|
||
pywikibot.output(u'Comment: %s' % comment)
|
||
if not self.dry:
|
||
if True or pywikibot.input_yn(
|
||
u'Do you want to accept these changes?',
|
||
default=False, automatic_quit=False):
|
||
try:
|
||
page.text = text
|
||
# Save the page
|
||
page.save(summary=comment or self.comment,
|
||
minor=minorEdit, botflag=botflag)
|
||
except pywikibot.LockedPage:
|
||
pywikibot.output(u"Page %s is locked; skipping."
|
||
% page.title(asLink=True))
|
||
except pywikibot.EditConflict:
|
||
pywikibot.output(
|
||
u'Skipping %s because of edit conflict'
|
||
% (page.title()))
|
||
except pywikibot.SpamfilterError as error:
|
||
pywikibot.output(
|
||
u'Cannot change %s because of spam blacklist \
|
||
entry %s'
|
||
% (page.title(), error.url))
|
||
else:
|
||
return True
|
||
return False
|
||
|
||
def parse_charts_list( self, page, belgien=False ):
|
||
"""
|
||
Handles the parsing process
|
||
"""
|
||
|
||
# Parse charts list with mwparser
|
||
wikicode = mwparser.parse( page.text )
|
||
|
||
# Select the section "Singles"
|
||
if belgien:
|
||
singles_section = wikicode.get_sections(
|
||
matches=belgien )[0].get_sections( matches="Singles" )[0]
|
||
else:
|
||
singles_section = wikicode.get_sections( matches="Singles" )[0]
|
||
|
||
# Select the last occurence of template "Nummer-eins-Hits Zeile" in
|
||
# "Singles"-section
|
||
last_entry = singles_section.ifilter_templates(
|
||
matches="Nummer-eins-Hits Zeile" )
|
||
for last in last_entry:
|
||
pass
|
||
|
||
# Detect weather we have a date or a weeknumber for Template Param
|
||
# "Chartein"
|
||
if( last.get("Chartein").value.strip().isnumeric() ):
|
||
chartein = last.get("Chartein").value.strip()
|
||
else:
|
||
chartein = datetime.strptime( last.get("Chartein").value.strip(),
|
||
"%Y-%m-%d" )
|
||
|
||
title = last.get("Titel").value.strip()
|
||
interpret = last.get("Interpret").value.strip()
|
||
|
||
# Return collected data as tuple
|
||
return ( chartein, title, interpret )
|
||
|
||
def parse_overview( self, text ):
|
||
"""
|
||
Parses the given Charts-Overview-Page and returns the updated version
|
||
"""
|
||
|
||
# Parse text with mwparser to get access to nodes
|
||
wikicode = mwparser.parse( text )
|
||
|
||
# Get mwparser.template objects for Template "/Eintrag"
|
||
for country in wikicode.ifilter_templates( matches="/Eintrag" ):
|
||
|
||
# Get mwparser.wikilink object
|
||
for link in country.get("Liste").value.ifilter_wikilinks():
|
||
# Create Page-Object for Chartslist
|
||
list_page = pywikibot.Page( self.site, link.title )
|
||
# Only use first wikilink in Template Param "Liste"
|
||
break
|
||
|
||
# Check if we have a saved revid
|
||
if not country.has( "Liste Revision" ):
|
||
try:
|
||
country.add( "Liste Revision", 0, before="Interpret" )
|
||
except ValueError:
|
||
country.add( "Liste Revision", 0 )
|
||
|
||
# Check if saved revid is unequal to current revid
|
||
if( str( country.get( "Liste Revision" ).value ) !=
|
||
list_page.latest_revision_id ):
|
||
|
||
country = self.update_overview( country, list_page )
|
||
|
||
# If any param of any occurence of Template "/Eintrag" has changed,
|
||
# Save new version
|
||
# We need to convert mwparser-objects to string before saving
|
||
return str( wikicode )
|
||
|
||
def update_overview( self, country, list_page ): # noqa
|
||
"""
|
||
Updates the templates given in county using data from given list_page
|
||
|
||
@param country wikicode-object with Template for country
|
||
@param list_page pywikibot-page-object for list-page
|
||
|
||
@returns wikicode-object with updated Template for country
|
||
"""
|
||
|
||
# Parse linked charts list for the country
|
||
if "Wallonien" in str( country.get( "Liste" ).value ):
|
||
belgien = "Wallonie"
|
||
elif "Flandern" in str( country.get( "Liste" ).value ):
|
||
belgien = "Flandern"
|
||
else:
|
||
belgien = None
|
||
|
||
data = self.parse_charts_list( list_page, belgien )
|
||
|
||
# Update "Liste Revision" param
|
||
country.get( "Liste Revision" ).value = str(
|
||
list_page.latest_revision_id )
|
||
|
||
# If param Korrektur is present extract the value
|
||
if( country.has( "Korrektur" ) and
|
||
str( country.get( "Korrektur" ).value ).isnumeric() ):
|
||
days = int( str( country.get( "Korrektur" ).value ) )
|
||
else:
|
||
days = 0
|
||
|
||
# For some countries we have weeknumbers instead of dates
|
||
if( isinstance( data[0], str ) ):
|
||
|
||
# Slice year out of link destination
|
||
year = int( list_page.title()[-5:-1] )
|
||
|
||
# Calculate date of monday in given week and add number of
|
||
# days given in Template parameter "Wochentag" with monday
|
||
# as day (zero)
|
||
# We need double conversion since wikicode could not be casted
|
||
# as int directly
|
||
date = ( Week( year, int( data[0] ) ).monday() +
|
||
timedelta( days=days ) )
|
||
|
||
# Param Chartein contains a regular date
|
||
else:
|
||
date = data[0] + timedelta( days=days )
|
||
|
||
# Check if param "Chartein" is present
|
||
if not country.has( "Chartein" ):
|
||
try:
|
||
country.add( "Chartein", "", before="Wochentag" )
|
||
except ValueError:
|
||
country.add( "Chartein", "" )
|
||
|
||
# Check if date has changed
|
||
if( date.strftime( "%d. %B" ).lstrip( "0" ) !=
|
||
country.get("Chartein").value ):
|
||
country.get("Chartein").value = date.strftime( "%d. %B"
|
||
).lstrip( "0" )
|
||
self.changed = True
|
||
|
||
# Check if param "Titel" is present
|
||
if not country.has( "Titel" ):
|
||
country.add( "Titel", "", before="Chartein" )
|
||
|
||
# Check if Titel has changed
|
||
if( data[1] != country.get( "Titel" ).value ):
|
||
country.get( "Titel" ).value = data[1]
|
||
self.changed = True
|
||
|
||
# Check if param "Intepret" is present
|
||
if not country.has( "Interpret" ):
|
||
country.add( "Interpret", "", before="Titel" )
|
||
|
||
# Check if Interpret has changed
|
||
if( data[2] != country.get( "Interpret" ).value ):
|
||
country.get( "Interpret" ).value = data[2]
|
||
self.changed = True
|
||
|
||
|
||
def main(*args):
|
||
"""
|
||
Process command line arguments and invoke bot.
|
||
|
||
If args is an empty list, sys.argv is used.
|
||
|
||
@param args: command line arguments
|
||
@type args: list of unicode
|
||
"""
|
||
# Process global arguments to determine desired site
|
||
local_args = pywikibot.handle_args(args)
|
||
|
||
# This factory is responsible for processing command line arguments
|
||
# that are also used by other scripts and that determine on which pages
|
||
# to work on.
|
||
genFactory = pagegenerators.GeneratorFactory()
|
||
# The generator gives the pages that should be worked upon.
|
||
gen = None
|
||
# If dry is True, doesn't do any real changes, but only show
|
||
# what would have been changed.
|
||
dry = False
|
||
|
||
# Parse command line arguments
|
||
for arg in local_args:
|
||
if arg.startswith("-dry"):
|
||
dry = True
|
||
else:
|
||
genFactory.handleArg(arg)
|
||
|
||
if not gen:
|
||
gen = genFactory.getCombinedGenerator()
|
||
if gen:
|
||
# The preloading generator is responsible for downloading multiple
|
||
# pages from the wiki simultaneously.
|
||
gen = pagegenerators.PreloadingGenerator(gen)
|
||
bot = Charts(gen, dry)
|
||
bot.run()
|
||
else:
|
||
pywikibot.showHelp()
|
||
|
||
if( __name__ == "__main__" ):
|
||
main()
|