Compare commits
32 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1f70c42bdc | |||
| 43668ef8b7 | |||
| 1a06d20a75 | |||
| df67d7ca8a | |||
| eca0142f71 | |||
| d98bc5e2b5 | |||
| fec2143203 | |||
| f003b20d65 | |||
| fc96bb5b6c | |||
| def75876b0 | |||
| a7b1813d24 | |||
| 5d43234b11 | |||
| 6ac8c9298f | |||
| bdb48d6e79 | |||
| f06f04c5ef | |||
| cdd0c4c666 | |||
| ce0a06ae0a | |||
| 9225dc5f73 | |||
| 2e8b4273e7 | |||
| 41d3ca95ef | |||
| 87aee8c42a | |||
| d4ea57dae8 | |||
| 8858e81ee6 | |||
| 4a790912fc | |||
| abc30707b5 | |||
| 6ae8f4c6ad | |||
| 11bfb6807c | |||
| eedcefb215 | |||
| f0978d26e2 | |||
| 0a49b27d7f | |||
| f05889a15b | |||
| a69a3add73 |
344
charts.py
344
charts.py
@@ -1,344 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# charts.py
|
||||
#
|
||||
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Provides a class for handling chart lists
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
import locale
|
||||
|
||||
from isoweek import Week
|
||||
|
||||
import pywikibot
|
||||
from pywikibot import pagegenerators
|
||||
import mwparserfromhell as mwparser
|
||||
|
||||
# This is required for the text that is shown when you run this script
|
||||
# with the parameter -help.
|
||||
docuReplacements = {
|
||||
'¶ms;': pagegenerators.parameterHelp
|
||||
}
|
||||
|
||||
|
||||
class Charts:
|
||||
"""
|
||||
Class for handling chart lists
|
||||
"""
|
||||
|
||||
def __init__( self, generator, dry ):
|
||||
"""
|
||||
Constructor.
|
||||
|
||||
@param generator: The page generator that determines on which pages
|
||||
to work.
|
||||
@type generator: generator.
|
||||
@param dry: If True, doesn't do any real changes, but only shows
|
||||
what would have been changed.
|
||||
@type dry: boolean.
|
||||
"""
|
||||
|
||||
self.generator = generator
|
||||
self.dry = dry
|
||||
|
||||
# Set the edit summary message
|
||||
self.site = pywikibot.Site()
|
||||
self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits"
|
||||
|
||||
# Set attribute to detect wether there was a real change
|
||||
self.changed = None
|
||||
|
||||
# Set locale to 'de_DE.UTF-8'
|
||||
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
|
||||
|
||||
def run(self):
|
||||
"""Process each page from the generator."""
|
||||
for page in self.generator:
|
||||
self.treat(page)
|
||||
|
||||
def treat(self, page):
|
||||
"""Load the given page, does some changes, and saves it."""
|
||||
text = self.load(page)
|
||||
if not text:
|
||||
return
|
||||
|
||||
################################################################
|
||||
# NOTE: Here you can modify the text in whatever way you want. #
|
||||
################################################################
|
||||
|
||||
# If you find out that you do not want to edit this page, just return.
|
||||
# Example: This puts the text 'Test' at the beginning of the page.
|
||||
|
||||
text = self.parse_overview( text )
|
||||
|
||||
if not self.save(text, page, self.summary, False):
|
||||
pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))
|
||||
|
||||
def load(self, page):
|
||||
"""Load the text of the given page."""
|
||||
try:
|
||||
# Load the page
|
||||
text = page.get()
|
||||
except pywikibot.NoPage:
|
||||
pywikibot.output(u"Page %s does not exist; skipping."
|
||||
% page.title(asLink=True))
|
||||
except pywikibot.IsRedirectPage:
|
||||
pywikibot.output(u"Page %s is a redirect; skipping."
|
||||
% page.title(asLink=True))
|
||||
else:
|
||||
return text
|
||||
return None
|
||||
|
||||
def save(self, text, page, comment=None, minorEdit=True,
|
||||
botflag=True):
|
||||
"""Update the given page with new text."""
|
||||
# only save if something was changed (and not just revision)
|
||||
if text != page.get() and self.changed:
|
||||
# Show the title of the page we're working on.
|
||||
# Highlight the title in purple.
|
||||
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
|
||||
% page.title())
|
||||
# show what was changed
|
||||
pywikibot.showDiff(page.get(), text)
|
||||
pywikibot.output(u'Comment: %s' % comment)
|
||||
if not self.dry:
|
||||
if True or pywikibot.input_yn(
|
||||
u'Do you want to accept these changes?',
|
||||
default=False, automatic_quit=False):
|
||||
try:
|
||||
page.text = text
|
||||
# Save the page
|
||||
page.save(summary=comment or self.comment,
|
||||
minor=minorEdit, botflag=botflag)
|
||||
except pywikibot.LockedPage:
|
||||
pywikibot.output(u"Page %s is locked; skipping."
|
||||
% page.title(asLink=True))
|
||||
except pywikibot.EditConflict:
|
||||
pywikibot.output(
|
||||
u'Skipping %s because of edit conflict'
|
||||
% (page.title()))
|
||||
except pywikibot.SpamfilterError as error:
|
||||
pywikibot.output(
|
||||
u'Cannot change %s because of spam blacklist \
|
||||
entry %s'
|
||||
% (page.title(), error.url))
|
||||
else:
|
||||
return True
|
||||
return False
|
||||
|
||||
def parse_charts_list( self, page, belgien=False ):
|
||||
"""
|
||||
Handles the parsing process
|
||||
"""
|
||||
|
||||
# Parse charts list with mwparser
|
||||
wikicode = mwparser.parse( page.text )
|
||||
|
||||
# Select the section "Singles"
|
||||
if belgien:
|
||||
singles_section = wikicode.get_sections(
|
||||
matches=belgien )[0].get_sections( matches="Singles" )[0]
|
||||
else:
|
||||
singles_section = wikicode.get_sections( matches="Singles" )[0]
|
||||
|
||||
# Select the last occurence of template "Nummer-eins-Hits Zeile" in
|
||||
# "Singles"-section
|
||||
last_entry = singles_section.ifilter_templates(
|
||||
matches="Nummer-eins-Hits Zeile" )
|
||||
for last in last_entry:
|
||||
pass
|
||||
|
||||
# Detect weather we have a date or a weeknumber for Template Param
|
||||
# "Chartein"
|
||||
if( last.get("Chartein").value.strip().isnumeric() ):
|
||||
chartein = last.get("Chartein").value.strip()
|
||||
else:
|
||||
chartein = datetime.strptime( last.get("Chartein").value.strip(),
|
||||
"%Y-%m-%d" )
|
||||
|
||||
title = last.get("Titel").value.strip()
|
||||
interpret = last.get("Interpret").value.strip()
|
||||
|
||||
# Return collected data as tuple
|
||||
return ( chartein, title, interpret )
|
||||
|
||||
def parse_overview( self, text ):
|
||||
"""
|
||||
Parses the given Charts-Overview-Page and returns the updated version
|
||||
"""
|
||||
|
||||
# Parse text with mwparser to get access to nodes
|
||||
wikicode = mwparser.parse( text )
|
||||
|
||||
# Get mwparser.template objects for Template "/Eintrag"
|
||||
for country in wikicode.ifilter_templates( matches="/Eintrag" ):
|
||||
|
||||
# Get mwparser.wikilink object
|
||||
for link in country.get("Liste").value.ifilter_wikilinks():
|
||||
# Create Page-Object for Chartslist
|
||||
list_page = pywikibot.Page( self.site, link.title )
|
||||
# Only use first wikilink in Template Param "Liste"
|
||||
break
|
||||
|
||||
# Check if we have a saved revid
|
||||
if not country.has( "Liste Revision" ):
|
||||
try:
|
||||
country.add( "Liste Revision", 0, before="Interpret" )
|
||||
except ValueError:
|
||||
country.add( "Liste Revision", 0 )
|
||||
|
||||
# Check if saved revid is unequal to current revid
|
||||
if( str( country.get( "Liste Revision" ).value ) !=
|
||||
list_page.latest_revision_id ):
|
||||
|
||||
country = self.update_overview( country, list_page )
|
||||
|
||||
# If any param of any occurence of Template "/Eintrag" has changed,
|
||||
# Save new version
|
||||
# We need to convert mwparser-objects to string before saving
|
||||
return str( wikicode )
|
||||
|
||||
def update_overview( self, country, list_page ): # noqa
|
||||
"""
|
||||
Updates the templates given in county using data from given list_page
|
||||
|
||||
@param country wikicode-object with Template for country
|
||||
@param list_page pywikibot-page-object for list-page
|
||||
|
||||
@returns wikicode-object with updated Template for country
|
||||
"""
|
||||
|
||||
# Parse linked charts list for the country
|
||||
if "Wallonien" in str( country.get( "Liste" ).value ):
|
||||
belgien = "Wallonie"
|
||||
elif "Flandern" in str( country.get( "Liste" ).value ):
|
||||
belgien = "Flandern"
|
||||
else:
|
||||
belgien = None
|
||||
|
||||
data = self.parse_charts_list( list_page, belgien )
|
||||
|
||||
# Update "Liste Revision" param
|
||||
country.get( "Liste Revision" ).value = str(
|
||||
list_page.latest_revision_id )
|
||||
|
||||
# If param Korrektur is present extract the value
|
||||
if( country.has( "Korrektur" ) and
|
||||
str( country.get( "Korrektur" ).value ).isnumeric() ):
|
||||
days = int( str( country.get( "Korrektur" ).value ) )
|
||||
else:
|
||||
days = 0
|
||||
|
||||
# For some countries we have weeknumbers instead of dates
|
||||
if( isinstance( data[0], str ) ):
|
||||
|
||||
# Slice year out of link destination
|
||||
year = int( list_page.title()[-5:-1] )
|
||||
|
||||
# Calculate date of monday in given week and add number of
|
||||
# days given in Template parameter "Wochentag" with monday
|
||||
# as day (zero)
|
||||
# We need double conversion since wikicode could not be casted
|
||||
# as int directly
|
||||
date = ( Week( year, int( data[0] ) ).monday() +
|
||||
timedelta( days=days ) )
|
||||
|
||||
# Param Chartein contains a regular date
|
||||
else:
|
||||
date = data[0] + timedelta( days=days )
|
||||
|
||||
# Check if param "Chartein" is present
|
||||
if not country.has( "Chartein" ):
|
||||
try:
|
||||
country.add( "Chartein", "", before="Wochentag" )
|
||||
except ValueError:
|
||||
country.add( "Chartein", "" )
|
||||
|
||||
# Check if date has changed
|
||||
if( date.strftime( "%d. %B" ).lstrip( "0" ) !=
|
||||
country.get("Chartein").value ):
|
||||
country.get("Chartein").value = date.strftime( "%d. %B"
|
||||
).lstrip( "0" )
|
||||
self.changed = True
|
||||
|
||||
# Check if param "Titel" is present
|
||||
if not country.has( "Titel" ):
|
||||
country.add( "Titel", "", before="Chartein" )
|
||||
|
||||
# Check if Titel has changed
|
||||
if( data[1] != country.get( "Titel" ).value ):
|
||||
country.get( "Titel" ).value = data[1]
|
||||
self.changed = True
|
||||
|
||||
# Check if param "Intepret" is present
|
||||
if not country.has( "Interpret" ):
|
||||
country.add( "Interpret", "", before="Titel" )
|
||||
|
||||
# Check if Interpret has changed
|
||||
if( data[2] != country.get( "Interpret" ).value ):
|
||||
country.get( "Interpret" ).value = data[2]
|
||||
self.changed = True
|
||||
|
||||
|
||||
def main(*args):
|
||||
"""
|
||||
Process command line arguments and invoke bot.
|
||||
|
||||
If args is an empty list, sys.argv is used.
|
||||
|
||||
@param args: command line arguments
|
||||
@type args: list of unicode
|
||||
"""
|
||||
# Process global arguments to determine desired site
|
||||
local_args = pywikibot.handle_args(args)
|
||||
|
||||
# This factory is responsible for processing command line arguments
|
||||
# that are also used by other scripts and that determine on which pages
|
||||
# to work on.
|
||||
genFactory = pagegenerators.GeneratorFactory()
|
||||
# The generator gives the pages that should be worked upon.
|
||||
gen = None
|
||||
# If dry is True, doesn't do any real changes, but only show
|
||||
# what would have been changed.
|
||||
dry = False
|
||||
|
||||
# Parse command line arguments
|
||||
for arg in local_args:
|
||||
if arg.startswith("-dry"):
|
||||
dry = True
|
||||
else:
|
||||
genFactory.handleArg(arg)
|
||||
|
||||
if not gen:
|
||||
gen = genFactory.getCombinedGenerator()
|
||||
if gen:
|
||||
# The preloading generator is responsible for downloading multiple
|
||||
# pages from the wiki simultaneously.
|
||||
gen = pagegenerators.PreloadingGenerator(gen)
|
||||
bot = Charts(gen, dry)
|
||||
bot.run()
|
||||
else:
|
||||
pywikibot.showHelp()
|
||||
|
||||
if( __name__ == "__main__" ):
|
||||
main()
|
||||
203
chartsbot.py
Normal file
203
chartsbot.py
Normal file
@@ -0,0 +1,203 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# charts.py
|
||||
#
|
||||
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Bot which automatically updates a ChartsSummaryPage like
|
||||
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
|
||||
CountryLists
|
||||
|
||||
The following parameters are supported:
|
||||
|
||||
¶ms;
|
||||
|
||||
-always If given, request for confirmation of edit is short circuited
|
||||
Use for unattended run
|
||||
"""
|
||||
|
||||
|
||||
import locale
|
||||
|
||||
import pywikibot
|
||||
from pywikibot import pagegenerators
|
||||
from pywikibot.bot import Bot
|
||||
import mwparserfromhell as mwparser
|
||||
|
||||
from summarypage import SummaryPage
|
||||
|
||||
# This is required for the text that is shown when you run this script
|
||||
# with the parameter -help.
|
||||
docuReplacements = {
|
||||
'¶ms;': pagegenerators.parameterHelp
|
||||
}
|
||||
|
||||
|
||||
class ChartsBot( ):
|
||||
"""
|
||||
Bot which automatically updates a ChartsSummaryPage like
|
||||
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
|
||||
CountryListsAn incomplete sample bot.
|
||||
"""
|
||||
|
||||
def __init__( self, generator, always ):
|
||||
"""
|
||||
Constructor.
|
||||
|
||||
@param generator: the page generator that determines on which pages
|
||||
to work
|
||||
@type generator: generator
|
||||
@param always: if True, request for confirmation of edit is short
|
||||
circuited. Use for unattended run
|
||||
@type always: bool
|
||||
"""
|
||||
|
||||
self.generator = generator
|
||||
self.always = always
|
||||
|
||||
# Set the edit summary message
|
||||
self.site = pywikibot.Site()
|
||||
self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits"
|
||||
|
||||
# Set locale to 'de_DE.UTF-8'
|
||||
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
|
||||
|
||||
def run(self):
|
||||
"""Process each page from the generator."""
|
||||
for page in self.generator:
|
||||
self.treat(page)
|
||||
|
||||
def treat(self, page):
|
||||
"""Load the given page, does some changes, and saves it."""
|
||||
text = self.load(page)
|
||||
if not text:
|
||||
return
|
||||
|
||||
################################################################
|
||||
# NOTE: Here you can modify the text in whatever way you want. #
|
||||
################################################################
|
||||
|
||||
# Initialise and treat SummaryPageWorker
|
||||
sumpage = SummaryPage( text )
|
||||
sumpage.treat()
|
||||
|
||||
# Check if editing is needed and if so get new text
|
||||
if sumpage.get_new_text():
|
||||
text = sumpage.get_new_text()
|
||||
|
||||
if not self.save(text, page, self.summary, False):
|
||||
pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))
|
||||
|
||||
def load(self, page):
|
||||
"""Load the text of the given page."""
|
||||
try:
|
||||
# Load the page
|
||||
text = page.get()
|
||||
except pywikibot.NoPage:
|
||||
pywikibot.output(u"Page %s does not exist; skipping."
|
||||
% page.title(asLink=True))
|
||||
except pywikibot.IsRedirectPage:
|
||||
pywikibot.output(u"Page %s is a redirect; skipping."
|
||||
% page.title(asLink=True))
|
||||
else:
|
||||
return text
|
||||
return None
|
||||
|
||||
def save(self, text, page, comment=None, minorEdit=True,
|
||||
botflag=True):
|
||||
"""Update the given page with new text."""
|
||||
# only save if something was changed (and not just revision)
|
||||
if text != page.get():
|
||||
# Show the title of the page we're working on.
|
||||
# Highlight the title in purple.
|
||||
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
|
||||
% page.title())
|
||||
# show what was changed
|
||||
pywikibot.showDiff(page.get(), text)
|
||||
pywikibot.output(u'Comment: %s' % comment)
|
||||
|
||||
if self.always or pywikibot.input_yn(
|
||||
u'Do you want to accept these changes?',
|
||||
default=False, automatic_quit=False):
|
||||
try:
|
||||
page.text = text
|
||||
# Save the page
|
||||
page.save(summary=comment or self.comment,
|
||||
minor=minorEdit, botflag=botflag)
|
||||
except pywikibot.LockedPage:
|
||||
pywikibot.output(u"Page %s is locked; skipping."
|
||||
% page.title(asLink=True))
|
||||
except pywikibot.EditConflict:
|
||||
pywikibot.output(
|
||||
u'Skipping %s because of edit conflict'
|
||||
% (page.title()))
|
||||
except pywikibot.SpamfilterError as error:
|
||||
pywikibot.output(
|
||||
u'Cannot change %s because of spam blacklist \
|
||||
entry %s'
|
||||
% (page.title(), error.url))
|
||||
else:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def main(*args):
|
||||
"""
|
||||
Process command line arguments and invoke bot.
|
||||
|
||||
If args is an empty list, sys.argv is used.
|
||||
|
||||
@param args: command line arguments
|
||||
@type args: list of unicode
|
||||
"""
|
||||
# Process global arguments to determine desired site
|
||||
local_args = pywikibot.handle_args(args)
|
||||
|
||||
# This factory is responsible for processing command line arguments
|
||||
# that are also used by other scripts and that determine on which pages
|
||||
# to work on.
|
||||
genFactory = pagegenerators.GeneratorFactory()
|
||||
# The generator gives the pages that should be worked upon.
|
||||
gen = None
|
||||
|
||||
# If always is True, bot won't ask for confirmation of edit (automode)
|
||||
always = False
|
||||
|
||||
# Parse command line arguments
|
||||
for arg in local_args:
|
||||
if arg.startswith("-always"):
|
||||
always = True
|
||||
else:
|
||||
genFactory.handleArg(arg)
|
||||
|
||||
if not gen:
|
||||
gen = genFactory.getCombinedGenerator()
|
||||
if gen:
|
||||
# The preloading generator is responsible for downloading multiple
|
||||
# pages from the wiki simultaneously.
|
||||
gen = pagegenerators.PreloadingGenerator(gen)
|
||||
bot = ChartsBot(gen, always)
|
||||
bot.run()
|
||||
else:
|
||||
pywikibot.showHelp()
|
||||
|
||||
if( __name__ == "__main__" ):
|
||||
main()
|
||||
344
countrylist.py
Normal file
344
countrylist.py
Normal file
@@ -0,0 +1,344 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# countrylist.py
|
||||
#
|
||||
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Provides a class for handling charts list per country and year
|
||||
"""
|
||||
|
||||
import locale
|
||||
from datetime import datetime
|
||||
|
||||
from isoweek import Week
|
||||
|
||||
import pywikibot
|
||||
import mwparserfromhell as mwparser
|
||||
|
||||
|
||||
class CountryList():
|
||||
"""
|
||||
Handles charts list per country and year
|
||||
"""
|
||||
|
||||
def __init__( self, wikilink ):
|
||||
"""
|
||||
Generate new instance of class
|
||||
|
||||
Checks wether page given with country_list_link exists
|
||||
|
||||
@param wikilink Wikilink object by mwparser linking CountryList
|
||||
|
||||
@returns self Object representing CountryList
|
||||
False if page does not exists
|
||||
"""
|
||||
|
||||
# Generate pywikibot site object
|
||||
# @TODO: Maybe store it outside???
|
||||
self.site = pywikibot.Site()
|
||||
|
||||
# Set locale to 'de_DE.UTF-8'
|
||||
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
|
||||
|
||||
# Generate pywikibot page object
|
||||
self.page = pywikibot.Page( self.site, wikilink.title )
|
||||
|
||||
# Store given wikilink for page object
|
||||
self.wikilink = wikilink
|
||||
|
||||
# Check if page exits
|
||||
if not self.page.exists():
|
||||
return False
|
||||
|
||||
# Initialise attributes
|
||||
__attr = ( "wikicode", "entry", "chartein", "_chartein_raw",
|
||||
"_titel_raw", "titel", "interpret", "_interpret_raw" )
|
||||
for attr in __attr:
|
||||
setattr( self, attr, None )
|
||||
|
||||
# Try to find year
|
||||
self.find_year()
|
||||
|
||||
def parsing_needed( self, revid ):
|
||||
"""
|
||||
Check if current revid of CountryList differs from given one
|
||||
|
||||
@param int Revid to check against
|
||||
|
||||
@return True Given revid differs from current revid
|
||||
False Given revid is equal to current revid
|
||||
"""
|
||||
|
||||
if revid != self.page.latest_revision_id:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def find_year( self ):
|
||||
"""
|
||||
Try to find the year related to CountryList
|
||||
"""
|
||||
self.year = datetime.now().year
|
||||
|
||||
# Check if year is in page.title, if not try last year
|
||||
if str( self.year ) not in self.page.title():
|
||||
self.year -= 1
|
||||
# If last year does not match, raise YearError
|
||||
if str( self.year ) not in self.page.title():
|
||||
raise CountryListYearError
|
||||
|
||||
def parse( self ):
|
||||
"""
|
||||
Handles the parsing process
|
||||
"""
|
||||
|
||||
# Parse page with mwparser
|
||||
self.generate_wikicode()
|
||||
|
||||
# Select lastest entry
|
||||
self.get_latest_entry()
|
||||
|
||||
# Prepare chartein, titel, interpret
|
||||
self.prepare_chartein()
|
||||
self.prepare_titel()
|
||||
self.prepare_interpret()
|
||||
|
||||
def detect_belgian( self ):
|
||||
"""
|
||||
Detect wether current entry is on of the belgian (Belgien/Wallonien)
|
||||
"""
|
||||
# Check if begian province name is in link text or title
|
||||
if "Wallonien" in str( self.wikilink.text ) \
|
||||
or "Wallonien" in str( self.wikilink.title):
|
||||
return "Wallonie"
|
||||
elif "Flandern" in str( self.wikilink.text ) \
|
||||
or "Flandern" in str( self.wikilink.title):
|
||||
return "Flandern"
|
||||
else:
|
||||
return None
|
||||
|
||||
def generate_wikicode( self ):
|
||||
"""
|
||||
Runs mwparser on page.text to get mwparser.objects
|
||||
"""
|
||||
|
||||
self.wikicode = mwparser.parse( self.page.text )
|
||||
|
||||
def get_latest_entry( self ):
|
||||
"""
|
||||
Get latest list entry template object
|
||||
"""
|
||||
|
||||
# Select the section "Singles"
|
||||
# For belgian list we need to select subsection of country
|
||||
belgian = self.detect_belgian()
|
||||
|
||||
if belgian:
|
||||
singles_section = self.wikicode.get_sections(
|
||||
matches=belgian )[0].get_sections( matches="Singles" )[0]
|
||||
else:
|
||||
singles_section = self.wikicode.get_sections( matches="Singles" )[0]
|
||||
|
||||
# Select the last occurence of template "Nummer-eins-Hits Zeile" in
|
||||
# "Singles"-section
|
||||
for self.entry in singles_section.ifilter_templates(
|
||||
matches="Nummer-eins-Hits Zeile" ):
|
||||
pass
|
||||
|
||||
# Check if we have found something
|
||||
if not self.entry:
|
||||
raise CountryListError( self.page.title() )
|
||||
|
||||
def get_year_correction( self ):
|
||||
"""
|
||||
Reads value of jahr parameter for correcting week numbers near to
|
||||
year changes
|
||||
"""
|
||||
# If param is present return correction, otherwise null
|
||||
if self.entry.has( "Jahr" ):
|
||||
|
||||
# Read value of param
|
||||
jahr = self.entry.get( "Jahr" ).strip()
|
||||
|
||||
if jahr == "+1":
|
||||
return 1
|
||||
elif jahr == "-1":
|
||||
return -1
|
||||
|
||||
# None or wrong parameter value
|
||||
return 0
|
||||
|
||||
def prepare_chartein( self ):
|
||||
"""
|
||||
Checks wether self._chartein_raw is a date or a week number and
|
||||
calculates related datetime object
|
||||
"""
|
||||
|
||||
# If self._chartein_raw is not set, get it
|
||||
if not self._chartein_raw:
|
||||
self.get_chartein_value()
|
||||
|
||||
# Detect weather we have a date or a weeknumber for Template Param
|
||||
# "Chartein"
|
||||
# Numeric string means week number
|
||||
if( self._chartein_raw.isnumeric() ):
|
||||
|
||||
# Calculate date of monday in given week and add number of
|
||||
# days given in Template parameter "Korrektur" with monday
|
||||
# as day (zero)
|
||||
self.chartein = ( Week( self.year + self.get_year_correction(),
|
||||
int( self._chartein_raw ) ).monday() )
|
||||
# Complete date string present
|
||||
else:
|
||||
self.chartein = datetime.strptime( self._chartein_raw,
|
||||
"%Y-%m-%d" )
|
||||
|
||||
def get_chartein_value( self ):
|
||||
"""
|
||||
Reads value of chartein parameter
|
||||
If param is not present raise Error
|
||||
"""
|
||||
if self.entry.has( "Chartein" ):
|
||||
self._chartein_raw = self.entry.get("Chartein").value.strip()
|
||||
else:
|
||||
raise CountryListEntryError( "Template Parameter 'Chartein' is \
|
||||
missing!" )
|
||||
|
||||
def prepare_titel( self ):
|
||||
"""
|
||||
Loads and prepares Titel of latest entry
|
||||
"""
|
||||
|
||||
# If self._titel_raw is not set, get it
|
||||
if not self._titel_raw:
|
||||
self.get_titel_value()
|
||||
|
||||
self.titel = self._titel_raw
|
||||
|
||||
def get_titel_value( self ):
|
||||
"""
|
||||
Reads value of Titel parameter
|
||||
If param is not present raise Error
|
||||
"""
|
||||
if self.entry.has( "Titel" ):
|
||||
self._titel_raw = self.entry.get("Titel").value.strip()
|
||||
else:
|
||||
raise CountryListEntryError( "Template Parameter 'Titel' is \
|
||||
missing!" )
|
||||
|
||||
def prepare_interpret( self ):
|
||||
"""
|
||||
Loads and prepares Interpret of latest entry
|
||||
"""
|
||||
|
||||
# If self._interpret_raw is not set, get it
|
||||
if not self._interpret_raw:
|
||||
self.get_interpret_value()
|
||||
|
||||
# Work with interpret value to add missing links
|
||||
# Split it in words
|
||||
words = self._interpret_raw.split()
|
||||
|
||||
# Interpret name separating words
|
||||
seps = ( "feat.", "&" )
|
||||
|
||||
# Create empty list for concatenated interpret names
|
||||
parts = [ " ", ]
|
||||
# Another list for managing indexes which need to be worked on
|
||||
indexes = list()
|
||||
index = 0
|
||||
|
||||
# Reconcatenate interpret names
|
||||
for word in words:
|
||||
|
||||
# Name parts
|
||||
if word not in seps:
|
||||
parts[-1] += (" " + word)
|
||||
|
||||
# Remove unnecessary whitespace
|
||||
parts[-1] = parts[-1].strip()
|
||||
|
||||
# We only need to work on it, if no wikilink is present
|
||||
if index not in indexes and "[[" not in parts[-1]:
|
||||
indexes.append( index )
|
||||
else:
|
||||
# Count up index 2 times ( Separator + next Name )
|
||||
index += 2
|
||||
parts.append( word )
|
||||
parts.append( " " )
|
||||
|
||||
# If we have indexes with out links, search for links
|
||||
if indexes:
|
||||
|
||||
# Iterate over wikilinks of refpage and try to find related links
|
||||
for wikilink in self.wikicode.ifilter_wikilinks():
|
||||
|
||||
# Iterate over interpret names
|
||||
for index in indexes:
|
||||
|
||||
# Check wether wikilink matches
|
||||
if parts[index] == wikilink.text \
|
||||
or parts[index] == wikilink.title:
|
||||
|
||||
# Overwrite name with complete wikilink
|
||||
parts[index] = str( wikilink )
|
||||
|
||||
# Remove index from worklist
|
||||
indexes.remove( index )
|
||||
|
||||
# Other indexes won't also match
|
||||
break
|
||||
|
||||
# If worklist is empty, stop iterating over wikilinks
|
||||
if not indexes:
|
||||
break
|
||||
|
||||
# Join the collected links
|
||||
sep = " "
|
||||
self.interpret = sep.join( parts )
|
||||
|
||||
# Nothing to do, just use raw
|
||||
else:
|
||||
self.interpret = self._interpret_raw
|
||||
|
||||
def get_interpret_value( self ):
|
||||
"""
|
||||
Reads value of Interpret parameter
|
||||
If param is not present raise Error
|
||||
"""
|
||||
if self.entry.has( "Interpret" ):
|
||||
self._interpret_raw = self.entry.get("Interpret").value.strip()
|
||||
else:
|
||||
raise CountryListEntryError( "Template Parameter 'Interpret' is \
|
||||
missing!" )
|
||||
|
||||
|
||||
class CountryListError( Exception ):
|
||||
"""
|
||||
Handles errors occuring in class CountryList
|
||||
"""
|
||||
pass
|
||||
|
||||
class CountryListEntryError( CountryListError ):
|
||||
"""
|
||||
Handles errors occuring in class CountryList related to entrys
|
||||
"""
|
||||
pass
|
||||
346
summarypage.py
Normal file
346
summarypage.py
Normal file
@@ -0,0 +1,346 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# summarypage.py
|
||||
#
|
||||
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Provides classes for handling Charts summary page
|
||||
"""
|
||||
|
||||
import locale
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import pywikibot
|
||||
import mwparserfromhell as mwparser
|
||||
|
||||
from countrylist import CountryList, CountryListError
|
||||
|
||||
class SummaryPage():
|
||||
"""
|
||||
Handles summary page related actions
|
||||
"""
|
||||
|
||||
def __init__( self, text ):
|
||||
"""
|
||||
Create Instance
|
||||
"""
|
||||
|
||||
# Parse Text with mwparser
|
||||
self.wikicode = mwparser.parse( text )
|
||||
|
||||
def treat( self ):
|
||||
"""
|
||||
Handles parsing/editing of text
|
||||
"""
|
||||
|
||||
# Get mwparser.template objects for Template "/Eintrag"
|
||||
for entry in self.wikicode.filter_templates( matches="/Eintrag" ) :
|
||||
|
||||
# Instantiate SummaryPageEntry-object
|
||||
summarypageentry = SummaryPageEntry( entry )
|
||||
# Treat SummaryPageEntry-object
|
||||
summarypageentry.treat()
|
||||
|
||||
# Get result
|
||||
# We need to replace origninal entry since objectid changes due to
|
||||
# recreation of template object and reassignment won't be reflected
|
||||
self.wikicode.replace( entry, summarypageentry.new_entry.template )
|
||||
|
||||
def get_new_text( self ):
|
||||
"""
|
||||
If writing page is needed, return new text, otherwise false
|
||||
"""
|
||||
|
||||
# Get information wether writing is needed from class attribute
|
||||
if SummaryPageEntry.write_needed:
|
||||
|
||||
# Convert wikicode back to string and return
|
||||
return str( self.wikicode )
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class SummaryPageEntry():
|
||||
"""
|
||||
Provides a generic wrapper for summary page entry template
|
||||
"""
|
||||
|
||||
write_needed = False
|
||||
|
||||
def __init__( self, entry ):
|
||||
"""
|
||||
Constructor
|
||||
"""
|
||||
self.old_entry = SummaryPageEntryTemplate( entry )
|
||||
self.new_entry = SummaryPageEntryTemplate( )
|
||||
|
||||
def treat( self ):
|
||||
"""
|
||||
Controls parsing/update-sequence of entry
|
||||
"""
|
||||
self.parse()
|
||||
|
||||
self.correct_chartein()
|
||||
|
||||
self.update_params()
|
||||
|
||||
self.is_write_needed()
|
||||
|
||||
def parse( self ):
|
||||
"""
|
||||
Handles parsing process of entry template
|
||||
"""
|
||||
|
||||
# Get wikilink to related countrylist
|
||||
self.get_countrylist_wikilink()
|
||||
|
||||
# Get saved revision of related countrylist
|
||||
self.get_countrylist_saved_revid()
|
||||
|
||||
|
||||
# Get current year
|
||||
current_year = datetime.now().year;
|
||||
|
||||
# Store old link.title
|
||||
link_title = self.countrylist_wikilink.title
|
||||
|
||||
# If list is from last year, replace year
|
||||
if (current_year - 1) in link_title:
|
||||
self.countrylist_wikilink.title.replace( (current_year - 1), current_year )
|
||||
|
||||
# Try to get current years list
|
||||
try:
|
||||
self.countrylist = CountryList( self.countrylist_wikilink )
|
||||
|
||||
if self.countrylist:
|
||||
self.countrylist.parse()
|
||||
|
||||
# Maybe fallback to last years list
|
||||
except CountryListError:
|
||||
|
||||
self.countrylist_wikilink.title = link_title
|
||||
self.countrylist = CountryList( self.countrylist_wikilink )
|
||||
|
||||
if self.countrylist:
|
||||
self.countrylist.parse()
|
||||
else:
|
||||
raise SummaryPageEntryError( "CountryList does not exists!" )
|
||||
|
||||
def get_countrylist_wikilink( self ):
|
||||
"""
|
||||
Load wikilink to related countrylist
|
||||
"""
|
||||
if self.old_entry.Liste:
|
||||
try:
|
||||
self.countrylist_wikilink = next( self.old_entry.Liste.ifilter_wikilinks() )
|
||||
except StopIteration:
|
||||
raise SummaryPageEntryError( "Parameter Liste does not contain valid wikilink!")
|
||||
else:
|
||||
raise SummaryPageEntryError( "Parameter Liste is not present!")
|
||||
|
||||
def get_countrylist_saved_revid( self ):
|
||||
"""
|
||||
Load saved revid of related countrylist if Param is present
|
||||
"""
|
||||
if self.old_entry.Liste_Revision:
|
||||
self.countrylist_revid = int( self.old_entry.Liste_Revision.strip())
|
||||
else:
|
||||
self.countrylist_revid = 0
|
||||
|
||||
def update_params( self ):
|
||||
"""
|
||||
Updates values of Parameters of template
|
||||
"""
|
||||
|
||||
self.new_entry.Liste = self.countrylist_wikilink
|
||||
self.new_entry.Liste_Revision = self.countrylist.page.latest_revision_id
|
||||
self.new_entry.Interpret = self.countrylist.interpret
|
||||
self.new_entry.Titel = self.countrylist.titel
|
||||
self.new_entry.Chartein = self._corrected_chartein
|
||||
|
||||
if self.old_entry.Korrektur:
|
||||
self.new_entry.Korrektur = self.old_entry.Korrektur
|
||||
else:
|
||||
self.new_entry.Korrektur = ""
|
||||
|
||||
if self.old_entry.Hervor:
|
||||
self.new_entry.Hervor = self.old_entry.Hervor
|
||||
else:
|
||||
self.new_entry.Hervor = ""
|
||||
|
||||
def correct_chartein( self ):
|
||||
"""
|
||||
Calulates the correct value of chartein, based on the chartein value
|
||||
from countrylist entry and param Korrektur of summarypage entry
|
||||
"""
|
||||
# If param Korrektur is present extract the value
|
||||
if self.old_entry.Korrektur:
|
||||
# If Korrektur is (after striping) castable to int use it
|
||||
try:
|
||||
days = int( str( self.old_entry.Korrektur ).strip() )
|
||||
# Otherwise, if casting fails, ignore it
|
||||
except ValueError:
|
||||
days = 0
|
||||
else:
|
||||
days = 0
|
||||
|
||||
corrected = self.countrylist.chartein + timedelta( days=days )
|
||||
self._corrected_chartein = corrected.strftime( "%d. %B" ).lstrip( "0" )
|
||||
|
||||
def is_write_needed( self ):
|
||||
"""
|
||||
Detects wether writing of entry is needed and stores information in
|
||||
Class-Attribute
|
||||
"""
|
||||
type( self ).write_needed = ( ( self.old_entry != self.new_entry ) or \
|
||||
type( self ).write_needed )
|
||||
|
||||
|
||||
class SummaryPageEntryTemplate():
|
||||
"""
|
||||
Interface class for mwparser.template to simply use template params as
|
||||
Properties
|
||||
"""
|
||||
|
||||
# Classatribute
|
||||
params = ( "Liste", "Liste_Revision", "Interpret", "Titel", "Chartein",
|
||||
"Korrektur", "Hervor" )
|
||||
|
||||
def __init__( self, template_obj=None ):
|
||||
"""
|
||||
Creates Instance of Class for given mwparser.template object of
|
||||
SummmaryPageEntry Template. If no object was given create empty one.
|
||||
|
||||
@param template_obj mw.parser.template Object of
|
||||
SummmaryPageEntry Template
|
||||
"""
|
||||
|
||||
# Check if object was given
|
||||
if( template_obj ):
|
||||
|
||||
# Check if object has correct type
|
||||
if isinstance( template_obj,
|
||||
mwparser.nodes.template.Template ):
|
||||
|
||||
self.template = template_obj;
|
||||
self.__initial = False;
|
||||
|
||||
# Otherwise raise error
|
||||
else:
|
||||
raise SummaryPageEntryTemplateError( "Wrong type given" );
|
||||
|
||||
# Otherwise initialise template
|
||||
else:
|
||||
self.__initial_template()
|
||||
self.__initial = True;
|
||||
|
||||
def __initial_template( self ):
|
||||
"""
|
||||
Builds the initial template
|
||||
"""
|
||||
|
||||
self.template = next( mwparser.parse(
|
||||
"{{/Eintrag|Liste=|Liste_Revision=|Interpret=|Titel=NN\
|
||||
|Chartein=|Korrektur=|Hervor=}}" ).ifilter_templates() )
|
||||
|
||||
def __getattr__( self, name ):
|
||||
"""
|
||||
Special getter for template params
|
||||
"""
|
||||
if name in type(self).params:
|
||||
|
||||
if( self.template.has( name ) ):
|
||||
return self.template.get( name ).value
|
||||
else:
|
||||
return False
|
||||
|
||||
else:
|
||||
raise AttributeError
|
||||
|
||||
def __setattr__( self, name, value ):
|
||||
"""
|
||||
Special setter for template params
|
||||
"""
|
||||
if name in type(self).params:
|
||||
|
||||
self.__dict__[ 'template' ].add( name, value )
|
||||
|
||||
else:
|
||||
object.__setattr__(self, name, value)
|
||||
|
||||
def __ne__( self, other ):
|
||||
"""
|
||||
Checks wether all Template param values except for Liste_Revision are
|
||||
equal
|
||||
"""
|
||||
|
||||
# Detect which of the two was initialised (without)
|
||||
# If none raise error
|
||||
if( self.__initial ):
|
||||
initial = self
|
||||
cmpto = other
|
||||
elif( other.__initial ):
|
||||
initial = other
|
||||
cmpto = self
|
||||
else:
|
||||
raise SummaryPageEntryTemplateError(
|
||||
"One of the compared instances must have been initial!" )
|
||||
|
||||
# Iterate over each param
|
||||
for param in initial.template.params:
|
||||
|
||||
# Slice out only Param.name
|
||||
param = param[:param.find("=")].strip()
|
||||
|
||||
# If param is missing, writing is needed
|
||||
if not cmpto.template.has( param ):
|
||||
return True
|
||||
|
||||
# Do not compare List Revisions (not just write about Revids)
|
||||
if param == "Liste_Revision":
|
||||
continue
|
||||
|
||||
# Compare other param values, if one unequal write is needed
|
||||
if initial.template.get( param ).value.strip() != \
|
||||
cmpto.template.get( param ).value.strip():
|
||||
return True
|
||||
|
||||
# If not returned True until now
|
||||
return False
|
||||
|
||||
|
||||
class SummaryPageError( Exception ):
|
||||
"""
|
||||
Handles errors occuring in class SummaryPage
|
||||
"""
|
||||
pass
|
||||
|
||||
class SummaryPageEntryError( SummaryPageError ):
|
||||
"""
|
||||
Handles errors occuring in class SummaryPageEntry
|
||||
"""
|
||||
pass
|
||||
|
||||
class SummaryPageEntryTemplateError( SummaryPageError ):
|
||||
"""
|
||||
Handles errors occuring in class SummaryPageEntryTemplate
|
||||
"""
|
||||
pass
|
||||
Reference in New Issue
Block a user