36 Commits

Author SHA1 Message Date
06ffb37d07 Merge branch 'summarypage-module' 2015-11-20 21:02:03 +01:00
f360deb1a0 SummaryPage-Modul: Use full path for embeding entry template since relative paths don't work on already embeded pages 2015-11-20 21:00:37 +01:00
03857c0236 Merge branch 'countrylist-module' 2015-11-20 14:45:08 +01:00
3864c9013c CountryList-Module: Since we have multiple categories in some countrys we need to select the first wrapping template 2015-11-20 14:44:26 +01:00
1f70c42bdc Merge branch 'chartsbot-module'
Reflect new structure
2015-11-13 13:36:52 +01:00
43668ef8b7 ChartsBot-Module: Remove old code which is replaced by SummaryPage- and CountryList-Module 2015-11-13 13:34:42 +01:00
1a06d20a75 ChartsBot-Module: Move charts.py to chartsbot.py to name like class
Several updates of code to use new modul structure
2015-11-13 13:34:22 +01:00
df67d7ca8a Merge changes from branch 'countrylist-module'
+ Fixed syntax error
2015-11-13 13:26:55 +01:00
eca0142f71 CountryList-Module: Add Exception-Handling classes 2015-11-13 12:22:17 +01:00
d98bc5e2b5 Merge branch 'summarypage-module' 2015-11-13 12:21:15 +01:00
fec2143203 SummaryPage-Module: Create Exception-Classes 2015-11-13 12:18:02 +01:00
f003b20d65 SummaryPage-Module: Implement method for returning new page text if editing is needed 2015-11-13 11:03:42 +01:00
fc96bb5b6c SummaryPage-Module: Implement class handling parsing/editing process of whole page 2015-11-13 11:02:16 +01:00
def75876b0 SummaryPage-Module: Add method for controling parsing/update-sequence of entry 2015-11-13 11:00:36 +01:00
a7b1813d24 SummaryPage-Module: Add methods for working with results from parsing in entry 2015-11-13 10:57:41 +01:00
5d43234b11 SummaryPage-Module: Add parse-method to Entry-Class which uses CountryList-Module 2015-11-13 10:54:34 +01:00
6ac8c9298f SummaryPage-Module: Add class for handling entries 2015-11-13 10:52:18 +01:00
bdb48d6e79 SummaryPage-Module: Add method for notequal-comparation to Template-Interface
to detect whether writing is needed
2015-11-13 10:49:25 +01:00
f06f04c5ef SummaryPage-Module: Hook into getattr and setattr to directly use
template params as object attributes
2015-11-13 10:47:23 +01:00
cdd0c4c666 SummaryPage-Module: Implement Class as inteface to Entry-Template 2015-11-13 10:45:59 +01:00
ce0a06ae0a SummaryPage-Module: Add new module for handling summarypage related actions 2015-11-13 10:31:42 +01:00
9225dc5f73 Merge branch 'countrylist-module'
Provides a class for handling CountryLists
2015-11-13 10:24:49 +01:00
2e8b4273e7 CountryList-Module: Implement parse-method which handles the parsing sequence 2015-11-13 10:18:09 +01:00
41d3ca95ef CountryList-Module: Implement methods for handling Interpret-Parameter
Including searching for missing links
2015-11-13 10:17:44 +01:00
87aee8c42a CountryList-Module: Implment methods for handling Titel-Parameter 2015-11-13 10:17:32 +01:00
d4ea57dae8 CountryList-Module: Implement methods for handling Chartein-Date 2015-11-13 10:17:25 +01:00
8858e81ee6 CountryList-Module: Implement methods to get the latest entry of list 2015-11-13 10:17:12 +01:00
4a790912fc CountryList-Module: Implement method for detecting year related to list 2015-11-13 10:17:07 +01:00
abc30707b5 CountryList-Module: Implement method for checking if parsing is needed 2015-11-13 10:16:56 +01:00
6ae8f4c6ad CountryList-Module: Implement basic init method 2015-11-13 10:16:49 +01:00
11bfb6807c CountryList-Module: Create new class CountryList to move code for handling country list in separate class 2015-11-13 10:16:17 +01:00
eedcefb215 Add functionality to search for links for nonlinked interprets 2015-11-08 20:48:34 +01:00
f0978d26e2 Automatic year change detection 2015-11-08 19:57:21 +01:00
0a49b27d7f Use param "-always" to run without any interactiv requests 2015-11-07 16:53:34 +01:00
f05889a15b Restucture code to make it better maintainable
Remove unnecessary whitespace
2015-11-07 16:50:09 +01:00
a69a3add73 Prepare exception handling 2015-11-05 17:33:09 +01:00
4 changed files with 901 additions and 344 deletions

344
charts.py
View File

@@ -1,344 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# charts.py
#
# Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Provides a class for handling chart lists
"""
from datetime import datetime, timedelta
import locale
from isoweek import Week
import pywikibot
from pywikibot import pagegenerators
import mwparserfromhell as mwparser
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'&params;': pagegenerators.parameterHelp
}
class Charts:
"""
Class for handling chart lists
"""
def __init__( self, generator, dry ):
"""
Constructor.
@param generator: The page generator that determines on which pages
to work.
@type generator: generator.
@param dry: If True, doesn't do any real changes, but only shows
what would have been changed.
@type dry: boolean.
"""
self.generator = generator
self.dry = dry
# Set the edit summary message
self.site = pywikibot.Site()
self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits"
# Set attribute to detect wether there was a real change
self.changed = None
# Set locale to 'de_DE.UTF-8'
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
def run(self):
"""Process each page from the generator."""
for page in self.generator:
self.treat(page)
def treat(self, page):
"""Load the given page, does some changes, and saves it."""
text = self.load(page)
if not text:
return
################################################################
# NOTE: Here you can modify the text in whatever way you want. #
################################################################
# If you find out that you do not want to edit this page, just return.
# Example: This puts the text 'Test' at the beginning of the page.
text = self.parse_overview( text )
if not self.save(text, page, self.summary, False):
pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))
def load(self, page):
"""Load the text of the given page."""
try:
# Load the page
text = page.get()
except pywikibot.NoPage:
pywikibot.output(u"Page %s does not exist; skipping."
% page.title(asLink=True))
except pywikibot.IsRedirectPage:
pywikibot.output(u"Page %s is a redirect; skipping."
% page.title(asLink=True))
else:
return text
return None
def save(self, text, page, comment=None, minorEdit=True,
botflag=True):
"""Update the given page with new text."""
# only save if something was changed (and not just revision)
if text != page.get() and self.changed:
# Show the title of the page we're working on.
# Highlight the title in purple.
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% page.title())
# show what was changed
pywikibot.showDiff(page.get(), text)
pywikibot.output(u'Comment: %s' % comment)
if not self.dry:
if True or pywikibot.input_yn(
u'Do you want to accept these changes?',
default=False, automatic_quit=False):
try:
page.text = text
# Save the page
page.save(summary=comment or self.comment,
minor=minorEdit, botflag=botflag)
except pywikibot.LockedPage:
pywikibot.output(u"Page %s is locked; skipping."
% page.title(asLink=True))
except pywikibot.EditConflict:
pywikibot.output(
u'Skipping %s because of edit conflict'
% (page.title()))
except pywikibot.SpamfilterError as error:
pywikibot.output(
u'Cannot change %s because of spam blacklist \
entry %s'
% (page.title(), error.url))
else:
return True
return False
def parse_charts_list( self, page, belgien=False ):
"""
Handles the parsing process
"""
# Parse charts list with mwparser
wikicode = mwparser.parse( page.text )
# Select the section "Singles"
if belgien:
singles_section = wikicode.get_sections(
matches=belgien )[0].get_sections( matches="Singles" )[0]
else:
singles_section = wikicode.get_sections( matches="Singles" )[0]
# Select the last occurence of template "Nummer-eins-Hits Zeile" in
# "Singles"-section
last_entry = singles_section.ifilter_templates(
matches="Nummer-eins-Hits Zeile" )
for last in last_entry:
pass
# Detect weather we have a date or a weeknumber for Template Param
# "Chartein"
if( last.get("Chartein").value.strip().isnumeric() ):
chartein = last.get("Chartein").value.strip()
else:
chartein = datetime.strptime( last.get("Chartein").value.strip(),
"%Y-%m-%d" )
title = last.get("Titel").value.strip()
interpret = last.get("Interpret").value.strip()
# Return collected data as tuple
return ( chartein, title, interpret )
def parse_overview( self, text ):
"""
Parses the given Charts-Overview-Page and returns the updated version
"""
# Parse text with mwparser to get access to nodes
wikicode = mwparser.parse( text )
# Get mwparser.template objects for Template "/Eintrag"
for country in wikicode.ifilter_templates( matches="/Eintrag" ):
# Get mwparser.wikilink object
for link in country.get("Liste").value.ifilter_wikilinks():
# Create Page-Object for Chartslist
list_page = pywikibot.Page( self.site, link.title )
# Only use first wikilink in Template Param "Liste"
break
# Check if we have a saved revid
if not country.has( "Liste Revision" ):
try:
country.add( "Liste Revision", 0, before="Interpret" )
except ValueError:
country.add( "Liste Revision", 0 )
# Check if saved revid is unequal to current revid
if( str( country.get( "Liste Revision" ).value ) !=
list_page.latest_revision_id ):
country = self.update_overview( country, list_page )
# If any param of any occurence of Template "/Eintrag" has changed,
# Save new version
# We need to convert mwparser-objects to string before saving
return str( wikicode )
def update_overview( self, country, list_page ): # noqa
"""
Updates the templates given in county using data from given list_page
@param country wikicode-object with Template for country
@param list_page pywikibot-page-object for list-page
@returns wikicode-object with updated Template for country
"""
# Parse linked charts list for the country
if "Wallonien" in str( country.get( "Liste" ).value ):
belgien = "Wallonie"
elif "Flandern" in str( country.get( "Liste" ).value ):
belgien = "Flandern"
else:
belgien = None
data = self.parse_charts_list( list_page, belgien )
# Update "Liste Revision" param
country.get( "Liste Revision" ).value = str(
list_page.latest_revision_id )
# If param Korrektur is present extract the value
if( country.has( "Korrektur" ) and
str( country.get( "Korrektur" ).value ).isnumeric() ):
days = int( str( country.get( "Korrektur" ).value ) )
else:
days = 0
# For some countries we have weeknumbers instead of dates
if( isinstance( data[0], str ) ):
# Slice year out of link destination
year = int( list_page.title()[-5:-1] )
# Calculate date of monday in given week and add number of
# days given in Template parameter "Wochentag" with monday
# as day (zero)
# We need double conversion since wikicode could not be casted
# as int directly
date = ( Week( year, int( data[0] ) ).monday() +
timedelta( days=days ) )
# Param Chartein contains a regular date
else:
date = data[0] + timedelta( days=days )
# Check if param "Chartein" is present
if not country.has( "Chartein" ):
try:
country.add( "Chartein", "", before="Wochentag" )
except ValueError:
country.add( "Chartein", "" )
# Check if date has changed
if( date.strftime( "%d. %B" ).lstrip( "0" ) !=
country.get("Chartein").value ):
country.get("Chartein").value = date.strftime( "%d. %B"
).lstrip( "0" )
self.changed = True
# Check if param "Titel" is present
if not country.has( "Titel" ):
country.add( "Titel", "", before="Chartein" )
# Check if Titel has changed
if( data[1] != country.get( "Titel" ).value ):
country.get( "Titel" ).value = data[1]
self.changed = True
# Check if param "Intepret" is present
if not country.has( "Interpret" ):
country.add( "Interpret", "", before="Titel" )
# Check if Interpret has changed
if( data[2] != country.get( "Interpret" ).value ):
country.get( "Interpret" ).value = data[2]
self.changed = True
def main(*args):
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
@param args: command line arguments
@type args: list of unicode
"""
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# The generator gives the pages that should be worked upon.
gen = None
# If dry is True, doesn't do any real changes, but only show
# what would have been changed.
dry = False
# Parse command line arguments
for arg in local_args:
if arg.startswith("-dry"):
dry = True
else:
genFactory.handleArg(arg)
if not gen:
gen = genFactory.getCombinedGenerator()
if gen:
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
bot = Charts(gen, dry)
bot.run()
else:
pywikibot.showHelp()
if( __name__ == "__main__" ):
main()

203
chartsbot.py Normal file
View File

@@ -0,0 +1,203 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# charts.py
#
# Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Bot which automatically updates a ChartsSummaryPage like
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
CountryLists
The following parameters are supported:
&params;
-always If given, request for confirmation of edit is short circuited
Use for unattended run
"""
import locale
import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import Bot
import mwparserfromhell as mwparser
from summarypage import SummaryPage
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'&params;': pagegenerators.parameterHelp
}
class ChartsBot( ):
"""
Bot which automatically updates a ChartsSummaryPage like
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
CountryListsAn incomplete sample bot.
"""
def __init__( self, generator, always ):
"""
Constructor.
@param generator: the page generator that determines on which pages
to work
@type generator: generator
@param always: if True, request for confirmation of edit is short
circuited. Use for unattended run
@type always: bool
"""
self.generator = generator
self.always = always
# Set the edit summary message
self.site = pywikibot.Site()
self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits"
# Set locale to 'de_DE.UTF-8'
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
def run(self):
"""Process each page from the generator."""
for page in self.generator:
self.treat(page)
def treat(self, page):
"""Load the given page, does some changes, and saves it."""
text = self.load(page)
if not text:
return
################################################################
# NOTE: Here you can modify the text in whatever way you want. #
################################################################
# Initialise and treat SummaryPageWorker
sumpage = SummaryPage( text )
sumpage.treat()
# Check if editing is needed and if so get new text
if sumpage.get_new_text():
text = sumpage.get_new_text()
if not self.save(text, page, self.summary, False):
pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))
def load(self, page):
"""Load the text of the given page."""
try:
# Load the page
text = page.get()
except pywikibot.NoPage:
pywikibot.output(u"Page %s does not exist; skipping."
% page.title(asLink=True))
except pywikibot.IsRedirectPage:
pywikibot.output(u"Page %s is a redirect; skipping."
% page.title(asLink=True))
else:
return text
return None
def save(self, text, page, comment=None, minorEdit=True,
botflag=True):
"""Update the given page with new text."""
# only save if something was changed (and not just revision)
if text != page.get():
# Show the title of the page we're working on.
# Highlight the title in purple.
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% page.title())
# show what was changed
pywikibot.showDiff(page.get(), text)
pywikibot.output(u'Comment: %s' % comment)
if self.always or pywikibot.input_yn(
u'Do you want to accept these changes?',
default=False, automatic_quit=False):
try:
page.text = text
# Save the page
page.save(summary=comment or self.comment,
minor=minorEdit, botflag=botflag)
except pywikibot.LockedPage:
pywikibot.output(u"Page %s is locked; skipping."
% page.title(asLink=True))
except pywikibot.EditConflict:
pywikibot.output(
u'Skipping %s because of edit conflict'
% (page.title()))
except pywikibot.SpamfilterError as error:
pywikibot.output(
u'Cannot change %s because of spam blacklist \
entry %s'
% (page.title(), error.url))
else:
return True
return False
def main(*args):
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
@param args: command line arguments
@type args: list of unicode
"""
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# The generator gives the pages that should be worked upon.
gen = None
# If always is True, bot won't ask for confirmation of edit (automode)
always = False
# Parse command line arguments
for arg in local_args:
if arg.startswith("-always"):
always = True
else:
genFactory.handleArg(arg)
if not gen:
gen = genFactory.getCombinedGenerator()
if gen:
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
bot = ChartsBot(gen, always)
bot.run()
else:
pywikibot.showHelp()
if( __name__ == "__main__" ):
main()

352
countrylist.py Normal file
View File

@@ -0,0 +1,352 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# countrylist.py
#
# Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Provides a class for handling charts list per country and year
"""
import locale
from datetime import datetime
from isoweek import Week
import pywikibot
import mwparserfromhell as mwparser
class CountryList():
"""
Handles charts list per country and year
"""
def __init__( self, wikilink ):
"""
Generate new instance of class
Checks wether page given with country_list_link exists
@param wikilink Wikilink object by mwparser linking CountryList
@returns self Object representing CountryList
False if page does not exists
"""
# Generate pywikibot site object
# @TODO: Maybe store it outside???
self.site = pywikibot.Site()
# Set locale to 'de_DE.UTF-8'
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
# Generate pywikibot page object
self.page = pywikibot.Page( self.site, wikilink.title )
# Store given wikilink for page object
self.wikilink = wikilink
# Check if page exits
if not self.page.exists():
return False
# Initialise attributes
__attr = ( "wikicode", "entry", "chartein", "_chartein_raw",
"_titel_raw", "titel", "interpret", "_interpret_raw" )
for attr in __attr:
setattr( self, attr, None )
# Try to find year
self.find_year()
def parsing_needed( self, revid ):
"""
Check if current revid of CountryList differs from given one
@param int Revid to check against
@return True Given revid differs from current revid
False Given revid is equal to current revid
"""
if revid != self.page.latest_revision_id:
return True
else:
return False
def find_year( self ):
"""
Try to find the year related to CountryList
"""
self.year = datetime.now().year
# Check if year is in page.title, if not try last year
if str( self.year ) not in self.page.title():
self.year -= 1
# If last year does not match, raise YearError
if str( self.year ) not in self.page.title():
raise CountryListYearError
def parse( self ):
"""
Handles the parsing process
"""
# Parse page with mwparser
self.generate_wikicode()
# Select lastest entry
self.get_latest_entry()
# Prepare chartein, titel, interpret
self.prepare_chartein()
self.prepare_titel()
self.prepare_interpret()
def detect_belgian( self ):
"""
Detect wether current entry is on of the belgian (Belgien/Wallonien)
"""
# Check if begian province name is in link text or title
if "Wallonien" in str( self.wikilink.text ) \
or "Wallonien" in str( self.wikilink.title):
return "Wallonie"
elif "Flandern" in str( self.wikilink.text ) \
or "Flandern" in str( self.wikilink.title):
return "Flandern"
else:
return None
def generate_wikicode( self ):
"""
Runs mwparser on page.text to get mwparser.objects
"""
self.wikicode = mwparser.parse( self.page.text )
def get_latest_entry( self ):
"""
Get latest list entry template object
"""
# Select the section "Singles"
# For belgian list we need to select subsection of country
belgian = self.detect_belgian()
if belgian:
singles_section = self.wikicode.get_sections(
matches=belgian )[0].get_sections( matches="Singles" )[0]
else:
singles_section = self.wikicode.get_sections( matches="Singles" )[0]
# Since we have multiple categories in some countrys we need
# to select the first wrapping template
try:
wrapping_template = next( singles_section.ifilter_templates(
matches="Nummer-eins-Hits" ) )
except StopIteration:
raise CountryListError( "Wrapping template is missing!")
# Select the last occurence of template "Nummer-eins-Hits Zeile" in
# Wrapper-template
for self.entry in wrapping_template.get("Inhalt").value.ifilter_templates(
matches="Nummer-eins-Hits Zeile" ):
pass
# Check if we have found something
if not self.entry:
raise CountryListError( self.page.title() )
def get_year_correction( self ):
"""
Reads value of jahr parameter for correcting week numbers near to
year changes
"""
# If param is present return correction, otherwise null
if self.entry.has( "Jahr" ):
# Read value of param
jahr = self.entry.get( "Jahr" ).strip()
if jahr == "+1":
return 1
elif jahr == "-1":
return -1
# None or wrong parameter value
return 0
def prepare_chartein( self ):
"""
Checks wether self._chartein_raw is a date or a week number and
calculates related datetime object
"""
# If self._chartein_raw is not set, get it
if not self._chartein_raw:
self.get_chartein_value()
# Detect weather we have a date or a weeknumber for Template Param
# "Chartein"
# Numeric string means week number
if( self._chartein_raw.isnumeric() ):
# Calculate date of monday in given week and add number of
# days given in Template parameter "Korrektur" with monday
# as day (zero)
self.chartein = ( Week( self.year + self.get_year_correction(),
int( self._chartein_raw ) ).monday() )
# Complete date string present
else:
self.chartein = datetime.strptime( self._chartein_raw,
"%Y-%m-%d" )
def get_chartein_value( self ):
"""
Reads value of chartein parameter
If param is not present raise Error
"""
if self.entry.has( "Chartein" ):
self._chartein_raw = self.entry.get("Chartein").value.strip()
else:
raise CountryListEntryError( "Template Parameter 'Chartein' is \
missing!" )
def prepare_titel( self ):
"""
Loads and prepares Titel of latest entry
"""
# If self._titel_raw is not set, get it
if not self._titel_raw:
self.get_titel_value()
self.titel = self._titel_raw
def get_titel_value( self ):
"""
Reads value of Titel parameter
If param is not present raise Error
"""
if self.entry.has( "Titel" ):
self._titel_raw = self.entry.get("Titel").value.strip()
else:
raise CountryListEntryError( "Template Parameter 'Titel' is \
missing!" )
def prepare_interpret( self ):
"""
Loads and prepares Interpret of latest entry
"""
# If self._interpret_raw is not set, get it
if not self._interpret_raw:
self.get_interpret_value()
# Work with interpret value to add missing links
# Split it in words
words = self._interpret_raw.split()
# Interpret name separating words
seps = ( "feat.", "&" )
# Create empty list for concatenated interpret names
parts = [ " ", ]
# Another list for managing indexes which need to be worked on
indexes = list()
index = 0
# Reconcatenate interpret names
for word in words:
# Name parts
if word not in seps:
parts[-1] += (" " + word)
# Remove unnecessary whitespace
parts[-1] = parts[-1].strip()
# We only need to work on it, if no wikilink is present
if index not in indexes and "[[" not in parts[-1]:
indexes.append( index )
else:
# Count up index 2 times ( Separator + next Name )
index += 2
parts.append( word )
parts.append( " " )
# If we have indexes with out links, search for links
if indexes:
# Iterate over wikilinks of refpage and try to find related links
for wikilink in self.wikicode.ifilter_wikilinks():
# Iterate over interpret names
for index in indexes:
# Check wether wikilink matches
if parts[index] == wikilink.text \
or parts[index] == wikilink.title:
# Overwrite name with complete wikilink
parts[index] = str( wikilink )
# Remove index from worklist
indexes.remove( index )
# Other indexes won't also match
break
# If worklist is empty, stop iterating over wikilinks
if not indexes:
break
# Join the collected links
sep = " "
self.interpret = sep.join( parts )
# Nothing to do, just use raw
else:
self.interpret = self._interpret_raw
def get_interpret_value( self ):
"""
Reads value of Interpret parameter
If param is not present raise Error
"""
if self.entry.has( "Interpret" ):
self._interpret_raw = self.entry.get("Interpret").value.strip()
else:
raise CountryListEntryError( "Template Parameter 'Interpret' is \
missing!" )
class CountryListError( Exception ):
"""
Handles errors occuring in class CountryList
"""
pass
class CountryListEntryError( CountryListError ):
"""
Handles errors occuring in class CountryList related to entrys
"""
pass

346
summarypage.py Normal file
View File

@@ -0,0 +1,346 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# summarypage.py
#
# Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Provides classes for handling Charts summary page
"""
import locale
from datetime import datetime, timedelta
import pywikibot
import mwparserfromhell as mwparser
from countrylist import CountryList, CountryListError
class SummaryPage():
"""
Handles summary page related actions
"""
def __init__( self, text ):
"""
Create Instance
"""
# Parse Text with mwparser
self.wikicode = mwparser.parse( text )
def treat( self ):
"""
Handles parsing/editing of text
"""
# Get mwparser.template objects for Template "/Eintrag"
for entry in self.wikicode.filter_templates( matches="/Eintrag" ) :
# Instantiate SummaryPageEntry-object
summarypageentry = SummaryPageEntry( entry )
# Treat SummaryPageEntry-object
summarypageentry.treat()
# Get result
# We need to replace origninal entry since objectid changes due to
# recreation of template object and reassignment won't be reflected
self.wikicode.replace( entry, summarypageentry.new_entry.template )
def get_new_text( self ):
"""
If writing page is needed, return new text, otherwise false
"""
# Get information wether writing is needed from class attribute
if SummaryPageEntry.write_needed:
# Convert wikicode back to string and return
return str( self.wikicode )
return False
class SummaryPageEntry():
"""
Provides a generic wrapper for summary page entry template
"""
write_needed = False
def __init__( self, entry ):
"""
Constructor
"""
self.old_entry = SummaryPageEntryTemplate( entry )
self.new_entry = SummaryPageEntryTemplate( )
def treat( self ):
"""
Controls parsing/update-sequence of entry
"""
self.parse()
self.correct_chartein()
self.update_params()
self.is_write_needed()
def parse( self ):
"""
Handles parsing process of entry template
"""
# Get wikilink to related countrylist
self.get_countrylist_wikilink()
# Get saved revision of related countrylist
self.get_countrylist_saved_revid()
# Get current year
current_year = datetime.now().year;
# Store old link.title
link_title = self.countrylist_wikilink.title
# If list is from last year, replace year
if (current_year - 1) in link_title:
self.countrylist_wikilink.title.replace( (current_year - 1), current_year )
# Try to get current years list
try:
self.countrylist = CountryList( self.countrylist_wikilink )
if self.countrylist:
self.countrylist.parse()
# Maybe fallback to last years list
except CountryListError:
self.countrylist_wikilink.title = link_title
self.countrylist = CountryList( self.countrylist_wikilink )
if self.countrylist:
self.countrylist.parse()
else:
raise SummaryPageEntryError( "CountryList does not exists!" )
def get_countrylist_wikilink( self ):
"""
Load wikilink to related countrylist
"""
if self.old_entry.Liste:
try:
self.countrylist_wikilink = next( self.old_entry.Liste.ifilter_wikilinks() )
except StopIteration:
raise SummaryPageEntryError( "Parameter Liste does not contain valid wikilink!")
else:
raise SummaryPageEntryError( "Parameter Liste is not present!")
def get_countrylist_saved_revid( self ):
"""
Load saved revid of related countrylist if Param is present
"""
if self.old_entry.Liste_Revision:
self.countrylist_revid = int( self.old_entry.Liste_Revision.strip())
else:
self.countrylist_revid = 0
def update_params( self ):
"""
Updates values of Parameters of template
"""
self.new_entry.Liste = self.countrylist_wikilink
self.new_entry.Liste_Revision = self.countrylist.page.latest_revision_id
self.new_entry.Interpret = self.countrylist.interpret
self.new_entry.Titel = self.countrylist.titel
self.new_entry.Chartein = self._corrected_chartein
if self.old_entry.Korrektur:
self.new_entry.Korrektur = self.old_entry.Korrektur
else:
self.new_entry.Korrektur = ""
if self.old_entry.Hervor:
self.new_entry.Hervor = self.old_entry.Hervor
else:
self.new_entry.Hervor = ""
def correct_chartein( self ):
"""
Calulates the correct value of chartein, based on the chartein value
from countrylist entry and param Korrektur of summarypage entry
"""
# If param Korrektur is present extract the value
if self.old_entry.Korrektur:
# If Korrektur is (after striping) castable to int use it
try:
days = int( str( self.old_entry.Korrektur ).strip() )
# Otherwise, if casting fails, ignore it
except ValueError:
days = 0
else:
days = 0
corrected = self.countrylist.chartein + timedelta( days=days )
self._corrected_chartein = corrected.strftime( "%d. %B" ).lstrip( "0" )
def is_write_needed( self ):
"""
Detects wether writing of entry is needed and stores information in
Class-Attribute
"""
type( self ).write_needed = ( ( self.old_entry != self.new_entry ) or \
type( self ).write_needed )
class SummaryPageEntryTemplate():
"""
Interface class for mwparser.template to simply use template params as
Properties
"""
# Classatribute
params = ( "Liste", "Liste_Revision", "Interpret", "Titel", "Chartein",
"Korrektur", "Hervor" )
def __init__( self, template_obj=None ):
"""
Creates Instance of Class for given mwparser.template object of
SummmaryPageEntry Template. If no object was given create empty one.
@param template_obj mw.parser.template Object of
SummmaryPageEntry Template
"""
# Check if object was given
if( template_obj ):
# Check if object has correct type
if isinstance( template_obj,
mwparser.nodes.template.Template ):
self.template = template_obj;
self.__initial = False;
# Otherwise raise error
else:
raise SummaryPageEntryTemplateError( "Wrong type given" );
# Otherwise initialise template
else:
self.__initial_template()
self.__initial = True;
def __initial_template( self ):
"""
Builds the initial template
"""
self.template = next( mwparser.parse(
"{{Portal:Charts und Popmusik/Aktuelle Nummer-eins-Hits/Eintrag|Liste=|Liste_Revision=|Interpret=|Titel=NN\
|Chartein=|Korrektur=|Hervor=}}" ).ifilter_templates() )
def __getattr__( self, name ):
"""
Special getter for template params
"""
if name in type(self).params:
if( self.template.has( name ) ):
return self.template.get( name ).value
else:
return False
else:
raise AttributeError
def __setattr__( self, name, value ):
"""
Special setter for template params
"""
if name in type(self).params:
self.__dict__[ 'template' ].add( name, value )
else:
object.__setattr__(self, name, value)
def __ne__( self, other ):
"""
Checks wether all Template param values except for Liste_Revision are
equal
"""
# Detect which of the two was initialised (without)
# If none raise error
if( self.__initial ):
initial = self
cmpto = other
elif( other.__initial ):
initial = other
cmpto = self
else:
raise SummaryPageEntryTemplateError(
"One of the compared instances must have been initial!" )
# Iterate over each param
for param in initial.template.params:
# Slice out only Param.name
param = param[:param.find("=")].strip()
# If param is missing, writing is needed
if not cmpto.template.has( param ):
return True
# Do not compare List Revisions (not just write about Revids)
if param == "Liste_Revision":
continue
# Compare other param values, if one unequal write is needed
if initial.template.get( param ).value.strip() != \
cmpto.template.get( param ).value.strip():
return True
# If not returned True until now
return False
class SummaryPageError( Exception ):
"""
Handles errors occuring in class SummaryPage
"""
pass
class SummaryPageEntryError( SummaryPageError ):
"""
Handles errors occuring in class SummaryPageEntry
"""
pass
class SummaryPageEntryTemplateError( SummaryPageError ):
"""
Handles errors occuring in class SummaryPageEntryTemplate
"""
pass