54 Commits

Author SHA1 Message Date
c708832515 Merge branch 'feature-force-reload' 2015-12-11 12:42:41 +01:00
18122fafe8 New feature force parsing of countrylists regardless if needed with param "-force-reload" 2015-12-11 12:41:23 +01:00
55afe94a4e Merge branch 'countrylist-linksearch' 2015-12-11 12:03:51 +01:00
e409c7a02b CountryList-module: Also search for Links in Titel 2015-12-11 00:03:53 +01:00
9d9207c175 CountryList-module: Put linksearching algorithm in separate function for simple reuse for Titel value 2015-12-10 23:13:45 +01:00
5f13da5934 Clarify licence situation of chartsbot.py 2015-11-25 17:15:55 +01:00
5b084f6fde Fix Bug: Writing is requested even when only rev_ids have changed
Introduce new attr to CountryList for simple get information wether page was parsed

The SummaryPageEntryTemplate comparation to non-equal fails when unparsed Entrys occur
--> and it with information wether CountryList was parsed
2015-11-23 19:36:19 +01:00
e3c2c1a5d9 Merge branch 'pep8-compat' 2015-11-23 19:15:37 +01:00
f819193790 pep8-compat: clean up CountryList-Modul 2015-11-23 19:11:21 +01:00
4a856b1dae pep8-compat: Replace undefined Error by Message in CountryList-Module 2015-11-23 19:04:27 +01:00
166e61aee7 pep8-compat: cleanup SummaryPage-Module 2015-11-23 19:00:07 +01:00
1ea37c0e0d pep8-compat: Remove unnecessary imports from summarypage.py 2015-11-23 18:59:16 +01:00
3e525edd2a pep8-compat: chartsbot.py remove unnecessary imports 2015-11-23 18:48:04 +01:00
3cab979662 Merge branch 'summarypage-module' 2015-11-21 11:52:21 +01:00
52f933bea7 SummaryPage-Module: Bugfix, move countrylist.parse() back in try statement since we need to make sure it is parseable due to automatic year change feature 2015-11-21 11:50:40 +01:00
e854244f0b Merge branches 'countrylist-module' and 'summarypage-module' 2015-11-21 11:33:35 +01:00
f1e0157643 CountryList-Module: Rename method parsing_needed to is_parsing_needed to make boolean character more clear 2015-11-21 11:32:00 +01:00
4987f97e91 SummaryPage-Module: Reimplement feature to prevent parsing for pages where revid haven't changed since last parsing 2015-11-21 11:30:37 +01:00
06ffb37d07 Merge branch 'summarypage-module' 2015-11-20 21:02:03 +01:00
f360deb1a0 SummaryPage-Modul: Use full path for embeding entry template since relative paths don't work on already embeded pages 2015-11-20 21:00:37 +01:00
03857c0236 Merge branch 'countrylist-module' 2015-11-20 14:45:08 +01:00
3864c9013c CountryList-Module: Since we have multiple categories in some countrys we need to select the first wrapping template 2015-11-20 14:44:26 +01:00
1f70c42bdc Merge branch 'chartsbot-module'
Reflect new structure
2015-11-13 13:36:52 +01:00
43668ef8b7 ChartsBot-Module: Remove old code which is replaced by SummaryPage- and CountryList-Module 2015-11-13 13:34:42 +01:00
1a06d20a75 ChartsBot-Module: Move charts.py to chartsbot.py to name like class
Several updates of code to use new modul structure
2015-11-13 13:34:22 +01:00
df67d7ca8a Merge changes from branch 'countrylist-module'
+ Fixed syntax error
2015-11-13 13:26:55 +01:00
eca0142f71 CountryList-Module: Add Exception-Handling classes 2015-11-13 12:22:17 +01:00
d98bc5e2b5 Merge branch 'summarypage-module' 2015-11-13 12:21:15 +01:00
fec2143203 SummaryPage-Module: Create Exception-Classes 2015-11-13 12:18:02 +01:00
f003b20d65 SummaryPage-Module: Implement method for returning new page text if editing is needed 2015-11-13 11:03:42 +01:00
fc96bb5b6c SummaryPage-Module: Implement class handling parsing/editing process of whole page 2015-11-13 11:02:16 +01:00
def75876b0 SummaryPage-Module: Add method for controling parsing/update-sequence of entry 2015-11-13 11:00:36 +01:00
a7b1813d24 SummaryPage-Module: Add methods for working with results from parsing in entry 2015-11-13 10:57:41 +01:00
5d43234b11 SummaryPage-Module: Add parse-method to Entry-Class which uses CountryList-Module 2015-11-13 10:54:34 +01:00
6ac8c9298f SummaryPage-Module: Add class for handling entries 2015-11-13 10:52:18 +01:00
bdb48d6e79 SummaryPage-Module: Add method for notequal-comparation to Template-Interface
to detect whether writing is needed
2015-11-13 10:49:25 +01:00
f06f04c5ef SummaryPage-Module: Hook into getattr and setattr to directly use
template params as object attributes
2015-11-13 10:47:23 +01:00
cdd0c4c666 SummaryPage-Module: Implement Class as inteface to Entry-Template 2015-11-13 10:45:59 +01:00
ce0a06ae0a SummaryPage-Module: Add new module for handling summarypage related actions 2015-11-13 10:31:42 +01:00
9225dc5f73 Merge branch 'countrylist-module'
Provides a class for handling CountryLists
2015-11-13 10:24:49 +01:00
2e8b4273e7 CountryList-Module: Implement parse-method which handles the parsing sequence 2015-11-13 10:18:09 +01:00
41d3ca95ef CountryList-Module: Implement methods for handling Interpret-Parameter
Including searching for missing links
2015-11-13 10:17:44 +01:00
87aee8c42a CountryList-Module: Implment methods for handling Titel-Parameter 2015-11-13 10:17:32 +01:00
d4ea57dae8 CountryList-Module: Implement methods for handling Chartein-Date 2015-11-13 10:17:25 +01:00
8858e81ee6 CountryList-Module: Implement methods to get the latest entry of list 2015-11-13 10:17:12 +01:00
4a790912fc CountryList-Module: Implement method for detecting year related to list 2015-11-13 10:17:07 +01:00
abc30707b5 CountryList-Module: Implement method for checking if parsing is needed 2015-11-13 10:16:56 +01:00
6ae8f4c6ad CountryList-Module: Implement basic init method 2015-11-13 10:16:49 +01:00
11bfb6807c CountryList-Module: Create new class CountryList to move code for handling country list in separate class 2015-11-13 10:16:17 +01:00
eedcefb215 Add functionality to search for links for nonlinked interprets 2015-11-08 20:48:34 +01:00
f0978d26e2 Automatic year change detection 2015-11-08 19:57:21 +01:00
0a49b27d7f Use param "-always" to run without any interactiv requests 2015-11-07 16:53:34 +01:00
f05889a15b Restucture code to make it better maintainable
Remove unnecessary whitespace
2015-11-07 16:50:09 +01:00
a69a3add73 Prepare exception handling 2015-11-05 17:33:09 +01:00
4 changed files with 1018 additions and 344 deletions

344
charts.py
View File

@@ -1,344 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# charts.py
#
# Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Provides a class for handling chart lists
"""
from datetime import datetime, timedelta
import locale
from isoweek import Week
import pywikibot
from pywikibot import pagegenerators
import mwparserfromhell as mwparser
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'&params;': pagegenerators.parameterHelp
}
class Charts:
"""
Class for handling chart lists
"""
def __init__( self, generator, dry ):
"""
Constructor.
@param generator: The page generator that determines on which pages
to work.
@type generator: generator.
@param dry: If True, doesn't do any real changes, but only shows
what would have been changed.
@type dry: boolean.
"""
self.generator = generator
self.dry = dry
# Set the edit summary message
self.site = pywikibot.Site()
self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits"
# Set attribute to detect wether there was a real change
self.changed = None
# Set locale to 'de_DE.UTF-8'
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
def run(self):
"""Process each page from the generator."""
for page in self.generator:
self.treat(page)
def treat(self, page):
"""Load the given page, does some changes, and saves it."""
text = self.load(page)
if not text:
return
################################################################
# NOTE: Here you can modify the text in whatever way you want. #
################################################################
# If you find out that you do not want to edit this page, just return.
# Example: This puts the text 'Test' at the beginning of the page.
text = self.parse_overview( text )
if not self.save(text, page, self.summary, False):
pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))
def load(self, page):
"""Load the text of the given page."""
try:
# Load the page
text = page.get()
except pywikibot.NoPage:
pywikibot.output(u"Page %s does not exist; skipping."
% page.title(asLink=True))
except pywikibot.IsRedirectPage:
pywikibot.output(u"Page %s is a redirect; skipping."
% page.title(asLink=True))
else:
return text
return None
def save(self, text, page, comment=None, minorEdit=True,
botflag=True):
"""Update the given page with new text."""
# only save if something was changed (and not just revision)
if text != page.get() and self.changed:
# Show the title of the page we're working on.
# Highlight the title in purple.
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% page.title())
# show what was changed
pywikibot.showDiff(page.get(), text)
pywikibot.output(u'Comment: %s' % comment)
if not self.dry:
if True or pywikibot.input_yn(
u'Do you want to accept these changes?',
default=False, automatic_quit=False):
try:
page.text = text
# Save the page
page.save(summary=comment or self.comment,
minor=minorEdit, botflag=botflag)
except pywikibot.LockedPage:
pywikibot.output(u"Page %s is locked; skipping."
% page.title(asLink=True))
except pywikibot.EditConflict:
pywikibot.output(
u'Skipping %s because of edit conflict'
% (page.title()))
except pywikibot.SpamfilterError as error:
pywikibot.output(
u'Cannot change %s because of spam blacklist \
entry %s'
% (page.title(), error.url))
else:
return True
return False
def parse_charts_list( self, page, belgien=False ):
"""
Handles the parsing process
"""
# Parse charts list with mwparser
wikicode = mwparser.parse( page.text )
# Select the section "Singles"
if belgien:
singles_section = wikicode.get_sections(
matches=belgien )[0].get_sections( matches="Singles" )[0]
else:
singles_section = wikicode.get_sections( matches="Singles" )[0]
# Select the last occurence of template "Nummer-eins-Hits Zeile" in
# "Singles"-section
last_entry = singles_section.ifilter_templates(
matches="Nummer-eins-Hits Zeile" )
for last in last_entry:
pass
# Detect weather we have a date or a weeknumber for Template Param
# "Chartein"
if( last.get("Chartein").value.strip().isnumeric() ):
chartein = last.get("Chartein").value.strip()
else:
chartein = datetime.strptime( last.get("Chartein").value.strip(),
"%Y-%m-%d" )
title = last.get("Titel").value.strip()
interpret = last.get("Interpret").value.strip()
# Return collected data as tuple
return ( chartein, title, interpret )
def parse_overview( self, text ):
"""
Parses the given Charts-Overview-Page and returns the updated version
"""
# Parse text with mwparser to get access to nodes
wikicode = mwparser.parse( text )
# Get mwparser.template objects for Template "/Eintrag"
for country in wikicode.ifilter_templates( matches="/Eintrag" ):
# Get mwparser.wikilink object
for link in country.get("Liste").value.ifilter_wikilinks():
# Create Page-Object for Chartslist
list_page = pywikibot.Page( self.site, link.title )
# Only use first wikilink in Template Param "Liste"
break
# Check if we have a saved revid
if not country.has( "Liste Revision" ):
try:
country.add( "Liste Revision", 0, before="Interpret" )
except ValueError:
country.add( "Liste Revision", 0 )
# Check if saved revid is unequal to current revid
if( str( country.get( "Liste Revision" ).value ) !=
list_page.latest_revision_id ):
country = self.update_overview( country, list_page )
# If any param of any occurence of Template "/Eintrag" has changed,
# Save new version
# We need to convert mwparser-objects to string before saving
return str( wikicode )
def update_overview( self, country, list_page ): # noqa
"""
Updates the templates given in county using data from given list_page
@param country wikicode-object with Template for country
@param list_page pywikibot-page-object for list-page
@returns wikicode-object with updated Template for country
"""
# Parse linked charts list for the country
if "Wallonien" in str( country.get( "Liste" ).value ):
belgien = "Wallonie"
elif "Flandern" in str( country.get( "Liste" ).value ):
belgien = "Flandern"
else:
belgien = None
data = self.parse_charts_list( list_page, belgien )
# Update "Liste Revision" param
country.get( "Liste Revision" ).value = str(
list_page.latest_revision_id )
# If param Korrektur is present extract the value
if( country.has( "Korrektur" ) and
str( country.get( "Korrektur" ).value ).isnumeric() ):
days = int( str( country.get( "Korrektur" ).value ) )
else:
days = 0
# For some countries we have weeknumbers instead of dates
if( isinstance( data[0], str ) ):
# Slice year out of link destination
year = int( list_page.title()[-5:-1] )
# Calculate date of monday in given week and add number of
# days given in Template parameter "Wochentag" with monday
# as day (zero)
# We need double conversion since wikicode could not be casted
# as int directly
date = ( Week( year, int( data[0] ) ).monday() +
timedelta( days=days ) )
# Param Chartein contains a regular date
else:
date = data[0] + timedelta( days=days )
# Check if param "Chartein" is present
if not country.has( "Chartein" ):
try:
country.add( "Chartein", "", before="Wochentag" )
except ValueError:
country.add( "Chartein", "" )
# Check if date has changed
if( date.strftime( "%d. %B" ).lstrip( "0" ) !=
country.get("Chartein").value ):
country.get("Chartein").value = date.strftime( "%d. %B"
).lstrip( "0" )
self.changed = True
# Check if param "Titel" is present
if not country.has( "Titel" ):
country.add( "Titel", "", before="Chartein" )
# Check if Titel has changed
if( data[1] != country.get( "Titel" ).value ):
country.get( "Titel" ).value = data[1]
self.changed = True
# Check if param "Intepret" is present
if not country.has( "Interpret" ):
country.add( "Interpret", "", before="Titel" )
# Check if Interpret has changed
if( data[2] != country.get( "Interpret" ).value ):
country.get( "Interpret" ).value = data[2]
self.changed = True
def main(*args):
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
@param args: command line arguments
@type args: list of unicode
"""
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# The generator gives the pages that should be worked upon.
gen = None
# If dry is True, doesn't do any real changes, but only show
# what would have been changed.
dry = False
# Parse command line arguments
for arg in local_args:
if arg.startswith("-dry"):
dry = True
else:
genFactory.handleArg(arg)
if not gen:
gen = genFactory.getCombinedGenerator()
if gen:
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
bot = Charts(gen, dry)
bot.run()
else:
pywikibot.showHelp()
if( __name__ == "__main__" ):
main()

223
chartsbot.py Normal file
View File

@@ -0,0 +1,223 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# chartsbot.py
#
# original version by:
#
# (C) Pywikibot team, 2006-2014 as basic.py
#
# Distributed under the terms of the MIT license.
#
# modified by:
#
# Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Bot which automatically updates a ChartsSummaryPage like
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
CountryLists
The following parameters are supported:
&params;
-always If given, request for confirmation of edit is short circuited
Use for unattended run
-force-reload If given, countrylists will be always parsed regardless if
needed or not
"""
import locale
import pywikibot
from pywikibot import pagegenerators
from summarypage import SummaryPage
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'&params;': pagegenerators.parameterHelp
}
class ChartsBot( ):
"""
Bot which automatically updates a ChartsSummaryPage like
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
CountryLists
"""
def __init__( self, generator, always, force_reload ):
"""
Constructor.
@param generator: the page generator that determines on which pages
to work
@type generator: generator
@param always: if True, request for confirmation of edit is short
circuited. Use for unattended run
@type always: bool
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
"""
self.generator = generator
self.always = always
# Force parsing of countrylist
self.force_reload = force_reload
# Set the edit summary message
self.site = pywikibot.Site()
self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits"
# Set locale to 'de_DE.UTF-8'
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
def run(self):
"""Process each page from the generator."""
for page in self.generator:
self.treat(page)
def treat(self, page):
"""Load the given page, does some changes, and saves it."""
text = self.load(page)
if not text:
return
################################################################
# NOTE: Here you can modify the text in whatever way you want. #
################################################################
# Initialise and treat SummaryPageWorker
sumpage = SummaryPage( text, self.force_reload )
sumpage.treat()
# Check if editing is needed and if so get new text
if sumpage.get_new_text():
text = sumpage.get_new_text()
if not self.save(text, page, self.summary, False):
pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))
def load(self, page):
"""Load the text of the given page."""
try:
# Load the page
text = page.get()
except pywikibot.NoPage:
pywikibot.output(u"Page %s does not exist; skipping."
% page.title(asLink=True))
except pywikibot.IsRedirectPage:
pywikibot.output(u"Page %s is a redirect; skipping."
% page.title(asLink=True))
else:
return text
return None
def save(self, text, page, comment=None, minorEdit=True,
botflag=True):
"""Update the given page with new text."""
# only save if something was changed (and not just revision)
if text != page.get():
# Show the title of the page we're working on.
# Highlight the title in purple.
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% page.title())
# show what was changed
pywikibot.showDiff(page.get(), text)
pywikibot.output(u'Comment: %s' % comment)
if self.always or pywikibot.input_yn(
u'Do you want to accept these changes?',
default=False, automatic_quit=False):
try:
page.text = text
# Save the page
page.save(summary=comment or self.comment,
minor=minorEdit, botflag=botflag)
except pywikibot.LockedPage:
pywikibot.output(u"Page %s is locked; skipping."
% page.title(asLink=True))
except pywikibot.EditConflict:
pywikibot.output(
u'Skipping %s because of edit conflict'
% (page.title()))
except pywikibot.SpamfilterError as error:
pywikibot.output(
u'Cannot change %s because of spam blacklist \
entry %s'
% (page.title(), error.url))
else:
return True
return False
def main(*args):
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
@param args: command line arguments
@type args: list of unicode
"""
# Process global arguments to determine desired site
local_args = pywikibot.handle_args(args)
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# The generator gives the pages that should be worked upon.
gen = None
# If always is True, bot won't ask for confirmation of edit (automode)
always = False
# If force_reload is True, bot will always parse Countrylist regardless of
# parsing is needed or not
force_reload = False
# Parse command line arguments
for arg in local_args:
if arg.startswith("-always"):
always = True
elif arg.startswith("-force-reload"):
force_reload = True
else:
genFactory.handleArg(arg)
if not gen:
gen = genFactory.getCombinedGenerator()
if gen:
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
bot = ChartsBot(gen, always, force_reload)
bot.run()
else:
pywikibot.showHelp()
if( __name__ == "__main__" ):
main()

394
countrylist.py Normal file
View File

@@ -0,0 +1,394 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# countrylist.py
#
# Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Provides a class for handling charts list per country and year
"""
import locale
from datetime import datetime
from isoweek import Week
import pywikibot
import mwparserfromhell as mwparser
class CountryList():
"""
Handles charts list per country and year
"""
def __init__( self, wikilink ):
"""
Generate new instance of class
Checks wether page given with country_list_link exists
@param wikilink Wikilink object by mwparser linking CountryList
@returns self Object representing CountryList
False if page does not exists
"""
# Generate pywikibot site object
# @TODO: Maybe store it outside???
self.site = pywikibot.Site()
# Set locale to 'de_DE.UTF-8'
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
# Generate pywikibot page object
self.page = pywikibot.Page( self.site, wikilink.title )
# Store given wikilink for page object
self.wikilink = wikilink
# Check if page exits
if not self.page.exists():
return False
# Initialise attributes
__attr = ( "wikicode", "entry", "chartein", "_chartein_raw",
"_titel_raw", "titel", "interpret", "_interpret_raw" )
for attr in __attr:
setattr( self, attr, None )
self.parsed = False
# Try to find year
self.find_year()
def is_parsing_needed( self, revid ):
"""
Check if current revid of CountryList differs from given one
@param int Revid to check against
@return True Given revid differs from current revid
False Given revid is equal to current revid
"""
if revid != self.page.latest_revision_id:
return True
else:
return False
def find_year( self ):
"""
Try to find the year related to CountryList
"""
self.year = datetime.now().year
# Check if year is in page.title, if not try last year
if str( self.year ) not in self.page.title():
self.year -= 1
# If last year does not match, raise YearError
if str( self.year ) not in self.page.title():
raise CountryListError( "CountryList year is errorneous!" )
def parse( self ):
"""
Handles the parsing process
"""
# Parse page with mwparser
self.generate_wikicode()
# Select lastest entry
self.get_latest_entry()
# Prepare chartein, titel, interpret
self.prepare_chartein()
self.prepare_titel()
self.prepare_interpret()
# For easy detecting wether we have parsed self
self.parsed = True
def detect_belgian( self ):
"""
Detect wether current entry is on of the belgian (Belgien/Wallonien)
"""
# Check if begian province name is in link text or title
if( "Wallonien" in str( self.wikilink.text ) or
"Wallonien" in str( self.wikilink.title) ):
return "Wallonie"
elif( "Flandern" in str( self.wikilink.text ) or
"Flandern" in str( self.wikilink.title) ):
return "Flandern"
else:
return None
def generate_wikicode( self ):
"""
Runs mwparser on page.text to get mwparser.objects
"""
self.wikicode = mwparser.parse( self.page.text )
def get_latest_entry( self ):
"""
Get latest list entry template object
"""
# Select the section "Singles"
# For belgian list we need to select subsection of country
belgian = self.detect_belgian()
if belgian:
singles_section = self.wikicode.get_sections(
matches=belgian )[0].get_sections( matches="Singles" )[0]
else:
singles_section = self.wikicode.get_sections(matches="Singles")[0]
# Since we have multiple categories in some countrys we need
# to select the first wrapping template
try:
wrapping = next( singles_section.ifilter_templates(
matches="Nummer-eins-Hits" ) )
except StopIteration:
raise CountryListError( "Wrapping template is missing!")
# Select the last occurence of template "Nummer-eins-Hits Zeile" in
# Wrapper-template
for self.entry in wrapping.get("Inhalt").value.ifilter_templates(
matches="Nummer-eins-Hits Zeile" ):
pass
# Check if we have found something
if not self.entry:
raise CountryListError( self.page.title() )
def get_year_correction( self ):
"""
Reads value of jahr parameter for correcting week numbers near to
year changes
"""
# If param is present return correction, otherwise null
if self.entry.has( "Jahr" ):
# Read value of param
jahr = self.entry.get( "Jahr" ).strip()
if jahr == "+1":
return 1
elif jahr == "-1":
return -1
# None or wrong parameter value
return 0
def prepare_chartein( self ):
"""
Checks wether self._chartein_raw is a date or a week number and
calculates related datetime object
"""
# If self._chartein_raw is not set, get it
if not self._chartein_raw:
self.get_chartein_value()
# Detect weather we have a date or a weeknumber for Template Param
# "Chartein"
# Numeric string means week number
if( self._chartein_raw.isnumeric() ):
# Calculate date of monday in given week and add number of
# days given in Template parameter "Korrektur" with monday
# as day (zero)
self.chartein = ( Week( self.year + self.get_year_correction(),
int( self._chartein_raw ) ).monday() )
# Complete date string present
else:
self.chartein = datetime.strptime( self._chartein_raw,
"%Y-%m-%d" )
def get_chartein_value( self ):
"""
Reads value of chartein parameter
If param is not present raise Error
"""
if self.entry.has( "Chartein" ):
self._chartein_raw = self.entry.get("Chartein").value.strip()
else:
raise CountryListEntryError( "Template Parameter 'Chartein' is \
missing!" )
def prepare_titel( self ):
"""
Loads and prepares Titel of latest entry
"""
# If self._titel_raw is not set, get it
if not self._titel_raw:
self.get_titel_value()
# Try to find a wikilink for Titel on countrylist
if "[[" not in self._titel_raw:
self.titel = self._search_links( str(self._titel_raw) )
else:
self.titel = self._titel_raw
def get_titel_value( self ):
"""
Reads value of Titel parameter
If param is not present raise Error
"""
if self.entry.has( "Titel" ):
self._titel_raw = self.entry.get("Titel").value.strip()
else:
raise CountryListEntryError( "Template Parameter 'Titel' is \
missing!" )
def prepare_interpret( self ):
"""
Loads and prepares Interpret of latest entry
"""
# If self._interpret_raw is not set, get it
if not self._interpret_raw:
self.get_interpret_value()
# Work with interpret value to add missing links
# Split it in words
words = self._interpret_raw.split()
# Interpret name separating words
seps = ( "feat.", "&" )
# Create empty list for concatenated interpret names
parts = [ " ", ]
# Another list for managing indexes which need to be worked on
indexes = list()
index = 0
# Reconcatenate interpret names
for word in words:
# Name parts
if word not in seps:
parts[-1] += (" " + word)
# Remove unnecessary whitespace
parts[-1] = parts[-1].strip()
# We only need to work on it, if no wikilink is present
if index not in indexes and "[[" not in parts[-1]:
indexes.append( index )
else:
# Count up index 2 times ( Separator + next Name )
index += 2
parts.append( word )
parts.append( " " )
# If we have indexes without links, search for links
if indexes:
parts = self._search_links( parts, indexes )
# Join the collected links
sep = " "
self.interpret = sep.join( parts )
# Nothing to do, just use raw
else:
self.interpret = self._interpret_raw
def get_interpret_value( self ):
"""
Reads value of Interpret parameter
If param is not present raise Error
"""
if self.entry.has( "Interpret" ):
self._interpret_raw = self.entry.get("Interpret").value.strip()
else:
raise CountryListEntryError( "Template Parameter 'Interpret' is \
missing!" )
def _search_links( self, keywords, indexes=None ):
"""
Search matching wikilinks for keyword(s) in CountryList's wikicode
@param keywords: One or more keywords to search for
@type keywords: str, list
@param indexes: List with numeric indexes for items of keywords to work
on only
@type indexes: list of ints
@return: List or String with replaced keywords
@return type: str, list
"""
# Maybe convert keywords string to list
if( isinstance( keywords, str ) ):
keywords = [ keywords, ]
string = True
else:
string = False
# If indexes worklist was not provided, work on all elements
if not indexes:
indexes = list(range( len( keywords ) ))
# Iterate over wikilinks of refpage and try to find related links
for wikilink in self.wikicode.ifilter_wikilinks():
# Iterate over interpret names
for index in indexes:
# Check wether wikilink matches
if( keywords[index] == wikilink.text or
keywords[index] == wikilink.title ):
# Overwrite name with complete wikilink
keywords[index] = str( wikilink )
# Remove index from worklist
indexes.remove( index )
# Other indexes won't also match
break
# If worklist is empty, stop iterating over wikilinks
if not indexes:
break
# Choose wether return list or string based on input type
if not string:
return keywords
else:
return str(keywords[0])
class CountryListError( Exception ):
"""
Handles errors occuring in class CountryList
"""
pass
class CountryListEntryError( CountryListError ):
"""
Handles errors occuring in class CountryList related to entrys
"""
pass

401
summarypage.py Normal file
View File

@@ -0,0 +1,401 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# summarypage.py
#
# Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Provides classes for handling Charts summary page
"""
from datetime import datetime, timedelta
# import pywikibot
import mwparserfromhell as mwparser
from countrylist import CountryList, CountryListError
class SummaryPage():
"""
Handles summary page related actions
"""
def __init__( self, text, force_reload=False ):
"""
Create Instance
@param text: Page Text of summarypage
@type text: str
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
"""
# Parse Text with mwparser
self.wikicode = mwparser.parse( text )
# Force parsing of countrylist
self.force_reload = force_reload
def treat( self ):
"""
Handles parsing/editing of text
"""
# Get mwparser.template objects for Template "/Eintrag"
for entry in self.wikicode.filter_templates( matches="/Eintrag" ):
# Instantiate SummaryPageEntry-object
summarypageentry = SummaryPageEntry(entry,
force_reload=self.force_reload)
# Treat SummaryPageEntry-object
summarypageentry.treat()
# Get result
# We need to replace origninal entry since objectid changes due to
# recreation of template object and reassignment won't be reflected
self.wikicode.replace(entry, summarypageentry.get_entry().template)
def get_new_text( self ):
"""
If writing page is needed, return new text, otherwise false
"""
# Get information wether writing is needed from class attribute
if SummaryPageEntry.write_needed:
# Convert wikicode back to string and return
return str( self.wikicode )
return False
class SummaryPageEntry():
"""
Provides a generic wrapper for summary page entry template
"""
write_needed = False
def __init__( self, entry, force_reload=False ):
"""
Constructor
@param entry: Entry template of summarypage entry
@type text: mwparser.template
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
"""
self.old_entry = SummaryPageEntryTemplate( entry )
self.new_entry = SummaryPageEntryTemplate( )
# Force parsing of countrylist
self.force_reload = force_reload
def treat( self ):
"""
Controls parsing/update-sequence of entry
"""
# Get CountryList-Object
self.get_countrylist()
# Check if parsing country list is needed
if( self.countrylist.parsed):
self.correct_chartein()
self.update_params()
self.is_write_needed()
def get_countrylist( self ):
"""
Get the CountryList-Object for current entry
"""
# Get wikilink to related countrylist
self.get_countrylist_wikilink()
# Get saved revision of related countrylist
self.get_countrylist_saved_revid()
# Get current year
current_year = datetime.now().year
# Store old link.title
link_title = self.countrylist_wikilink.title
# If list is from last year, replace year
if (current_year - 1) in link_title:
self.countrylist_wikilink.title.replace( (current_year - 1),
current_year )
# Try to get current years list
try:
self.countrylist = CountryList( self.countrylist_wikilink )
self.maybe_parse_countrylist()
# Maybe fallback to last years list
except CountryListError:
self.countrylist_wikilink.title = link_title
self.countrylist = CountryList( self.countrylist_wikilink )
self.maybe_parse_countrylist()
if not self.countrylist:
raise SummaryPageEntryError( "CountryList does not exists!" )
def maybe_parse_countrylist( self ):
"""
Parse countrylist if page-object exists and if parsing is needed or
param -force-reload is set
"""
# Fast return if no countrylist-object
if not self.countrylist:
return
# Parse if needed or forced
if( self.countrylist.is_parsing_needed( self.countrylist_revid ) or
self.force_reload ):
self.countrylist.parse()
def get_countrylist_wikilink( self ):
"""
Load wikilink to related countrylist
"""
if self.old_entry.Liste:
try:
self.countrylist_wikilink = next(
self.old_entry.Liste.ifilter_wikilinks() )
except StopIteration:
raise SummaryPageEntryError(
"Parameter Liste does not contain valid wikilink!" )
else:
raise SummaryPageEntryError( "Parameter Liste is not present!")
def get_countrylist_saved_revid( self ):
"""
Load saved revid of related countrylist if Param is present
"""
if self.old_entry.Liste_Revision:
self.countrylist_revid = int(self.old_entry.Liste_Revision.strip())
else:
self.countrylist_revid = 0
def update_params( self ):
"""
Updates values of Parameters of template
"""
self.new_entry.Liste = self.countrylist_wikilink
self.new_entry.Liste_Revision = \
self.countrylist.page.latest_revision_id
self.new_entry.Interpret = self.countrylist.interpret
self.new_entry.Titel = self.countrylist.titel
self.new_entry.Chartein = self._corrected_chartein
if self.old_entry.Korrektur:
self.new_entry.Korrektur = self.old_entry.Korrektur
else:
self.new_entry.Korrektur = ""
if self.old_entry.Hervor:
self.new_entry.Hervor = self.old_entry.Hervor
else:
self.new_entry.Hervor = ""
def correct_chartein( self ):
"""
Calulates the correct value of chartein, based on the chartein value
from countrylist entry and param Korrektur of summarypage entry
"""
# If param Korrektur is present extract the value
if self.old_entry.Korrektur:
# If Korrektur is (after striping) castable to int use it
try:
days = int( str( self.old_entry.Korrektur ).strip() )
# Otherwise, if casting fails, ignore it
except ValueError:
days = 0
else:
days = 0
corrected = self.countrylist.chartein + timedelta( days=days )
self._corrected_chartein = corrected.strftime( "%d. %B" ).lstrip( "0" )
def is_write_needed( self ):
"""
Detects wether writing of entry is needed and stores information in
Class-Attribute
"""
type( self ).write_needed = ( ( self.old_entry != self.new_entry ) and
self.countrylist.parsed or
type( self ).write_needed )
def get_entry( self ):
"""
Returns the new entry if CountryList was parsed otherwise returns the
old one
"""
if( self.countrylist.parsed):
return self.new_entry
else:
return self.old_entry
class SummaryPageEntryTemplate():
"""
Interface class for mwparser.template to simply use template params as
Properties
"""
# Classatribute
params = ( "Liste", "Liste_Revision", "Interpret", "Titel", "Chartein",
"Korrektur", "Hervor" )
def __init__( self, template_obj=None ):
"""
Creates Instance of Class for given mwparser.template object of
SummmaryPageEntry Template. If no object was given create empty one.
@param template_obj Object of SummmaryPageEntry Template
@type template_obj: mwparser.template
"""
# Check if object was given
if( template_obj ):
# Check if object has correct type
if isinstance( template_obj,
mwparser.nodes.template.Template ):
self.template = template_obj
self.__initial = False
# Otherwise raise error
else:
raise SummaryPageEntryTemplateError( "Wrong type given" )
# Otherwise initialise template
else:
self.__initial_template()
self.__initial = True
def __initial_template( self ):
"""
Builds the initial template
"""
self.template = next( mwparser.parse( "{{Portal:Charts und Popmusik/\
Aktuelle Nummer-eins-Hits/Eintrag|Liste=|Liste_Revision=|Interpret=|Titel=NN\
|Chartein=|Korrektur=|Hervor=}}" ).ifilter_templates() )
def __getattr__( self, name ):
"""
Special getter for template params
"""
if name in type(self).params:
if( self.template.has( name ) ):
return self.template.get( name ).value
else:
return False
else:
raise AttributeError
def __setattr__( self, name, value ):
"""
Special setter for template params
"""
if name in type(self).params:
self.__dict__[ 'template' ].add( name, value )
else:
object.__setattr__(self, name, value)
def __ne__( self, other ):
"""
Checks wether all Template param values except for Liste_Revision are
equal
"""
# Detect which of the two was initialised (without)
# If none raise error
if( self.__initial ):
initial = self
cmpto = other
elif( other.__initial ):
initial = other
cmpto = self
else:
raise SummaryPageEntryTemplateError(
"One of the compared instances must have been initial!" )
# Iterate over each param
for param in initial.template.params:
# Slice out only Param.name
param = param[:param.find("=")].strip()
# If param is missing, writing is needed
if not cmpto.template.has( param ):
return True
# Do not compare List Revisions (not just write about Revids)
if param == "Liste_Revision":
continue
# Compare other param values, if one unequal write is needed
if( initial.template.get( param ).value.strip() !=
cmpto.template.get( param ).value.strip() ):
return True
# If not returned True until now
return False
class SummaryPageError( Exception ):
"""
Handles errors occuring in class SummaryPage
"""
pass
class SummaryPageEntryError( SummaryPageError ):
"""
Handles errors occuring in class SummaryPageEntry
"""
pass
class SummaryPageEntryTemplateError( SummaryPageError ):
"""
Handles errors occuring in class SummaryPageEntryTemplate
"""
pass