22 Commits

Author SHA1 Message Date
c708832515 Merge branch 'feature-force-reload' 2015-12-11 12:42:41 +01:00
18122fafe8 New feature force parsing of countrylists regardless if needed with param "-force-reload" 2015-12-11 12:41:23 +01:00
55afe94a4e Merge branch 'countrylist-linksearch' 2015-12-11 12:03:51 +01:00
e409c7a02b CountryList-module: Also search for Links in Titel 2015-12-11 00:03:53 +01:00
9d9207c175 CountryList-module: Put linksearching algorithm in separate function for simple reuse for Titel value 2015-12-10 23:13:45 +01:00
5f13da5934 Clarify licence situation of chartsbot.py 2015-11-25 17:15:55 +01:00
5b084f6fde Fix Bug: Writing is requested even when only rev_ids have changed
Introduce new attr to CountryList for simple get information wether page was parsed

The SummaryPageEntryTemplate comparation to non-equal fails when unparsed Entrys occur
--> and it with information wether CountryList was parsed
2015-11-23 19:36:19 +01:00
e3c2c1a5d9 Merge branch 'pep8-compat' 2015-11-23 19:15:37 +01:00
f819193790 pep8-compat: clean up CountryList-Modul 2015-11-23 19:11:21 +01:00
4a856b1dae pep8-compat: Replace undefined Error by Message in CountryList-Module 2015-11-23 19:04:27 +01:00
166e61aee7 pep8-compat: cleanup SummaryPage-Module 2015-11-23 19:00:07 +01:00
1ea37c0e0d pep8-compat: Remove unnecessary imports from summarypage.py 2015-11-23 18:59:16 +01:00
3e525edd2a pep8-compat: chartsbot.py remove unnecessary imports 2015-11-23 18:48:04 +01:00
3cab979662 Merge branch 'summarypage-module' 2015-11-21 11:52:21 +01:00
52f933bea7 SummaryPage-Module: Bugfix, move countrylist.parse() back in try statement since we need to make sure it is parseable due to automatic year change feature 2015-11-21 11:50:40 +01:00
e854244f0b Merge branches 'countrylist-module' and 'summarypage-module' 2015-11-21 11:33:35 +01:00
f1e0157643 CountryList-Module: Rename method parsing_needed to is_parsing_needed to make boolean character more clear 2015-11-21 11:32:00 +01:00
4987f97e91 SummaryPage-Module: Reimplement feature to prevent parsing for pages where revid haven't changed since last parsing 2015-11-21 11:30:37 +01:00
06ffb37d07 Merge branch 'summarypage-module' 2015-11-20 21:02:03 +01:00
f360deb1a0 SummaryPage-Modul: Use full path for embeding entry template since relative paths don't work on already embeded pages 2015-11-20 21:00:37 +01:00
03857c0236 Merge branch 'countrylist-module' 2015-11-20 14:45:08 +01:00
3864c9013c CountryList-Module: Since we have multiple categories in some countrys we need to select the first wrapping template 2015-11-20 14:44:26 +01:00
3 changed files with 203 additions and 78 deletions

View File

@@ -1,7 +1,15 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# charts.py # chartsbot.py
#
# original version by:
#
# (C) Pywikibot team, 2006-2014 as basic.py
#
# Distributed under the terms of the MIT license.
#
# modified by:
# #
# Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de> # Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
# #
@@ -32,6 +40,8 @@ The following parameters are supported:
-always If given, request for confirmation of edit is short circuited -always If given, request for confirmation of edit is short circuited
Use for unattended run Use for unattended run
-force-reload If given, countrylists will be always parsed regardless if
needed or not
""" """
@@ -39,8 +49,6 @@ import locale
import pywikibot import pywikibot
from pywikibot import pagegenerators from pywikibot import pagegenerators
from pywikibot.bot import Bot
import mwparserfromhell as mwparser
from summarypage import SummaryPage from summarypage import SummaryPage
@@ -55,10 +63,10 @@ class ChartsBot( ):
""" """
Bot which automatically updates a ChartsSummaryPage like Bot which automatically updates a ChartsSummaryPage like
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked [[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
CountryListsAn incomplete sample bot. CountryLists
""" """
def __init__( self, generator, always ): def __init__( self, generator, always, force_reload ):
""" """
Constructor. Constructor.
@@ -68,11 +76,17 @@ class ChartsBot( ):
@param always: if True, request for confirmation of edit is short @param always: if True, request for confirmation of edit is short
circuited. Use for unattended run circuited. Use for unattended run
@type always: bool @type always: bool
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
""" """
self.generator = generator self.generator = generator
self.always = always self.always = always
# Force parsing of countrylist
self.force_reload = force_reload
# Set the edit summary message # Set the edit summary message
self.site = pywikibot.Site() self.site = pywikibot.Site()
self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits" self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits"
@@ -96,7 +110,7 @@ class ChartsBot( ):
################################################################ ################################################################
# Initialise and treat SummaryPageWorker # Initialise and treat SummaryPageWorker
sumpage = SummaryPage( text ) sumpage = SummaryPage( text, self.force_reload )
sumpage.treat() sumpage.treat()
# Check if editing is needed and if so get new text # Check if editing is needed and if so get new text
@@ -181,10 +195,16 @@ def main(*args):
# If always is True, bot won't ask for confirmation of edit (automode) # If always is True, bot won't ask for confirmation of edit (automode)
always = False always = False
# If force_reload is True, bot will always parse Countrylist regardless of
# parsing is needed or not
force_reload = False
# Parse command line arguments # Parse command line arguments
for arg in local_args: for arg in local_args:
if arg.startswith("-always"): if arg.startswith("-always"):
always = True always = True
elif arg.startswith("-force-reload"):
force_reload = True
else: else:
genFactory.handleArg(arg) genFactory.handleArg(arg)
@@ -194,7 +214,7 @@ def main(*args):
# The preloading generator is responsible for downloading multiple # The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously. # pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen) gen = pagegenerators.PreloadingGenerator(gen)
bot = ChartsBot(gen, always) bot = ChartsBot(gen, always, force_reload)
bot.run() bot.run()
else: else:
pywikibot.showHelp() pywikibot.showHelp()

View File

@@ -74,10 +74,12 @@ class CountryList():
for attr in __attr: for attr in __attr:
setattr( self, attr, None ) setattr( self, attr, None )
self.parsed = False
# Try to find year # Try to find year
self.find_year() self.find_year()
def parsing_needed( self, revid ): def is_parsing_needed( self, revid ):
""" """
Check if current revid of CountryList differs from given one Check if current revid of CountryList differs from given one
@@ -103,7 +105,7 @@ class CountryList():
self.year -= 1 self.year -= 1
# If last year does not match, raise YearError # If last year does not match, raise YearError
if str( self.year ) not in self.page.title(): if str( self.year ) not in self.page.title():
raise CountryListYearError raise CountryListError( "CountryList year is errorneous!" )
def parse( self ): def parse( self ):
""" """
@@ -121,16 +123,19 @@ class CountryList():
self.prepare_titel() self.prepare_titel()
self.prepare_interpret() self.prepare_interpret()
# For easy detecting wether we have parsed self
self.parsed = True
def detect_belgian( self ): def detect_belgian( self ):
""" """
Detect wether current entry is on of the belgian (Belgien/Wallonien) Detect wether current entry is on of the belgian (Belgien/Wallonien)
""" """
# Check if begian province name is in link text or title # Check if begian province name is in link text or title
if "Wallonien" in str( self.wikilink.text ) \ if( "Wallonien" in str( self.wikilink.text ) or
or "Wallonien" in str( self.wikilink.title): "Wallonien" in str( self.wikilink.title) ):
return "Wallonie" return "Wallonie"
elif "Flandern" in str( self.wikilink.text ) \ elif( "Flandern" in str( self.wikilink.text ) or
or "Flandern" in str( self.wikilink.title): "Flandern" in str( self.wikilink.title) ):
return "Flandern" return "Flandern"
else: else:
return None return None
@@ -155,11 +160,19 @@ class CountryList():
singles_section = self.wikicode.get_sections( singles_section = self.wikicode.get_sections(
matches=belgian )[0].get_sections( matches="Singles" )[0] matches=belgian )[0].get_sections( matches="Singles" )[0]
else: else:
singles_section = self.wikicode.get_sections( matches="Singles" )[0] singles_section = self.wikicode.get_sections(matches="Singles")[0]
# Since we have multiple categories in some countrys we need
# to select the first wrapping template
try:
wrapping = next( singles_section.ifilter_templates(
matches="Nummer-eins-Hits" ) )
except StopIteration:
raise CountryListError( "Wrapping template is missing!")
# Select the last occurence of template "Nummer-eins-Hits Zeile" in # Select the last occurence of template "Nummer-eins-Hits Zeile" in
# "Singles"-section # Wrapper-template
for self.entry in singles_section.ifilter_templates( for self.entry in wrapping.get("Inhalt").value.ifilter_templates(
matches="Nummer-eins-Hits Zeile" ): matches="Nummer-eins-Hits Zeile" ):
pass pass
@@ -231,6 +244,10 @@ missing!" )
if not self._titel_raw: if not self._titel_raw:
self.get_titel_value() self.get_titel_value()
# Try to find a wikilink for Titel on countrylist
if "[[" not in self._titel_raw:
self.titel = self._search_links( str(self._titel_raw) )
else:
self.titel = self._titel_raw self.titel = self._titel_raw
def get_titel_value( self ): def get_titel_value( self ):
@@ -285,31 +302,10 @@ missing!" )
parts.append( word ) parts.append( word )
parts.append( " " ) parts.append( " " )
# If we have indexes with out links, search for links # If we have indexes without links, search for links
if indexes: if indexes:
# Iterate over wikilinks of refpage and try to find related links parts = self._search_links( parts, indexes )
for wikilink in self.wikicode.ifilter_wikilinks():
# Iterate over interpret names
for index in indexes:
# Check wether wikilink matches
if parts[index] == wikilink.text \
or parts[index] == wikilink.title:
# Overwrite name with complete wikilink
parts[index] = str( wikilink )
# Remove index from worklist
indexes.remove( index )
# Other indexes won't also match
break
# If worklist is empty, stop iterating over wikilinks
if not indexes:
break
# Join the collected links # Join the collected links
sep = " " sep = " "
@@ -330,6 +326,59 @@ missing!" )
raise CountryListEntryError( "Template Parameter 'Interpret' is \ raise CountryListEntryError( "Template Parameter 'Interpret' is \
missing!" ) missing!" )
def _search_links( self, keywords, indexes=None ):
"""
Search matching wikilinks for keyword(s) in CountryList's wikicode
@param keywords: One or more keywords to search for
@type keywords: str, list
@param indexes: List with numeric indexes for items of keywords to work
on only
@type indexes: list of ints
@return: List or String with replaced keywords
@return type: str, list
"""
# Maybe convert keywords string to list
if( isinstance( keywords, str ) ):
keywords = [ keywords, ]
string = True
else:
string = False
# If indexes worklist was not provided, work on all elements
if not indexes:
indexes = list(range( len( keywords ) ))
# Iterate over wikilinks of refpage and try to find related links
for wikilink in self.wikicode.ifilter_wikilinks():
# Iterate over interpret names
for index in indexes:
# Check wether wikilink matches
if( keywords[index] == wikilink.text or
keywords[index] == wikilink.title ):
# Overwrite name with complete wikilink
keywords[index] = str( wikilink )
# Remove index from worklist
indexes.remove( index )
# Other indexes won't also match
break
# If worklist is empty, stop iterating over wikilinks
if not indexes:
break
# Choose wether return list or string based on input type
if not string:
return keywords
else:
return str(keywords[0])
class CountryListError( Exception ): class CountryListError( Exception ):
""" """
@@ -337,6 +386,7 @@ class CountryListError( Exception ):
""" """
pass pass
class CountryListEntryError( CountryListError ): class CountryListEntryError( CountryListError ):
""" """
Handles errors occuring in class CountryList related to entrys Handles errors occuring in class CountryList related to entrys

View File

@@ -25,44 +25,56 @@
Provides classes for handling Charts summary page Provides classes for handling Charts summary page
""" """
import locale
from datetime import datetime, timedelta from datetime import datetime, timedelta
import pywikibot # import pywikibot
import mwparserfromhell as mwparser import mwparserfromhell as mwparser
from countrylist import CountryList, CountryListError from countrylist import CountryList, CountryListError
class SummaryPage(): class SummaryPage():
""" """
Handles summary page related actions Handles summary page related actions
""" """
def __init__( self, text ): def __init__( self, text, force_reload=False ):
""" """
Create Instance Create Instance
@param text: Page Text of summarypage
@type text: str
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
""" """
# Parse Text with mwparser # Parse Text with mwparser
self.wikicode = mwparser.parse( text ) self.wikicode = mwparser.parse( text )
# Force parsing of countrylist
self.force_reload = force_reload
def treat( self ): def treat( self ):
""" """
Handles parsing/editing of text Handles parsing/editing of text
""" """
# Get mwparser.template objects for Template "/Eintrag" # Get mwparser.template objects for Template "/Eintrag"
for entry in self.wikicode.filter_templates( matches="/Eintrag" ) : for entry in self.wikicode.filter_templates( matches="/Eintrag" ):
# Instantiate SummaryPageEntry-object # Instantiate SummaryPageEntry-object
summarypageentry = SummaryPageEntry( entry ) summarypageentry = SummaryPageEntry(entry,
force_reload=self.force_reload)
# Treat SummaryPageEntry-object # Treat SummaryPageEntry-object
summarypageentry.treat() summarypageentry.treat()
# Get result # Get result
# We need to replace origninal entry since objectid changes due to # We need to replace origninal entry since objectid changes due to
# recreation of template object and reassignment won't be reflected # recreation of template object and reassignment won't be reflected
self.wikicode.replace( entry, summarypageentry.new_entry.template ) self.wikicode.replace(entry, summarypageentry.get_entry().template)
def get_new_text( self ): def get_new_text( self ):
""" """
@@ -85,18 +97,31 @@ class SummaryPageEntry():
write_needed = False write_needed = False
def __init__( self, entry ): def __init__( self, entry, force_reload=False ):
""" """
Constructor Constructor
@param entry: Entry template of summarypage entry
@type text: mwparser.template
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
""" """
self.old_entry = SummaryPageEntryTemplate( entry ) self.old_entry = SummaryPageEntryTemplate( entry )
self.new_entry = SummaryPageEntryTemplate( ) self.new_entry = SummaryPageEntryTemplate( )
# Force parsing of countrylist
self.force_reload = force_reload
def treat( self ): def treat( self ):
""" """
Controls parsing/update-sequence of entry Controls parsing/update-sequence of entry
""" """
self.parse() # Get CountryList-Object
self.get_countrylist()
# Check if parsing country list is needed
if( self.countrylist.parsed):
self.correct_chartein() self.correct_chartein()
@@ -104,9 +129,9 @@ class SummaryPageEntry():
self.is_write_needed() self.is_write_needed()
def parse( self ): def get_countrylist( self ):
""" """
Handles parsing process of entry template Get the CountryList-Object for current entry
""" """
# Get wikilink to related countrylist # Get wikilink to related countrylist
@@ -115,23 +140,22 @@ class SummaryPageEntry():
# Get saved revision of related countrylist # Get saved revision of related countrylist
self.get_countrylist_saved_revid() self.get_countrylist_saved_revid()
# Get current year # Get current year
current_year = datetime.now().year; current_year = datetime.now().year
# Store old link.title # Store old link.title
link_title = self.countrylist_wikilink.title link_title = self.countrylist_wikilink.title
# If list is from last year, replace year # If list is from last year, replace year
if (current_year - 1) in link_title: if (current_year - 1) in link_title:
self.countrylist_wikilink.title.replace( (current_year - 1), current_year ) self.countrylist_wikilink.title.replace( (current_year - 1),
current_year )
# Try to get current years list # Try to get current years list
try: try:
self.countrylist = CountryList( self.countrylist_wikilink ) self.countrylist = CountryList( self.countrylist_wikilink )
if self.countrylist: self.maybe_parse_countrylist()
self.countrylist.parse()
# Maybe fallback to last years list # Maybe fallback to last years list
except CountryListError: except CountryListError:
@@ -139,20 +163,37 @@ class SummaryPageEntry():
self.countrylist_wikilink.title = link_title self.countrylist_wikilink.title = link_title
self.countrylist = CountryList( self.countrylist_wikilink ) self.countrylist = CountryList( self.countrylist_wikilink )
if self.countrylist: self.maybe_parse_countrylist()
self.countrylist.parse()
else: if not self.countrylist:
raise SummaryPageEntryError( "CountryList does not exists!" ) raise SummaryPageEntryError( "CountryList does not exists!" )
def maybe_parse_countrylist( self ):
"""
Parse countrylist if page-object exists and if parsing is needed or
param -force-reload is set
"""
# Fast return if no countrylist-object
if not self.countrylist:
return
# Parse if needed or forced
if( self.countrylist.is_parsing_needed( self.countrylist_revid ) or
self.force_reload ):
self.countrylist.parse()
def get_countrylist_wikilink( self ): def get_countrylist_wikilink( self ):
""" """
Load wikilink to related countrylist Load wikilink to related countrylist
""" """
if self.old_entry.Liste: if self.old_entry.Liste:
try: try:
self.countrylist_wikilink = next( self.old_entry.Liste.ifilter_wikilinks() ) self.countrylist_wikilink = next(
self.old_entry.Liste.ifilter_wikilinks() )
except StopIteration: except StopIteration:
raise SummaryPageEntryError( "Parameter Liste does not contain valid wikilink!") raise SummaryPageEntryError(
"Parameter Liste does not contain valid wikilink!" )
else: else:
raise SummaryPageEntryError( "Parameter Liste is not present!") raise SummaryPageEntryError( "Parameter Liste is not present!")
@@ -161,7 +202,7 @@ class SummaryPageEntry():
Load saved revid of related countrylist if Param is present Load saved revid of related countrylist if Param is present
""" """
if self.old_entry.Liste_Revision: if self.old_entry.Liste_Revision:
self.countrylist_revid = int( self.old_entry.Liste_Revision.strip()) self.countrylist_revid = int(self.old_entry.Liste_Revision.strip())
else: else:
self.countrylist_revid = 0 self.countrylist_revid = 0
@@ -171,7 +212,8 @@ class SummaryPageEntry():
""" """
self.new_entry.Liste = self.countrylist_wikilink self.new_entry.Liste = self.countrylist_wikilink
self.new_entry.Liste_Revision = self.countrylist.page.latest_revision_id self.new_entry.Liste_Revision = \
self.countrylist.page.latest_revision_id
self.new_entry.Interpret = self.countrylist.interpret self.new_entry.Interpret = self.countrylist.interpret
self.new_entry.Titel = self.countrylist.titel self.new_entry.Titel = self.countrylist.titel
self.new_entry.Chartein = self._corrected_chartein self.new_entry.Chartein = self._corrected_chartein
@@ -210,9 +252,20 @@ class SummaryPageEntry():
Detects wether writing of entry is needed and stores information in Detects wether writing of entry is needed and stores information in
Class-Attribute Class-Attribute
""" """
type( self ).write_needed = ( ( self.old_entry != self.new_entry ) or \ type( self ).write_needed = ( ( self.old_entry != self.new_entry ) and
self.countrylist.parsed or
type( self ).write_needed ) type( self ).write_needed )
def get_entry( self ):
"""
Returns the new entry if CountryList was parsed otherwise returns the
old one
"""
if( self.countrylist.parsed):
return self.new_entry
else:
return self.old_entry
class SummaryPageEntryTemplate(): class SummaryPageEntryTemplate():
""" """
@@ -229,8 +282,8 @@ class SummaryPageEntryTemplate():
Creates Instance of Class for given mwparser.template object of Creates Instance of Class for given mwparser.template object of
SummmaryPageEntry Template. If no object was given create empty one. SummmaryPageEntry Template. If no object was given create empty one.
@param template_obj mw.parser.template Object of @param template_obj Object of SummmaryPageEntry Template
SummmaryPageEntry Template @type template_obj: mwparser.template
""" """
# Check if object was given # Check if object was given
@@ -240,25 +293,25 @@ class SummaryPageEntryTemplate():
if isinstance( template_obj, if isinstance( template_obj,
mwparser.nodes.template.Template ): mwparser.nodes.template.Template ):
self.template = template_obj; self.template = template_obj
self.__initial = False; self.__initial = False
# Otherwise raise error # Otherwise raise error
else: else:
raise SummaryPageEntryTemplateError( "Wrong type given" ); raise SummaryPageEntryTemplateError( "Wrong type given" )
# Otherwise initialise template # Otherwise initialise template
else: else:
self.__initial_template() self.__initial_template()
self.__initial = True; self.__initial = True
def __initial_template( self ): def __initial_template( self ):
""" """
Builds the initial template Builds the initial template
""" """
self.template = next( mwparser.parse( self.template = next( mwparser.parse( "{{Portal:Charts und Popmusik/\
"{{/Eintrag|Liste=|Liste_Revision=|Interpret=|Titel=NN\ Aktuelle Nummer-eins-Hits/Eintrag|Liste=|Liste_Revision=|Interpret=|Titel=NN\
|Chartein=|Korrektur=|Hervor=}}" ).ifilter_templates() ) |Chartein=|Korrektur=|Hervor=}}" ).ifilter_templates() )
def __getattr__( self, name ): def __getattr__( self, name ):
@@ -302,7 +355,7 @@ class SummaryPageEntryTemplate():
cmpto = self cmpto = self
else: else:
raise SummaryPageEntryTemplateError( raise SummaryPageEntryTemplateError(
"One of the compared instances must have been initial!" ) "One of the compared instances must have been initial!" )
# Iterate over each param # Iterate over each param
for param in initial.template.params: for param in initial.template.params:
@@ -319,8 +372,8 @@ class SummaryPageEntryTemplate():
continue continue
# Compare other param values, if one unequal write is needed # Compare other param values, if one unequal write is needed
if initial.template.get( param ).value.strip() != \ if( initial.template.get( param ).value.strip() !=
cmpto.template.get( param ).value.strip(): cmpto.template.get( param ).value.strip() ):
return True return True
# If not returned True until now # If not returned True until now
@@ -333,12 +386,14 @@ class SummaryPageError( Exception ):
""" """
pass pass
class SummaryPageEntryError( SummaryPageError ): class SummaryPageEntryError( SummaryPageError ):
""" """
Handles errors occuring in class SummaryPageEntry Handles errors occuring in class SummaryPageEntry
""" """
pass pass
class SummaryPageEntryTemplateError( SummaryPageError ): class SummaryPageEntryTemplateError( SummaryPageError ):
""" """
Handles errors occuring in class SummaryPageEntryTemplate Handles errors occuring in class SummaryPageEntryTemplate