Browse Source

ChartsBot-Module: Remove old code which is replaced by SummaryPage- and CountryList-Module

develop
Jonathan Golder 9 years ago
parent
commit
43668ef8b7
  1. 362
      chartsbot.py

362
chartsbot.py

@ -133,349 +133,31 @@ class ChartsBot( ):
# show what was changed
pywikibot.showDiff(page.get(), text)
pywikibot.output(u'Comment: %s' % comment)
if not self.dry:
if self.always or pywikibot.input_yn(
u'Do you want to accept these changes?',
default=False, automatic_quit=False):
try:
page.text = text
# Save the page
page.save(summary=comment or self.comment,
minor=minorEdit, botflag=botflag)
except pywikibot.LockedPage:
pywikibot.output(u"Page %s is locked; skipping."
% page.title(asLink=True))
except pywikibot.EditConflict:
pywikibot.output(
u'Skipping %s because of edit conflict'
% (page.title()))
except pywikibot.SpamfilterError as error:
pywikibot.output(
u'Cannot change %s because of spam blacklist \
if self.always or pywikibot.input_yn(
u'Do you want to accept these changes?',
default=False, automatic_quit=False):
try:
page.text = text
# Save the page
page.save(summary=comment or self.comment,
minor=minorEdit, botflag=botflag)
except pywikibot.LockedPage:
pywikibot.output(u"Page %s is locked; skipping."
% page.title(asLink=True))
except pywikibot.EditConflict:
pywikibot.output(
u'Skipping %s because of edit conflict'
% (page.title()))
except pywikibot.SpamfilterError as error:
pywikibot.output(
u'Cannot change %s because of spam blacklist \
entry %s'
% (page.title(), error.url))
else:
return True
% (page.title(), error.url))
else:
return True
return False
def parse_overview( self, text ):
"""
Parses the given Charts-Overview-Page and returns the updated version
"""
# Parse text with mwparser to get access to nodes
wikicode = mwparser.parse( text )
# Get mwparser.template objects for Template "/Eintrag"
for entry in wikicode.ifilter_templates( matches="/Eintrag" ):
# Maybe complete entry template
self.entry_template_complete( entry )
# Extract saved revision_id
ref_list_revid = int(str( entry.get( "Liste Revision" ).value ))
# Parse ref list
data = self.parse_ref_list( self.get_entry_ref_list( entry ),
ref_list_revid )
# Check that parsing was not short circuited
if data:
data = self.calculate_chartein( entry, data )
entry = self.entry_changed( entry, data )
#~ # Check if saved revid is unequal to current revid
#~ if( str( country.get( "Liste Revision" ).value ) !=
#~ list_page.latest_revision_id ):
#~
#~ country = self.update_overview( country, list_page )
# If any param of any occurence of Template "/Eintrag" has changed,
# Save new version
# We need to convert mwparser-objects to string before saving
return str( wikicode )
def parse_ref_list( self, ref_list_link , ref_list_revid):
"""
Handles the parsing process of ref list
"""
# Create Page-Object for Chartslist
ref_list_page = pywikibot.Page( self.site, ref_list_link.title )
# Short circuit if current revision is same than saved
if( ref_list_page.latest_revision_id == ref_list_revid ):
return False
# Try to find list related year
year = datetime.now().year
if str( year ) not in ref_list_page.title():
year = year - 1
# Parse charts list with mwparser
wikicode = mwparser.parse( ref_list_page.text )
# Detect if we are on begian list
belgian = self.detect_belgium( ref_list_link )
# Select the section "Singles"
# For belgian list we need to select subsection of country
if belgian:
singles_section = wikicode.get_sections(
matches=belgian )[0].get_sections( matches="Singles" )[0]
else:
singles_section = wikicode.get_sections( matches="Singles" )[0]
# Select the last occurence of template "Nummer-eins-Hits Zeile" in
# "Singles"-section
entries = singles_section.filter_templates(
matches="Nummer-eins-Hits Zeile" )
# Check, wether we found some entries
if not entries:
raise ChartsListError( page.title() )
else:
last = entries[-1]
# Detect weather we have a date or a weeknumber for Template Param
# "Chartein"
if( last.get("Chartein").value.strip().isnumeric() ):
chartein = last.get("Chartein").value.strip()
# Maybe there is a year correction for weeknumber
if last.has( "Jahr" ):
if last.get("Jahr").value.strip() == "+1":
year = year + 1
elif last.get("Jahr").value.strip() == "-1":
year = year - 1
chartein = ( year, chartein )
else:
chartein = datetime.strptime( last.get("Chartein").value.strip(),
"%Y-%m-%d" )
title = last.get("Titel").value.strip()
# Work with interpret value to add missing links
# Split them in words
interpret = last.get("Interpret").value.strip()
interpret_words = interpret.split()
# Interpret name concatenating words
interpret_cat = ( "feat.", "&" )
# Create empty list for concatenated interpret names
interpreten_raw = [ " ", ]
indexes = list()
index = 0
# Reconcatenate interpret names
for word in interpret_words:
if word not in interpret_cat:
interpreten_raw[-1] = (interpreten_raw[-1] + " " + word).strip()
if index not in indexes and "[[" not in interpreten_raw[-1]:
indexes.append( index )
else:
index += 2
interpreten_raw.append( word )
interpreten_raw.append( " " )
# Copy raw list to overwrite
interpreten = interpreten_raw
# Check if we have indexes with out links
if indexes:
print( ref_list_page.title() )
# Iterate over wikilinks of refpage and try to find related links
for wikilink in wikicode.ifilter_wikilinks():
# Iterate over interpret names to check wether wikilink matches
for index in indexes:
if interpreten_raw[index] == wikilink.text \
or interpreten_raw[index] == wikilink.title:
interpreten_raw[index] = str( wikilink )
indexes.remove( index )
break
if not indexes:
break
# Join the collected links
sep = " "
interpret = sep.join( interpreten )
# Return collected data as tuple
return ( chartein, title, interpret, ref_list_page.latest_revision_id )
def detect_belgium( self, ref_list_link ):
"""
Detect wether current entry is on of the belgian (Belgien/Wallonien)
"""
# Parse linked charts list for the country
if "Wallonien" in str( ref_list_link.text ) \
or "Wallonien" in str( ref_list_link.title):
return "Wallonie"
elif "Flandern" in str( ref_list_link.text ) \
or "Flandern" in str( ref_list_link.title):
return "Flandern"
else:
return None
def update_overview( self, country, list_page ): # noqa
"""
Updates the templates given in county using data from given list_page
@param country wikicode-object with Template for country
@param list_page pywikibot-page-object for list-page
@returns wikicode-object with updated Template for country
"""
data = self.parse_charts_list( ref_list_link, belgien )
def get_entry_ref_list( self, entry ):
"""
"""
# Get mwparser.wikilink object
link = next( entry.get("Liste").value.ifilter_wikilinks() )
year = datetime.now().year
old_link_title = link.title
# If year in link is lower then current year replace it
link.title = str(link.title).replace( str( year-1 ), str( year ) )
if ( pywikibot.Page( self.site, link.title).exists() ):
return link
else:
link.title = old_link_title
return link
def calculate_chartein( self, entry, data ):
"""
Calculates the correct value for param chartein in entry
"""
# If param Korrektur is present extract the value
if( entry.has( "Korrektur" ) ):
# If Korrektur is (after striping) castable to int use it
try:
days = int( str( entry.get( "Korrektur" ).value ).strip() )
# Otherwise, if casting fails, ignore it
except ValueError:
days = 0
else:
days = 0
# For some countries we have weeknumbers instead of dates
if( isinstance( data[0], tuple ) ):
# Calculate date of monday in given week and add number of
# days given in Template parameter "Korrektur" with monday
# as day (zero)
date = ( Week( data[0][0], int( data[0][1] ) ).monday() +
timedelta( days=days ) )
# Param Chartein contains a regular date
else:
date = data[0] + timedelta( days=days )
return (date,)+data[1:]
def entry_template_complete( self, entry ):
"""
Checks wether given entry template is complete, otherwise adds missing
params
"""
# Check if param "Chartein" is present
if not entry.has( "Chartein" ):
try:
entry.add( "Chartein", "", before="Korrektur" )
except ValueError:
entry.add( "Chartein", "" )
# Check if param "Titel" is present
if not entry.has( "Titel" ):
entry.add( "Titel", "", before="Chartein" )
# Check if param "Intepret" is present
if not entry.has( "Interpret" ):
entry.add( "Interpret", "", before="Titel" )
# Check if we have a saved revid
if not entry.has( "Liste Revision" ):
entry.add( "Liste Revision", 0, before="Interpret" )
return entry
def entry_changed( self, entry, data ):
"""
Checks wether given entry has changed
"""
# Check if date has changed
if( data[0].strftime( "%d. %B" ).lstrip( "0" ) !=
entry.get("Chartein").value ):
entry.get("Chartein").value = data[0].strftime( "%d. %B"
).lstrip( "0" )
# Check if Titel has changed
if( data[1] != entry.get( "Titel" ).value ):
entry.get( "Titel" ).value = data[1]
# Check if Interpret has changed
if( data[2] != entry.get( "Interpret" ).value ):
entry.get( "Interpret" ).value = data[2]
# Update "Liste Revision" param
entry.get( "Liste Revision" ).value = str(
data[3] )
return entry
class ChartsError( Exception ):
"""
Base class for all Errors of Charts-Module
"""
def __init__( self, message=None ):
"""
Handles Instantiation of ChartsError's
"""
if not message:
self.message = "An Error occured while executing a Charts action"
else:
self.message = message
def __str__( self ):
"""
Output of error message
"""
return self.message
class ChartsListError( ChartsError ):
"""
Raised when given ChartsListPage does not contain valid entrys
"""
def __init__( self, givenPage ):
message = "Given CharstListPage ('{given}') does not contain \
valid entries".format( given=givenPage )
super().__init__( message )
def main(*args):
"""

Loading…
Cancel
Save