Browse Source

Merge branch 'remove-refs' into improve-output

Get recent changes before going on
develop
Jonathan Golder 8 years ago
parent
commit
287942e174
  1. 33
      chartsbot.py
  2. 132
      countrylist.py
  3. 65
      summarypage.py

33
chartsbot.py

@ -40,6 +40,8 @@ The following parameters are supported:
-always If given, request for confirmation of edit is short circuited -always If given, request for confirmation of edit is short circuited
Use for unattended run Use for unattended run
-force-reload If given, countrylists will be always parsed regardless if
needed or not
""" """
@ -61,10 +63,10 @@ class ChartsBot( ):
""" """
Bot which automatically updates a ChartsSummaryPage like Bot which automatically updates a ChartsSummaryPage like
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked [[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
CountryListsAn incomplete sample bot. CountryLists
""" """
def __init__( self, generator, always ): def __init__( self, generator, always, force_reload ):
""" """
Constructor. Constructor.
@ -74,11 +76,17 @@ class ChartsBot( ):
@param always: if True, request for confirmation of edit is short @param always: if True, request for confirmation of edit is short
circuited. Use for unattended run circuited. Use for unattended run
@type always: bool @type always: bool
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
""" """
self.generator = generator self.generator = generator
self.always = always self.always = always
# Force parsing of countrylist
self.force_reload = force_reload
# Set the edit summary message # Set the edit summary message
self.site = pywikibot.Site() self.site = pywikibot.Site()
self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits" self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits"
@ -86,6 +94,14 @@ class ChartsBot( ):
# Set locale to 'de_DE.UTF-8' # Set locale to 'de_DE.UTF-8'
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
# provisional-onwiki-activation
page_active = pywikibot.Page( self.site, "Benutzer:JogoBot/active" )
text_active = page_active.get()
if "true" not in text_active.lower():
pywikibot.output( "Bot ist deaktiviert!" )
return False
def run(self): def run(self):
"""Process each page from the generator.""" """Process each page from the generator."""
for page in self.generator: for page in self.generator:
@ -102,7 +118,7 @@ class ChartsBot( ):
################################################################ ################################################################
# Initialise and treat SummaryPageWorker # Initialise and treat SummaryPageWorker
sumpage = SummaryPage( text ) sumpage = SummaryPage( text, self.force_reload )
sumpage.treat() sumpage.treat()
# Check if editing is needed and if so get new text # Check if editing is needed and if so get new text
@ -191,10 +207,16 @@ def main(*args):
# If always is True, bot won't ask for confirmation of edit (automode) # If always is True, bot won't ask for confirmation of edit (automode)
always = False always = False
# If force_reload is True, bot will always parse Countrylist regardless of
# parsing is needed or not
force_reload = False
# Parse command line arguments # Parse command line arguments
for arg in local_args: for arg in local_args:
if arg.startswith("-always"): if arg.startswith("-always"):
always = True always = True
elif arg.startswith("-force-reload"):
force_reload = True
else: else:
genFactory.handleArg(arg) genFactory.handleArg(arg)
@ -204,8 +226,9 @@ def main(*args):
# The preloading generator is responsible for downloading multiple # The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously. # pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen) gen = pagegenerators.PreloadingGenerator(gen)
bot = ChartsBot(gen, always) bot = ChartsBot(gen, always, force_reload)
bot.run() if bot:
bot.run()
else: else:
pywikibot.showHelp() pywikibot.showHelp()

132
countrylist.py

@ -66,7 +66,8 @@ class CountryList():
# Check if page exits # Check if page exits
if not self.page.exists(): if not self.page.exists():
return False raise CountryListError( "CountryList " +
str(wikilink.title) + " does not exists!" )
# Initialise attributes # Initialise attributes
__attr = ( "wikicode", "entry", "chartein", "_chartein_raw", __attr = ( "wikicode", "entry", "chartein", "_chartein_raw",
@ -156,11 +157,18 @@ class CountryList():
# For belgian list we need to select subsection of country # For belgian list we need to select subsection of country
belgian = self.detect_belgian() belgian = self.detect_belgian()
if belgian: # Select Singles-Section
singles_section = self.wikicode.get_sections( # Catch Error if we have none
matches=belgian )[0].get_sections( matches="Singles" )[0] try:
else: if belgian:
singles_section = self.wikicode.get_sections(matches="Singles")[0] singles_section = self.wikicode.get_sections(
matches=belgian )[0].get_sections( matches="Singles" )[0]
else:
singles_section = self.wikicode.get_sections(
matches="Singles" )[0]
except IndexError:
raise CountryListError( "No Singles-Section found!")
# Since we have multiple categories in some countrys we need # Since we have multiple categories in some countrys we need
# to select the first wrapping template # to select the first wrapping template
@ -230,7 +238,15 @@ class CountryList():
If param is not present raise Error If param is not present raise Error
""" """
if self.entry.has( "Chartein" ): if self.entry.has( "Chartein" ):
self._chartein_raw = self.entry.get("Chartein").value.strip() self._chartein_raw = self.entry.get("Chartein").value
# Remove possible ref-tags
for ref in self._chartein_raw.ifilter_tags(matches="ref"):
self._chartein_raw.remove( ref )
# Remove whitespace
self._chartein_raw = str(self._chartein_raw).strip()
else: else:
raise CountryListEntryError( "Template Parameter 'Chartein' is \ raise CountryListEntryError( "Template Parameter 'Chartein' is \
missing!" ) missing!" )
@ -244,7 +260,11 @@ missing!" )
if not self._titel_raw: if not self._titel_raw:
self.get_titel_value() self.get_titel_value()
self.titel = self._titel_raw # Try to find a wikilink for Titel on countrylist
if "[[" not in self._titel_raw:
self.titel = self._search_links( str(self._titel_raw) )
else:
self.titel = self._titel_raw
def get_titel_value( self ): def get_titel_value( self ):
""" """
@ -252,7 +272,14 @@ missing!" )
If param is not present raise Error If param is not present raise Error
""" """
if self.entry.has( "Titel" ): if self.entry.has( "Titel" ):
self._titel_raw = self.entry.get("Titel").value.strip() self._titel_raw = self.entry.get("Titel").value
# Remove possible ref-tags
for ref in self._titel_raw.ifilter_tags(matches="ref"):
self._titel_raw.remove( ref )
# Remove whitespace
self._titel_raw = str(self._titel_raw).strip()
else: else:
raise CountryListEntryError( "Template Parameter 'Titel' is \ raise CountryListEntryError( "Template Parameter 'Titel' is \
missing!" ) missing!" )
@ -298,31 +325,10 @@ missing!" )
parts.append( word ) parts.append( word )
parts.append( " " ) parts.append( " " )
# If we have indexes with out links, search for links # If we have indexes without links, search for links
if indexes: if indexes:
# Iterate over wikilinks of refpage and try to find related links parts = self._search_links( parts, indexes )
for wikilink in self.wikicode.ifilter_wikilinks():
# Iterate over interpret names
for index in indexes:
# Check wether wikilink matches
if( parts[index] == wikilink.text or
parts[index] == wikilink.title ):
# Overwrite name with complete wikilink
parts[index] = str( wikilink )
# Remove index from worklist
indexes.remove( index )
# Other indexes won't also match
break
# If worklist is empty, stop iterating over wikilinks
if not indexes:
break
# Join the collected links # Join the collected links
sep = " " sep = " "
@ -338,11 +344,71 @@ missing!" )
If param is not present raise Error If param is not present raise Error
""" """
if self.entry.has( "Interpret" ): if self.entry.has( "Interpret" ):
self._interpret_raw = self.entry.get("Interpret").value.strip() self._interpret_raw = self.entry.get("Interpret").value
# Remove possible ref-tags
for ref in self._interpret_raw.ifilter_tags(matches="ref"):
self._interpret_raw.remove( ref )
# Remove whitespace
self._interpret_raw = str(self._interpret_raw).strip()
else: else:
raise CountryListEntryError( "Template Parameter 'Interpret' is \ raise CountryListEntryError( "Template Parameter 'Interpret' is \
missing!" ) missing!" )
def _search_links( self, keywords, indexes=None ):
"""
Search matching wikilinks for keyword(s) in CountryList's wikicode
@param keywords: One or more keywords to search for
@type keywords: str, list
@param indexes: List with numeric indexes for items of keywords to work
on only
@type indexes: list of ints
@return: List or String with replaced keywords
@return type: str, list
"""
# Maybe convert keywords string to list
if( isinstance( keywords, str ) ):
keywords = [ keywords, ]
string = True
else:
string = False
# If indexes worklist was not provided, work on all elements
if not indexes:
indexes = list(range( len( keywords ) ))
# Iterate over wikilinks of refpage and try to find related links
for wikilink in self.wikicode.ifilter_wikilinks():
# Iterate over interpret names
for index in indexes:
# Check wether wikilink matches
if( keywords[index] == wikilink.text or
keywords[index] == wikilink.title ):
# Overwrite name with complete wikilink
keywords[index] = str( wikilink )
# Remove index from worklist
indexes.remove( index )
# Other indexes won't also match
break
# If worklist is empty, stop iterating over wikilinks
if not indexes:
break
# Choose wether return list or string based on input type
if not string:
return keywords
else:
return str(keywords[0])
class CountryListError( Exception ): class CountryListError( Exception ):
""" """

65
summarypage.py

@ -38,14 +38,24 @@ class SummaryPage():
Handles summary page related actions Handles summary page related actions
""" """
def __init__( self, text ): def __init__( self, text, force_reload=False ):
""" """
Create Instance Create Instance
@param text: Page Text of summarypage
@type text: str
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
""" """
# Parse Text with mwparser # Parse Text with mwparser
self.wikicode = mwparser.parse( text ) self.wikicode = mwparser.parse( text )
# Force parsing of countrylist
self.force_reload = force_reload
def treat( self ): def treat( self ):
""" """
Handles parsing/editing of text Handles parsing/editing of text
@ -55,7 +65,9 @@ class SummaryPage():
for entry in self.wikicode.filter_templates( matches="/Eintrag" ): for entry in self.wikicode.filter_templates( matches="/Eintrag" ):
# Instantiate SummaryPageEntry-object # Instantiate SummaryPageEntry-object
summarypageentry = SummaryPageEntry( entry ) summarypageentry = SummaryPageEntry(entry,
force_reload=self.force_reload)
# Treat SummaryPageEntry-object # Treat SummaryPageEntry-object
summarypageentry.treat() summarypageentry.treat()
@ -85,13 +97,22 @@ class SummaryPageEntry():
write_needed = False write_needed = False
def __init__( self, entry ): def __init__( self, entry, force_reload=False ):
""" """
Constructor Constructor
@param entry: Entry template of summarypage entry
@type text: mwparser.template
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
""" """
self.old_entry = SummaryPageEntryTemplate( entry ) self.old_entry = SummaryPageEntryTemplate( entry )
self.new_entry = SummaryPageEntryTemplate( ) self.new_entry = SummaryPageEntryTemplate( )
# Force parsing of countrylist
self.force_reload = force_reload
def treat( self ): def treat( self ):
""" """
Controls parsing/update-sequence of entry Controls parsing/update-sequence of entry
@ -122,11 +143,8 @@ class SummaryPageEntry():
# Get current year # Get current year
current_year = datetime.now().year current_year = datetime.now().year
# Store old link.title
link_title = self.countrylist_wikilink.title
# If list is from last year, replace year # If list is from last year, replace year
if (current_year - 1) in link_title: if (current_year - 1) in self.countrylist_wikilink.title:
self.countrylist_wikilink.title.replace( (current_year - 1), self.countrylist_wikilink.title.replace( (current_year - 1),
current_year ) current_year )
@ -134,23 +152,38 @@ class SummaryPageEntry():
try: try:
self.countrylist = CountryList( self.countrylist_wikilink ) self.countrylist = CountryList( self.countrylist_wikilink )
if( self.countrylist and self.maybe_parse_countrylist()
self.countrylist.is_parsing_needed( self.countrylist_revid )):
self.countrylist.parse()
# Maybe fallback to last years list # Maybe fallback to last years list
except CountryListError: except CountryListError:
self.countrylist_wikilink.title = link_title # If list is from last year, replace year
if (current_year ) in self.countrylist_wikilink.title:
self.countrylist_wikilink.title.replace( current_year,
(current_year - 1) )
self.countrylist = CountryList( self.countrylist_wikilink ) self.countrylist = CountryList( self.countrylist_wikilink )
if( self.countrylist and self.maybe_parse_countrylist()
self.countrylist.is_parsing_needed( self.countrylist_revid )):
self.countrylist.parse()
if not self.countrylist: if not self.countrylist:
raise SummaryPageEntryError( "CountryList does not exists!" ) raise SummaryPageEntryError( "CountryList does not exists!" )
def maybe_parse_countrylist( self ):
"""
Parse countrylist if page-object exists and if parsing is needed or
param -force-reload is set
"""
# Fast return if no countrylist-object
if not self.countrylist:
return
# Parse if needed or forced
if( self.countrylist.is_parsing_needed( self.countrylist_revid ) or
self.force_reload ):
self.countrylist.parse()
def get_countrylist_wikilink( self ): def get_countrylist_wikilink( self ):
""" """
Load wikilink to related countrylist Load wikilink to related countrylist
@ -250,8 +283,8 @@ class SummaryPageEntryTemplate():
Creates Instance of Class for given mwparser.template object of Creates Instance of Class for given mwparser.template object of
SummmaryPageEntry Template. If no object was given create empty one. SummmaryPageEntry Template. If no object was given create empty one.
@param template_obj mw.parser.template Object of @param template_obj Object of SummmaryPageEntry Template
SummmaryPageEntry Template @type template_obj: mwparser.template
""" """
# Check if object was given # Check if object was given

Loading…
Cancel
Save