Browse Source

Merge branch 'remove-refs' into improve-output

Get recent changes before going on
develop
Jonathan Golder 8 years ago
parent
commit
287942e174
  1. 31
      chartsbot.py
  2. 114
      countrylist.py
  3. 65
      summarypage.py

31
chartsbot.py

@ -40,6 +40,8 @@ The following parameters are supported:
-always If given, request for confirmation of edit is short circuited -always If given, request for confirmation of edit is short circuited
Use for unattended run Use for unattended run
-force-reload If given, countrylists will be always parsed regardless if
needed or not
""" """
@ -61,10 +63,10 @@ class ChartsBot( ):
""" """
Bot which automatically updates a ChartsSummaryPage like Bot which automatically updates a ChartsSummaryPage like
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked [[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
CountryListsAn incomplete sample bot. CountryLists
""" """
def __init__( self, generator, always ): def __init__( self, generator, always, force_reload ):
""" """
Constructor. Constructor.
@ -74,11 +76,17 @@ class ChartsBot( ):
@param always: if True, request for confirmation of edit is short @param always: if True, request for confirmation of edit is short
circuited. Use for unattended run circuited. Use for unattended run
@type always: bool @type always: bool
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
""" """
self.generator = generator self.generator = generator
self.always = always self.always = always
# Force parsing of countrylist
self.force_reload = force_reload
# Set the edit summary message # Set the edit summary message
self.site = pywikibot.Site() self.site = pywikibot.Site()
self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits" self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits"
@ -86,6 +94,14 @@ class ChartsBot( ):
# Set locale to 'de_DE.UTF-8' # Set locale to 'de_DE.UTF-8'
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
# provisional-onwiki-activation
page_active = pywikibot.Page( self.site, "Benutzer:JogoBot/active" )
text_active = page_active.get()
if "true" not in text_active.lower():
pywikibot.output( "Bot ist deaktiviert!" )
return False
def run(self): def run(self):
"""Process each page from the generator.""" """Process each page from the generator."""
for page in self.generator: for page in self.generator:
@ -102,7 +118,7 @@ class ChartsBot( ):
################################################################ ################################################################
# Initialise and treat SummaryPageWorker # Initialise and treat SummaryPageWorker
sumpage = SummaryPage( text ) sumpage = SummaryPage( text, self.force_reload )
sumpage.treat() sumpage.treat()
# Check if editing is needed and if so get new text # Check if editing is needed and if so get new text
@ -191,10 +207,16 @@ def main(*args):
# If always is True, bot won't ask for confirmation of edit (automode) # If always is True, bot won't ask for confirmation of edit (automode)
always = False always = False
# If force_reload is True, bot will always parse Countrylist regardless of
# parsing is needed or not
force_reload = False
# Parse command line arguments # Parse command line arguments
for arg in local_args: for arg in local_args:
if arg.startswith("-always"): if arg.startswith("-always"):
always = True always = True
elif arg.startswith("-force-reload"):
force_reload = True
else: else:
genFactory.handleArg(arg) genFactory.handleArg(arg)
@ -204,7 +226,8 @@ def main(*args):
# The preloading generator is responsible for downloading multiple # The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously. # pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen) gen = pagegenerators.PreloadingGenerator(gen)
bot = ChartsBot(gen, always) bot = ChartsBot(gen, always, force_reload)
if bot:
bot.run() bot.run()
else: else:
pywikibot.showHelp() pywikibot.showHelp()

114
countrylist.py

@ -66,7 +66,8 @@ class CountryList():
# Check if page exits # Check if page exits
if not self.page.exists(): if not self.page.exists():
return False raise CountryListError( "CountryList " +
str(wikilink.title) + " does not exists!" )
# Initialise attributes # Initialise attributes
__attr = ( "wikicode", "entry", "chartein", "_chartein_raw", __attr = ( "wikicode", "entry", "chartein", "_chartein_raw",
@ -156,11 +157,18 @@ class CountryList():
# For belgian list we need to select subsection of country # For belgian list we need to select subsection of country
belgian = self.detect_belgian() belgian = self.detect_belgian()
# Select Singles-Section
# Catch Error if we have none
try:
if belgian: if belgian:
singles_section = self.wikicode.get_sections( singles_section = self.wikicode.get_sections(
matches=belgian )[0].get_sections( matches="Singles" )[0] matches=belgian )[0].get_sections( matches="Singles" )[0]
else: else:
singles_section = self.wikicode.get_sections(matches="Singles")[0] singles_section = self.wikicode.get_sections(
matches="Singles" )[0]
except IndexError:
raise CountryListError( "No Singles-Section found!")
# Since we have multiple categories in some countrys we need # Since we have multiple categories in some countrys we need
# to select the first wrapping template # to select the first wrapping template
@ -230,7 +238,15 @@ class CountryList():
If param is not present raise Error If param is not present raise Error
""" """
if self.entry.has( "Chartein" ): if self.entry.has( "Chartein" ):
self._chartein_raw = self.entry.get("Chartein").value.strip() self._chartein_raw = self.entry.get("Chartein").value
# Remove possible ref-tags
for ref in self._chartein_raw.ifilter_tags(matches="ref"):
self._chartein_raw.remove( ref )
# Remove whitespace
self._chartein_raw = str(self._chartein_raw).strip()
else: else:
raise CountryListEntryError( "Template Parameter 'Chartein' is \ raise CountryListEntryError( "Template Parameter 'Chartein' is \
missing!" ) missing!" )
@ -244,6 +260,10 @@ missing!" )
if not self._titel_raw: if not self._titel_raw:
self.get_titel_value() self.get_titel_value()
# Try to find a wikilink for Titel on countrylist
if "[[" not in self._titel_raw:
self.titel = self._search_links( str(self._titel_raw) )
else:
self.titel = self._titel_raw self.titel = self._titel_raw
def get_titel_value( self ): def get_titel_value( self ):
@ -252,7 +272,14 @@ missing!" )
If param is not present raise Error If param is not present raise Error
""" """
if self.entry.has( "Titel" ): if self.entry.has( "Titel" ):
self._titel_raw = self.entry.get("Titel").value.strip() self._titel_raw = self.entry.get("Titel").value
# Remove possible ref-tags
for ref in self._titel_raw.ifilter_tags(matches="ref"):
self._titel_raw.remove( ref )
# Remove whitespace
self._titel_raw = str(self._titel_raw).strip()
else: else:
raise CountryListEntryError( "Template Parameter 'Titel' is \ raise CountryListEntryError( "Template Parameter 'Titel' is \
missing!" ) missing!" )
@ -301,6 +328,58 @@ missing!" )
# If we have indexes without links, search for links # If we have indexes without links, search for links
if indexes: if indexes:
parts = self._search_links( parts, indexes )
# Join the collected links
sep = " "
self.interpret = sep.join( parts )
# Nothing to do, just use raw
else:
self.interpret = self._interpret_raw
def get_interpret_value( self ):
"""
Reads value of Interpret parameter
If param is not present raise Error
"""
if self.entry.has( "Interpret" ):
self._interpret_raw = self.entry.get("Interpret").value
# Remove possible ref-tags
for ref in self._interpret_raw.ifilter_tags(matches="ref"):
self._interpret_raw.remove( ref )
# Remove whitespace
self._interpret_raw = str(self._interpret_raw).strip()
else:
raise CountryListEntryError( "Template Parameter 'Interpret' is \
missing!" )
def _search_links( self, keywords, indexes=None ):
"""
Search matching wikilinks for keyword(s) in CountryList's wikicode
@param keywords: One or more keywords to search for
@type keywords: str, list
@param indexes: List with numeric indexes for items of keywords to work
on only
@type indexes: list of ints
@return: List or String with replaced keywords
@return type: str, list
"""
# Maybe convert keywords string to list
if( isinstance( keywords, str ) ):
keywords = [ keywords, ]
string = True
else:
string = False
# If indexes worklist was not provided, work on all elements
if not indexes:
indexes = list(range( len( keywords ) ))
# Iterate over wikilinks of refpage and try to find related links # Iterate over wikilinks of refpage and try to find related links
for wikilink in self.wikicode.ifilter_wikilinks(): for wikilink in self.wikicode.ifilter_wikilinks():
@ -308,11 +387,11 @@ missing!" )
for index in indexes: for index in indexes:
# Check wether wikilink matches # Check wether wikilink matches
if( parts[index] == wikilink.text or if( keywords[index] == wikilink.text or
parts[index] == wikilink.title ): keywords[index] == wikilink.title ):
# Overwrite name with complete wikilink # Overwrite name with complete wikilink
parts[index] = str( wikilink ) keywords[index] = str( wikilink )
# Remove index from worklist # Remove index from worklist
indexes.remove( index ) indexes.remove( index )
@ -324,24 +403,11 @@ missing!" )
if not indexes: if not indexes:
break break
# Join the collected links # Choose wether return list or string based on input type
sep = " " if not string:
self.interpret = sep.join( parts ) return keywords
# Nothing to do, just use raw
else: else:
self.interpret = self._interpret_raw return str(keywords[0])
def get_interpret_value( self ):
"""
Reads value of Interpret parameter
If param is not present raise Error
"""
if self.entry.has( "Interpret" ):
self._interpret_raw = self.entry.get("Interpret").value.strip()
else:
raise CountryListEntryError( "Template Parameter 'Interpret' is \
missing!" )
class CountryListError( Exception ): class CountryListError( Exception ):

65
summarypage.py

@ -38,14 +38,24 @@ class SummaryPage():
Handles summary page related actions Handles summary page related actions
""" """
def __init__( self, text ): def __init__( self, text, force_reload=False ):
""" """
Create Instance Create Instance
@param text: Page Text of summarypage
@type text: str
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
""" """
# Parse Text with mwparser # Parse Text with mwparser
self.wikicode = mwparser.parse( text ) self.wikicode = mwparser.parse( text )
# Force parsing of countrylist
self.force_reload = force_reload
def treat( self ): def treat( self ):
""" """
Handles parsing/editing of text Handles parsing/editing of text
@ -55,7 +65,9 @@ class SummaryPage():
for entry in self.wikicode.filter_templates( matches="/Eintrag" ): for entry in self.wikicode.filter_templates( matches="/Eintrag" ):
# Instantiate SummaryPageEntry-object # Instantiate SummaryPageEntry-object
summarypageentry = SummaryPageEntry( entry ) summarypageentry = SummaryPageEntry(entry,
force_reload=self.force_reload)
# Treat SummaryPageEntry-object # Treat SummaryPageEntry-object
summarypageentry.treat() summarypageentry.treat()
@ -85,13 +97,22 @@ class SummaryPageEntry():
write_needed = False write_needed = False
def __init__( self, entry ): def __init__( self, entry, force_reload=False ):
""" """
Constructor Constructor
@param entry: Entry template of summarypage entry
@type text: mwparser.template
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
""" """
self.old_entry = SummaryPageEntryTemplate( entry ) self.old_entry = SummaryPageEntryTemplate( entry )
self.new_entry = SummaryPageEntryTemplate( ) self.new_entry = SummaryPageEntryTemplate( )
# Force parsing of countrylist
self.force_reload = force_reload
def treat( self ): def treat( self ):
""" """
Controls parsing/update-sequence of entry Controls parsing/update-sequence of entry
@ -122,11 +143,8 @@ class SummaryPageEntry():
# Get current year # Get current year
current_year = datetime.now().year current_year = datetime.now().year
# Store old link.title
link_title = self.countrylist_wikilink.title
# If list is from last year, replace year # If list is from last year, replace year
if (current_year - 1) in link_title: if (current_year - 1) in self.countrylist_wikilink.title:
self.countrylist_wikilink.title.replace( (current_year - 1), self.countrylist_wikilink.title.replace( (current_year - 1),
current_year ) current_year )
@ -134,23 +152,38 @@ class SummaryPageEntry():
try: try:
self.countrylist = CountryList( self.countrylist_wikilink ) self.countrylist = CountryList( self.countrylist_wikilink )
if( self.countrylist and self.maybe_parse_countrylist()
self.countrylist.is_parsing_needed( self.countrylist_revid )):
self.countrylist.parse()
# Maybe fallback to last years list # Maybe fallback to last years list
except CountryListError: except CountryListError:
self.countrylist_wikilink.title = link_title # If list is from last year, replace year
if (current_year ) in self.countrylist_wikilink.title:
self.countrylist_wikilink.title.replace( current_year,
(current_year - 1) )
self.countrylist = CountryList( self.countrylist_wikilink ) self.countrylist = CountryList( self.countrylist_wikilink )
if( self.countrylist and self.maybe_parse_countrylist()
self.countrylist.is_parsing_needed( self.countrylist_revid )):
self.countrylist.parse()
if not self.countrylist: if not self.countrylist:
raise SummaryPageEntryError( "CountryList does not exists!" ) raise SummaryPageEntryError( "CountryList does not exists!" )
def maybe_parse_countrylist( self ):
"""
Parse countrylist if page-object exists and if parsing is needed or
param -force-reload is set
"""
# Fast return if no countrylist-object
if not self.countrylist:
return
# Parse if needed or forced
if( self.countrylist.is_parsing_needed( self.countrylist_revid ) or
self.force_reload ):
self.countrylist.parse()
def get_countrylist_wikilink( self ): def get_countrylist_wikilink( self ):
""" """
Load wikilink to related countrylist Load wikilink to related countrylist
@ -250,8 +283,8 @@ class SummaryPageEntryTemplate():
Creates Instance of Class for given mwparser.template object of Creates Instance of Class for given mwparser.template object of
SummmaryPageEntry Template. If no object was given create empty one. SummmaryPageEntry Template. If no object was given create empty one.
@param template_obj mw.parser.template Object of @param template_obj Object of SummmaryPageEntry Template
SummmaryPageEntry Template @type template_obj: mwparser.template
""" """
# Check if object was given # Check if object was given

Loading…
Cancel
Save