Browse Source

Merge branch 'remove-refs' into improve-output

Get recent changes before going on
develop
Jonathan Golder 6 years ago
parent
commit
287942e174
3 changed files with 176 additions and 54 deletions
  1. +28
    -5
      chartsbot.py
  2. +99
    -33
      countrylist.py
  3. +49
    -16
      summarypage.py

+ 28
- 5
chartsbot.py View File

@ -40,6 +40,8 @@ The following parameters are supported:
-always If given, request for confirmation of edit is short circuited
Use for unattended run
-force-reload If given, countrylists will be always parsed regardless if
needed or not
"""
@ -61,10 +63,10 @@ class ChartsBot( ):
"""
Bot which automatically updates a ChartsSummaryPage like
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
CountryListsAn incomplete sample bot.
CountryLists
"""
def __init__( self, generator, always ):
def __init__( self, generator, always, force_reload ):
"""
Constructor.
@ -74,11 +76,17 @@ class ChartsBot( ):
@param always: if True, request for confirmation of edit is short
circuited. Use for unattended run
@type always: bool
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
"""
self.generator = generator
self.always = always
# Force parsing of countrylist
self.force_reload = force_reload
# Set the edit summary message
self.site = pywikibot.Site()
self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits"
@ -86,6 +94,14 @@ class ChartsBot( ):
# Set locale to 'de_DE.UTF-8'
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
# provisional-onwiki-activation
page_active = pywikibot.Page( self.site, "Benutzer:JogoBot/active" )
text_active = page_active.get()
if "true" not in text_active.lower():
pywikibot.output( "Bot ist deaktiviert!" )
return False
def run(self):
"""Process each page from the generator."""
for page in self.generator:
@ -102,7 +118,7 @@ class ChartsBot( ):
################################################################
# Initialise and treat SummaryPageWorker
sumpage = SummaryPage( text )
sumpage = SummaryPage( text, self.force_reload )
sumpage.treat()
# Check if editing is needed and if so get new text
@ -191,10 +207,16 @@ def main(*args):
# If always is True, bot won't ask for confirmation of edit (automode)
always = False
# If force_reload is True, bot will always parse Countrylist regardless of
# parsing is needed or not
force_reload = False
# Parse command line arguments
for arg in local_args:
if arg.startswith("-always"):
always = True
elif arg.startswith("-force-reload"):
force_reload = True
else:
genFactory.handleArg(arg)
@ -204,8 +226,9 @@ def main(*args):
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
bot = ChartsBot(gen, always)
bot.run()
bot = ChartsBot(gen, always, force_reload)
if bot:
bot.run()
else:
pywikibot.showHelp()


+ 99
- 33
countrylist.py View File

@ -66,7 +66,8 @@ class CountryList():
# Check if page exits
if not self.page.exists():
return False
raise CountryListError( "CountryList " +
str(wikilink.title) + " does not exists!" )
# Initialise attributes
__attr = ( "wikicode", "entry", "chartein", "_chartein_raw",
@ -156,11 +157,18 @@ class CountryList():
# For belgian list we need to select subsection of country
belgian = self.detect_belgian()
if belgian:
singles_section = self.wikicode.get_sections(
matches=belgian )[0].get_sections( matches="Singles" )[0]
else:
singles_section = self.wikicode.get_sections(matches="Singles")[0]
# Select Singles-Section
# Catch Error if we have none
try:
if belgian:
singles_section = self.wikicode.get_sections(
matches=belgian )[0].get_sections( matches="Singles" )[0]
else:
singles_section = self.wikicode.get_sections(
matches="Singles" )[0]
except IndexError:
raise CountryListError( "No Singles-Section found!")
# Since we have multiple categories in some countrys we need
# to select the first wrapping template
@ -230,7 +238,15 @@ class CountryList():
If param is not present raise Error
"""
if self.entry.has( "Chartein" ):
self._chartein_raw = self.entry.get("Chartein").value.strip()
self._chartein_raw = self.entry.get("Chartein").value
# Remove possible ref-tags
for ref in self._chartein_raw.ifilter_tags(matches="ref"):
self._chartein_raw.remove( ref )
# Remove whitespace
self._chartein_raw = str(self._chartein_raw).strip()
else:
raise CountryListEntryError( "Template Parameter 'Chartein' is \
missing!" )
@ -244,7 +260,11 @@ missing!" )
if not self._titel_raw:
self.get_titel_value()
self.titel = self._titel_raw
# Try to find a wikilink for Titel on countrylist
if "[[" not in self._titel_raw:
self.titel = self._search_links( str(self._titel_raw) )
else:
self.titel = self._titel_raw
def get_titel_value( self ):
"""
@ -252,7 +272,14 @@ missing!" )
If param is not present raise Error
"""
if self.entry.has( "Titel" ):
self._titel_raw = self.entry.get("Titel").value.strip()
self._titel_raw = self.entry.get("Titel").value
# Remove possible ref-tags
for ref in self._titel_raw.ifilter_tags(matches="ref"):
self._titel_raw.remove( ref )
# Remove whitespace
self._titel_raw = str(self._titel_raw).strip()
else:
raise CountryListEntryError( "Template Parameter 'Titel' is \
missing!" )
@ -298,31 +325,10 @@ missing!" )
parts.append( word )
parts.append( " " )
# If we have indexes with out links, search for links
# If we have indexes without links, search for links
if indexes:
# Iterate over wikilinks of refpage and try to find related links
for wikilink in self.wikicode.ifilter_wikilinks():
# Iterate over interpret names
for index in indexes:
# Check wether wikilink matches
if( parts[index] == wikilink.text or
parts[index] == wikilink.title ):
# Overwrite name with complete wikilink
parts[index] = str( wikilink )
# Remove index from worklist
indexes.remove( index )
# Other indexes won't also match
break
# If worklist is empty, stop iterating over wikilinks
if not indexes:
break
parts = self._search_links( parts, indexes )
# Join the collected links
sep = " "
@ -338,11 +344,71 @@ missing!" )
If param is not present raise Error
"""
if self.entry.has( "Interpret" ):
self._interpret_raw = self.entry.get("Interpret").value.strip()
self._interpret_raw = self.entry.get("Interpret").value
# Remove possible ref-tags
for ref in self._interpret_raw.ifilter_tags(matches="ref"):
self._interpret_raw.remove( ref )
# Remove whitespace
self._interpret_raw = str(self._interpret_raw).strip()
else:
raise CountryListEntryError( "Template Parameter 'Interpret' is \
missing!" )
def _search_links( self, keywords, indexes=None ):
"""
Search matching wikilinks for keyword(s) in CountryList's wikicode
@param keywords: One or more keywords to search for
@type keywords: str, list
@param indexes: List with numeric indexes for items of keywords to work
on only
@type indexes: list of ints
@return: List or String with replaced keywords
@return type: str, list
"""
# Maybe convert keywords string to list
if( isinstance( keywords, str ) ):
keywords = [ keywords, ]
string = True
else:
string = False
# If indexes worklist was not provided, work on all elements
if not indexes:
indexes = list(range( len( keywords ) ))
# Iterate over wikilinks of refpage and try to find related links
for wikilink in self.wikicode.ifilter_wikilinks():
# Iterate over interpret names
for index in indexes:
# Check wether wikilink matches
if( keywords[index] == wikilink.text or
keywords[index] == wikilink.title ):
# Overwrite name with complete wikilink
keywords[index] = str( wikilink )
# Remove index from worklist
indexes.remove( index )
# Other indexes won't also match
break
# If worklist is empty, stop iterating over wikilinks
if not indexes:
break
# Choose wether return list or string based on input type
if not string:
return keywords
else:
return str(keywords[0])
class CountryListError( Exception ):
"""


+ 49
- 16
summarypage.py View File

@ -38,14 +38,24 @@ class SummaryPage():
Handles summary page related actions
"""
def __init__( self, text ):
def __init__( self, text, force_reload=False ):
"""
Create Instance
@param text: Page Text of summarypage
@type text: str
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
"""
# Parse Text with mwparser
self.wikicode = mwparser.parse( text )
# Force parsing of countrylist
self.force_reload = force_reload
def treat( self ):
"""
Handles parsing/editing of text
@ -55,7 +65,9 @@ class SummaryPage():
for entry in self.wikicode.filter_templates( matches="/Eintrag" ):
# Instantiate SummaryPageEntry-object
summarypageentry = SummaryPageEntry( entry )
summarypageentry = SummaryPageEntry(entry,
force_reload=self.force_reload)
# Treat SummaryPageEntry-object
summarypageentry.treat()
@ -85,13 +97,22 @@ class SummaryPageEntry():
write_needed = False
def __init__( self, entry ):
def __init__( self, entry, force_reload=False ):
"""
Constructor
@param entry: Entry template of summarypage entry
@type text: mwparser.template
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
"""
self.old_entry = SummaryPageEntryTemplate( entry )
self.new_entry = SummaryPageEntryTemplate( )
# Force parsing of countrylist
self.force_reload = force_reload
def treat( self ):
"""
Controls parsing/update-sequence of entry
@ -122,11 +143,8 @@ class SummaryPageEntry():
# Get current year
current_year = datetime.now().year
# Store old link.title
link_title = self.countrylist_wikilink.title
# If list is from last year, replace year
if (current_year - 1) in link_title:
if (current_year - 1) in self.countrylist_wikilink.title:
self.countrylist_wikilink.title.replace( (current_year - 1),
current_year )
@ -134,23 +152,38 @@ class SummaryPageEntry():
try:
self.countrylist = CountryList( self.countrylist_wikilink )
if( self.countrylist and
self.countrylist.is_parsing_needed( self.countrylist_revid )):
self.countrylist.parse()
self.maybe_parse_countrylist()
# Maybe fallback to last years list
except CountryListError:
self.countrylist_wikilink.title = link_title
# If list is from last year, replace year
if (current_year ) in self.countrylist_wikilink.title:
self.countrylist_wikilink.title.replace( current_year,
(current_year - 1) )
self.countrylist = CountryList( self.countrylist_wikilink )
if( self.countrylist and
self.countrylist.is_parsing_needed( self.countrylist_revid )):
self.countrylist.parse()
self.maybe_parse_countrylist()
if not self.countrylist:
raise SummaryPageEntryError( "CountryList does not exists!" )
def maybe_parse_countrylist( self ):
"""
Parse countrylist if page-object exists and if parsing is needed or
param -force-reload is set
"""
# Fast return if no countrylist-object
if not self.countrylist:
return
# Parse if needed or forced
if( self.countrylist.is_parsing_needed( self.countrylist_revid ) or
self.force_reload ):
self.countrylist.parse()
def get_countrylist_wikilink( self ):
"""
Load wikilink to related countrylist
@ -250,8 +283,8 @@ class SummaryPageEntryTemplate():
Creates Instance of Class for given mwparser.template object of
SummmaryPageEntry Template. If no object was given create empty one.
@param template_obj mw.parser.template Object of
SummmaryPageEntry Template
@param template_obj Object of SummmaryPageEntry Template
@type template_obj: mwparser.template
"""
# Check if object was given


Loading…
Cancel
Save