22 Commits

Author SHA1 Message Date
c708832515 Merge branch 'feature-force-reload' 2015-12-11 12:42:41 +01:00
18122fafe8 New feature force parsing of countrylists regardless if needed with param "-force-reload" 2015-12-11 12:41:23 +01:00
55afe94a4e Merge branch 'countrylist-linksearch' 2015-12-11 12:03:51 +01:00
e409c7a02b CountryList-module: Also search for Links in Titel 2015-12-11 00:03:53 +01:00
9d9207c175 CountryList-module: Put linksearching algorithm in separate function for simple reuse for Titel value 2015-12-10 23:13:45 +01:00
5f13da5934 Clarify licence situation of chartsbot.py 2015-11-25 17:15:55 +01:00
5b084f6fde Fix Bug: Writing is requested even when only rev_ids have changed
Introduce new attr to CountryList for simple get information wether page was parsed

The SummaryPageEntryTemplate comparation to non-equal fails when unparsed Entrys occur
--> and it with information wether CountryList was parsed
2015-11-23 19:36:19 +01:00
e3c2c1a5d9 Merge branch 'pep8-compat' 2015-11-23 19:15:37 +01:00
f819193790 pep8-compat: clean up CountryList-Modul 2015-11-23 19:11:21 +01:00
4a856b1dae pep8-compat: Replace undefined Error by Message in CountryList-Module 2015-11-23 19:04:27 +01:00
166e61aee7 pep8-compat: cleanup SummaryPage-Module 2015-11-23 19:00:07 +01:00
1ea37c0e0d pep8-compat: Remove unnecessary imports from summarypage.py 2015-11-23 18:59:16 +01:00
3e525edd2a pep8-compat: chartsbot.py remove unnecessary imports 2015-11-23 18:48:04 +01:00
3cab979662 Merge branch 'summarypage-module' 2015-11-21 11:52:21 +01:00
52f933bea7 SummaryPage-Module: Bugfix, move countrylist.parse() back in try statement since we need to make sure it is parseable due to automatic year change feature 2015-11-21 11:50:40 +01:00
e854244f0b Merge branches 'countrylist-module' and 'summarypage-module' 2015-11-21 11:33:35 +01:00
f1e0157643 CountryList-Module: Rename method parsing_needed to is_parsing_needed to make boolean character more clear 2015-11-21 11:32:00 +01:00
4987f97e91 SummaryPage-Module: Reimplement feature to prevent parsing for pages where revid haven't changed since last parsing 2015-11-21 11:30:37 +01:00
06ffb37d07 Merge branch 'summarypage-module' 2015-11-20 21:02:03 +01:00
f360deb1a0 SummaryPage-Modul: Use full path for embeding entry template since relative paths don't work on already embeded pages 2015-11-20 21:00:37 +01:00
03857c0236 Merge branch 'countrylist-module' 2015-11-20 14:45:08 +01:00
3864c9013c CountryList-Module: Since we have multiple categories in some countrys we need to select the first wrapping template 2015-11-20 14:44:26 +01:00
3 changed files with 203 additions and 78 deletions

View File

@@ -1,7 +1,15 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# charts.py
# chartsbot.py
#
# original version by:
#
# (C) Pywikibot team, 2006-2014 as basic.py
#
# Distributed under the terms of the MIT license.
#
# modified by:
#
# Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
#
@@ -32,6 +40,8 @@ The following parameters are supported:
-always If given, request for confirmation of edit is short circuited
Use for unattended run
-force-reload If given, countrylists will be always parsed regardless if
needed or not
"""
@@ -39,8 +49,6 @@ import locale
import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import Bot
import mwparserfromhell as mwparser
from summarypage import SummaryPage
@@ -55,10 +63,10 @@ class ChartsBot( ):
"""
Bot which automatically updates a ChartsSummaryPage like
[[Portal:Charts_und_Popmusik/Aktuelle_Nummer-eins-Hits]] by reading linked
CountryListsAn incomplete sample bot.
CountryLists
"""
def __init__( self, generator, always ):
def __init__( self, generator, always, force_reload ):
"""
Constructor.
@@ -68,11 +76,17 @@ class ChartsBot( ):
@param always: if True, request for confirmation of edit is short
circuited. Use for unattended run
@type always: bool
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
"""
self.generator = generator
self.always = always
# Force parsing of countrylist
self.force_reload = force_reload
# Set the edit summary message
self.site = pywikibot.Site()
self.summary = "Bot: Aktualisiere Übersichtsseite Nummer-eins-Hits"
@@ -96,7 +110,7 @@ class ChartsBot( ):
################################################################
# Initialise and treat SummaryPageWorker
sumpage = SummaryPage( text )
sumpage = SummaryPage( text, self.force_reload )
sumpage.treat()
# Check if editing is needed and if so get new text
@@ -181,10 +195,16 @@ def main(*args):
# If always is True, bot won't ask for confirmation of edit (automode)
always = False
# If force_reload is True, bot will always parse Countrylist regardless of
# parsing is needed or not
force_reload = False
# Parse command line arguments
for arg in local_args:
if arg.startswith("-always"):
always = True
elif arg.startswith("-force-reload"):
force_reload = True
else:
genFactory.handleArg(arg)
@@ -194,7 +214,7 @@ def main(*args):
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
bot = ChartsBot(gen, always)
bot = ChartsBot(gen, always, force_reload)
bot.run()
else:
pywikibot.showHelp()

View File

@@ -74,10 +74,12 @@ class CountryList():
for attr in __attr:
setattr( self, attr, None )
self.parsed = False
# Try to find year
self.find_year()
def parsing_needed( self, revid ):
def is_parsing_needed( self, revid ):
"""
Check if current revid of CountryList differs from given one
@@ -103,7 +105,7 @@ class CountryList():
self.year -= 1
# If last year does not match, raise YearError
if str( self.year ) not in self.page.title():
raise CountryListYearError
raise CountryListError( "CountryList year is errorneous!" )
def parse( self ):
"""
@@ -121,16 +123,19 @@ class CountryList():
self.prepare_titel()
self.prepare_interpret()
# For easy detecting wether we have parsed self
self.parsed = True
def detect_belgian( self ):
"""
Detect wether current entry is on of the belgian (Belgien/Wallonien)
"""
# Check if begian province name is in link text or title
if "Wallonien" in str( self.wikilink.text ) \
or "Wallonien" in str( self.wikilink.title):
if( "Wallonien" in str( self.wikilink.text ) or
"Wallonien" in str( self.wikilink.title) ):
return "Wallonie"
elif "Flandern" in str( self.wikilink.text ) \
or "Flandern" in str( self.wikilink.title):
elif( "Flandern" in str( self.wikilink.text ) or
"Flandern" in str( self.wikilink.title) ):
return "Flandern"
else:
return None
@@ -157,9 +162,17 @@ class CountryList():
else:
singles_section = self.wikicode.get_sections(matches="Singles")[0]
# Since we have multiple categories in some countrys we need
# to select the first wrapping template
try:
wrapping = next( singles_section.ifilter_templates(
matches="Nummer-eins-Hits" ) )
except StopIteration:
raise CountryListError( "Wrapping template is missing!")
# Select the last occurence of template "Nummer-eins-Hits Zeile" in
# "Singles"-section
for self.entry in singles_section.ifilter_templates(
# Wrapper-template
for self.entry in wrapping.get("Inhalt").value.ifilter_templates(
matches="Nummer-eins-Hits Zeile" ):
pass
@@ -231,6 +244,10 @@ missing!" )
if not self._titel_raw:
self.get_titel_value()
# Try to find a wikilink for Titel on countrylist
if "[[" not in self._titel_raw:
self.titel = self._search_links( str(self._titel_raw) )
else:
self.titel = self._titel_raw
def get_titel_value( self ):
@@ -288,28 +305,7 @@ missing!" )
# If we have indexes without links, search for links
if indexes:
# Iterate over wikilinks of refpage and try to find related links
for wikilink in self.wikicode.ifilter_wikilinks():
# Iterate over interpret names
for index in indexes:
# Check wether wikilink matches
if parts[index] == wikilink.text \
or parts[index] == wikilink.title:
# Overwrite name with complete wikilink
parts[index] = str( wikilink )
# Remove index from worklist
indexes.remove( index )
# Other indexes won't also match
break
# If worklist is empty, stop iterating over wikilinks
if not indexes:
break
parts = self._search_links( parts, indexes )
# Join the collected links
sep = " "
@@ -330,6 +326,59 @@ missing!" )
raise CountryListEntryError( "Template Parameter 'Interpret' is \
missing!" )
def _search_links( self, keywords, indexes=None ):
"""
Search matching wikilinks for keyword(s) in CountryList's wikicode
@param keywords: One or more keywords to search for
@type keywords: str, list
@param indexes: List with numeric indexes for items of keywords to work
on only
@type indexes: list of ints
@return: List or String with replaced keywords
@return type: str, list
"""
# Maybe convert keywords string to list
if( isinstance( keywords, str ) ):
keywords = [ keywords, ]
string = True
else:
string = False
# If indexes worklist was not provided, work on all elements
if not indexes:
indexes = list(range( len( keywords ) ))
# Iterate over wikilinks of refpage and try to find related links
for wikilink in self.wikicode.ifilter_wikilinks():
# Iterate over interpret names
for index in indexes:
# Check wether wikilink matches
if( keywords[index] == wikilink.text or
keywords[index] == wikilink.title ):
# Overwrite name with complete wikilink
keywords[index] = str( wikilink )
# Remove index from worklist
indexes.remove( index )
# Other indexes won't also match
break
# If worklist is empty, stop iterating over wikilinks
if not indexes:
break
# Choose wether return list or string based on input type
if not string:
return keywords
else:
return str(keywords[0])
class CountryListError( Exception ):
"""
@@ -337,6 +386,7 @@ class CountryListError( Exception ):
"""
pass
class CountryListEntryError( CountryListError ):
"""
Handles errors occuring in class CountryList related to entrys

View File

@@ -25,27 +25,37 @@
Provides classes for handling Charts summary page
"""
import locale
from datetime import datetime, timedelta
import pywikibot
# import pywikibot
import mwparserfromhell as mwparser
from countrylist import CountryList, CountryListError
class SummaryPage():
"""
Handles summary page related actions
"""
def __init__( self, text ):
def __init__( self, text, force_reload=False ):
"""
Create Instance
@param text: Page Text of summarypage
@type text: str
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
"""
# Parse Text with mwparser
self.wikicode = mwparser.parse( text )
# Force parsing of countrylist
self.force_reload = force_reload
def treat( self ):
"""
Handles parsing/editing of text
@@ -55,14 +65,16 @@ class SummaryPage():
for entry in self.wikicode.filter_templates( matches="/Eintrag" ):
# Instantiate SummaryPageEntry-object
summarypageentry = SummaryPageEntry( entry )
summarypageentry = SummaryPageEntry(entry,
force_reload=self.force_reload)
# Treat SummaryPageEntry-object
summarypageentry.treat()
# Get result
# We need to replace origninal entry since objectid changes due to
# recreation of template object and reassignment won't be reflected
self.wikicode.replace( entry, summarypageentry.new_entry.template )
self.wikicode.replace(entry, summarypageentry.get_entry().template)
def get_new_text( self ):
"""
@@ -85,18 +97,31 @@ class SummaryPageEntry():
write_needed = False
def __init__( self, entry ):
def __init__( self, entry, force_reload=False ):
"""
Constructor
@param entry: Entry template of summarypage entry
@type text: mwparser.template
@param force-reload: If given, countrylists will be always parsed
regardless if needed or not
@type force-reload: bool
"""
self.old_entry = SummaryPageEntryTemplate( entry )
self.new_entry = SummaryPageEntryTemplate( )
# Force parsing of countrylist
self.force_reload = force_reload
def treat( self ):
"""
Controls parsing/update-sequence of entry
"""
self.parse()
# Get CountryList-Object
self.get_countrylist()
# Check if parsing country list is needed
if( self.countrylist.parsed):
self.correct_chartein()
@@ -104,9 +129,9 @@ class SummaryPageEntry():
self.is_write_needed()
def parse( self ):
def get_countrylist( self ):
"""
Handles parsing process of entry template
Get the CountryList-Object for current entry
"""
# Get wikilink to related countrylist
@@ -115,23 +140,22 @@ class SummaryPageEntry():
# Get saved revision of related countrylist
self.get_countrylist_saved_revid()
# Get current year
current_year = datetime.now().year;
current_year = datetime.now().year
# Store old link.title
link_title = self.countrylist_wikilink.title
# If list is from last year, replace year
if (current_year - 1) in link_title:
self.countrylist_wikilink.title.replace( (current_year - 1), current_year )
self.countrylist_wikilink.title.replace( (current_year - 1),
current_year )
# Try to get current years list
try:
self.countrylist = CountryList( self.countrylist_wikilink )
if self.countrylist:
self.countrylist.parse()
self.maybe_parse_countrylist()
# Maybe fallback to last years list
except CountryListError:
@@ -139,20 +163,37 @@ class SummaryPageEntry():
self.countrylist_wikilink.title = link_title
self.countrylist = CountryList( self.countrylist_wikilink )
if self.countrylist:
self.countrylist.parse()
else:
self.maybe_parse_countrylist()
if not self.countrylist:
raise SummaryPageEntryError( "CountryList does not exists!" )
def maybe_parse_countrylist( self ):
"""
Parse countrylist if page-object exists and if parsing is needed or
param -force-reload is set
"""
# Fast return if no countrylist-object
if not self.countrylist:
return
# Parse if needed or forced
if( self.countrylist.is_parsing_needed( self.countrylist_revid ) or
self.force_reload ):
self.countrylist.parse()
def get_countrylist_wikilink( self ):
"""
Load wikilink to related countrylist
"""
if self.old_entry.Liste:
try:
self.countrylist_wikilink = next( self.old_entry.Liste.ifilter_wikilinks() )
self.countrylist_wikilink = next(
self.old_entry.Liste.ifilter_wikilinks() )
except StopIteration:
raise SummaryPageEntryError( "Parameter Liste does not contain valid wikilink!")
raise SummaryPageEntryError(
"Parameter Liste does not contain valid wikilink!" )
else:
raise SummaryPageEntryError( "Parameter Liste is not present!")
@@ -171,7 +212,8 @@ class SummaryPageEntry():
"""
self.new_entry.Liste = self.countrylist_wikilink
self.new_entry.Liste_Revision = self.countrylist.page.latest_revision_id
self.new_entry.Liste_Revision = \
self.countrylist.page.latest_revision_id
self.new_entry.Interpret = self.countrylist.interpret
self.new_entry.Titel = self.countrylist.titel
self.new_entry.Chartein = self._corrected_chartein
@@ -210,9 +252,20 @@ class SummaryPageEntry():
Detects wether writing of entry is needed and stores information in
Class-Attribute
"""
type( self ).write_needed = ( ( self.old_entry != self.new_entry ) or \
type( self ).write_needed = ( ( self.old_entry != self.new_entry ) and
self.countrylist.parsed or
type( self ).write_needed )
def get_entry( self ):
"""
Returns the new entry if CountryList was parsed otherwise returns the
old one
"""
if( self.countrylist.parsed):
return self.new_entry
else:
return self.old_entry
class SummaryPageEntryTemplate():
"""
@@ -229,8 +282,8 @@ class SummaryPageEntryTemplate():
Creates Instance of Class for given mwparser.template object of
SummmaryPageEntry Template. If no object was given create empty one.
@param template_obj mw.parser.template Object of
SummmaryPageEntry Template
@param template_obj Object of SummmaryPageEntry Template
@type template_obj: mwparser.template
"""
# Check if object was given
@@ -240,25 +293,25 @@ class SummaryPageEntryTemplate():
if isinstance( template_obj,
mwparser.nodes.template.Template ):
self.template = template_obj;
self.__initial = False;
self.template = template_obj
self.__initial = False
# Otherwise raise error
else:
raise SummaryPageEntryTemplateError( "Wrong type given" );
raise SummaryPageEntryTemplateError( "Wrong type given" )
# Otherwise initialise template
else:
self.__initial_template()
self.__initial = True;
self.__initial = True
def __initial_template( self ):
"""
Builds the initial template
"""
self.template = next( mwparser.parse(
"{{/Eintrag|Liste=|Liste_Revision=|Interpret=|Titel=NN\
self.template = next( mwparser.parse( "{{Portal:Charts und Popmusik/\
Aktuelle Nummer-eins-Hits/Eintrag|Liste=|Liste_Revision=|Interpret=|Titel=NN\
|Chartein=|Korrektur=|Hervor=}}" ).ifilter_templates() )
def __getattr__( self, name ):
@@ -319,8 +372,8 @@ class SummaryPageEntryTemplate():
continue
# Compare other param values, if one unequal write is needed
if initial.template.get( param ).value.strip() != \
cmpto.template.get( param ).value.strip():
if( initial.template.get( param ).value.strip() !=
cmpto.template.get( param ).value.strip() ):
return True
# If not returned True until now
@@ -333,12 +386,14 @@ class SummaryPageError( Exception ):
"""
pass
class SummaryPageEntryError( SummaryPageError ):
"""
Handles errors occuring in class SummaryPageEntry
"""
pass
class SummaryPageEntryTemplateError( SummaryPageError ):
"""
Handles errors occuring in class SummaryPageEntryTemplate