Merge branch 'fs#89-article-status' into fs#88-mark-pages-bot

This commit is contained in:
2016-11-05 19:39:30 +01:00
3 changed files with 216 additions and 12 deletions

View File

@@ -107,7 +107,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
# We need the talkpage (and only this) of each existing page # We need the talkpage (and only this) of each existing page
for talkpage in pagegenerators.PageWithTalkPageGenerator( for talkpage in pagegenerators.PageWithTalkPageGenerator(
redfam.article_generator( filter_existing=True ), redfam.article_generator(
filter_existing=True,
exclude_article_status=["marked"] ),
return_talk_only=True ): return_talk_only=True ):
# Add reference to redfam to talkpages # Add reference to redfam to talkpages
@@ -127,7 +129,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
self.current_wikicode = mwparser.parse( self.current_page.text ) self.current_wikicode = mwparser.parse( self.current_page.text )
# Add notice # Add notice
self.add_disc_notice_template() # Returns True if added
# None if already present
add_ret = self.add_disc_notice_template()
# Convert wikicode back to string to save # Convert wikicode back to string to save
self.new_text = str( self.current_wikicode ) self.new_text = str( self.current_wikicode )
@@ -140,8 +144,24 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
if not summary[:len("Bot:")] == "Bot:": if not summary[:len("Bot:")] == "Bot:":
summary = "Bot: " + summary.strip() summary = "Bot: " + summary.strip()
# Save # will return True if saved
self.put_current( self.new_text, summary=summary ) # False if not saved because of errors
# None if change was not accepted by user
save_ret = self.put_current( self.new_text )
# Status
if add_ret is None or add_ret and save_ret:
self.current_page.redfam.article_add_status(
"marked",
title=self.current_page.title(withNamespace=False))
elif save_ret is None:
self.current_page.redfam.article_add_status(
"note_rej",
title=self.current_page.title(withNamespace=False))
else:
self.current_page.redfam.article_add_status(
"sav_err",
title=self.current_page.title(withNamespace=False))
def add_disc_notice_template( self ): def add_disc_notice_template( self ):
""" """
@@ -155,7 +175,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
# Check if it is already present in wikicode # Check if it is already present in wikicode
if self.disc_notice_present(): if self.disc_notice_present():
return False return
# Find the right place to insert notice template # Find the right place to insert notice template
# Therfore we need the first section (if there is one) # Therfore we need the first section (if there is one)
@@ -209,3 +229,33 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
# If nothing is found, loop will run till its end # If nothing is found, loop will run till its end
else: else:
return False return False
# We need to overrite this since orginal from pywikibot.bot.CurrentPageBot
# does not return result of self._save_page
def put_current(self, new_text, ignore_save_related_errors=None,
ignore_server_errors=None, **kwargs):
"""
Call L{Bot.userPut} but use the current page.
It compares the new_text to the current page text.
@param new_text: The new text
@type new_text: basestring
@param ignore_save_related_errors: Ignore save related errors and
automatically print a message. If None uses this instances default.
@type ignore_save_related_errors: bool or None
@param ignore_server_errors: Ignore server errors and automatically
print a message. If None uses this instances default.
@type ignore_server_errors: bool or None
@param kwargs: Additional parameters directly given to L{Bot.userPut}.
@type kwargs: dict
"""
if ignore_save_related_errors is None:
ignore_save_related_errors = self.ignore_save_related_errors
if ignore_server_errors is None:
ignore_server_errors = self.ignore_server_errors
return self.userPut(
self.current_page, self.current_page.text, new_text,
ignore_save_related_errors=ignore_save_related_errors,
ignore_server_errors=ignore_server_errors,
**kwargs)

View File

@@ -336,8 +336,8 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor = type( self ).connection.cursor( mysqldb.DictCursor )
cursor.execute( cursor.execute(
'SELECT * FROM `{prefix}_redfams` WHERE `status` = ?;'.format( 'SELECT * FROM `{prefix}_redfams` WHERE `status` = LIKE %?%;'.
prefix=type( self ).db_table_prefix), ( status, ) ) format( prefix=type( self ).db_table_prefix), ( status, ) )
while True: while True:
res = cursor.fetchmany( 1000 ) res = cursor.fetchmany( 1000 )
@@ -358,9 +358,10 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
'FROM `{prefix}_redfams` `F` ' + 'FROM `{prefix}_redfams` `F` ' +
'INNER JOIN `{prefix}_redpages` `P` ' + 'INNER JOIN `{prefix}_redpages` `P` ' +
'ON `F`.`status` = ? ' + 'ON `F`.`status` = ? ' +
'AND `F`.`ending` >= ? ' 'AND `F`.`ending` >= ? ' +
'AND `F`.`redpageid` = `P`.`pageid`;').format( 'AND `F`.`redpageid` = `P`.`pageid`;').format(
prefix=type( self ).db_table_prefix), ( status, ending ) ) prefix=type( self ).db_table_prefix),
( status, ending ) )
while True: while True:
res = cursor.fetchmany( 1000 ) res = cursor.fetchmany( 1000 )

View File

@@ -68,6 +68,7 @@ class RedFam:
self._beginning = beginning self._beginning = beginning
self._ending = ending self._ending = ending
self._redpageid = redpageid self._redpageid = redpageid
self._status = set()
self._status = self._parse_status(status) self._status = self._parse_status(status)
self._famhash = famhash self._famhash = famhash
self._heading = heading self._heading = heading
@@ -198,6 +199,116 @@ class RedFam:
""" """
return ",".join( self._status ) return ",".join( self._status )
def article_add_status(self, status, index=None, title=None ):
"""
Adds a status specified by status, to article (identified by title
or index in articlesList) status set
@param status Statusstring to add
@type status str
@param index Add to article with index in articlesList
@type index int
@param title Add to article with title in articlesList
@type title str
"""
if title and not index:
index = self._articlesList.index( title )
if isinstance( index, int ) and index < len(self._articlesList):
self._article_status[index].add(status)
else:
raise IndexError( "No index given or wrong format!")
def article_remove_status(self, status, index=None, title=None, weak=True):
"""
Removes a status specified by status, from article (identified by title
or index in articlesList) status set
If weak is set to False it will throw a KeyError when trying to
remove a status not set.
@param status Statusstring to add
@type status str
@param index Remove from article with index in articlesList
@type index int
@param title Remove from article with title in articlesList
@type title str
@param weak Change behavior on missing status
@type bool
"""
if title and not index:
index = self._articlesList.index( title )
if isinstance( index, int ) and index < len(self._articlesList):
if weak:
self._article_status[index].discard(status)
else:
self._article_status[index].remove(status)
else:
raise IndexError( "No index given or wrong format!")
def article_has_status(self, status, index=None, title=None ):
"""
Adds a status specified by status, to articles (identified by title
or index in articlesList) status set
@param status Statusstring to add
@type status str
@param index Check article with index in articlesList
@type index int
@param title Check article with title in articlesList
@type title str
"""
if title and not index:
index = self._articlesList.index( title )
if isinstance( index, int ) and index < len(self._articlesList):
if status in self._article_status[index]:
return True
else:
return False
else:
raise IndexError( "No index given or wrong format!")
def _article_parse_status(self, raw_status, index=None, title=None ):
"""
Sets status based on comma separated list to articles (identified by
title or index in articlesList) status set
@param status Statusstring to set
@type status str
@param index Add to article with index in articlesList
@type index int
@param title Add to article with title in articlesList
@type title str
"""
if title and not index:
index = self._articlesList.index( title )
if isinstance( index, int ) and index < len(self._articlesList):
self._article_status[index] = set( raw_status.strip().split(","))
else:
raise IndexError( "No index given or wrong format!")
def _article_raw_status( self, index=None, title=None ):
"""
Returns status as commaseparated string (to save in DB) of article
(identified by title or index in articlesList) status set
@param index Get from article with index in articlesList
@type index int
@param title Get from article with title in articlesList
@type title str
@returns Raw status string
@rtype str
"""
if title and not index:
index = self._articlesList.index( title )
if isinstance( index, int ) and index < len(self._articlesList):
return ",".join( self._article_status[index] )
else:
raise IndexError( "No index given or wrong format!")
class RedFamParser( RedFam ): class RedFamParser( RedFam ):
""" """
@@ -491,10 +602,14 @@ class RedFamWorker( RedFam ):
def __init__( self, mysql_data ): def __init__( self, mysql_data ):
articlesList = [] articlesList = []
for key in sorted( mysql_data.keys() ): for key in sorted( mysql_data.keys() ):
if 'article' in key and mysql_data[ key ]: if 'article' in key and 'status' not in key and mysql_data[ key ]:
articlesList.append( mysql_data[ key ] ) articlesList.append( mysql_data[ key ] )
# Preset article status list with empty sets for existing articles
self._article_status = [set() for x in range(0, len(articlesList))]
super().__init__( articlesList, mysql_data[ 'beginning' ], super().__init__( articlesList, mysql_data[ 'beginning' ],
mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], mysql_data[ 'ending' ], mysql_data[ 'redpageid' ],
mysql_data[ 'status' ], mysql_data[ 'famhash' ], mysql_data[ 'status' ], mysql_data[ 'famhash' ],
@@ -502,6 +617,15 @@ class RedFamWorker( RedFam ):
self._mysql.data = mysql_data self._mysql.data = mysql_data
# Set up article status
index = 0
for article in self._articlesList:
raw_status = mysql_data[ "article" + str(index) + "_status" ]
if not raw_status:
raw_status = str()
self._article_parse_status( raw_status, index )
index += 1
# Get related RedPage-Information # Get related RedPage-Information
self.redpageid = mysql_data[ 'pageid' ] self.redpageid = mysql_data[ 'pageid' ]
self.redpagetitle = mysql_data[ 'pagetitle' ] self.redpagetitle = mysql_data[ 'pagetitle' ]
@@ -510,7 +634,9 @@ class RedFamWorker( RedFam ):
# with wrong month abreviations in strptime # with wrong month abreviations in strptime
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
def article_generator(self, filter_existing=None, filter_redirects=None ): def article_generator(self, filter_existing=None, filter_redirects=None,
exclude_article_status=[],
onlyinclude_article_status=[] ):
""" """
Yields pywikibot pageobjects for articles belonging to this redfams Yields pywikibot pageobjects for articles belonging to this redfams
in a generator in a generator
@@ -524,11 +650,22 @@ class RedFamWorker( RedFam ):
set to False to get only redirectpages, set to False to get only redirectpages,
unset/None results in not filtering unset/None results in not filtering
@type filter_redirects bool/None @type filter_redirects bool/None
""" """
# Iterate over articles in redfam # Iterate over articles in redfam
for article in self._articlesList: for article in self._articlesList:
page = pywikibot.Page(pywikibot.Link(article), self.site) page = pywikibot.Page(pywikibot.Link(article), self.site)
# Exclude by article status
for status in exclude_article_status:
if self.article_has_status( status, title=article ):
continue
# Only include by article status
for status in onlyinclude_article_status:
if not self.article_has_status( status, title=article ):
continue
# Filter non existing Pages if requested with filter_existing=True # Filter non existing Pages if requested with filter_existing=True
if filter_existing and not page.exists(): if filter_existing and not page.exists():
continue continue
@@ -550,8 +687,24 @@ class RedFamWorker( RedFam ):
""" """
Sets status to 3 when worked on Sets status to 3 when worked on
""" """
for article in self._articlesList:
if self.article_has_status( "note_rej", title=article ):
self.add_status( "note_rej" )
if self.article_has_status( "sav_err", title=article ):
self.add_status( "sav_err" )
pass if not self.has_status( "sav_err" ) and \
not self.has_status( "note_rej" ):
self.add_status( "marked" )
self._mysql.data[ 'status' ] = self._raw_status()
index = 0
for article in self._articlesList:
self._mysql.data[ "article" + str(index) + 'status' ] = \
self._article_raw_status( index=index )
index += 1
print( repr(self) )
def get_disc_link( self ): def get_disc_link( self ):
""" """