diff --git a/bots/markpages.py b/bots/markpages.py index 244ba14..7548294 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -107,7 +107,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # We need the talkpage (and only this) of each existing page for talkpage in pagegenerators.PageWithTalkPageGenerator( - redfam.article_generator( filter_existing=True ), + redfam.article_generator( + filter_existing=True, + exclude_article_status=["marked"] ), return_talk_only=True ): # Add reference to redfam to talkpages @@ -127,7 +129,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() self.current_wikicode = mwparser.parse( self.current_page.text ) # Add notice - self.add_disc_notice_template() + # Returns True if added + # None if already present + add_ret = self.add_disc_notice_template() # Convert wikicode back to string to save self.new_text = str( self.current_wikicode ) @@ -140,8 +144,24 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() if not summary[:len("Bot:")] == "Bot:": summary = "Bot: " + summary.strip() - # Save - self.put_current( self.new_text, summary=summary ) + # will return True if saved + # False if not saved because of errors + # None if change was not accepted by user + save_ret = self.put_current( self.new_text ) + + # Status + if add_ret is None or add_ret and save_ret: + self.current_page.redfam.article_add_status( + "marked", + title=self.current_page.title(withNamespace=False)) + elif save_ret is None: + self.current_page.redfam.article_add_status( + "note_rej", + title=self.current_page.title(withNamespace=False)) + else: + self.current_page.redfam.article_add_status( + "sav_err", + title=self.current_page.title(withNamespace=False)) def add_disc_notice_template( self ): """ @@ -155,7 +175,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Check if it is already present in wikicode if self.disc_notice_present(): - return False + return # Find the right place to insert notice template # Therfore we need the first section (if there is one) @@ -209,3 +229,33 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # If nothing is found, loop will run till its end else: return False + + # We need to overrite this since orginal from pywikibot.bot.CurrentPageBot + # does not return result of self._save_page + def put_current(self, new_text, ignore_save_related_errors=None, + ignore_server_errors=None, **kwargs): + """ + Call L{Bot.userPut} but use the current page. + + It compares the new_text to the current page text. + + @param new_text: The new text + @type new_text: basestring + @param ignore_save_related_errors: Ignore save related errors and + automatically print a message. If None uses this instances default. + @type ignore_save_related_errors: bool or None + @param ignore_server_errors: Ignore server errors and automatically + print a message. If None uses this instances default. + @type ignore_server_errors: bool or None + @param kwargs: Additional parameters directly given to L{Bot.userPut}. + @type kwargs: dict + """ + if ignore_save_related_errors is None: + ignore_save_related_errors = self.ignore_save_related_errors + if ignore_server_errors is None: + ignore_server_errors = self.ignore_server_errors + return self.userPut( + self.current_page, self.current_page.text, new_text, + ignore_save_related_errors=ignore_save_related_errors, + ignore_server_errors=ignore_server_errors, + **kwargs) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 79360a8..0bb843c 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -336,8 +336,8 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor.execute( - 'SELECT * FROM `{prefix}_redfams` WHERE `status` = ?;'.format( - prefix=type( self ).db_table_prefix), ( status, ) ) + 'SELECT * FROM `{prefix}_redfams` WHERE `status` = LIKE %?%;'. + format( prefix=type( self ).db_table_prefix), ( status, ) ) while True: res = cursor.fetchmany( 1000 ) @@ -358,9 +358,10 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' 'FROM `{prefix}_redfams` `F` ' + 'INNER JOIN `{prefix}_redpages` `P` ' + 'ON `F`.`status` = ? ' + - 'AND `F`.`ending` >= ? ' + 'AND `F`.`ending` >= ? ' + 'AND `F`.`redpageid` = `P`.`pageid`;').format( - prefix=type( self ).db_table_prefix), ( status, ending ) ) + prefix=type( self ).db_table_prefix), + ( status, ending ) ) while True: res = cursor.fetchmany( 1000 ) diff --git a/lib/redfam.py b/lib/redfam.py index 798d501..6e8b3d5 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -68,6 +68,7 @@ class RedFam: self._beginning = beginning self._ending = ending self._redpageid = redpageid + self._status = set() self._status = self._parse_status(status) self._famhash = famhash self._heading = heading @@ -198,6 +199,116 @@ class RedFam: """ return ",".join( self._status ) + def article_add_status(self, status, index=None, title=None ): + """ + Adds a status specified by status, to article (identified by title + or index in articlesList) status set + + @param status Statusstring to add + @type status str + @param index Add to article with index in articlesList + @type index int + @param title Add to article with title in articlesList + @type title str + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + self._article_status[index].add(status) + else: + raise IndexError( "No index given or wrong format!") + + def article_remove_status(self, status, index=None, title=None, weak=True): + """ + Removes a status specified by status, from article (identified by title + or index in articlesList) status set + If weak is set to False it will throw a KeyError when trying to + remove a status not set. + + @param status Statusstring to add + @type status str + @param index Remove from article with index in articlesList + @type index int + @param title Remove from article with title in articlesList + @type title str + @param weak Change behavior on missing status + @type bool + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + if weak: + self._article_status[index].discard(status) + else: + self._article_status[index].remove(status) + else: + raise IndexError( "No index given or wrong format!") + + def article_has_status(self, status, index=None, title=None ): + """ + Adds a status specified by status, to articles (identified by title + or index in articlesList) status set + + @param status Statusstring to add + @type status str + @param index Check article with index in articlesList + @type index int + @param title Check article with title in articlesList + @type title str + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + if status in self._article_status[index]: + return True + else: + return False + else: + raise IndexError( "No index given or wrong format!") + + def _article_parse_status(self, raw_status, index=None, title=None ): + """ + Sets status based on comma separated list to articles (identified by + title or index in articlesList) status set + + @param status Statusstring to set + @type status str + @param index Add to article with index in articlesList + @type index int + @param title Add to article with title in articlesList + @type title str + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + self._article_status[index] = set( raw_status.strip().split(",")) + else: + raise IndexError( "No index given or wrong format!") + + def _article_raw_status( self, index=None, title=None ): + """ + Returns status as commaseparated string (to save in DB) of article + (identified by title or index in articlesList) status set + + @param index Get from article with index in articlesList + @type index int + @param title Get from article with title in articlesList + @type title str + @returns Raw status string + @rtype str + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + return ",".join( self._article_status[index] ) + else: + raise IndexError( "No index given or wrong format!") + class RedFamParser( RedFam ): """ @@ -491,10 +602,14 @@ class RedFamWorker( RedFam ): def __init__( self, mysql_data ): articlesList = [] + for key in sorted( mysql_data.keys() ): - if 'article' in key and mysql_data[ key ]: + if 'article' in key and 'status' not in key and mysql_data[ key ]: articlesList.append( mysql_data[ key ] ) + # Preset article status list with empty sets for existing articles + self._article_status = [set() for x in range(0, len(articlesList))] + super().__init__( articlesList, mysql_data[ 'beginning' ], mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], mysql_data[ 'status' ], mysql_data[ 'famhash' ], @@ -502,6 +617,15 @@ class RedFamWorker( RedFam ): self._mysql.data = mysql_data + # Set up article status + index = 0 + for article in self._articlesList: + raw_status = mysql_data[ "article" + str(index) + "_status" ] + if not raw_status: + raw_status = str() + self._article_parse_status( raw_status, index ) + index += 1 + # Get related RedPage-Information self.redpageid = mysql_data[ 'pageid' ] self.redpagetitle = mysql_data[ 'pagetitle' ] @@ -510,7 +634,9 @@ class RedFamWorker( RedFam ): # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') - def article_generator(self, filter_existing=None, filter_redirects=None ): + def article_generator(self, filter_existing=None, filter_redirects=None, + exclude_article_status=[], + onlyinclude_article_status=[] ): """ Yields pywikibot pageobjects for articles belonging to this redfams in a generator @@ -524,11 +650,22 @@ class RedFamWorker( RedFam ): set to False to get only redirectpages, unset/None results in not filtering @type filter_redirects bool/None + """ # Iterate over articles in redfam for article in self._articlesList: page = pywikibot.Page(pywikibot.Link(article), self.site) + # Exclude by article status + for status in exclude_article_status: + if self.article_has_status( status, title=article ): + continue + + # Only include by article status + for status in onlyinclude_article_status: + if not self.article_has_status( status, title=article ): + continue + # Filter non existing Pages if requested with filter_existing=True if filter_existing and not page.exists(): continue @@ -550,8 +687,24 @@ class RedFamWorker( RedFam ): """ Sets status to 3 when worked on """ + for article in self._articlesList: + if self.article_has_status( "note_rej", title=article ): + self.add_status( "note_rej" ) + if self.article_has_status( "sav_err", title=article ): + self.add_status( "sav_err" ) - pass + if not self.has_status( "sav_err" ) and \ + not self.has_status( "note_rej" ): + self.add_status( "marked" ) + + self._mysql.data[ 'status' ] = self._raw_status() + index = 0 + for article in self._articlesList: + self._mysql.data[ "article" + str(index) + 'status' ] = \ + self._article_raw_status( index=index ) + index += 1 + + print( repr(self) ) def get_disc_link( self ): """