Browse Source

Merge branch 'fs#25-mark-done' into test-v3

develop
Jonathan Golder 7 years ago
parent
commit
76666aa294
  1. 281
      bots/markpages.py
  2. 30
      bots/reddiscparser.py
  3. 2
      jogobot
  4. 469
      lib/mysqlred.py
  5. 504
      lib/redfam.py
  6. 90
      lib/redpage.py
  7. 4
      red.py

281
bots/markpages.py

@ -0,0 +1,281 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# markpages.py
#
# Copyright 2016 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Bot to mark pages which were/are subjects of redundance discussions
with templates
"""
from datetime import datetime
from pywikibot import pagegenerators
from pywikibot.bot import CurrentPageBot
import mwparserfromhell as mwparser
import jogobot
from lib.redfam import RedFamWorker
class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
"""
Bot class to mark pages which were/are subjects of redundance discussions
with templates
"""
def __init__( self, genFactory, **kwargs ):
"""
Constructor
Parameters:
@param genFactory GenFactory with parsed pagegenerator args to
build generator
@type genFactory pagegenerators.GeneratorFactory
@param **kwargs Additional args
@type iterable
"""
# Init attribute
self.__redfams = None # Will hold a generator with our redfams
# We do not use predefined genFactory as there is no sensefull case to
# give a generator via cmd-line for this right now
self.genFactory = pagegenerators.GeneratorFactory()
# Build generator with genFactory
self.build_generator()
# Run super class init with builded generator
super( MarkPagesBot, self ).__init__(generator=self.gen)
def run(self):
"""
Controls the overal parsing process, using super class for page switch
Needed to do things before/after treating pages is done
"""
try:
super( MarkPagesBot, self ).run()
except:
raise
else:
# Do status redfam status updates
for redfam in self.redfams:
redfam.update_status()
RedFamWorker.flush_db_cache()
@property
def redfams(self):
"""
Holds redfams generator to work on in this bot
"""
# Create generator if not present
if not self.__redfams:
end_after = datetime.strptime(
jogobot.config["red.markpages"]["mark_done_after"],
"%Y-%m-%d" )
self.__redfams = list( RedFamWorker.gen_by_status_and_ending(
"archived", end_after) )
return self.__redfams
def build_generator( self ):
"""
Builds generator to pass to super class
"""
# Add Talkpages to work on to generatorFactory
self.genFactory.gens.append( self.redfam_talkpages_generator() )
# Set generator to pass to super class
self.gen = pagegenerators.PreloadingGenerator(
self.genFactory.getCombinedGenerator() )
def redfam_talkpages_generator( self ):
"""
Wrappers the redfam.article_generator and
passes it to pagegenerators.PageWithTalkPageGenerator().
Then it iterates over the generator and adds a reference to the
related redfam to each talkpage-object.
"""
for redfam in self.redfams:
# We need the talkpage (and only this) of each existing page
for talkpage in pagegenerators.PageWithTalkPageGenerator(
redfam.article_generator(
filter_existing=True,
exclude_article_status=["marked"] ),
return_talk_only=True ):
# Add reference to redfam to talkpages
talkpage.redfam = redfam
yield talkpage
def treat_page( self ):
"""
Handles work on current page
We get a reference to related redfam in current_page.redfam
"""
# First we need to have the current text of page
# and parse it as wikicode
self.current_wikicode = mwparser.parse( self.current_page.text )
# Add notice
# Returns True if added
# None if already present
add_ret = self.add_disc_notice_template()
# Convert wikicode back to string to save
self.new_text = str( self.current_wikicode )
# Define edit summary
summary = jogobot.config["red.markpages"]["mark_done_summary"].format(
reddisc=self.current_page.redfam.get_disc_link() ).strip()
# Make sure summary starts with "Bot:"
if not summary[:len("Bot:")] == "Bot:":
summary = "Bot: " + summary.strip()
# will return True if saved
# False if not saved because of errors
# None if change was not accepted by user
save_ret = self.put_current( self.new_text, summary=summary )
# Status
if add_ret is None or ( add_ret and save_ret ):
self.current_page.redfam.article_add_status(
"marked",
title=self.current_page.title(withNamespace=False))
elif save_ret is None:
self.current_page.redfam.article_add_status(
"note_rej",
title=self.current_page.title(withNamespace=False))
else:
self.current_page.redfam.article_add_status(
"sav_err",
title=self.current_page.title(withNamespace=False))
def add_disc_notice_template( self ):
"""
Will take self.current_wikicode and adds disc notice template after the
last template in leading section or as first element if there is no
other template in leading section
"""
# The notice to add
self.disc_notice = \
self.current_page.redfam.generate_disc_notice_template()
# Check if it is already present in wikicode
if self.disc_notice_present():
return
# Find the right place to insert notice template
# Therfore we need the first section (if there is one)
leadsec = self.current_wikicode.get_sections(
flat=False, include_lead=True )[0]
# There is none on empty pages, so we need to check
if leadsec:
# Get the last template in leadsec
ltemplate = leadsec.filter_templates()[-1]
# If there is one, add notice after this
if ltemplate:
self.current_wikicode.insert_after(ltemplate, self.disc_notice)
# To have it in its own line we need to add a linbreak before
self.current_wikicode.insert_before(self.disc_notice, "\n" )
# If there is no template, add before first element on page
else:
self.current_wikicode.insert( 0, self.disc_notice )
# If there is no leadsec (and therefore no template in it, we will add
# before the first element
else:
self.current_wikicode.insert( 0, self.disc_notice )
# Notice was added
return True
def disc_notice_present(self):
"""
Checks if disc notice which shall be added is already present.
"""
# Iterate over Templates with same name (if any) to search equal
# Link to decide if they are the same
for present_notice in self.current_wikicode.ifilter_templates(
matches=self.disc_notice.name ):
# Get reddisc page.title of notice to add
add_notice_link_tile = self.disc_notice.get(
"Diskussion").partition("#")[0]
# Get reddisc page.title of possible present notice
present_notice_link_tile = present_notice.get(
"Diskussion").partition("#")[0]
# If those are equal, notice is already present
if add_notice_link_tile == present_notice_link_tile:
return True
# If nothing is found, loop will run till its end
else:
return False
# We need to overrite this since orginal from pywikibot.bot.CurrentPageBot
# does not return result of self._save_page
def put_current(self, new_text, ignore_save_related_errors=None,
ignore_server_errors=None, **kwargs):
"""
Call L{Bot.userPut} but use the current page.
It compares the new_text to the current page text.
@param new_text: The new text
@type new_text: basestring
@param ignore_save_related_errors: Ignore save related errors and
automatically print a message. If None uses this instances default.
@type ignore_save_related_errors: bool or None
@param ignore_server_errors: Ignore server errors and automatically
print a message. If None uses this instances default.
@type ignore_server_errors: bool or None
@param kwargs: Additional parameters directly given to L{Bot.userPut}.
@type kwargs: dict
"""
if ignore_save_related_errors is None:
ignore_save_related_errors = self.ignore_save_related_errors
if ignore_server_errors is None:
ignore_server_errors = self.ignore_server_errors
return self.userPut(
self.current_page, self.current_page.text, new_text,
ignore_save_related_errors=ignore_save_related_errors,
ignore_server_errors=ignore_server_errors,
**kwargs)

30
bots/reddiscparser.py

@ -33,8 +33,8 @@ from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
import jogobot
from lib import redpage
from lib import redfam
from lib.redpage import RedPage
from lib.redfam import RedFamParser
class DiscussionParserBot(
@ -127,7 +127,7 @@ class DiscussionParserBot(
else:
# If successfully parsed all pages in cat, flush db write cache
redpage.RedPage.flush_db_cache()
RedPage.flush_db_cache()
def treat_page( self ):
"""
@ -146,20 +146,23 @@ class DiscussionParserBot(
return
# Initiate RedPage object
red_page = redpage.RedPage( self.current_page )
redpage = RedPage.session.query(RedPage).filter(
RedPage.pageid == self.current_page.pageid ).one_or_none()
# Check whether parsing is needed
if red_page.is_parsing_needed():
if redpage:
redpage.update( self.current_page )
else:
redpage = RedPage( self.current_page )
# Check whether parsing is needed
if redpage.is_parsing_needed():
# Count families for failure analysis
fam_counter = 0
# Iterate over returned generator with redfam sections
for fam in red_page.parse():
for fam in redpage.parse():
# Run RedFamParser on section text
redfam.RedFamParser.parser( fam, red_page.page,
red_page.is_archive() )
RedFamParser.parser( fam, redpage, redpage.archive )
fam_counter += 1
@ -167,12 +170,13 @@ class DiscussionParserBot(
# If successfully parsed whole page, flush
# db write cache
if( fam_counter ):
redfam.RedFamParser.flush_db_cache()
RedFamParser.flush_db_cache()
jogobot.output( "Page [[{reddisc}]] parsed".format(
reddisc=red_page.page.title() ) )
reddisc=redpage.page.title() ) )
else:
jogobot.output(
"\03{red}" + "Page [[{reddisc}]], ".format(
reddisc=red_page.page.title() ) +
reddisc=redpage.page.title() ) +
"containing no redfam, parsed!",
"WARNING" )

2
jogobot

@ -1 +1 @@
Subproject commit 28d03f35b848a33ad45d3f5f8f3f82e8c45534ec
Subproject commit 49ada2993e345600523c161c5e2516ec65625684

469
lib/mysqlred.py

@ -25,315 +25,300 @@
Provides interface classes for communication of redundances bot with mysql-db
"""
# Prefere using oursql then MySQLdb
try:
import oursql as mysqldb
except ImportError:
import MySQLdb as mysqldb
import atexit # noqa
import atexit
import pywikibot
import pywikibot # noqa
from pywikibot import config
import jogobot
from sqlalchemy import (
create_engine, Column, Integer, String, Text, DateTime, ForeignKey )
from sqlalchemy import text # noqa
from sqlalchemy.engine.url import URL
from sqlalchemy.ext.declarative import (
declarative_base, declared_attr, has_inherited_table )
from sqlalchemy.ext.mutable import MutableComposite, MutableSet
from sqlalchemy.orm import sessionmaker, relationship, composite
from sqlalchemy.orm.collections import attribute_mapped_collection
import sqlalchemy.types as types
class MysqlRed:
"""
Basic interface class, containing opening of connection
Specific querys should be defined in descendant classes per data type
"""
Base = declarative_base()
# Save mysqldb-connection as class attribute to use only one
# in descendant classes
connection = False
db_hostname = config.db_hostname
db_port = config.db_port
db_username = config.db_username
db_password = config.db_password
db_name = config.db_username + jogobot.config['db_suffix']
db_table_prefix = False
# Class variables for storing cached querys
_cached_update_data = []
_update_query = ''
_cached_insert_data = {}
_insert_query = ''
def __init__( self ):
"""
Opens a connection to MySQL-DB
url = URL( "mysql+oursql",
username=config.db_username,
password=config.db_password,
host=config.db_hostname,
port=config.db_port,
database=config.db_username + jogobot.config['db_suffix'] )
engine = create_engine(url, echo=True)
@returns mysql-stream MySQL Connection
"""
# Needs to be generated after Parsing of Args (not at import time)
if not type(self).db_table_prefix:
type(self).db_table_prefix = \
pywikibot.Site().family.dbName(pywikibot.Site().code)
# Now we can setup prepared queries
self._prepare_queries()
Session = sessionmaker(bind=engine)
session = Session()
# Connect to mysqldb only once
if not type( self ).connection:
family = pywikibot.Site().family.dbName(pywikibot.Site().code)
type( self ).connection = mysqldb.connect(
host=type( self ).db_hostname,
port=type( self ).db_port,
user=type( self ).db_username,
passwd=type( self ).db_password,
db=type( self ).db_name )
# Register callback for warnig if exit with cached db write querys
atexit.register( type(self).warn_if_not_flushed )
class Mysql(object):
session = session
def __del__( self ):
"""
Before deleting class, close connection to MySQL-DB
"""
@declared_attr
def _tableprefix(cls):
return family + "_"
type( self ).connection.close()
@declared_attr
def _tablesuffix(cls):
return "s"
def _prepare_queries( self ):
"""
Used to replace placeholders in prepared queries
"""
type(self)._update_query = type(self)._update_query.format(
prefix=type(self).db_table_prefix)
type(self)._insert_query = type(self)._insert_query.format(
prefix=type(self).db_table_prefix)
@declared_attr
def __tablename__(cls):
if has_inherited_table(cls):
return None
name = cls.__name__[len("Mysql"):].lower()
return cls._tableprefix + name + cls._tablesuffix
@classmethod
def flush( cls ):
"""
Run cached querys
"""
if not cls.connection:
raise MysqlRedConnectionError( "No connection exists!" )
cursor = cls.connection.cursor()
# Execute insert query
if cls._cached_insert_data:
# Since cls._cached_insert_data is a dict, we need to have a custom
# Generator to iterate over it
cursor.executemany( cls._insert_query,
( cls._cached_insert_data[ key ]
for key in cls._cached_insert_data ) )
# Reset after writing
cls._cached_insert_data = {}
# Execute update query
# Use executemany since update could not be reduced to one query
if cls._cached_update_data:
cursor.executemany( cls._update_query, cls._cached_update_data )
# Reset after writing
cls._cached_update_data = []
# Commit db changes
if cls._cached_insert_data or cls._cached_update_data:
cls.connection.commit()
@classmethod
def warn_if_not_flushed(cls):
"""
Outputs a warning if there are db write querys cached and not flushed
before exiting programm!
"""
if cls._cached_update_data or cls._cached_insert_data:
jogobot.output( "Cached Database write querys not flushed!!! " +
"Data loss is possible!", "WARNING" )
def changedp(self):
return self.session.is_modified(self)
class MysqlRedPage( MysqlRed ):
class MutableSet(MutableSet):
"""
MySQL-db Interface for handling querys for RedPages
Extended version of the mutable set for our states
"""
# Class variables for storing cached querys
# '{prefix}' will be replaced during super().__init__()
_cached_update_data = []
_update_query = 'UPDATE `{prefix}_red_pages` \
SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'
_cached_insert_data = {}
_insert_query = 'INSERT INTO `{prefix}_red_pages` \
( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );'
def __init__( self, page_id ):
def has(self, item):
"""
Creates a new instance, runs __init__ of parent class
"""
super().__init__( )
self.__page_id = int( page_id )
self.data = self.get_page()
def __del__( self ):
pass
Check if item is in set
def get_page( self ):
@param item Item to check
"""
Retrieves a red page row from MySQL-Database for given page_id
return item in self
@param int page_id MediaWiki page_id for page to retrieve
@returns tuple Tuple with data for given page_id
bool FALSE if none found
def add(self, item):
"""
Extended add method, which only result in changed object if there is
really an item added.
cursor = type( self ).connection.cursor(mysqldb.DictCursor)
cursor.execute(
'SELECT * FROM `{prefix}_red_pages` WHERE `page_id` = ?;'.format(
prefix=type(self).db_table_prefix), ( self.__page_id, ) )
res = cursor.fetchone()
if res:
return res
else:
return False
def add_page( self, page_title, rev_id, status=0 ):
@param item Item to add
"""
Inserts a red page row in MySQL-Database for given page_id
if item not in self:
super().add(item)
@param int rev_id MediaWiki current rev_id
@param str page_title MediaWiki new page_title
@param int status Page parsing status
def discard(self, item):
"""
Wrapper for extended remove below
insert_data = { self.__page_id: ( self.__page_id, page_title,
rev_id, status ) }
type( self )._cached_insert_data.update( insert_data )
# Manualy construct self.data dict
self.data = { 'page_id': self.__page_id, 'rev_id': rev_id,
'page_title': page_title, 'status': status }
def update_page( self, rev_id=None, page_title=None, status=0 ):
@param item Item to discard
"""
Updates the red page row in MySQL-Database for given page_id
self.remove(item)
@param int rev_id MediaWiki current rev_id
@param str page_title MediaWiki new page_title
@param int status Page parsing status
def remove(self, item, weak=True ):
"""
Extended remove method, which only results in changed object if there
is really an item removed. Additionally, combine remove and discard!
if not page_title:
page_title = self.data[ 'page_title' ]
if not rev_id:
rev_id = self.data[ 'rev_id' ]
type( self )._cached_update_data.append( ( page_title, rev_id,
status, self.__page_id ) )
@param item Item to remove/discard
@param weak Set to false to use remove, else discard behavior
"""
if item in self:
if weak:
super().discard(item)
else:
super().remove(item)
class MysqlRedFam( MysqlRed ):
class ColumnList( list, MutableComposite ):
"""
MySQL-db Interface for handling querys for RedFams
Combines multiple Colums into a list like object
"""
# Class variables for storing cached querys
_cached_update_data = []
_update_query = 'UPDATE `{prefix}_red_families` \
SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \
`status`= ? WHERE `fam_hash` = ?;'
_cached_insert_data = {}
_insert_query = 'INSERT INTO `{prefix}_red_families` \
( fam_hash, red_page_id, beginning, ending, status, heading, \
article0, article1, article2, article3, article4, article5, article6, \
article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
def __init__( self ):
def __init__( self, *columns ):
"""
Wrapper to the list constructor deciding whether we have initialization
with individual params per article or with an iterable.
"""
# Individual params per article (from db), first one is a str
if isinstance( columns[0], str ) or \
isinstance( columns[0], MutableSet ) or columns[0] is None:
super().__init__( columns )
# Iterable articles list
else:
super().__init__( columns[0] )
def __setitem__(self, key, value):
"""
Creates a new instance, runs __init__ of parent class
The MutableComposite class needs to be noticed about changes in our
component. So we tweak the setitem process.
"""
super().__init__( )
# set the item
super().__setitem__( key, value)
def __del__( self ):
pass
# alert all parents to the change
self.changed()
def get_fam( self, fam_hash ):
def __composite_values__(self):
"""
Retrieves a red family row from MySQL-Database for given fam_hash
@returns dict Dictionairy with data for given fam hash
False if none found
The Composite method needs to have this method to get the items for db.
"""
self.__fam_hash = fam_hash
return self
cursor = type( self ).connection.cursor( mysqldb.DictCursor )
cursor.execute(
'SELECT * FROM `{prefix}_red_families` WHERE `fam_hash` = ?;'.
format( prefix=type(self).db_table_prefix), ( fam_hash, ) )
class Status( types.TypeDecorator ):
self.data = cursor.fetchone()
impl = types.String
def add_fam( self, articlesList, heading, red_page_id,
beginning, ending=None, status=0 ):
def process_bind_param(self, value, dialect):
"""
Returns status as commaseparated string (to save in DB)
data = [ self.__fam_hash, red_page_id, beginning, ending,
status, heading ]
@returns Raw status string
@rtype str
"""
if isinstance(value, MutableSet):
return ",".join( value )
elif isinstance(value, String ) or value is None:
return value
else:
raise TypeError(
"Value should be an instance of one of {0:s},".format(
str( [type(MutableSet()), type(String()), type(None)] ) ) +
"given value was an instance of {1:s}".format(
str(type(value))) )
for article in articlesList:
data.append( str( article ) )
def process_result_value(self, value, dialect):
"""
Sets status based on comma separated list
while len( data ) < 14:
data.append( None )
@param raw_status Commaseparated string of stati (from DB)
@type raw_status str
"""
if value:
return MutableSet( value.strip().split(","))
else:
return MutableSet([])
def copy(self, **kw):
return Status(self.impl.length)
class MysqlRedFam( Mysql, Base ):
famhash = Column( String(64), primary_key=True, unique=True )
__article0 = Column('article0', String(255), nullable=False )
__article1 = Column('article1', String(255), nullable=False )
__article2 = Column('article2', String(255), nullable=True )
__article3 = Column('article3', String(255), nullable=True )
__article4 = Column('article4', String(255), nullable=True )
__article5 = Column('article5', String(255), nullable=True )
__article6 = Column('article6', String(255), nullable=True )
__article7 = Column('article7', String(255), nullable=True )
__articlesList = composite(
ColumnList, __article0, __article1, __article2, __article3,
__article4, __article5, __article6, __article7 )
heading = Column( Text, nullable=False )
redpageid = Column(
Integer, ForeignKey( family + "_redpages.pageid" ), nullable=False )
beginning = Column( DateTime, nullable=False )
ending = Column( DateTime, nullable=True )
_status = Column( 'status', MutableSet.as_mutable(Status(255)),
nullable=True )
__article0_status = Column(
'article0_status', MutableSet.as_mutable(Status(64)), nullable=True )
__article1_status = Column(
'article1_status', MutableSet.as_mutable(Status(64)), nullable=True )
__article2_status = Column(
'article2_status', MutableSet.as_mutable(Status(64)), nullable=True )
__article3_status = Column(
'article3_status', MutableSet.as_mutable(Status(64)), nullable=True )
__article4_status = Column(
'article4_status', MutableSet.as_mutable(Status(64)), nullable=True )
__article5_status = Column(
'article5_status', MutableSet.as_mutable(Status(64)), nullable=True )
__article6_status = Column(
'article6_status', MutableSet.as_mutable(Status(64)), nullable=True )
__article7_status = Column(
'article7_status', MutableSet.as_mutable(Status(64)), nullable=True )
__articlesStatus = composite(
ColumnList, __article0_status, __article1_status, __article2_status,
__article3_status, __article4_status, __article5_status,
__article6_status, __article7_status )
redpage = relationship( "MysqlRedPage", enable_typechecks=False,
back_populates="redfams" )
@property
def articlesList(self):
"""
List of articles belonging to the redfam
"""
return self.__articlesList
data = tuple( data )
@articlesList.setter
def articlesList(self, articlesList):
# Make sure to always have full length for complete overwrites
while( len(articlesList) < 8 ):
articlesList.append(None)
self.__articlesList = ColumnList(articlesList)
insert_data = { self.__fam_hash: data }
type( self )._cached_insert_data.update( insert_data )
@property
def status( self ):
"""
Current fam status
"""
return self._status
# Manualy construct self.data dict
data_keys = ( 'fam_hash', 'red_page_id', 'beginning', 'ending',
'status', 'heading', 'article0', 'article1', 'article2',
'article3', 'article4', 'article5', 'article6',
'article7' )
self.data = dict( zip( data_keys, data ) )
@status.setter
def status( self, status ):
if status:
self._status = MutableSet( status )
else:
self._status = MutableSet()
def update_fam( self, red_page_id, heading, beginning, ending, status ):
@property
def articlesStatus(self):
"""
Updates the red fam row in MySQL-Database for given fam_hash
@param int red_page_id MediaWiki page_id
@param datetime beginning Timestamp of beginning
qparam datetime ending Timestamp of ending of
@param int status red_fam status
List of status strings/sets for the articles of the redfam
"""
return self.__articlesStatus
@articlesStatus.setter
def articlesStatus(self, articlesStatus):
self.__articlesStatus = ColumnList(articlesStatus)
type( self )._cached_update_data.append( ( red_page_id, heading,
beginning, ending, status,
self.__fam_hash ) )
class MysqlRedPage( Mysql, Base ):
pageid = Column( Integer, unique=True, primary_key=True )
revid = Column( Integer, unique=True, nullable=False )
pagetitle = Column( String(255), nullable=False )
__status = Column( 'status', MutableSet.as_mutable(Status(255)),
nullable=True )
def get_by_status( self, status ):
redfams = relationship(
"MysqlRedFam", enable_typechecks=False,
back_populates="redpage", order_by=MysqlRedFam.famhash,
collection_class=attribute_mapped_collection("famhash") )
@property
def status( self ):
"""
Generator witch fetches redFams with given status from DB
Current fam status
"""
return self.__status
cursor = type( self ).connection.cursor( mysqldb.DictCursor )
cursor.execute(
'SELECT * FROM `{prefix}_red_families` WHERE `status` = ?;'.format(
prefix=type( self ).db_table_prefix), ( status, ) )
@status.setter
def status( self, status ):
if status:
self.__status = MutableSet( status )
else:
self.__status = MutableSet()
while True:
res = cursor.fetchmany( 1000 )
if not res:
break
for row in res:
yield row
Base.metadata.create_all(engine)
class MysqlRedError(Exception):

504
lib/redfam.py

@ -3,7 +3,7 @@
#
# redfam.py
#
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
# Copyright 2017 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -35,16 +35,16 @@ import pywikibot # noqa
from pywikibot.tools import deprecated # noqa
import jogobot
from lib.mysqlred import MysqlRedFam
from lib.mysqlred import MysqlRedFam, text
class RedFam:
class RedFam( MysqlRedFam ):
"""
Basic class for RedFams, containing the basic data structure
"""
def __init__( self, articlesList, beginning, ending=None, red_page_id=None,
status=0, fam_hash=None, heading=None ):
def __init__( self, articlesList, beginning, ending=None, redpageid=None,
status=None, famhash=None, heading=None ):
"""
Generates a new RedFam object
@ -52,23 +52,24 @@ class RedFam:
@param beginning datetime Beginning date
@param ending datetime Ending date
@param red_page_id int MW pageid of containing RedPage
@param status int Status of RedFam
@param status str Status of RedFam
@param fam_hash str SHA1 hash of articlesList
@param heading str Original heading of RedFam (Link)
"""
# Initial attribute values
self._articlesList = articlesList
self._beginning = beginning
self._ending = ending
self._red_page_id = red_page_id
self._status = status
self._fam_hash = fam_hash
self._heading = heading
# Having pywikibot.Site() is a good idea most of the time
self.site = pywikibot.Site()
# Calculates the sha1 hash over self._articlesList to
# rediscover known redundance families
self.calc_fam_hash()
super().__init__(
articlesList=articlesList,
beginning=beginning,
ending=ending,
redpageid=redpageid,
famhash=famhash,
heading=heading,
status=status,
articlesStatus=None
)
def __repr__( self ):
"""
@ -78,18 +79,20 @@ class RedFam:
"""
__repr = "RedFam( " + \
"articlesList=" + repr( self._articlesList ) + \
", heading=" + repr( self._heading ) + \
", beginning=" + repr( self._beginning ) + \
", ending=" + repr( self._ending ) + \
", red_page_id=" + repr( self._red_page_id ) + \
", status=" + repr( self._status ) + \
", fam_hash=" + repr( self._fam_hash ) + \
"articlesList=" + repr( self.articlesList ) + \
", heading=" + repr( self.heading ) + \
", beginning=" + repr( self.beginning ) + \
", ending=" + repr( self.ending ) + \
", red_page_id=" + repr( self.redpageid ) + \
", status=" + repr( self.status ) + \
", fam_hash=" + repr( self.famhash ) + \
", articlesStatus=" + repr( self.articlesStatus ) + \
" )"
return __repr
def calc_fam_hash( self ):
@classmethod
def calc_famhash(cls, articlesList ):
"""
Calculates the SHA-1 hash for the articlesList of redundance family.
Since we don't need security SHA-1 is just fine.
@ -98,22 +101,91 @@ class RedFam:
"""
h = hashlib.sha1()
h.update( str( self._articlesList[:8] ).encode('utf-8') )
# Since articlesList attr of RedFam will have always 8 Members we
# need to fill up smaller lists (longers will be cropped below).
while len( articlesList) < 8:
articlesList.append(None)
if self._fam_hash and h.hexdigest() != self._fam_hash:
raise RedFamHashError( self._fam_hash, h.hexdigest() )
h.update( str( articlesList[:8] ).encode('utf-8') )
elif self._fam_hash:
return
else:
self._fam_hash = h.hexdigest()
return h.hexdigest()
@classmethod
def flush_db_cache( cls ):
"""
Calls flush method of Mysql Interface class
"""
MysqlRedFam.flush()
cls.session.commit()
def article_add_status(self, status, index=None, title=None ):
"""
Adds a status specified by status, to article (identified by title
or index in articlesList) status set
@param status Statusstring to add
@type status str
@param index Add to article with index in articlesList
@type index int
@param title Add to article with title in articlesList
@type title str
"""
if title and not index:
index = self.articlesList.index( title )
if isinstance( index, int ) and index < len(self.articlesList):
self.articlesStatus[index].add(status)
else:
raise IndexError( "No index given or wrong format!")
def article_remove_status(self, status, index=None, title=None, weak=True):
"""
Removes a status specified by status, from article (identified by title
or index in articlesList) status set
If weak is set to False it will throw a KeyError when trying to
remove a status not set.
@param status Statusstring to add
@type status str
@param index Remove from article with index in articlesList
@type index int
@param title Remove from article with title in articlesList
@type title str
@param weak Change behavior on missing status
@type bool
"""
if title and not index:
index = self.articlesList.index( title )
if isinstance( index, int ) and index < len(self.articlesList):
if weak:
self.articlesStatus[index].discard(status)
else:
self.articlesStatus[index].remove(status)
else:
raise IndexError( "No index given or wrong format!")
def article_has_status(self, status, index=None, title=None ):
"""
Adds a status specified by status, to articles (identified by title
or index in articlesList) status set
@param status Statusstring to add
@type status str
@param index Check article with index in articlesList
@type index int
@param title Check article with title in articlesList
@type title str
"""
if title and not index:
index = self.articlesList.index( title )
if isinstance( index, int ) and index < len(self.articlesList):
if status in self.articlesStatus[index]:
return True
else:
return False
else:
raise IndexError( "No index given or wrong format!")
class RedFamParser( RedFam ):
@ -137,72 +209,65 @@ class RedFamParser( RedFam ):
wurde gewünscht von:"
__done_notice2 = "{{Erledigt|"
def __init__( self, heading, red_page, red_page_archive,
def __init__( self, articlesList, heading, redpage, redpagearchive,
beginning, ending=None ):
"""
Creates a RedFam object based on data collected while parsing red_pages
combined with possibly former known data from db
@param red_fam_heading str Wikitext heading of section
@param red_page page Pywikibot.page object
@param red_page_archive bool Is red_page an archive
@param redfam_heading str Wikitext heading of section
@param redpage page Pywikibot.page object
@param redpagearchive bool Is red_page an archive
@param beginning datetime Timestamp of beginning
str as strptime parseable string
@param ending datetime Timestamp of ending
str strptime parseable string
"""
# Set object attributes:
self._red_page_id = red_page._pageid
self._red_page_archive = red_page_archive
self._fam_hash = None
# Method self.add_beginning sets self._beginning directly
self.add_beginning( beginning )
# Calculates the sha1 hash over self._articlesList to
# rediscover known redundance families
famhash = type(self).calc_famhash(articlesList)
# Method self.add_ending sets self._ending directly
if( ending ):
self.add_ending( ending )
else:
# If no ending was provided set to None
self._ending = None
# Set object attributes:
self.redpage = redpage
self._status = None
# Parse Timestamps
beginning = self.__datetime(beginning)
if ending:
ending = self.__datetime(ending)
# Parse the provided heading of redundance section
# to set self._articlesList
self.heading_parser( heading )
super().__init__( articlesList,
beginning,
ending=ending,
redpageid=redpage.page._pageid,
famhash=famhash,
heading=heading )
# Calculates the sha1 hash over self._articlesList to
# rediscover known redundance families
# Check status changes
self.check_status()
self.calc_fam_hash()
self.session.add(self)
# Open database connection, ask for data if existing,
# otherwise create entry
self.__handle_db()
def update( self, articlesList, heading, redpage, redpagearchive,
beginning, ending=None ):
# Check status changes
self.status()
self.articlesList = articlesList
self.heading = heading
self.redpage = redpage
self.redpageid = redpage.pageid
# Triggers db update if anything changed
self.changed()
self.add_beginning( beginning )
def __handle_db( self ):
"""
Handles opening of db connection
"""
if ending:
self.add_ending( ending )
# We need a connection to our mysqldb
self.__mysql = MysqlRedFam( )
self.__mysql.get_fam( self._fam_hash )
self._redpagearchive = redpagearchive
if not self.__mysql.data:
self.__mysql.add_fam( self._articlesList, self._heading,
self._red_page_id, self._beginning,
self._ending )
# Check status changes
self.check_status()
def heading_parser( self, heading ):
@classmethod
def heading_parser( cls, heading ):
"""
Parses given red_fam_heading string and saves articles list
@ -210,33 +275,13 @@ class RedFamParser( RedFam ):
@type heading wikicode or mwparser-parseable
"""
# Save heading as string
self._heading = str( heading )
# Parse string heading with mwparse again everytime
# In some cases the given wikicode is broken due to syntax errors
# (Task FS#77)
heading = mwparser.parse( self._heading )
heading = mwparser.parse( str( heading ) )
# Save destinations of wikilinks in headings
self._articlesList = [ str( link.title ) for link
in heading.ifilter_wikilinks() ]
# Catch sections with more then 8 articles, print error
if len( self._articlesList ) > 8:
# For repression in output we need to know the fam hash
self.calc_fam_hash()
jogobot.output(
( "\03{{lightred}}" +
"Maximum number of articles in red_fam exceeded, " +
"maximum number is 8, {number:d} were given \n {repress}"
).format( datetime=datetime.now().strftime(
"%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ),
repress=repr( self ) ),
"WARNING" )
# Only save the first 8 articles
self._articlesList = self._articlesList[:8]
return [ str( link.title ) for link in heading.ifilter_wikilinks() ]
def add_beginning( self, beginning ):
"""
@ -245,7 +290,7 @@ class RedFamParser( RedFam ):
@param datetime datetime Beginning date
"""
self._beginning = self.__datetime( beginning )
self.beginning = self.__datetime( beginning )
def add_ending( self, ending ):
"""
@ -254,7 +299,7 @@ class RedFamParser( RedFam ):
@param datetime datetime Ending date
"""
self._ending = self.__datetime( ending )
self.ending = self.__datetime( ending )
def __datetime( self, timestamp ):
"""
@ -278,7 +323,7 @@ class RedFamParser( RedFam ):
type( self ).__timestamp_format )
return result
def status( self ):
def check_status( self ):
"""
Handles detection of correct status
There are three possible stati:
@ -288,42 +333,18 @@ class RedFamParser( RedFam ):
- 3 and greater status was set by worker script, do not change it
"""
# Do not change stati set by worker script etc.
if not self.__mysql.data['status'] > 2:
# No ending, discussion is running:
# Sometimes archived discussions also have no detectable ending
if not self._ending and not self._red_page_archive:
self._status = 0
else:
if not self._red_page_archive:
self._status = 1
else:
self._status = 2
# No ending, discussion is running:
# Sometimes archived discussions also have no detectable ending
if not self.ending and not self.redpage.archive:
self.status.add("open")
else:
self._status = self.__mysql.data[ 'status' ]
def changed( self ):
"""
Checks wether anything has changed and maybe triggers db update
"""
# On archived red_fams do not delete possibly existing ending
if( not self._ending and self._status > 1 and
self.__mysql.data[ 'ending' ] ):
self._ending = self.__mysql.data[ 'ending' ]
# Since status change means something has changed, update database
if( self._status != self.__mysql.data[ 'status' ] or
self._beginning != self.__mysql.data[ 'beginning' ] or
self._ending != self.__mysql.data[ 'ending' ] or
self._red_page_id != self.__mysql.data[ 'red_page_id' ] or
self._heading != self.__mysql.data[ 'heading' ]):
self.__mysql.update_fam( self._red_page_id, self._heading,
self._beginning, self._ending,
self._status )
self.status.remove("open")
if not self.redpage.archive:
self.status.add("done")
else:
self.status.remove("done")
self.status.remove("open")
self.status.add("archived")
@classmethod
def is_section_redfam_cb( cls, heading ):
@ -342,7 +363,7 @@ class RedFamParser( RedFam ):
return False
@classmethod
def parser( cls, text, page, isarchive=False ):
def parser( cls, text, redpage, isarchive=False ):
"""
Handles parsing of redfam section
@ -355,7 +376,7 @@ class RedFamParser( RedFam ):
text = mwparser.parse( text )
# Extract heading text
heading = next( text.ifilter_headings() ).title
heading = next( text.ifilter_headings() ).title.strip()
# Extract beginnig and maybe ending
(beginning, ending) = RedFamParser.extract_dates( text, isarchive )
@ -365,16 +386,37 @@ class RedFamParser( RedFam ):
if not beginning:
match = re.search(
jogobot.config["redundances"]["reddiscs_onlyinclude_re"],
page.title() )
redpage.page.title() )
if match:
beginning = datetime.strptime(
"01. {month} {year}".format(
month=match.group(1), year=match.group(2)),
"%d. %B %Y" )
articlesList = RedFamParser.heading_parser( heading )
famhash = RedFamParser.calc_famhash( articlesList )
# Check for existing objects in DB first in current redpage
redfam = redpage.redfams.get(famhash)
with RedFamParser.session.no_autoflush:
if not redfam:
# Otherwise in db table
redfam = RedFamParser.session.query(RedFamParser).filter(
RedFamParser.famhash == famhash ).one_or_none()
if redfam:
# Existing redfams need to be updated
redfam.update( articlesList, str(heading), redpage, isarchive,
beginning, ending )
else:
# Create the RedFam object
redfam = RedFamParser( articlesList, str(heading),
redpage, isarchive, beginning, ending )
# Create the RedFam object
RedFamParser( heading, page, isarchive, beginning, ending )
# Add redfam to redpage object
redpage.redfams.set( redfam )
@classmethod
def extract_dates( cls, text, isarchive=False ):
@ -428,17 +470,164 @@ class RedFamWorker( RedFam ):
Handles working with redundance families stored in database
where discussion is finished
"""
def __init__( self, mysql_data ):
def __init__( self ):
super().__init__()
# Make sure locale is set to 'de_DE.UTF-8' to prevent problems
# with wrong month abreviations in strptime
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
def article_generator(self, # noqa
filter_existing=None, filter_redirects=None,
exclude_article_status=[],
onlyinclude_article_status=[] ):
"""
Yields pywikibot pageobjects for articles belonging to this redfams
in a generator
self.
@param filter_existing Set to True to only get existing pages
set to False to only get nonexisting pages
unset/None results in not filtering
@type filter_existing bool/None
@param filter_redirects Set to True to get only noredirectpages,
set to False to get only redirectpages,
unset/None results in not filtering
@type filter_redirects bool/None
"""
# Iterate over articles in redfam
for article in self.articlesList:
# Not all list elements contain articles
if not article:
break
page = pywikibot.Page(pywikibot.Link(article), pywikibot.Site())
# Filter existing pages if requested with filter_existing=False
if page.exists():
self.article_remove_status( "deleted", title=article )
if filter_existing is False:
continue
# Filter non existing Pages if requested with filter_existing=True
else:
self.article_add_status( "deleted", title=article )
if filter_existing:
continue
# Filter redirects if requested with filter_redirects=True
if page.isRedirectPage():
self.article_add_status( "redirect", title=article )
if filter_redirects:
continue
# Filter noredirects if requested with filter_redirects=False
else:
self.article_remove_status("redirect", title=article )
if filter_redirects is False:
continue
# Exclude by article status
for status in exclude_article_status:
if self.article_has_status( status, title=article ):
continue
# Only include by article status
for status in onlyinclude_article_status:
if not self.article_has_status( status, title=article ):
continue
# Yield filtered pages
yield page
def update_status( self ):
"""
Sets status to 3 when worked on
"""
for article in self.articlesList:
if not article:
break
if self.article_has_status( "note_rej", title=article ):
self.status.add( "note_rej" )
if self.article_has_status( "sav_err", title=article ):
self.status.add( "sav_err" )
if not self.status.has( "sav_err" ) and \
not self.status.has( "note_rej" ):
self.status.add( "marked" )
def get_disc_link( self ):
"""
Constructs and returns the link to Redundancy discussion
articlesList = []
for key in sorted( mysql_data.keys() ):
if 'article' in key and mysql_data[ key ]:
articlesList.append( mysql_data[ key ] )
@returns Link to diskussion
@rtype str
"""
# We need to Replace Links with their linktext
anchor_code = mwparser.parse( self.heading.strip() )
for link in anchor_code.ifilter_wikilinks():
if link.text:
text = link.text
else:
text = link.title
anchor_code.replace( link, text )
# Whitespace is replaced with underscores
anchor_code.replace( " ", "_" )
# We try it with out any more parsing as mw will do while parsing page
return ( self.redpage.pagetitle + "#" +
str(anchor_code).strip() )
def generate_disc_notice_template( self ):
"""
Generates notice template to add on discussion Pages of Articles when
redundancy discussion is finished
@return Notice template to add on article disc
@rtype wikicode-node
"""
# Generate template boilerplate
template = mwparser.nodes.template.Template(
jogobot.config['redundances']['disc_notice_template_name'])
# Index of first article's param
param_cnt = 3
super().__init__( articlesList, mysql_data[ 'beginning' ],
mysql_data[ 'ending' ], mysql_data[ 'red_page_id' ],
mysql_data[ 'status' ], mysql_data[ 'fam_hash' ],
mysql_data[ 'heading' ] )
# Iterate over articles in redfam
for article in self.articlesList:
if not article:
break
# Make sure to only use 8 articles (max. param 10)
if param_cnt > 10:
break
# Add param for article
template.add( param_cnt, article, True )
param_cnt += 1
# Add begin
begin = self.beginning.strftime( "%B %Y" )
template.add( "Beginn", begin, True )
# Add end (if not same as begin)
end = self.ending.strftime( "%B %Y" )
if not end == begin:
template.add( "Ende", end, True )
# Add link to related reddisc
template.add( "Diskussion", self.get_disc_link(), True )
# Add signature and timestamp
# Not used atm
# template.add( 1, "-- ~~~~", True )
return template
@classmethod
def list_by_status( cls, status ):
@ -453,6 +642,21 @@ class RedFamWorker( RedFam ):
print(fam)
raise
@classmethod
def gen_by_status_and_ending( cls, status, ending ):
"""
Yield red_fams stored in db by given status which have an ending after
given one
"""
for redfam in RedFamWorker.session.query(RedFamWorker).filter(
# NOT WORKING WITH OBJECT NOTATION
# RedFamWorker._status.like('archived'),
# RedFamWorker._status.like("%{0:s}%".format(status)),
text("status LIKE '%archived%'"),
RedFamWorker.ending >= ending ):
yield redfam
class RedFamError( Exception ):
"""

90
lib/redpage.py

@ -34,74 +34,70 @@ from lib.mysqlred import MysqlRedPage
from lib.redfam import RedFamParser
class RedPage:
class RedPage( MysqlRedPage ):
"""
Class for handling redundance discussion pages and archives
"""
def __init__( self, page, archive=False ):
def __init__( self, page=None, pageid=None, archive=False ):
"""
Generate a new RedPage object based on the given pywikibot page object
@param page page Pywikibot/MediaWiki page object for page
@param page Pywikibot/MediaWiki page object for page
@type page pywikibot.Page
@param pageid MW-Pageid for related page
@type pageid int
"""
# Safe the pywikibot page object
self.page = page
self._archive = archive
if page:
self._page = page
self.__handle_db( )
super().__init__(
pageid=self._page.pageid,
revid=self._page._revid,
pagetitle=self._page.title(),
status=None
)
self.is_page_changed()
self.is_archive()
self._parsed = None
self.session.add(self)
def __handle_db( self ):
"""
Handles opening of db connection
"""
def update( self, page ):
self._page = page
self.revid = page._revid
self.pagetitle = page.title()
self.is_archive()
# We need a connection to our mysqldb
self.__mysql = MysqlRedPage( self.page._pageid )
@property
def page(self):
if not hasattr(self, "_page"):
self._page = pywikibot.Page( pywikibot.Site(), self.pagetitle )
if not self.__mysql.data:
self.__mysql.add_page( self.page.title(), self.page._revid )
def is_page_changed( self ):
"""
Check wether the page was changed since last run
"""
return self._page
if( self.__mysql.data != { 'page_id': self.page._pageid,
'rev_id': self.page._revid,
'page_title': self.page.title(),
'status': self.__mysql.data[ 'status' ] } ):
self._changed = True
else:
self._changed = False
@property
def archive(self):
self.is_archive()
return self.status.has("archive")
def is_archive( self ):
"""
Detects wether current page is an archive of discussions
"""
if( self._archive or ( u"/Archiv" in self.page.title() ) or
if( ( u"/Archiv" in self.page.title() ) or
( "{{Archiv}}" in self.page.text ) or
( "{{Archiv|" in self.page.text ) ):
return True
self.status.add("archive")
else:
return False
self.status.discard("archive")
def is_parsing_needed( self ):
"""
Decides wether current RedPage needs to be parsed or not
"""
if( self._changed or self.__mysql.data[ 'status' ] == 0 ):
return True
else:
return False
return self.changedp() or not self.status.has("parsed")
def parse( self ):
"""
@ -126,26 +122,12 @@ class RedPage:
yield fam
else:
self.status.add("parsed")
self._parsed = True
self.__update_db()
def __update_db( self ):
"""
Updates the page meta data in mysql db
"""
if( self._parsed or not self._changed ):
status = 1
if( self.is_archive() ):
status = 2
else:
status = 0
self.__mysql.update_page( self.page._revid, self.page.title(), status )
@classmethod
def flush_db_cache( cls ):
"""
Calls flush method of Mysql Interface class
"""
MysqlRedPage.flush()
cls.session.commit()

4
red.py

@ -68,6 +68,10 @@ def prepare_bot( task_slug, subtask, genFactory, subtask_args ):
# Import related bot
from bots.reddiscparser import DiscussionParserBot as Bot
elif subtask == "markpages":
# Import related bot
from bots.markpages import MarkPagesBot as Bot
# Subtask error
else:
jogobot.output( (

Loading…
Cancel
Save