Files
jogobot-red/lib/mysqlred.py
Jonathan Golder 6e973369cd sqlalchemy working for parser
Needs some testing, presumably contains some bugs
2017-03-09 00:08:48 +01:00

603 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# mysqlred.py
#
# Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Provides interface classes for communication of redundances bot with mysql-db
"""
# Prefere using oursql then MySQLdb
try:
import oursql as mysqldb
except ImportError:
import MySQLdb as mysqldb
import atexit
import pywikibot
from pywikibot import config
import jogobot
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
url = URL( "mysql+oursql",
username=config.db_username,
password=config.db_password,
host=config.db_hostname,
port=config.db_port,
database=config.db_username + jogobot.config['db_suffix'] )
engine = create_engine(url, echo=True)
from sqlalchemy.ext.declarative import (
declarative_base, declared_attr, has_inherited_table )
Base = declarative_base()
from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey
from sqlalchemy.orm import sessionmaker, relationship, composite
from sqlalchemy.ext.mutable import MutableComposite, MutableSet
from sqlalchemy.orm.collections import attribute_mapped_collection
import sqlalchemy.types as types
Session = sessionmaker(bind=engine)
session = Session()
family = "dewpbeta"
class Mysql(object):
session = session
@declared_attr
def _tableprefix(cls):
return family + "_"
@declared_attr
def _tablesuffix(cls):
return "s"
@declared_attr
def __tablename__(cls):
if has_inherited_table(cls):
return None
prefix = family + "_"
name = cls.__name__[len("Mysql"):].lower()
suffix = "s"
return cls._tableprefix + name + cls._tablesuffix
def changedp(self):
return self in self.session.dirty
class ColumnList( list, MutableComposite ):
"""
Combines multiple Colums into a list like object
"""
def __init__( self, *columns ):
"""
Wrapper to the list constructor deciding whether we have initialization
with individual params per article or with an iterable.
"""
# Individual params per article (from db), first one is a str
if isinstance( columns[0], str ) or \
isinstance( columns[0], MutableSet ) or columns[0] is None:
super().__init__( columns )
# Iterable articles list
else:
super().__init__( columns[0] )
def __setitem__(self, key, value):
"""
The MutableComposite class needs to be noticed about changes in our
component. So we tweak the setitem process.
"""
# set the item
super().__setitem__( key, value)
# alert all parents to the change
self.changed()
def __composite_values__(self):
"""
The Composite method needs to have this method to get the items for db.
"""
return self
class Status( types.TypeDecorator ):
impl = types.String
def process_bind_param(self, value, dialect):
"""
Returns status as commaseparated string (to save in DB)
@returns Raw status string
@rtype str
"""
if isinstance(value, MutableSet):
return ",".join( value )
elif isinstance(value, String ) or value is None:
return value
else:
raise ProgrammingError
def process_result_value(self, value, dialect):
"""
Sets status based on comma separated list
@param raw_status Commaseparated string of stati (from DB)
@type raw_status str
"""
if value:
return MutableSet( value.strip().split(","))
else:
return MutableSet([])
def copy(self, **kw):
return Status(self.impl.length)
class MysqlRedFam( Mysql, Base ):
famhash = Column( String(64), primary_key=True, unique=True )
__article0 = Column('article0', String(255), nullable=False )
__article1 = Column('article1', String(255), nullable=False )
__article2 = Column('article2', String(255), nullable=True )
__article3 = Column('article3', String(255), nullable=True )
__article4 = Column('article4', String(255), nullable=True )
__article5 = Column('article5', String(255), nullable=True )
__article6 = Column('article6', String(255), nullable=True )
__article7 = Column('article7', String(255), nullable=True )
__articlesList = composite(
ColumnList, __article0, __article1, __article2, __article3,
__article4, __article5, __article6, __article7 )
heading = Column( Text, nullable=False )
redpageid = Column(
Integer, ForeignKey( "dewpbeta_redpages.pageid" ), nullable=False )
beginning = Column( DateTime, nullable=False )
ending = Column( DateTime, nullable=True )
__status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True )
__article0_status = Column(
'article0_status', MutableSet.as_mutable(Status(64)), nullable=True )
__article1_status = Column(
'article1_status', MutableSet.as_mutable(Status(64)), nullable=True )
__article2_status = Column(
'article2_status', MutableSet.as_mutable(Status(64)), nullable=True )
__article3_status = Column(
'article3_status', MutableSet.as_mutable(Status(64)), nullable=True )
__article4_status = Column(
'article4_status', MutableSet.as_mutable(Status(64)), nullable=True )
__article5_status = Column(
'article5_status', MutableSet.as_mutable(Status(64)), nullable=True )
__article6_status = Column(
'article6_status', MutableSet.as_mutable(Status(64)), nullable=True )
__article7_status = Column(
'article7_status', MutableSet.as_mutable(Status(64)), nullable=True )
__articlesStatus = composite(
ColumnList, __article0_status, __article1_status, __article2_status,
__article3_status, __article4_status, __article5_status,
__article6_status, __article7_status )
redpage = relationship( "RedPage", back_populates="redfams" )
@property
def articlesList(self):
"""
List of articles belonging to the redfam
"""
return self.__articlesList
@articlesList.setter
def articlesList(self, articlesList):
# Make sure to always have full length for complete overwrites
while( len(articlesList) < 8 ):
articlesList.append(None)
self.__articlesList = ColumnList(articlesList)
@property
def status( self ):
"""
Current fam status
"""
return self.__status
@status.setter
def status( self, status ):
if status:
self.__status = MutableSet( status )
else:
self.__status = MutableSet()
@property
def articlesStatus(self):
"""
List of status strings/sets for the articles of the redfam
"""
return self.__articlesStatus
@articlesStatus.setter
def articlesStatus(self, articlesStatus):
self.__articlesStatus = ColumnList(articlesStatus)
class MysqlRedPage( Mysql, Base ):
pageid = Column( Integer, unique=True, primary_key=True )
revid = Column( Integer, unique=True, nullable=False )
pagetitle = Column( String(255), nullable=False )
status = Column( MutableSet.as_mutable(Status(255)), nullable=True )
redfams = relationship(
"MysqlRedFam", order_by=MysqlRedFam.famhash, back_populates="redpage",
collection_class=attribute_mapped_collection("famhash"))
Base.metadata.create_all(engine)
#~ class MysqlRed:
#~ """
#~ Basic interface class, containing opening of connection
#~ Specific querys should be defined in descendant classes per data type
#~ """
#~ # Save mysqldb-connection as class attribute to use only one
#~ # in descendant classes
#~ connection = False
#~ db_hostname = config.db_hostname
#~ db_port = config.db_port
#~ db_username = config.db_username
#~ db_password = config.db_password
#~ db_name = config.db_username + jogobot.config['db_suffix']
#~ db_table_prefix = False
#~ # Class variables for storing cached querys
#~ _cached_update_data = []
#~ _update_query = ''
#~ _cached_insert_data = {}
#~ _insert_query = ''
#~ def __init__( self ):
#~ """
#~ Opens a connection to MySQL-DB
#~ @returns mysql-stream MySQL Connection
#~ """
#~ # Needs to be generated after Parsing of Args (not at import time)
#~ if not type(self).db_table_prefix:
#~ type(self).db_table_prefix = \
#~ pywikibot.Site().family.dbName(pywikibot.Site().code)
#~ # Now we can setup prepared queries
#~ self._prepare_queries()
#~ # Connect to mysqldb only once
#~ if not type( self ).connection:
#~ type( self ).connection = mysqldb.connect(
#~ host=type( self ).db_hostname,
#~ port=type( self ).db_port,
#~ user=type( self ).db_username,
#~ passwd=type( self ).db_password,
#~ db=type( self ).db_name )
#~ # Register callback for warnig if exit with cached db write querys
#~ atexit.register( type(self).warn_if_not_flushed )
#~ def __del__( self ):
#~ """
#~ Before deleting class, close connection to MySQL-DB
#~ """
#~ type( self ).connection.close()
#~ def _prepare_queries( self ):
#~ """
#~ Used to replace placeholders in prepared queries
#~ """
#~ type(self)._update_query = type(self)._update_query.format(
#~ prefix=type(self).db_table_prefix)
#~ type(self)._insert_query = type(self)._insert_query.format(
#~ prefix=type(self).db_table_prefix)
#~ @classmethod
#~ def flush( cls ):
#~ """
#~ Run cached querys
#~ """
#~ if not cls.connection:
#~ raise MysqlRedConnectionError( "No connection exists!" )
#~ cursor = cls.connection.cursor()
#~ # Execute insert query
#~ if cls._cached_insert_data:
#~ # Since cls._cached_insert_data is a dict, we need to have a custom
#~ # Generator to iterate over it
#~ cursor.executemany( cls._insert_query,
#~ ( cls._cached_insert_data[ key ]
#~ for key in cls._cached_insert_data ) )
#~ # Reset after writing
#~ cls._cached_insert_data = {}
#~ # Execute update query
#~ # Use executemany since update could not be reduced to one query
#~ if cls._cached_update_data:
#~ cursor.executemany( cls._update_query, cls._cached_update_data )
#~ # Reset after writing
#~ cls._cached_update_data = []
#~ # Commit db changes
#~ if cls._cached_insert_data or cls._cached_update_data:
#~ cls.connection.commit()
#~ @classmethod
#~ def warn_if_not_flushed(cls):
#~ """
#~ Outputs a warning if there are db write querys cached and not flushed
#~ before exiting programm!
#~ """
#~ if cls._cached_update_data or cls._cached_insert_data:
#~ jogobot.output( "Cached Database write querys not flushed!!! " +
#~ "Data loss is possible!", "WARNING" )
#~ class MysqlRedPage( MysqlRed ):
#~ """
#~ MySQL-db Interface for handling querys for RedPages
#~ """
#~ # Class variables for storing cached querys
#~ # '{prefix}' will be replaced during super().__init__()
#~ _cached_update_data = []
#~ _update_query = 'UPDATE `{prefix}_redpages` \
#~ SET `pagetitle` = ?, `revid` = ?, `status`= ? WHERE `pageid` = ?;'
#~ _cached_insert_data = {}
#~ _insert_query = 'INSERT INTO `{prefix}_redpages` \
#~ ( pageid, pagetitle, revid, status ) VALUES ( ?, ?, ?, ? );'
#~ def __init__( self, pageid ):
#~ """
#~ Creates a new instance, runs __init__ of parent class
#~ """
#~ super().__init__( )
#~ self.__pageid = int( pageid )
#~ self.data = self.get_page()
#~ def __del__( self ):
#~ """
#~ Needed to prevent descendant classes of MYSQL_RED from deleting
#~ connection to db
#~ """
#~ pass
#~ def get_page( self ):
#~ """
#~ Retrieves a red page row from MySQL-Database for given page_id
#~ @param int pageid MediaWiki page_id for page to retrieve
#~ @returns tuple Tuple with data for given page_id
#~ bool FALSE if none found
#~ """
#~ cursor = type( self ).connection.cursor(mysqldb.DictCursor)
#~ cursor.execute(
#~ 'SELECT * FROM `{prefix}_redpages` WHERE `pageid` = ?;'.format(
#~ prefix=type(self).db_table_prefix), ( self.__pageid, ) )
#~ res = cursor.fetchone()
#~ if res:
#~ return res
#~ else:
#~ return False
#~ def add_page( self, pagetitle, revid, status=0 ):
#~ """
#~ Inserts a red page row in MySQL-Database for given pageid
#~ @param int revid MediaWiki current revid
#~ @param str pagetitle MediaWiki new pagetitle
#~ @param int status Page parsing status
#~ """
#~ insert_data = { self.__pageid: ( self.__pageid, pagetitle,
#~ revid, status ) }
#~ type( self )._cached_insert_data.update( insert_data )
#~ # Manualy construct self.data dict
#~ self.data = { 'pageid': self.__pageid, 'revid': revid,
#~ 'pagetitle': pagetitle, 'status': status }
#~ def update_page( self, revid=None, pagetitle=None, status=0 ):
#~ """
#~ Updates the red page row in MySQL-Database for given page_id
#~ @param int revid MediaWiki current rev_id
#~ @param str pagetitle MediaWiki new page_title
#~ @param int status Page parsing status
#~ """
#~ if not pagetitle:
#~ pagetitle = self.data[ 'pagetitle' ]
#~ if not revid:
#~ revid = self.data[ 'revid' ]
#~ type( self )._cached_update_data.append( ( pagetitle, revid,
#~ status, self.__pageid ) )
#~ class MysqlRedFam( MysqlRed ):
#~ """
#~ MySQL-db Interface for handling querys for RedFams
#~ """
#~ # Class variables for storing cached querys
#~ _cached_update_data = []
#~ _update_query = 'UPDATE `{prefix}_redfams` \
#~ SET `redpageid` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \
#~ `status`= ? WHERE `famhash` = ?;'
#~ _cached_insert_data = {}
#~ _insert_query = 'INSERT INTO `{prefix}_redfams` \
#~ ( famhash, redpageid, beginning, ending, status, heading, \
#~ article0, article1, article2, article3, article4, article5, article6, \
#~ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
#~ def __init__( self, famhash=None ):
#~ """
#~ Creates a new instance, runs __init__ of parent class
#~ """
#~ self.__famhash = famhash
#~ super().__init__( )
#~ def __del__( self ):
#~ """
#~ Needed to prevent descendant classes of MYSQL_RED from deleting
#~ connection to db
#~ """
#~ pass
#~ def get_fam( self, famhash ):
#~ """
#~ Retrieves a red family row from MySQL-Database for given fam_hash
#~ @returns dict Dictionairy with data for given fam hash
#~ False if none found
#~ """
#~ self.__famhash = famhash
#~ cursor = type( self ).connection.cursor( mysqldb.DictCursor )
#~ cursor.execute(
#~ 'SELECT * FROM `{prefix}_redfams` WHERE `famhash` = ?;'.
#~ format( prefix=type(self).db_table_prefix), ( famhash, ) )
#~ self.data = cursor.fetchone()
#~ def add_fam( self, articlesList, heading, redpageid,
#~ beginning, ending=None, status=0 ):
#~ data = [ self.__famhash, redpageid, beginning, ending,
#~ status, heading ]
#~ for article in articlesList:
#~ data.append( str( article ) )
#~ while len( data ) < 14:
#~ data.append( None )
#~ data = tuple( data )
#~ insert_data = { self.__famhash: data }
#~ type( self )._cached_insert_data.update( insert_data )
#~ # Manualy construct self.data dict
#~ data_keys = ( 'famhash', 'redpageid', 'beginning', 'ending',
#~ 'status', 'heading', 'article0', 'article1', 'article2',
#~ 'article3', 'article4', 'article5', 'article6',
#~ 'article7' )
#~ self.data = dict( zip( data_keys, data ) )
#~ def update_fam( self, redpageid, heading, beginning, ending, status ):
#~ """
#~ Updates the red fam row in MySQL-Database for given fam_hash
#~ @param int redpageid MediaWiki page_id
#~ @param datetime beginning Timestamp of beginning
#~ qparam datetime ending Timestamp of ending of
#~ @param int status red_fam status
#~ """
#~ type( self )._cached_update_data.append( ( redpageid, heading,
#~ beginning, ending, status,
#~ self.__famhash ) )
#~ def get_by_status( self, status ):
#~ """
#~ Generator witch fetches redFams with given status from DB
#~ """
#~ cursor = type( self ).connection.cursor( mysqldb.DictCursor )
#~ cursor.execute(
#~ 'SELECT * FROM `{prefix}_redfams` WHERE `status` = LIKE %?%;'.
#~ format( prefix=type( self ).db_table_prefix), ( status, ) )
#~ while True:
#~ res = cursor.fetchmany( 1000 )
#~ if not res:
#~ break
#~ for row in res:
#~ yield row
#~ def get_by_status_and_ending( self, status, ending ):
#~ """
#~ Generator witch fetches redFams with given status from DB
#~ """
#~ cursor = type( self ).connection.cursor( mysqldb.DictCursor )
#~ cursor.execute( (
#~ 'SELECT * ' +
#~ 'FROM `{prefix}_redfams` `F` ' +
#~ 'INNER JOIN `{prefix}_redpages` `P` ' +
#~ 'ON `F`.`status` = ? ' +
#~ 'AND `F`.`ending` >= ? ' +
#~ 'AND `F`.`redpageid` = `P`.`pageid`;').format(
#~ prefix=type( self ).db_table_prefix),
#~ ( status, ending ) )
#~ while True:
#~ res = cursor.fetchmany( 1000 )
#~ if not res:
#~ break
#~ for row in res:
#~ yield row
class MysqlRedError(Exception):
"""
Basic Exception class for this module
"""
pass
class MysqlRedConnectionError(MysqlRedError):
"""
Raised if there are Errors with Mysql-Connections
"""
pass