Files
jogobot-red/lib/redpage.py
Jonathan Golder 6e973369cd sqlalchemy working for parser
Needs some testing, presumably contains some bugs
2017-03-09 00:08:48 +01:00

267 lines
7.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# redpage.py
#
# Copyright 2015 GOLDERWEB Jonathan Golder <jonathan@golderweb.de>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
"""
Provides a class for handling redundance discussion pages and archives
"""
import pywikibot # noqa
import mwparserfromhell as mwparser
import jogobot # noqa
#~ from lib.mysqlred import Column, Integer, String, Text, DateTime, ForeignKey, ColumnList, Status
from lib.mysqlred import MysqlRedPage, relationship, MutableSet #MysqlRedFam, Base, composite,
from lib.redfam import RedFam, RedFamParser
from sqlalchemy.orm.collections import attribute_mapped_collection
class RedPage( MysqlRedPage ):
"""
Class for handling redundance discussion pages and archives
"""
#TODO POLYMORPHISM? of BASEClass
redfams = relationship(
"RedFamParser", order_by=RedFamParser.famhash,
back_populates="redpage",
collection_class=attribute_mapped_collection( "famhash" ) )
def __init__( self, page=None, pageid=None, archive=False ):
"""
Generate a new RedPage object based on the given pywikibot page object
@param page Pywikibot/MediaWiki page object for page
@type page pywikibot.Page
@param pageid MW-Pageid for related page
@type pageid int
"""
# Safe the pywikibot page object
if page:
self._page = page
pageid = self._page.pageid
super().__init__(
pageid=pageid,
revid=self.page._revid,
pagetitle=self.page.title(),
status=MutableSet() ) #TODO EMPTY MutableSet() necessary?
#~ self._status = set()
if archive:
self.status.add("archived")
#~ self._archive = archive
#~ self.pageid = pageid
#~ self.revid = self.page._revid
#~ self.p
#~ self.status = MutableSet()
# self.__handle_db( )
#~ self.is_page_changed()
#~ self._parsed = None
self.session.add(self)
#~ def __handle_db( self ):
#~ """
#~ Handles opening of db connection
#~ """
#~ # We need a connection to our mysqldb
#~ if self.page:
#~ self.__mysql = MysqlRedPage( self.page._pageid )
#~ self.pageid = self.page._pageid
#~ elif self.pageid:
#~ self.__mysql = MysqlRedPage( self.pageid )
#~ self.page = pywikibot.Page( pywikibot.Site(),
#~ self.pagetitle )
#~ self.page.exists()
#~ else:
#~ raise ValueError( "Page NOR pagid provided!" )
#~ if not self.__mysql.data:
#~ self.__mysql.add_page( self.page.title(), self.page._revid )
def update( self, page ):
self._page = page
self.revid = page._revid
self.pagetitle = page.title()
@property
def page(self):
if not hasattr(self,"_page"):
self._page = pywikibot.Page( pywikibot.Site(), self.pagetitle )
return self._page
@property
def archive(self):
return self.has_status("archived")
def is_page_changed( self ):
"""
Check wether the page was changed since last run
"""
self._changed = self.changedp()
#~ if( self.__mysql.data != { 'pageid': self.page._pageid,
#~ 'revid': self.page._revid,
#~ 'pagetitle': self.page.title(),
#~ 'status': self.__mysql.data[ 'status' ] } ):
#~ self._changed = True
#~ else:
#~ self._changed = False
def is_archive( self ):
"""
Detects wether current page is an archive of discussions
"""
if( self.archive or ( u"/Archiv" in self.page.title() ) or
( "{{Archiv}}" in self.page.text ) or
( "{{Archiv|" in self.page.text ) ):
return True
else:
return False
def is_parsing_needed( self ):
"""
Decides wether current RedPage needs to be parsed or not
"""
if( self.changedp() or not self.has_status("parsed") ):
return True
else:
return False
def parse( self ):
"""
Handles the parsing process
"""
# Generate Wikicode object
self.wikicode = mwparser.parse( self.page.text )
# Select RedFam-sections
# matches=Regexp or
# function( gets heading content as wikicode as param 1)
# include_lead = if true include first section (intro)
# include_heading = if true include heading
fams = self.wikicode.get_sections(
matches=RedFamParser.is_section_redfam_cb,
include_lead=False, include_headings=True )
# Iterate over RedFam
for fam in fams:
yield fam
else:
self.status.add("parsed")
self._parsed = True
#~ self.__update_db()
#~ def __update_db( self ):
#~ """
#~ Updates the page meta data in mysql db
#~ """
#~ if( self._parsed or not self._changed ):
#~ self.add_status( "open" )
#~ if( self.is_archive() ):
#~ self.remove_status( "open" )
#~ self.add_status( "archived" )
#~ else:
#~ pass
#~ self._status = set()
#~ self.__mysql.update_page( self.page._revid, self.page.title(),
#~ self._raw_status() )
@classmethod
def flush_db_cache( cls ):
"""
Calls flush method of Mysql Interface class
"""
cls.session.commit()
#~ MysqlRedPage.flush()
def add_status(self, status):
"""
Adds a status specified by status, to status set
@param status Statusstring to add
@type status str
"""
self.status.add(status)
def remove_status(self, status, weak=True):
"""
Removes a status, specified by status from set. If weak is set to
False it will throw a KeyError when trying to remove a status not set.
@param status Statusstring to add
@type status str
@param weak Change behavior on missing status
@type bool
"""
if weak:
self.status.discard(status)
else:
self.status.remove(status)
def has_status(self, status):
"""
Returns True, if redfam has given status
@param status Statusstring to check
@type status str
@returns True if status is present else False
"""
if status in self.status:
return True
else:
return False
#~ def _parse_status(self, raw_status ):
#~ """
#~ Sets status based on comma separated list
#~ @param raw_status Commaseparated string of stati (from DB)
#~ @type raw_status str
#~ """
#~ self._status = set( raw_status.strip().split(","))
#~ def _raw_status( self ):
#~ """
#~ Returns status as commaseparated string (to save in DB)
#~ @returns Raw status string
#~ @rtype str
#~ """
#~ return ",".join( self._status )