Merge branch 'fs#70-refactoring' into test-v3
This commit is contained in:
2
bots/__init__.py
Normal file
2
bots/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
178
bots/reddiscparser.py
Normal file
178
bots/reddiscparser.py
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# reddiscparser.py
|
||||||
|
#
|
||||||
|
# Copyright 2016 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||||
|
# MA 02110-1301, USA.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
"""
|
||||||
|
Bot to parse all reddisc pages in given Generator or configured categories
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
import pywikibot # noqa
|
||||||
|
from pywikibot import pagegenerators # noqa
|
||||||
|
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
|
||||||
|
|
||||||
|
import jogobot
|
||||||
|
|
||||||
|
from lib import redpage
|
||||||
|
from lib import redfam
|
||||||
|
|
||||||
|
|
||||||
|
class DiscussionParserBot(
|
||||||
|
# CurrentPageBot, # via next two sets 'current_page' on each treat()
|
||||||
|
ExistingPageBot, # CurrentPageBot only treats existing pages
|
||||||
|
NoRedirectPageBot ): # class which only treats non-redirects
|
||||||
|
"""
|
||||||
|
Botclass witch initialises the parsing process of Redundancy Discussions
|
||||||
|
"""
|
||||||
|
|
||||||
|
# RegEx to filter wrong pages
|
||||||
|
onlyinclude_re = re.compile(
|
||||||
|
jogobot.config["redundances"]["reddiscs_onlyinclude_re"] )
|
||||||
|
|
||||||
|
def __init__( self, genFactory, **kwargs ):
|
||||||
|
"""
|
||||||
|
Constructor
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
@param genFactory GenFactory with parsed pagegenerator args to
|
||||||
|
build generator
|
||||||
|
@type genFactory pagegenerators.GeneratorFactory
|
||||||
|
@param **kwargs Additional args
|
||||||
|
@type iterable
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Copy needed args
|
||||||
|
self.genFactory = genFactory
|
||||||
|
|
||||||
|
# Build generator with genFactory
|
||||||
|
self.build_generator()
|
||||||
|
|
||||||
|
# Run super class init with builded generator
|
||||||
|
super( DiscussionParserBot, self ).__init__(generator=self.gen)
|
||||||
|
|
||||||
|
def build_generator(self):
|
||||||
|
"""
|
||||||
|
Builds generator to work on, based on self.genFactory
|
||||||
|
"""
|
||||||
|
# Check wether there are generators waiting for factoring, if not
|
||||||
|
# use configured categories
|
||||||
|
if not self.genFactory.gens:
|
||||||
|
self.apply_conf_cat_generators()
|
||||||
|
|
||||||
|
# Create combined Generator (Union of all Generators)
|
||||||
|
gen = self.genFactory.getCombinedGenerator()
|
||||||
|
|
||||||
|
if gen:
|
||||||
|
# The preloading generator is responsible for downloading multiple
|
||||||
|
# pages from the wiki simultaneously.
|
||||||
|
self.gen = pagegenerators.PreloadingGenerator(gen)
|
||||||
|
|
||||||
|
else:
|
||||||
|
pywikibot.showHelp()
|
||||||
|
|
||||||
|
def apply_conf_cat_generators( self ):
|
||||||
|
"""
|
||||||
|
Builds generators for categories which are read from jogobot.config
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
@param genFactory: The GeneratorFactory to which the builded
|
||||||
|
generators should be added.
|
||||||
|
@type genFactory: pagegenerators.GeneratorFactory
|
||||||
|
"""
|
||||||
|
# Create Generators for configured Categories
|
||||||
|
for category in jogobot.config["redundances"]["redpage_cats"]:
|
||||||
|
gen = self.genFactory.getCategoryGen(
|
||||||
|
category, gen_func=pagegenerators.CategorizedPageGenerator)
|
||||||
|
|
||||||
|
# If there is one, append to genFactory
|
||||||
|
if gen:
|
||||||
|
self.genFactory.gens.append(gen)
|
||||||
|
|
||||||
|
# Reset gen for next iteration
|
||||||
|
gen = None
|
||||||
|
|
||||||
|
def run( self ):
|
||||||
|
"""
|
||||||
|
Controls the overal parsing process, using super class for page switch
|
||||||
|
|
||||||
|
Needed to do things before/after treating pages is done
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
|
||||||
|
super( DiscussionParserBot, self ).run()
|
||||||
|
|
||||||
|
except:
|
||||||
|
raise
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
# If successfully parsed all pages in cat, flush db write cache
|
||||||
|
redpage.RedPage.flush_db_cache()
|
||||||
|
|
||||||
|
def treat_page( self ):
|
||||||
|
"""
|
||||||
|
Handles work on current page
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Short circuit excluded pages
|
||||||
|
if self.current_page.title() in (
|
||||||
|
jogobot.config["redundances"]["redpage_exclude"] ):
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
# Exclude pages which does not match pattern
|
||||||
|
if not type(self).onlyinclude_re.search( self.current_page.title() ):
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
# Initiate RedPage object
|
||||||
|
red_page = redpage.RedPage( self.current_page )
|
||||||
|
|
||||||
|
# Check whether parsing is needed
|
||||||
|
if red_page.is_parsing_needed():
|
||||||
|
|
||||||
|
# Count families for failure analysis
|
||||||
|
fam_counter = 0
|
||||||
|
|
||||||
|
# Iterate over returned generator with redfam sections
|
||||||
|
for fam in red_page.parse():
|
||||||
|
|
||||||
|
# Run RedFamParser on section text
|
||||||
|
redfam.RedFamParser.parser( fam, red_page.page,
|
||||||
|
red_page.is_archive() )
|
||||||
|
|
||||||
|
fam_counter += 1
|
||||||
|
|
||||||
|
else:
|
||||||
|
# If successfully parsed whole page, flush
|
||||||
|
# db write cache
|
||||||
|
if( fam_counter ):
|
||||||
|
redfam.RedFamParser.flush_db_cache()
|
||||||
|
jogobot.output( "Page [[{reddisc}]] parsed".format(
|
||||||
|
reddisc=red_page.page.title() ) )
|
||||||
|
else:
|
||||||
|
jogobot.output(
|
||||||
|
"\03{red}" + "Page [[{reddisc}]], ".format(
|
||||||
|
reddisc=red_page.page.title() ) +
|
||||||
|
"containing no redfam, parsed!",
|
||||||
|
"WARNING" )
|
||||||
2
jogobot
2
jogobot
Submodule jogobot updated: 2173f2984f...28d03f35b8
@@ -33,6 +33,7 @@ except ImportError:
|
|||||||
|
|
||||||
import atexit
|
import atexit
|
||||||
|
|
||||||
|
import pywikibot
|
||||||
from pywikibot import config
|
from pywikibot import config
|
||||||
|
|
||||||
import jogobot
|
import jogobot
|
||||||
@@ -53,6 +54,7 @@ class MysqlRed:
|
|||||||
db_username = config.db_username
|
db_username = config.db_username
|
||||||
db_password = config.db_password
|
db_password = config.db_password
|
||||||
db_name = config.db_username + jogobot.config['db_suffix']
|
db_name = config.db_username + jogobot.config['db_suffix']
|
||||||
|
db_table_prefix = False
|
||||||
|
|
||||||
# Class variables for storing cached querys
|
# Class variables for storing cached querys
|
||||||
_cached_update_data = []
|
_cached_update_data = []
|
||||||
@@ -67,6 +69,14 @@ class MysqlRed:
|
|||||||
@returns mysql-stream MySQL Connection
|
@returns mysql-stream MySQL Connection
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Needs to be generated after Parsing of Args (not at import time)
|
||||||
|
if not type(self).db_table_prefix:
|
||||||
|
type(self).db_table_prefix = \
|
||||||
|
pywikibot.Site().family.dbName(pywikibot.Site().code)
|
||||||
|
|
||||||
|
# Now we can setup prepared queries
|
||||||
|
self._prepare_queries()
|
||||||
|
|
||||||
# Connect to mysqldb only once
|
# Connect to mysqldb only once
|
||||||
if not type( self ).connection:
|
if not type( self ).connection:
|
||||||
|
|
||||||
@@ -87,11 +97,23 @@ class MysqlRed:
|
|||||||
|
|
||||||
type( self ).connection.close()
|
type( self ).connection.close()
|
||||||
|
|
||||||
|
def _prepare_queries( self ):
|
||||||
|
"""
|
||||||
|
Used to replace placeholders in prepared queries
|
||||||
|
"""
|
||||||
|
type(self)._update_query = type(self)._update_query.format(
|
||||||
|
prefix=type(self).db_table_prefix)
|
||||||
|
type(self)._insert_query = type(self)._insert_query.format(
|
||||||
|
prefix=type(self).db_table_prefix)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def flush( cls ):
|
def flush( cls ):
|
||||||
"""
|
"""
|
||||||
Run cached querys
|
Run cached querys
|
||||||
"""
|
"""
|
||||||
|
if not cls.connection:
|
||||||
|
raise MysqlRedConnectionError( "No connection exists!" )
|
||||||
|
|
||||||
cursor = cls.connection.cursor()
|
cursor = cls.connection.cursor()
|
||||||
|
|
||||||
# Execute insert query
|
# Execute insert query
|
||||||
@@ -132,12 +154,13 @@ class MysqlRedPage( MysqlRed ):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Class variables for storing cached querys
|
# Class variables for storing cached querys
|
||||||
|
# '{prefix}' will be replaced during super().__init__()
|
||||||
_cached_update_data = []
|
_cached_update_data = []
|
||||||
_update_query = 'UPDATE `red_pages` \
|
_update_query = 'UPDATE `{prefix}_red_pages` \
|
||||||
SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'
|
SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'
|
||||||
|
|
||||||
_cached_insert_data = {}
|
_cached_insert_data = {}
|
||||||
_insert_query = 'INSERT INTO `red_pages` \
|
_insert_query = 'INSERT INTO `{prefix}_red_pages` \
|
||||||
( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );'
|
( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );'
|
||||||
|
|
||||||
def __init__( self, page_id ):
|
def __init__( self, page_id ):
|
||||||
@@ -166,8 +189,10 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'
|
|||||||
|
|
||||||
cursor = type( self ).connection.cursor(mysqldb.DictCursor)
|
cursor = type( self ).connection.cursor(mysqldb.DictCursor)
|
||||||
|
|
||||||
cursor.execute( 'SELECT * FROM `red_pages` WHERE `page_id` = ?;',
|
cursor.execute(
|
||||||
( self.__page_id, ) )
|
'SELECT * FROM `{prefix}_red_pages` WHERE `page_id` = ?;'.format(
|
||||||
|
prefix=type(self).db_table_prefix), ( self.__page_id, ) )
|
||||||
|
|
||||||
res = cursor.fetchone()
|
res = cursor.fetchone()
|
||||||
|
|
||||||
if res:
|
if res:
|
||||||
@@ -218,12 +243,11 @@ class MysqlRedFam( MysqlRed ):
|
|||||||
|
|
||||||
# Class variables for storing cached querys
|
# Class variables for storing cached querys
|
||||||
_cached_update_data = []
|
_cached_update_data = []
|
||||||
_update_query = 'UPDATE `red_families` \
|
_update_query = 'UPDATE `{prefix}_red_families` \
|
||||||
SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \
|
SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \
|
||||||
`status`= ? WHERE `fam_hash` = ?;'
|
`status`= ? WHERE `fam_hash` = ?;'
|
||||||
|
|
||||||
_cached_insert_data = {}
|
_cached_insert_data = {}
|
||||||
_insert_query = 'INSERT INTO `red_families` \
|
_insert_query = 'INSERT INTO `{prefix}_red_families` \
|
||||||
( fam_hash, red_page_id, beginning, ending, status, heading, \
|
( fam_hash, red_page_id, beginning, ending, status, heading, \
|
||||||
article0, article1, article2, article3, article4, article5, article6, \
|
article0, article1, article2, article3, article4, article5, article6, \
|
||||||
article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
|
article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
|
||||||
@@ -249,8 +273,10 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
|
|||||||
|
|
||||||
cursor = type( self ).connection.cursor( mysqldb.DictCursor )
|
cursor = type( self ).connection.cursor( mysqldb.DictCursor )
|
||||||
|
|
||||||
cursor.execute( 'SELECT * FROM `red_families` WHERE `fam_hash` = ?;',
|
cursor.execute(
|
||||||
( fam_hash, ) )
|
'SELECT * FROM `{prefix}_red_families` WHERE `fam_hash` = ?;'.
|
||||||
|
format( prefix=type(self).db_table_prefix), ( fam_hash, ) )
|
||||||
|
|
||||||
self.data = cursor.fetchone()
|
self.data = cursor.fetchone()
|
||||||
|
|
||||||
def add_fam( self, articlesList, heading, red_page_id,
|
def add_fam( self, articlesList, heading, red_page_id,
|
||||||
@@ -298,8 +324,9 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
|
|||||||
|
|
||||||
cursor = type( self ).connection.cursor( mysqldb.DictCursor )
|
cursor = type( self ).connection.cursor( mysqldb.DictCursor )
|
||||||
|
|
||||||
cursor.execute( 'SELECT * FROM `red_families` WHERE `status` = ?;',
|
cursor.execute(
|
||||||
( status, ) )
|
'SELECT * FROM `{prefix}_red_families` WHERE `status` = ?;'.format(
|
||||||
|
prefix=type( self ).db_table_prefix), ( status, ) )
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
res = cursor.fetchmany( 1000 )
|
res = cursor.fetchmany( 1000 )
|
||||||
@@ -307,3 +334,17 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
|
|||||||
break
|
break
|
||||||
for row in res:
|
for row in res:
|
||||||
yield row
|
yield row
|
||||||
|
|
||||||
|
|
||||||
|
class MysqlRedError(Exception):
|
||||||
|
"""
|
||||||
|
Basic Exception class for this module
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class MysqlRedConnectionError(MysqlRedError):
|
||||||
|
"""
|
||||||
|
Raised if there are Errors with Mysql-Connections
|
||||||
|
"""
|
||||||
|
pass
|
||||||
@@ -35,7 +35,7 @@ import pywikibot # noqa
|
|||||||
from pywikibot.tools import deprecated # noqa
|
from pywikibot.tools import deprecated # noqa
|
||||||
|
|
||||||
import jogobot
|
import jogobot
|
||||||
from mysqlred import MysqlRedFam
|
from lib.mysqlred import MysqlRedFam
|
||||||
|
|
||||||
|
|
||||||
class RedFam:
|
class RedFam:
|
||||||
@@ -137,14 +137,14 @@ class RedFamParser( RedFam ):
|
|||||||
wurde gewünscht von:"
|
wurde gewünscht von:"
|
||||||
__done_notice2 = "{{Erledigt|"
|
__done_notice2 = "{{Erledigt|"
|
||||||
|
|
||||||
def __init__( self, heading, red_page_id, red_page_archive,
|
def __init__( self, heading, red_page, red_page_archive,
|
||||||
beginning, ending=None ):
|
beginning, ending=None ):
|
||||||
"""
|
"""
|
||||||
Creates a RedFam object based on data collected while parsing red_pages
|
Creates a RedFam object based on data collected while parsing red_pages
|
||||||
combined with possibly former known data from db
|
combined with possibly former known data from db
|
||||||
|
|
||||||
@param red_fam_heading str Wikitext heading of section
|
@param red_fam_heading str Wikitext heading of section
|
||||||
@param red_page_id int MediaWiki page_id
|
@param red_page page Pywikibot.page object
|
||||||
@param red_page_archive bool Is red_page an archive
|
@param red_page_archive bool Is red_page an archive
|
||||||
@param beginning datetime Timestamp of beginning
|
@param beginning datetime Timestamp of beginning
|
||||||
str as strptime parseable string
|
str as strptime parseable string
|
||||||
@@ -153,7 +153,7 @@ class RedFamParser( RedFam ):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Set object attributes:
|
# Set object attributes:
|
||||||
self._red_page_id = red_page_id
|
self._red_page_id = red_page._pageid
|
||||||
self._red_page_archive = red_page_archive
|
self._red_page_archive = red_page_archive
|
||||||
self._fam_hash = None
|
self._fam_hash = None
|
||||||
|
|
||||||
@@ -210,13 +210,14 @@ class RedFamParser( RedFam ):
|
|||||||
@type heading wikicode or mwparser-parseable
|
@type heading wikicode or mwparser-parseable
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Parse heading with mwparse if needed
|
|
||||||
if not isinstance( heading, mwparser.wikicode.Wikicode ):
|
|
||||||
heading = mwparser.parse( heading )
|
|
||||||
|
|
||||||
# Save heading as string
|
# Save heading as string
|
||||||
self._heading = str( heading )
|
self._heading = str( heading )
|
||||||
|
|
||||||
|
# Parse string heading with mwparse again everytime
|
||||||
|
# In some cases the given wikicode is broken due to syntax errors
|
||||||
|
# (Task FS#77)
|
||||||
|
heading = mwparser.parse( self._heading )
|
||||||
|
|
||||||
# Save destinations of wikilinks in headings
|
# Save destinations of wikilinks in headings
|
||||||
self._articlesList = [ str( link.title ) for link
|
self._articlesList = [ str( link.title ) for link
|
||||||
in heading.ifilter_wikilinks() ]
|
in heading.ifilter_wikilinks() ]
|
||||||
@@ -325,23 +326,23 @@ class RedFamParser( RedFam ):
|
|||||||
self._status )
|
self._status )
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@deprecated
|
def is_section_redfam_cb( cls, heading ):
|
||||||
def is_sectionheading( cls, line ):
|
|
||||||
"""
|
"""
|
||||||
Checks wether given line is a red_fam section heading
|
Used as callback for wikicode.get_sections in redpage.parse to
|
||||||
|
select sections which are redfams
|
||||||
@param str line String to check
|
|
||||||
|
|
||||||
@returns bool Returns True if it is a section heading
|
|
||||||
"""
|
"""
|
||||||
|
# Because of strange behavior in some cases, parse heading again
|
||||||
|
# (Task FS#77)
|
||||||
|
heading = mwparser.parse( str( heading ) )
|
||||||
|
|
||||||
if cls.__sectionhead_pat.search( str(line) ):
|
# Make sure we have min. two wikilinks in heading to assume a redfam
|
||||||
|
if len( heading.filter_wikilinks() ) >= 2:
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def parser( cls, text, pageid, isarchive=False ):
|
def parser( cls, text, page, isarchive=False ):
|
||||||
"""
|
"""
|
||||||
Handles parsing of redfam section
|
Handles parsing of redfam section
|
||||||
|
|
||||||
@@ -359,8 +360,21 @@ class RedFamParser( RedFam ):
|
|||||||
# Extract beginnig and maybe ending
|
# Extract beginnig and maybe ending
|
||||||
(beginning, ending) = RedFamParser.extract_dates( text, isarchive )
|
(beginning, ending) = RedFamParser.extract_dates( text, isarchive )
|
||||||
|
|
||||||
|
# Missing beginning (Task: FS#76)
|
||||||
|
# Use first day of month of reddisc
|
||||||
|
if not beginning:
|
||||||
|
match = re.search(
|
||||||
|
jogobot.config["redundances"]["reddiscs_onlyinclude_re"],
|
||||||
|
page.title() )
|
||||||
|
|
||||||
|
if match:
|
||||||
|
beginning = datetime.strptime(
|
||||||
|
"01. {month} {year}".format(
|
||||||
|
month=match.group(1), year=match.group(2)),
|
||||||
|
"%d. %B %Y" )
|
||||||
|
|
||||||
# Create the RedFam object
|
# Create the RedFam object
|
||||||
RedFamParser( heading, pageid, isarchive, beginning, ending )
|
RedFamParser( heading, page, isarchive, beginning, ending )
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def extract_dates( cls, text, isarchive=False ):
|
def extract_dates( cls, text, isarchive=False ):
|
||||||
@@ -401,51 +415,13 @@ class RedFamParser( RedFam ):
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
ending = None
|
ending = None
|
||||||
|
# Missing dates (Task: FS#76)
|
||||||
|
else:
|
||||||
|
beginning = None
|
||||||
|
ending = None
|
||||||
|
|
||||||
return (beginning, ending)
|
return (beginning, ending)
|
||||||
|
|
||||||
@classmethod
|
|
||||||
@deprecated( 'extract_dates' )
|
|
||||||
def is_beginning( cls, line ):
|
|
||||||
"""
|
|
||||||
Returns the first timestamp found in line, otherwise None
|
|
||||||
|
|
||||||
@param str line String to search in
|
|
||||||
|
|
||||||
@returns str Timestamp, otherwise None
|
|
||||||
"""
|
|
||||||
|
|
||||||
return cls.extract_dates( line )[0]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
@deprecated( 'extract_dates' )
|
|
||||||
def is_ending( cls, line, isarchive=False ):
|
|
||||||
"""
|
|
||||||
Returns the timestamp of done notice ( if one ), otherwise None
|
|
||||||
|
|
||||||
@param line String to search in
|
|
||||||
@type line str
|
|
||||||
@param isarchive If true skip searching done notice (on archivepages)
|
|
||||||
@type isarchive bool
|
|
||||||
|
|
||||||
@returns Timestamp, otherwise None
|
|
||||||
@returntype str
|
|
||||||
"""
|
|
||||||
|
|
||||||
return cls.extract_dates( line )[1]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
@deprecated( 'extract_dates' )
|
|
||||||
def is_ending2( cls, line ):
|
|
||||||
"""
|
|
||||||
Returns the last timestamp found in line, otherwise None
|
|
||||||
@param str line String to search in
|
|
||||||
|
|
||||||
@returns str Timestamp, otherwise None
|
|
||||||
"""
|
|
||||||
|
|
||||||
return cls.extract_dates( line, True )[1]
|
|
||||||
|
|
||||||
|
|
||||||
class RedFamWorker( RedFam ):
|
class RedFamWorker( RedFam ):
|
||||||
"""
|
"""
|
||||||
@@ -28,9 +28,10 @@ Provides a class for handling redundance discussion pages and archives
|
|||||||
import pywikibot # noqa
|
import pywikibot # noqa
|
||||||
import mwparserfromhell as mwparser
|
import mwparserfromhell as mwparser
|
||||||
|
|
||||||
import jogobot
|
import jogobot # noqa
|
||||||
|
|
||||||
from mysqlred import MysqlRedPage
|
from lib.mysqlred import MysqlRedPage
|
||||||
|
from lib.redfam import RedFamParser
|
||||||
|
|
||||||
|
|
||||||
class RedPage:
|
class RedPage:
|
||||||
@@ -116,7 +117,7 @@ class RedPage:
|
|||||||
# include_lead = if true include first section (intro)
|
# include_lead = if true include first section (intro)
|
||||||
# include_heading = if true include heading
|
# include_heading = if true include heading
|
||||||
fams = self.wikicode.get_sections(
|
fams = self.wikicode.get_sections(
|
||||||
matches=jogobot.config["redundances"]["section_heading_regex"],
|
matches=RedFamParser.is_section_redfam_cb,
|
||||||
include_lead=False, include_headings=True )
|
include_lead=False, include_headings=True )
|
||||||
|
|
||||||
# Iterate over RedFam
|
# Iterate over RedFam
|
||||||
107
parse-pages.py
107
parse-pages.py
@@ -1,107 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# parse-pages.py
|
|
||||||
#
|
|
||||||
# Copyright 2016 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
|
||||||
#
|
|
||||||
# This program is free software; you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU General Public License as published by
|
|
||||||
# the Free Software Foundation; either version 2 of the License, or
|
|
||||||
# (at your option) any later version.
|
|
||||||
#
|
|
||||||
# This program is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
# GNU General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the GNU General Public License
|
|
||||||
# along with this program; if not, write to the Free Software
|
|
||||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
||||||
# MA 02110-1301, USA.
|
|
||||||
#
|
|
||||||
#
|
|
||||||
"""
|
|
||||||
Script to parse all redpages in configured categories
|
|
||||||
"""
|
|
||||||
|
|
||||||
import pywikibot
|
|
||||||
from pywikibot import pagegenerators
|
|
||||||
|
|
||||||
import jogobot
|
|
||||||
|
|
||||||
import redpage
|
|
||||||
import redfam
|
|
||||||
|
|
||||||
|
|
||||||
def get_cat_pages( cat ):
|
|
||||||
"""
|
|
||||||
Generates a iteratable generator-object with all pages listet in given
|
|
||||||
category
|
|
||||||
|
|
||||||
@param cat Category to request
|
|
||||||
@type cat str
|
|
||||||
|
|
||||||
@returns generator Iteratable object with pages of given category
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Get site to work on from pywikibot config
|
|
||||||
site = pywikibot.Site()
|
|
||||||
|
|
||||||
# Retrieve the content of given category
|
|
||||||
category = pywikibot.Category( site, cat )
|
|
||||||
|
|
||||||
# Build an iteratable generator object with page objects for given category
|
|
||||||
generator = pagegenerators.CategorizedPageGenerator( category )
|
|
||||||
|
|
||||||
return generator
|
|
||||||
|
|
||||||
|
|
||||||
def main(*args):
|
|
||||||
"""
|
|
||||||
Handles process
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
jogobot.output( "BEGINN – parser-pages.py" )
|
|
||||||
|
|
||||||
# Iterate over configured categories
|
|
||||||
for cat in ( jogobot.config["redundances"]["redpage_cats"] ):
|
|
||||||
|
|
||||||
# Iterate over pages in current cat
|
|
||||||
for page in get_cat_pages( cat ):
|
|
||||||
|
|
||||||
# For pages configured to exclude, go on with next page
|
|
||||||
if page.title() in (
|
|
||||||
jogobot.config["redundances"]["redpage_exclude"] ):
|
|
||||||
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Initiate RedPage object
|
|
||||||
red_page = redpage.RedPage( page )
|
|
||||||
|
|
||||||
# Check whether parsing is needed
|
|
||||||
if red_page.is_parsing_needed():
|
|
||||||
|
|
||||||
# Iterate over returned generator with redfam sections
|
|
||||||
for fam in red_page.parse():
|
|
||||||
|
|
||||||
# Run RedFamParser on section text
|
|
||||||
redfam.RedFamParser.parser( fam, red_page.page._pageid,
|
|
||||||
red_page.is_archive() )
|
|
||||||
else:
|
|
||||||
# If successfully parsed whole page, flush
|
|
||||||
# db write cache
|
|
||||||
redfam.RedFamParser.flush_db_cache()
|
|
||||||
jogobot.output( "Page '%s' parsed" %
|
|
||||||
red_page.page.title() )
|
|
||||||
else:
|
|
||||||
# If successfully parsed all pages in cat, flush db write cache
|
|
||||||
redpage.RedPage.flush_db_cache()
|
|
||||||
|
|
||||||
finally:
|
|
||||||
jogobot.output( "END – parser-pages.py" )
|
|
||||||
pywikibot.stopme()
|
|
||||||
|
|
||||||
if( __name__ == "__main__" ):
|
|
||||||
main()
|
|
||||||
118
red.py
Normal file
118
red.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# reddiscparser.py
|
||||||
|
#
|
||||||
|
# Copyright 2016 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||||
|
# MA 02110-1301, USA.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
"""
|
||||||
|
Wrapper script to invoke all redundances bot tasks
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pywikibot
|
||||||
|
|
||||||
|
import jogobot
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_bot( task_slug, subtask, genFactory, subtask_args ):
|
||||||
|
"""
|
||||||
|
Handles importing subtask Bot class and prepares specific args
|
||||||
|
|
||||||
|
Throws exception if bot not exists
|
||||||
|
|
||||||
|
@param task_slug Task slug, needed for logging
|
||||||
|
@type task_slug str
|
||||||
|
@param subtask Slug of given subtask
|
||||||
|
@type subtask str
|
||||||
|
@param genFactory GenFactory with parsed pagegenerator args
|
||||||
|
@type genFactory pagegenerators.GeneratorFactory
|
||||||
|
@param subtask_args Additional args for subtasks
|
||||||
|
@type subtask_args dict\
|
||||||
|
|
||||||
|
@returns The following tuple
|
||||||
|
@return 1 Subtask slug (replaced None for default)
|
||||||
|
@rtype str
|
||||||
|
@return 2 Botclass of given subtask (Arg "-task")
|
||||||
|
@rtype Class
|
||||||
|
@return 3 GenFactory with parsed pagegenerator args
|
||||||
|
@rtype pagegenerators.GeneratorFactory
|
||||||
|
@return 4 Additional args for subtasks
|
||||||
|
@rtype dict
|
||||||
|
@rtype tuple
|
||||||
|
"""
|
||||||
|
# kwargs are passed to selected bot as **kwargs
|
||||||
|
kwargs = dict()
|
||||||
|
|
||||||
|
if not subtask or subtask == "discparser":
|
||||||
|
# Default case: discparser
|
||||||
|
subtask = "discparser"
|
||||||
|
|
||||||
|
# Import related bot
|
||||||
|
from bots.reddiscparser import DiscussionParserBot as Bot
|
||||||
|
|
||||||
|
# Subtask error
|
||||||
|
else:
|
||||||
|
jogobot.output( (
|
||||||
|
"\03{{red}} Given subtask \"{subtask} \"" +
|
||||||
|
"is not existing!" ).format( subtask=subtask ), "ERROR" )
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
return ( subtask, Bot, genFactory, kwargs )
|
||||||
|
|
||||||
|
|
||||||
|
def main(*args):
|
||||||
|
"""
|
||||||
|
Process command line arguments and invoke bot.
|
||||||
|
|
||||||
|
If args is an empty list, sys.argv is used.
|
||||||
|
|
||||||
|
@param args: command line arguments
|
||||||
|
@type args: list of unicode
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Process global arguments to determine desired site
|
||||||
|
local_args = pywikibot.handle_args(args)
|
||||||
|
|
||||||
|
# Get the jogobot-task_slug (basename of current file without ending)
|
||||||
|
task_slug = os.path.basename(__file__)[:-len(".py")]
|
||||||
|
|
||||||
|
# Disabled until [FS#86] is done
|
||||||
|
# Before run, we need to check wether we are currently active or not
|
||||||
|
# if not jogobot.bot.active( task_slug ):
|
||||||
|
# return
|
||||||
|
|
||||||
|
# Parse local Args to get information about subtask
|
||||||
|
( subtask, genFactory, subtask_args ) = jogobot.bot.parse_local_args(
|
||||||
|
local_args )
|
||||||
|
|
||||||
|
# select subtask and prepare args
|
||||||
|
( subtask, Bot, genFactory, kwargs ) = prepare_bot(
|
||||||
|
task_slug, subtask, genFactory, subtask_args )
|
||||||
|
|
||||||
|
# Init Bot
|
||||||
|
bot = jogobot.bot.init_bot( task_slug, subtask, Bot, genFactory, **kwargs)
|
||||||
|
|
||||||
|
# Run bot
|
||||||
|
jogobot.bot.run_bot( task_slug, subtask, bot )
|
||||||
|
|
||||||
|
|
||||||
|
if( __name__ == "__main__" ):
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user