Merge branch 'fs#70-refactoring' into test-v3
This commit is contained in:
2
bots/__init__.py
Normal file
2
bots/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
178
bots/reddiscparser.py
Normal file
178
bots/reddiscparser.py
Normal file
@@ -0,0 +1,178 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# reddiscparser.py
|
||||
#
|
||||
# Copyright 2016 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Bot to parse all reddisc pages in given Generator or configured categories
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
import pywikibot # noqa
|
||||
from pywikibot import pagegenerators # noqa
|
||||
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
|
||||
|
||||
import jogobot
|
||||
|
||||
from lib import redpage
|
||||
from lib import redfam
|
||||
|
||||
|
||||
class DiscussionParserBot(
|
||||
# CurrentPageBot, # via next two sets 'current_page' on each treat()
|
||||
ExistingPageBot, # CurrentPageBot only treats existing pages
|
||||
NoRedirectPageBot ): # class which only treats non-redirects
|
||||
"""
|
||||
Botclass witch initialises the parsing process of Redundancy Discussions
|
||||
"""
|
||||
|
||||
# RegEx to filter wrong pages
|
||||
onlyinclude_re = re.compile(
|
||||
jogobot.config["redundances"]["reddiscs_onlyinclude_re"] )
|
||||
|
||||
def __init__( self, genFactory, **kwargs ):
|
||||
"""
|
||||
Constructor
|
||||
|
||||
Parameters:
|
||||
@param genFactory GenFactory with parsed pagegenerator args to
|
||||
build generator
|
||||
@type genFactory pagegenerators.GeneratorFactory
|
||||
@param **kwargs Additional args
|
||||
@type iterable
|
||||
"""
|
||||
|
||||
# Copy needed args
|
||||
self.genFactory = genFactory
|
||||
|
||||
# Build generator with genFactory
|
||||
self.build_generator()
|
||||
|
||||
# Run super class init with builded generator
|
||||
super( DiscussionParserBot, self ).__init__(generator=self.gen)
|
||||
|
||||
def build_generator(self):
|
||||
"""
|
||||
Builds generator to work on, based on self.genFactory
|
||||
"""
|
||||
# Check wether there are generators waiting for factoring, if not
|
||||
# use configured categories
|
||||
if not self.genFactory.gens:
|
||||
self.apply_conf_cat_generators()
|
||||
|
||||
# Create combined Generator (Union of all Generators)
|
||||
gen = self.genFactory.getCombinedGenerator()
|
||||
|
||||
if gen:
|
||||
# The preloading generator is responsible for downloading multiple
|
||||
# pages from the wiki simultaneously.
|
||||
self.gen = pagegenerators.PreloadingGenerator(gen)
|
||||
|
||||
else:
|
||||
pywikibot.showHelp()
|
||||
|
||||
def apply_conf_cat_generators( self ):
|
||||
"""
|
||||
Builds generators for categories which are read from jogobot.config
|
||||
|
||||
Parameters:
|
||||
@param genFactory: The GeneratorFactory to which the builded
|
||||
generators should be added.
|
||||
@type genFactory: pagegenerators.GeneratorFactory
|
||||
"""
|
||||
# Create Generators for configured Categories
|
||||
for category in jogobot.config["redundances"]["redpage_cats"]:
|
||||
gen = self.genFactory.getCategoryGen(
|
||||
category, gen_func=pagegenerators.CategorizedPageGenerator)
|
||||
|
||||
# If there is one, append to genFactory
|
||||
if gen:
|
||||
self.genFactory.gens.append(gen)
|
||||
|
||||
# Reset gen for next iteration
|
||||
gen = None
|
||||
|
||||
def run( self ):
|
||||
"""
|
||||
Controls the overal parsing process, using super class for page switch
|
||||
|
||||
Needed to do things before/after treating pages is done
|
||||
"""
|
||||
try:
|
||||
|
||||
super( DiscussionParserBot, self ).run()
|
||||
|
||||
except:
|
||||
raise
|
||||
|
||||
else:
|
||||
|
||||
# If successfully parsed all pages in cat, flush db write cache
|
||||
redpage.RedPage.flush_db_cache()
|
||||
|
||||
def treat_page( self ):
|
||||
"""
|
||||
Handles work on current page
|
||||
"""
|
||||
|
||||
# Short circuit excluded pages
|
||||
if self.current_page.title() in (
|
||||
jogobot.config["redundances"]["redpage_exclude"] ):
|
||||
|
||||
return
|
||||
|
||||
# Exclude pages which does not match pattern
|
||||
if not type(self).onlyinclude_re.search( self.current_page.title() ):
|
||||
|
||||
return
|
||||
|
||||
# Initiate RedPage object
|
||||
red_page = redpage.RedPage( self.current_page )
|
||||
|
||||
# Check whether parsing is needed
|
||||
if red_page.is_parsing_needed():
|
||||
|
||||
# Count families for failure analysis
|
||||
fam_counter = 0
|
||||
|
||||
# Iterate over returned generator with redfam sections
|
||||
for fam in red_page.parse():
|
||||
|
||||
# Run RedFamParser on section text
|
||||
redfam.RedFamParser.parser( fam, red_page.page,
|
||||
red_page.is_archive() )
|
||||
|
||||
fam_counter += 1
|
||||
|
||||
else:
|
||||
# If successfully parsed whole page, flush
|
||||
# db write cache
|
||||
if( fam_counter ):
|
||||
redfam.RedFamParser.flush_db_cache()
|
||||
jogobot.output( "Page [[{reddisc}]] parsed".format(
|
||||
reddisc=red_page.page.title() ) )
|
||||
else:
|
||||
jogobot.output(
|
||||
"\03{red}" + "Page [[{reddisc}]], ".format(
|
||||
reddisc=red_page.page.title() ) +
|
||||
"containing no redfam, parsed!",
|
||||
"WARNING" )
|
||||
2
jogobot
2
jogobot
Submodule jogobot updated: 2173f2984f...28d03f35b8
@@ -33,6 +33,7 @@ except ImportError:
|
||||
|
||||
import atexit
|
||||
|
||||
import pywikibot
|
||||
from pywikibot import config
|
||||
|
||||
import jogobot
|
||||
@@ -53,6 +54,7 @@ class MysqlRed:
|
||||
db_username = config.db_username
|
||||
db_password = config.db_password
|
||||
db_name = config.db_username + jogobot.config['db_suffix']
|
||||
db_table_prefix = False
|
||||
|
||||
# Class variables for storing cached querys
|
||||
_cached_update_data = []
|
||||
@@ -67,6 +69,14 @@ class MysqlRed:
|
||||
@returns mysql-stream MySQL Connection
|
||||
"""
|
||||
|
||||
# Needs to be generated after Parsing of Args (not at import time)
|
||||
if not type(self).db_table_prefix:
|
||||
type(self).db_table_prefix = \
|
||||
pywikibot.Site().family.dbName(pywikibot.Site().code)
|
||||
|
||||
# Now we can setup prepared queries
|
||||
self._prepare_queries()
|
||||
|
||||
# Connect to mysqldb only once
|
||||
if not type( self ).connection:
|
||||
|
||||
@@ -87,11 +97,23 @@ class MysqlRed:
|
||||
|
||||
type( self ).connection.close()
|
||||
|
||||
def _prepare_queries( self ):
|
||||
"""
|
||||
Used to replace placeholders in prepared queries
|
||||
"""
|
||||
type(self)._update_query = type(self)._update_query.format(
|
||||
prefix=type(self).db_table_prefix)
|
||||
type(self)._insert_query = type(self)._insert_query.format(
|
||||
prefix=type(self).db_table_prefix)
|
||||
|
||||
@classmethod
|
||||
def flush( cls ):
|
||||
"""
|
||||
Run cached querys
|
||||
"""
|
||||
if not cls.connection:
|
||||
raise MysqlRedConnectionError( "No connection exists!" )
|
||||
|
||||
cursor = cls.connection.cursor()
|
||||
|
||||
# Execute insert query
|
||||
@@ -132,12 +154,13 @@ class MysqlRedPage( MysqlRed ):
|
||||
"""
|
||||
|
||||
# Class variables for storing cached querys
|
||||
# '{prefix}' will be replaced during super().__init__()
|
||||
_cached_update_data = []
|
||||
_update_query = 'UPDATE `red_pages` \
|
||||
_update_query = 'UPDATE `{prefix}_red_pages` \
|
||||
SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'
|
||||
|
||||
_cached_insert_data = {}
|
||||
_insert_query = 'INSERT INTO `red_pages` \
|
||||
_insert_query = 'INSERT INTO `{prefix}_red_pages` \
|
||||
( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );'
|
||||
|
||||
def __init__( self, page_id ):
|
||||
@@ -166,8 +189,10 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'
|
||||
|
||||
cursor = type( self ).connection.cursor(mysqldb.DictCursor)
|
||||
|
||||
cursor.execute( 'SELECT * FROM `red_pages` WHERE `page_id` = ?;',
|
||||
( self.__page_id, ) )
|
||||
cursor.execute(
|
||||
'SELECT * FROM `{prefix}_red_pages` WHERE `page_id` = ?;'.format(
|
||||
prefix=type(self).db_table_prefix), ( self.__page_id, ) )
|
||||
|
||||
res = cursor.fetchone()
|
||||
|
||||
if res:
|
||||
@@ -218,12 +243,11 @@ class MysqlRedFam( MysqlRed ):
|
||||
|
||||
# Class variables for storing cached querys
|
||||
_cached_update_data = []
|
||||
_update_query = 'UPDATE `red_families` \
|
||||
_update_query = 'UPDATE `{prefix}_red_families` \
|
||||
SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \
|
||||
`status`= ? WHERE `fam_hash` = ?;'
|
||||
|
||||
_cached_insert_data = {}
|
||||
_insert_query = 'INSERT INTO `red_families` \
|
||||
_insert_query = 'INSERT INTO `{prefix}_red_families` \
|
||||
( fam_hash, red_page_id, beginning, ending, status, heading, \
|
||||
article0, article1, article2, article3, article4, article5, article6, \
|
||||
article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
|
||||
@@ -249,8 +273,10 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
|
||||
|
||||
cursor = type( self ).connection.cursor( mysqldb.DictCursor )
|
||||
|
||||
cursor.execute( 'SELECT * FROM `red_families` WHERE `fam_hash` = ?;',
|
||||
( fam_hash, ) )
|
||||
cursor.execute(
|
||||
'SELECT * FROM `{prefix}_red_families` WHERE `fam_hash` = ?;'.
|
||||
format( prefix=type(self).db_table_prefix), ( fam_hash, ) )
|
||||
|
||||
self.data = cursor.fetchone()
|
||||
|
||||
def add_fam( self, articlesList, heading, red_page_id,
|
||||
@@ -298,8 +324,9 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
|
||||
|
||||
cursor = type( self ).connection.cursor( mysqldb.DictCursor )
|
||||
|
||||
cursor.execute( 'SELECT * FROM `red_families` WHERE `status` = ?;',
|
||||
( status, ) )
|
||||
cursor.execute(
|
||||
'SELECT * FROM `{prefix}_red_families` WHERE `status` = ?;'.format(
|
||||
prefix=type( self ).db_table_prefix), ( status, ) )
|
||||
|
||||
while True:
|
||||
res = cursor.fetchmany( 1000 )
|
||||
@@ -307,3 +334,17 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
|
||||
break
|
||||
for row in res:
|
||||
yield row
|
||||
|
||||
|
||||
class MysqlRedError(Exception):
|
||||
"""
|
||||
Basic Exception class for this module
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class MysqlRedConnectionError(MysqlRedError):
|
||||
"""
|
||||
Raised if there are Errors with Mysql-Connections
|
||||
"""
|
||||
pass
|
||||
@@ -35,7 +35,7 @@ import pywikibot # noqa
|
||||
from pywikibot.tools import deprecated # noqa
|
||||
|
||||
import jogobot
|
||||
from mysqlred import MysqlRedFam
|
||||
from lib.mysqlred import MysqlRedFam
|
||||
|
||||
|
||||
class RedFam:
|
||||
@@ -137,14 +137,14 @@ class RedFamParser( RedFam ):
|
||||
wurde gewünscht von:"
|
||||
__done_notice2 = "{{Erledigt|"
|
||||
|
||||
def __init__( self, heading, red_page_id, red_page_archive,
|
||||
def __init__( self, heading, red_page, red_page_archive,
|
||||
beginning, ending=None ):
|
||||
"""
|
||||
Creates a RedFam object based on data collected while parsing red_pages
|
||||
combined with possibly former known data from db
|
||||
|
||||
@param red_fam_heading str Wikitext heading of section
|
||||
@param red_page_id int MediaWiki page_id
|
||||
@param red_page page Pywikibot.page object
|
||||
@param red_page_archive bool Is red_page an archive
|
||||
@param beginning datetime Timestamp of beginning
|
||||
str as strptime parseable string
|
||||
@@ -153,7 +153,7 @@ class RedFamParser( RedFam ):
|
||||
"""
|
||||
|
||||
# Set object attributes:
|
||||
self._red_page_id = red_page_id
|
||||
self._red_page_id = red_page._pageid
|
||||
self._red_page_archive = red_page_archive
|
||||
self._fam_hash = None
|
||||
|
||||
@@ -210,13 +210,14 @@ class RedFamParser( RedFam ):
|
||||
@type heading wikicode or mwparser-parseable
|
||||
"""
|
||||
|
||||
# Parse heading with mwparse if needed
|
||||
if not isinstance( heading, mwparser.wikicode.Wikicode ):
|
||||
heading = mwparser.parse( heading )
|
||||
|
||||
# Save heading as string
|
||||
self._heading = str( heading )
|
||||
|
||||
# Parse string heading with mwparse again everytime
|
||||
# In some cases the given wikicode is broken due to syntax errors
|
||||
# (Task FS#77)
|
||||
heading = mwparser.parse( self._heading )
|
||||
|
||||
# Save destinations of wikilinks in headings
|
||||
self._articlesList = [ str( link.title ) for link
|
||||
in heading.ifilter_wikilinks() ]
|
||||
@@ -325,23 +326,23 @@ class RedFamParser( RedFam ):
|
||||
self._status )
|
||||
|
||||
@classmethod
|
||||
@deprecated
|
||||
def is_sectionheading( cls, line ):
|
||||
def is_section_redfam_cb( cls, heading ):
|
||||
"""
|
||||
Checks wether given line is a red_fam section heading
|
||||
|
||||
@param str line String to check
|
||||
|
||||
@returns bool Returns True if it is a section heading
|
||||
Used as callback for wikicode.get_sections in redpage.parse to
|
||||
select sections which are redfams
|
||||
"""
|
||||
# Because of strange behavior in some cases, parse heading again
|
||||
# (Task FS#77)
|
||||
heading = mwparser.parse( str( heading ) )
|
||||
|
||||
if cls.__sectionhead_pat.search( str(line) ):
|
||||
# Make sure we have min. two wikilinks in heading to assume a redfam
|
||||
if len( heading.filter_wikilinks() ) >= 2:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def parser( cls, text, pageid, isarchive=False ):
|
||||
def parser( cls, text, page, isarchive=False ):
|
||||
"""
|
||||
Handles parsing of redfam section
|
||||
|
||||
@@ -359,8 +360,21 @@ class RedFamParser( RedFam ):
|
||||
# Extract beginnig and maybe ending
|
||||
(beginning, ending) = RedFamParser.extract_dates( text, isarchive )
|
||||
|
||||
# Missing beginning (Task: FS#76)
|
||||
# Use first day of month of reddisc
|
||||
if not beginning:
|
||||
match = re.search(
|
||||
jogobot.config["redundances"]["reddiscs_onlyinclude_re"],
|
||||
page.title() )
|
||||
|
||||
if match:
|
||||
beginning = datetime.strptime(
|
||||
"01. {month} {year}".format(
|
||||
month=match.group(1), year=match.group(2)),
|
||||
"%d. %B %Y" )
|
||||
|
||||
# Create the RedFam object
|
||||
RedFamParser( heading, pageid, isarchive, beginning, ending )
|
||||
RedFamParser( heading, page, isarchive, beginning, ending )
|
||||
|
||||
@classmethod
|
||||
def extract_dates( cls, text, isarchive=False ):
|
||||
@@ -401,51 +415,13 @@ class RedFamParser( RedFam ):
|
||||
|
||||
else:
|
||||
ending = None
|
||||
# Missing dates (Task: FS#76)
|
||||
else:
|
||||
beginning = None
|
||||
ending = None
|
||||
|
||||
return (beginning, ending)
|
||||
|
||||
@classmethod
|
||||
@deprecated( 'extract_dates' )
|
||||
def is_beginning( cls, line ):
|
||||
"""
|
||||
Returns the first timestamp found in line, otherwise None
|
||||
|
||||
@param str line String to search in
|
||||
|
||||
@returns str Timestamp, otherwise None
|
||||
"""
|
||||
|
||||
return cls.extract_dates( line )[0]
|
||||
|
||||
@classmethod
|
||||
@deprecated( 'extract_dates' )
|
||||
def is_ending( cls, line, isarchive=False ):
|
||||
"""
|
||||
Returns the timestamp of done notice ( if one ), otherwise None
|
||||
|
||||
@param line String to search in
|
||||
@type line str
|
||||
@param isarchive If true skip searching done notice (on archivepages)
|
||||
@type isarchive bool
|
||||
|
||||
@returns Timestamp, otherwise None
|
||||
@returntype str
|
||||
"""
|
||||
|
||||
return cls.extract_dates( line )[1]
|
||||
|
||||
@classmethod
|
||||
@deprecated( 'extract_dates' )
|
||||
def is_ending2( cls, line ):
|
||||
"""
|
||||
Returns the last timestamp found in line, otherwise None
|
||||
@param str line String to search in
|
||||
|
||||
@returns str Timestamp, otherwise None
|
||||
"""
|
||||
|
||||
return cls.extract_dates( line, True )[1]
|
||||
|
||||
|
||||
class RedFamWorker( RedFam ):
|
||||
"""
|
||||
@@ -28,9 +28,10 @@ Provides a class for handling redundance discussion pages and archives
|
||||
import pywikibot # noqa
|
||||
import mwparserfromhell as mwparser
|
||||
|
||||
import jogobot
|
||||
import jogobot # noqa
|
||||
|
||||
from mysqlred import MysqlRedPage
|
||||
from lib.mysqlred import MysqlRedPage
|
||||
from lib.redfam import RedFamParser
|
||||
|
||||
|
||||
class RedPage:
|
||||
@@ -116,7 +117,7 @@ class RedPage:
|
||||
# include_lead = if true include first section (intro)
|
||||
# include_heading = if true include heading
|
||||
fams = self.wikicode.get_sections(
|
||||
matches=jogobot.config["redundances"]["section_heading_regex"],
|
||||
matches=RedFamParser.is_section_redfam_cb,
|
||||
include_lead=False, include_headings=True )
|
||||
|
||||
# Iterate over RedFam
|
||||
107
parse-pages.py
107
parse-pages.py
@@ -1,107 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# parse-pages.py
|
||||
#
|
||||
# Copyright 2016 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Script to parse all redpages in configured categories
|
||||
"""
|
||||
|
||||
import pywikibot
|
||||
from pywikibot import pagegenerators
|
||||
|
||||
import jogobot
|
||||
|
||||
import redpage
|
||||
import redfam
|
||||
|
||||
|
||||
def get_cat_pages( cat ):
|
||||
"""
|
||||
Generates a iteratable generator-object with all pages listet in given
|
||||
category
|
||||
|
||||
@param cat Category to request
|
||||
@type cat str
|
||||
|
||||
@returns generator Iteratable object with pages of given category
|
||||
"""
|
||||
|
||||
# Get site to work on from pywikibot config
|
||||
site = pywikibot.Site()
|
||||
|
||||
# Retrieve the content of given category
|
||||
category = pywikibot.Category( site, cat )
|
||||
|
||||
# Build an iteratable generator object with page objects for given category
|
||||
generator = pagegenerators.CategorizedPageGenerator( category )
|
||||
|
||||
return generator
|
||||
|
||||
|
||||
def main(*args):
|
||||
"""
|
||||
Handles process
|
||||
"""
|
||||
|
||||
try:
|
||||
jogobot.output( "BEGINN – parser-pages.py" )
|
||||
|
||||
# Iterate over configured categories
|
||||
for cat in ( jogobot.config["redundances"]["redpage_cats"] ):
|
||||
|
||||
# Iterate over pages in current cat
|
||||
for page in get_cat_pages( cat ):
|
||||
|
||||
# For pages configured to exclude, go on with next page
|
||||
if page.title() in (
|
||||
jogobot.config["redundances"]["redpage_exclude"] ):
|
||||
|
||||
continue
|
||||
|
||||
# Initiate RedPage object
|
||||
red_page = redpage.RedPage( page )
|
||||
|
||||
# Check whether parsing is needed
|
||||
if red_page.is_parsing_needed():
|
||||
|
||||
# Iterate over returned generator with redfam sections
|
||||
for fam in red_page.parse():
|
||||
|
||||
# Run RedFamParser on section text
|
||||
redfam.RedFamParser.parser( fam, red_page.page._pageid,
|
||||
red_page.is_archive() )
|
||||
else:
|
||||
# If successfully parsed whole page, flush
|
||||
# db write cache
|
||||
redfam.RedFamParser.flush_db_cache()
|
||||
jogobot.output( "Page '%s' parsed" %
|
||||
red_page.page.title() )
|
||||
else:
|
||||
# If successfully parsed all pages in cat, flush db write cache
|
||||
redpage.RedPage.flush_db_cache()
|
||||
|
||||
finally:
|
||||
jogobot.output( "END – parser-pages.py" )
|
||||
pywikibot.stopme()
|
||||
|
||||
if( __name__ == "__main__" ):
|
||||
main()
|
||||
118
red.py
Normal file
118
red.py
Normal file
@@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# reddiscparser.py
|
||||
#
|
||||
# Copyright 2016 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Wrapper script to invoke all redundances bot tasks
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
import pywikibot
|
||||
|
||||
import jogobot
|
||||
|
||||
|
||||
def prepare_bot( task_slug, subtask, genFactory, subtask_args ):
|
||||
"""
|
||||
Handles importing subtask Bot class and prepares specific args
|
||||
|
||||
Throws exception if bot not exists
|
||||
|
||||
@param task_slug Task slug, needed for logging
|
||||
@type task_slug str
|
||||
@param subtask Slug of given subtask
|
||||
@type subtask str
|
||||
@param genFactory GenFactory with parsed pagegenerator args
|
||||
@type genFactory pagegenerators.GeneratorFactory
|
||||
@param subtask_args Additional args for subtasks
|
||||
@type subtask_args dict\
|
||||
|
||||
@returns The following tuple
|
||||
@return 1 Subtask slug (replaced None for default)
|
||||
@rtype str
|
||||
@return 2 Botclass of given subtask (Arg "-task")
|
||||
@rtype Class
|
||||
@return 3 GenFactory with parsed pagegenerator args
|
||||
@rtype pagegenerators.GeneratorFactory
|
||||
@return 4 Additional args for subtasks
|
||||
@rtype dict
|
||||
@rtype tuple
|
||||
"""
|
||||
# kwargs are passed to selected bot as **kwargs
|
||||
kwargs = dict()
|
||||
|
||||
if not subtask or subtask == "discparser":
|
||||
# Default case: discparser
|
||||
subtask = "discparser"
|
||||
|
||||
# Import related bot
|
||||
from bots.reddiscparser import DiscussionParserBot as Bot
|
||||
|
||||
# Subtask error
|
||||
else:
|
||||
jogobot.output( (
|
||||
"\03{{red}} Given subtask \"{subtask} \"" +
|
||||
"is not existing!" ).format( subtask=subtask ), "ERROR" )
|
||||
raise Exception
|
||||
|
||||
return ( subtask, Bot, genFactory, kwargs )
|
||||
|
||||
|
||||
def main(*args):
|
||||
"""
|
||||
Process command line arguments and invoke bot.
|
||||
|
||||
If args is an empty list, sys.argv is used.
|
||||
|
||||
@param args: command line arguments
|
||||
@type args: list of unicode
|
||||
"""
|
||||
|
||||
# Process global arguments to determine desired site
|
||||
local_args = pywikibot.handle_args(args)
|
||||
|
||||
# Get the jogobot-task_slug (basename of current file without ending)
|
||||
task_slug = os.path.basename(__file__)[:-len(".py")]
|
||||
|
||||
# Disabled until [FS#86] is done
|
||||
# Before run, we need to check wether we are currently active or not
|
||||
# if not jogobot.bot.active( task_slug ):
|
||||
# return
|
||||
|
||||
# Parse local Args to get information about subtask
|
||||
( subtask, genFactory, subtask_args ) = jogobot.bot.parse_local_args(
|
||||
local_args )
|
||||
|
||||
# select subtask and prepare args
|
||||
( subtask, Bot, genFactory, kwargs ) = prepare_bot(
|
||||
task_slug, subtask, genFactory, subtask_args )
|
||||
|
||||
# Init Bot
|
||||
bot = jogobot.bot.init_bot( task_slug, subtask, Bot, genFactory, **kwargs)
|
||||
|
||||
# Run bot
|
||||
jogobot.bot.run_bot( task_slug, subtask, bot )
|
||||
|
||||
|
||||
if( __name__ == "__main__" ):
|
||||
main()
|
||||
Reference in New Issue
Block a user