Compare commits
247 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d4a3dee037 | |||
| a35546e53d | |||
| 661a7f6b4b | |||
| e6ffd7d14a | |||
| 1b82de1eab | |||
| ace17777f5 | |||
| 466b9da886 | |||
| b80f5bd2c9 | |||
| ff2421b63e | |||
| 236ba6a870 | |||
| 0df2017387 | |||
| b9faed8847 | |||
| 5cdccaeec6 | |||
| 54d8b8ea3b | |||
| dea5a393ad | |||
| f021a13202 | |||
| 4c8ba95534 | |||
| 9804db212f | |||
| 68b81b1111 | |||
| 389c48605e | |||
| 95af95aca6 | |||
| 99adad873e | |||
| dbcc2717d7 | |||
| e5a45fa692 | |||
| 63d3f837e9 | |||
| cfb3e8e37c | |||
| dfffe97200 | |||
| 246e94c228 | |||
| 181486c718 | |||
| 4f31b1a792 | |||
| 3fbfd4ccd7 | |||
| 50b0e142ec | |||
| 14db996a43 | |||
| 110589cb5b | |||
| 5c277495a3 | |||
| a466ab4e74 | |||
| 860a285ab0 | |||
| 2c105336b0 | |||
| ea85ca731f | |||
| 6e119ea98f | |||
| 67aaf3cbbe | |||
| fa13e2a5cf | |||
| 562e689418 | |||
| ae1ee7d6a5 | |||
| 93447d8dc6 | |||
| 1b6faf9e53 | |||
| b4c193eedc | |||
| 788a3df0cd | |||
| 04f591b466 | |||
| 9640467f69 | |||
| bfec2abf98 | |||
| 20103d589d | |||
| e18aa96a84 | |||
| 1dd4c7f87e | |||
| 33b2e47312 | |||
| 3bd17ce692 | |||
| 5f4640d5ff | |||
| 7e0456ae4f | |||
| 108b7aa331 | |||
| a3adf31b89 | |||
| 614f288bb9 | |||
| c450a045bf | |||
| 84802cf521 | |||
| 5f6c443ba8 | |||
| 0c135ef1bb | |||
| 8b8221cfcd | |||
| bdccc8417c | |||
| a70835c58a | |||
| ec2b84df2a | |||
| 88848cb084 | |||
| 5057aed0d3 | |||
| 02e53475f1 | |||
| d6f9b460c9 | |||
| ff03ca8f13 | |||
| 88692ca678 | |||
| d9b4fcc0bd | |||
| 22ff78ea98 | |||
| b3cfcdc259 | |||
| b3e0ace2f4 | |||
| f8002c85da | |||
| 49bc05d29b | |||
| 8a26b6d92a | |||
| 49a8230d76 | |||
| 31c10073a2 | |||
| 642a29b022 | |||
| 2f90751dc2 | |||
| 024be69fe1 | |||
| b6d7268a7f | |||
| 526184c1e1 | |||
| 3aa6c5fb1c | |||
| ec8f459db5 | |||
| 3b2cb95f36 | |||
| 41e5cc1a9d | |||
| 9b9d50c4d2 | |||
| a755288700 | |||
| 14ec71dd09 | |||
| e283eb78ac | |||
| cc02006fd2 | |||
| 37b0cbef08 | |||
| 4137d72468 | |||
| cd87d1c2bb | |||
| 456b2ba3d4 | |||
| 47b85a0b5e | |||
| a6fdc974bd | |||
| 30de2a2e12 | |||
| 4a6855cf7b | |||
| 8422d08cb6 | |||
| ed78501821 | |||
| 34e7e0d3be | |||
| f9f081d072 | |||
| 0f930082b4 | |||
| 80c94ccf4f | |||
| 37704c6661 | |||
| 4e4be1c6d0 | |||
| 3e69a1c77e | |||
| 56f326b568 | |||
| 868894a38b | |||
| 65de6decb2 | |||
| 889be30a47 | |||
| 147e96d388 | |||
| 76666aa294 | |||
| db39bb5ff4 | |||
| ec7880207b | |||
| 4aaacf1443 | |||
| 281f1c49a8 | |||
| 3fe47e666f | |||
| e16925197c | |||
| 9ba7d2e517 | |||
| 844fee52ae | |||
| 43e31c108a | |||
| 89b50e3312 | |||
| bf8e47f916 | |||
| 467f829af2 | |||
| 6e973369cd | |||
| 0ebf307bb8 | |||
| 4e4d5005fd | |||
| 65fb2ecb28 | |||
| d55c81c97b | |||
| 870ed4bf25 | |||
| e13320820c | |||
| 4ae562590e | |||
| 6149dcdb8b | |||
| f021f2ea60 | |||
| 8c56125a7b | |||
| c19f642d11 | |||
| 20b811bc2a | |||
| 59d4d23c83 | |||
| 2b93e4cf16 | |||
| 9beca7f6c9 | |||
| c4d8a95672 | |||
| da4f9b5d6b | |||
| ecc78bef96 | |||
| efa919ff27 | |||
| 72c6165de8 | |||
| c0b18f88e5 | |||
| e5989305a4 | |||
| 8ce6f03641 | |||
| 6717fa4fba | |||
| 8acba7d0f9 | |||
| 3723aba578 | |||
| 9d3bc74c80 | |||
| b36dc250d2 | |||
| 4055dc52d8 | |||
| 594130c8a6 | |||
| b271a0b0b1 | |||
| ad088126e7 | |||
| 151c22a735 | |||
| a97d8c722e | |||
| 58dfd8c86a | |||
| 9481116777 | |||
| eaa7596a8f | |||
| 449d83d7b5 | |||
| 4ac9b305f5 | |||
| 604b7bd8b7 | |||
| d0fa15d0ed | |||
| 71e41bfed3 | |||
| 2be0a8903d | |||
| 0ceb2e6e83 | |||
| 3540cc2a7d | |||
| 460d2db183 | |||
| 156f117b18 | |||
| 1679e2ad6a | |||
| b88efb6bdd | |||
| 177a8f920f | |||
| 0549cbd2c2 | |||
| 78eda10562 | |||
| 510771509b | |||
| 71b99b5f58 | |||
| 77d1de4473 | |||
| cac04f344f | |||
| e28acf88d1 | |||
| af48888535 | |||
| ac54aea698 | |||
| 2deb02fe47 | |||
| 1e4c8646bf | |||
| fe2810f07c | |||
| ab430e0085 | |||
| 95be313859 | |||
| 0bb0b2d957 | |||
| db32c9e8f6 | |||
| bd2d221c48 | |||
| ee8ebbc8bc | |||
| dcc4851513 | |||
| 0ea1b0039d | |||
| 2f878ee901 | |||
| 17bfb32ded | |||
| 6cb92c1da7 | |||
| a8605bcee6 | |||
| 5d31bdd7eb | |||
| 7f8ab1897e | |||
| 79dbde2413 | |||
| 36a480a042 | |||
| bd9dbdfa17 | |||
| 944bea488a | |||
| 7cac294181 | |||
| a24f208449 | |||
| 0af7eb11d6 | |||
| 7422307985 | |||
| b81694c6d3 | |||
| a2dfffc74b | |||
| 163972c924 | |||
| baf4ae2a07 | |||
| 10f64199ab | |||
| 24f1a7f516 | |||
| 9113a40704 | |||
| f53a5b3745 | |||
| 673e49c55a | |||
| 24adafeee7 | |||
| b26f04db8c | |||
| f29dfd5003 | |||
| ef9c13324a | |||
| e186f2f22b | |||
| 7d6cd8bb30 | |||
| 4e21b6696a | |||
| 6992f82f02 | |||
| dbcfe8f106 | |||
| 8059bb9992 | |||
| b5ca69077c | |||
| 523d029fdc | |||
| 4518efc504 | |||
| b1b37f9b9e | |||
| 8dc7fe678d | |||
| 53f53ddb8b | |||
| 26f5912f88 | |||
| 1dea5d7e84 | |||
| b514eb5c42 | |||
| db5bb7401e |
90
README.md
Normal file
90
README.md
Normal file
@@ -0,0 +1,90 @@
|
||||
jogobot-red
|
||||
===========
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
|
||||
* pywikibot-core
|
||||
* mwparserfromhell
|
||||
|
||||
The libraries above need to be installed and configured manualy considering [documentation of pywikibot-core](https://www.mediawiki.org/wiki/Manual:Pywikibot).
|
||||
|
||||
* SQLAlchemy
|
||||
* PyMySQL
|
||||
* [jogobot-core module](https://git.golderweb.de/wiki/jogobot)
|
||||
|
||||
Those can be installed using pip and the _requirements.txt_ file provided with this packet
|
||||
|
||||
pip install -r requirements.txt
|
||||
|
||||
Versions
|
||||
--------
|
||||
* v1.2.2
|
||||
- Fix removed pywikibot config property db_hostname
|
||||
|
||||
* v1.2.1
|
||||
- Fix [#72](https://git.golderweb.de/wiki/jogobot-red/issues/72)
|
||||
|
||||
* v1.2
|
||||
- Create a list of redfams/articles missing reddisc notice
|
||||
|
||||
python red.py -task:missingnotice -family:wikipedia
|
||||
|
||||
- jogobot module not longer included
|
||||
|
||||
* v1.1.1
|
||||
- Check if moved page exists
|
||||
|
||||
* v1.1
|
||||
- Improved page filter
|
||||
|
||||
* v1.0
|
||||
- first stable release
|
||||
- less debug output
|
||||
- fixed problems with article title
|
||||
* test-v7
|
||||
- Fixed problem with url encoded chars in article title
|
||||
|
||||
* test-v6
|
||||
- jogobot status API enabled (Bot can be disabled onwiki)
|
||||
- Fixed problem with space between article title and anchor
|
||||
|
||||
* test-v5
|
||||
- Feature _markpages_ working in full-automatic mode with _always_-flag
|
||||
|
||||
python red.py -task:markpages -family:wikipedia -always
|
||||
|
||||
* test-v4
|
||||
|
||||
- Feature _markpages_ working in semi-automatic mode using command
|
||||
|
||||
python red.py -task:markpages -family:wikipedia
|
||||
|
||||
- Work on specific redfam using param
|
||||
|
||||
-famhash:[sha1-famhash]
|
||||
|
||||
- Use _PyMySQL_ instead of _OurSQL_
|
||||
|
||||
- Correctly parse redfams with articles with leading small character or spaces in wikilink
|
||||
|
||||
* test-v3
|
||||
|
||||
* test-v2
|
||||
|
||||
* test-v1
|
||||
|
||||
Bugs
|
||||
----
|
||||
[jogobot-red Issues](https://git.golderweb.de/wiki/jogobot-red/issues)
|
||||
|
||||
License
|
||||
-------
|
||||
GPLv3
|
||||
|
||||
Author Information
|
||||
------------------
|
||||
|
||||
Copyright 2018 Jonathan Golder jonathan@golderweb.de https://golderweb.de/
|
||||
|
||||
alias Wikipedia.org-User _Jogo.obb_ (https://de.wikipedia.org/Benutzer:Jogo.obb)
|
||||
2
bots/__init__.py
Normal file
2
bots/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
357
bots/markpages.py
Normal file
357
bots/markpages.py
Normal file
@@ -0,0 +1,357 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# markpages.py
|
||||
#
|
||||
# Copyright 2017 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Bot to mark pages which were/are subjects of redundance discussions
|
||||
with templates
|
||||
"""
|
||||
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
import pywikibot
|
||||
from pywikibot import pagegenerators
|
||||
from pywikibot.bot import CurrentPageBot
|
||||
from pywikibot.diff import PatchManager
|
||||
|
||||
import mwparserfromhell as mwparser
|
||||
|
||||
import jogobot
|
||||
|
||||
from lib.redfam import RedFamWorker
|
||||
|
||||
|
||||
class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
"""
|
||||
Bot class to mark pages which were/are subjects of redundance discussions
|
||||
with templates
|
||||
"""
|
||||
|
||||
def __init__( self, genFactory, **kwargs ):
|
||||
"""
|
||||
Constructor
|
||||
|
||||
Parameters:
|
||||
@param genFactory GenFactory with parsed pagegenerator args to
|
||||
build generator
|
||||
@type genFactory pagegenerators.GeneratorFactory
|
||||
@param **kwargs Additional args
|
||||
@type iterable
|
||||
"""
|
||||
|
||||
# Init attribute
|
||||
self.__redfams = None # Will hold a generator with our redfams
|
||||
|
||||
if "famhash" in kwargs:
|
||||
self.famhash = kwargs["famhash"]
|
||||
|
||||
# We do not use predefined genFactory as there is no sensefull case to
|
||||
# give a generator via cmd-line for this right now
|
||||
self.genFactory = pagegenerators.GeneratorFactory()
|
||||
|
||||
# Build generator with genFactory
|
||||
self.build_generator()
|
||||
|
||||
# Run super class init with builded generator
|
||||
super( MarkPagesBot, self ).__init__(
|
||||
generator=self.gen,
|
||||
always=True if "always" in kwargs else False )
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
Controls the overal parsing process, using super class for page switch
|
||||
|
||||
Needed to do things before/after treating pages is done
|
||||
"""
|
||||
try:
|
||||
|
||||
super( MarkPagesBot, self ).run()
|
||||
|
||||
except:
|
||||
raise
|
||||
|
||||
else:
|
||||
# Do status redfam status updates
|
||||
for redfam in self.redfams:
|
||||
redfam.update_status()
|
||||
|
||||
RedFamWorker.flush_db_cache()
|
||||
|
||||
@property
|
||||
def redfams(self):
|
||||
"""
|
||||
Holds redfams generator to work on in this bot
|
||||
"""
|
||||
# Create generator if not present
|
||||
if not self.__redfams:
|
||||
end_after = datetime.strptime(
|
||||
jogobot.config["red.markpages"]["mark_done_after"],
|
||||
"%Y-%m-%d" )
|
||||
|
||||
if hasattr(self, "famhash"):
|
||||
self.__redfams = list(
|
||||
RedFamWorker.session.query(RedFamWorker).filter(
|
||||
RedFamWorker.famhash == self.famhash ) )
|
||||
|
||||
else:
|
||||
self.__redfams = list( RedFamWorker.gen_by_status_and_ending(
|
||||
"archived", end_after) )
|
||||
|
||||
return self.__redfams
|
||||
|
||||
def build_generator( self ):
|
||||
"""
|
||||
Builds generator to pass to super class
|
||||
"""
|
||||
# Add Talkpages to work on to generatorFactory
|
||||
self.genFactory.gens.append( self.redfam_talkpages_generator() )
|
||||
|
||||
# Set generator to pass to super class
|
||||
# Since PreloadingGenerator mixis up the Pages, do not use it right now
|
||||
# (FS#148)
|
||||
# We can do so for automatic runs (FS#150)
|
||||
# self.gen = pagegenerators.PreloadingGenerator(
|
||||
# self.genFactory.getCombinedGenerator() )
|
||||
self.gen = self.genFactory.getCombinedGenerator()
|
||||
|
||||
def redfam_talkpages_generator( self ):
|
||||
"""
|
||||
Wrappers the redfam.article_generator and
|
||||
passes it to pagegenerators.PageWithTalkPageGenerator().
|
||||
Then it iterates over the generator and adds a reference to the
|
||||
related redfam to each talkpage-object.
|
||||
"""
|
||||
|
||||
for redfam in self.redfams:
|
||||
|
||||
# We need the talkpage (and only this) of each existing page
|
||||
for talkpage in redfam.article_generator(
|
||||
filter_existing=True,
|
||||
exclude_article_status=["marked"],
|
||||
talkpages=True ):
|
||||
|
||||
yield talkpage
|
||||
|
||||
def treat_page( self ):
|
||||
"""
|
||||
Handles work on current page
|
||||
|
||||
We get a reference to related redfam in current_page.redfam
|
||||
"""
|
||||
|
||||
# First we need to have the current text of page
|
||||
# and parse it as wikicode
|
||||
self.current_wikicode = mwparser.parse( self.current_page.text )
|
||||
|
||||
# Add notice
|
||||
# Returns True if added
|
||||
# None if already present
|
||||
add_ret = self.add_disc_notice_template()
|
||||
|
||||
# Convert wikicode back to string to save
|
||||
self.new_text = str( self.current_wikicode )
|
||||
|
||||
# Define edit summary
|
||||
summary = jogobot.config["red.markpages"]["mark_done_summary"].format(
|
||||
reddisc=self.current_page.redfam.get_disc_link() ).strip()
|
||||
|
||||
# Make sure summary starts with "Bot:"
|
||||
if not summary[:len("Bot:")] == "Bot:":
|
||||
summary = "Bot: " + summary.strip()
|
||||
|
||||
# will return True if saved
|
||||
# False if not saved because of errors
|
||||
# None if change was not accepted by user
|
||||
save_ret = self.put_current( self.new_text, summary=summary )
|
||||
|
||||
# Get article as named in db
|
||||
article = self.current_page.redarticle
|
||||
|
||||
# Status
|
||||
if add_ret is None or ( add_ret and save_ret ):
|
||||
self.current_page.redfam.article_remove_status(
|
||||
"note_rej",
|
||||
title=article)
|
||||
self.current_page.redfam.article_remove_status(
|
||||
"sav_err",
|
||||
title=article)
|
||||
self.current_page.redfam.article_add_status(
|
||||
"marked",
|
||||
title=article)
|
||||
elif save_ret is None:
|
||||
self.current_page.redfam.article_add_status(
|
||||
"note_rej",
|
||||
title=article)
|
||||
else:
|
||||
self.current_page.redfam.article_add_status(
|
||||
"sav_err",
|
||||
title=article)
|
||||
|
||||
def add_disc_notice_template( self ):
|
||||
"""
|
||||
Will take self.current_wikicode and adds disc notice template after the
|
||||
last template in leading section or as first element if there is no
|
||||
other template in leading section
|
||||
"""
|
||||
# The notice to add
|
||||
self.disc_notice = \
|
||||
self.current_page.redfam.generate_disc_notice_template()
|
||||
|
||||
# Check if it is already present in wikicode
|
||||
if self.disc_notice_present():
|
||||
return
|
||||
|
||||
# Find the right place to insert notice template
|
||||
# Therfore we need the first section (if there is one)
|
||||
leadsec = self.current_wikicode.get_sections(
|
||||
flat=False, include_lead=True )[0]
|
||||
|
||||
# There is none on empty pages, so we need to check
|
||||
if leadsec:
|
||||
# Get the last template in leadsec
|
||||
ltemplates = leadsec.filter_templates(recursive=False)
|
||||
|
||||
# If there is one, add notice after this
|
||||
if ltemplates:
|
||||
|
||||
# Make sure not separate template and maybe following comment
|
||||
insert_after_index = self.current_wikicode.index(
|
||||
ltemplates[-1] )
|
||||
|
||||
# If there is more content
|
||||
if len(self.current_wikicode.nodes) > (insert_after_index + 1):
|
||||
# Filter one linebreak
|
||||
if isinstance( self.current_wikicode.get(
|
||||
insert_after_index + 1),
|
||||
mwparser.nodes.text.Text) and \
|
||||
re.search( r"^\n[^\n\S]+$", self.current_wikicode.get(
|
||||
insert_after_index + 1 ).value ):
|
||||
|
||||
insert_after_index += 1
|
||||
|
||||
while len(self.current_wikicode.nodes) > \
|
||||
(insert_after_index + 1) and \
|
||||
isinstance(
|
||||
self.current_wikicode.get(insert_after_index + 1),
|
||||
mwparser.nodes.comment.Comment ):
|
||||
|
||||
insert_after_index += 1
|
||||
|
||||
self.current_wikicode.insert_after(
|
||||
self.current_wikicode.get(insert_after_index),
|
||||
self.disc_notice )
|
||||
|
||||
# To have it in its own line we need to add a linbreak before
|
||||
self.current_wikicode.insert_before(self.disc_notice, "\n" )
|
||||
|
||||
# If there is no template, add before first element on page
|
||||
else:
|
||||
self.current_wikicode.insert( 0, self.disc_notice )
|
||||
|
||||
# To have it in its own line we need to add a linbreak after it
|
||||
self.current_wikicode.insert_after(self.disc_notice, "\n" )
|
||||
|
||||
# If there is no leadsec (and therefore no template in it, we will add
|
||||
# before the first element
|
||||
else:
|
||||
self.current_wikicode.insert( 0, self.disc_notice )
|
||||
|
||||
# To have it in its own line we need to add a linbreak after it
|
||||
self.current_wikicode.insert_after(self.disc_notice, "\n" )
|
||||
|
||||
# Notice was added
|
||||
return True
|
||||
|
||||
def disc_notice_present(self):
|
||||
"""
|
||||
Checks if disc notice which shall be added is already present.
|
||||
"""
|
||||
|
||||
if self.disc_notice in self.current_wikicode:
|
||||
return True
|
||||
|
||||
# Iterate over Templates with same name (if any) to search equal
|
||||
# Link to decide if they are the same
|
||||
for present_notice in self.current_wikicode.ifilter_templates(
|
||||
matches=str(self.disc_notice.name) ):
|
||||
|
||||
# Get reddisc page.title of notice to add
|
||||
add_notice_link_tile = self.disc_notice.get(
|
||||
"Diskussion").partition("#")[0]
|
||||
# Get reddisc page.title of possible present notice
|
||||
present_notice_link_tile = present_notice.get(
|
||||
"Diskussion").partition("#")[0]
|
||||
|
||||
# If those are equal, notice is already present
|
||||
if add_notice_link_tile == present_notice_link_tile:
|
||||
return True
|
||||
|
||||
# If nothing is found, loop will run till its end
|
||||
else:
|
||||
return False
|
||||
|
||||
# We need to overrite this since orginal from pywikibot.bot.CurrentPageBot
|
||||
# does not return result of self._save_page
|
||||
def put_current(self, new_text, ignore_save_related_errors=None,
|
||||
ignore_server_errors=None, **kwargs):
|
||||
"""
|
||||
Call L{Bot.userPut} but use the current page.
|
||||
|
||||
It compares the new_text to the current page text.
|
||||
|
||||
@param new_text: The new text
|
||||
@type new_text: basestring
|
||||
@param ignore_save_related_errors: Ignore save related errors and
|
||||
automatically print a message. If None uses this instances default.
|
||||
@type ignore_save_related_errors: bool or None
|
||||
@param ignore_server_errors: Ignore server errors and automatically
|
||||
print a message. If None uses this instances default.
|
||||
@type ignore_server_errors: bool or None
|
||||
@param kwargs: Additional parameters directly given to L{Bot.userPut}.
|
||||
@type kwargs: dict
|
||||
"""
|
||||
|
||||
# Monkey patch pywikibot.showDiff
|
||||
pywikibot.showDiff = showDiff
|
||||
|
||||
if ignore_save_related_errors is None:
|
||||
ignore_save_related_errors = self.ignore_save_related_errors
|
||||
if ignore_server_errors is None:
|
||||
ignore_server_errors = self.ignore_server_errors
|
||||
return self.userPut(
|
||||
self.current_page, self.current_page.text, new_text,
|
||||
ignore_save_related_errors=ignore_save_related_errors,
|
||||
ignore_server_errors=ignore_server_errors,
|
||||
**kwargs)
|
||||
|
||||
|
||||
# We need to have a patched version to set context param to value greater 0 as
|
||||
# pywikibot.bot.userPut() currently does not support this value
|
||||
def showDiff(oldtext, newtext, context=3):
|
||||
"""
|
||||
Output a string showing the differences between oldtext and newtext.
|
||||
|
||||
The differences are highlighted (only on compatible systems) to show which
|
||||
changes were made.
|
||||
"""
|
||||
PatchManager(oldtext, newtext, context=context).print_hunks()
|
||||
201
bots/missingnotice.py
Normal file
201
bots/missingnotice.py
Normal file
@@ -0,0 +1,201 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# missingnotice.py
|
||||
#
|
||||
# Copyright 2018 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.engine.url import URL
|
||||
|
||||
import pywikibot
|
||||
|
||||
import jogobot
|
||||
|
||||
from lib.redfam import RedFamWorker
|
||||
|
||||
|
||||
class MissingNoticeBot(pywikibot.bot.Bot):
|
||||
"""
|
||||
"""
|
||||
|
||||
# MySQL-query to get articles with notice
|
||||
cat_article_query = """
|
||||
SELECT `page_title`
|
||||
FROM `categorylinks`
|
||||
JOIN `category`
|
||||
ON `cl_to` = `cat_title`
|
||||
AND `cat_title` LIKE "{cat}\_%%"
|
||||
JOIN `page`
|
||||
ON `cl_from` = `page_id`
|
||||
""".format(cat=jogobot.config["red.missingnotice"]["article_category"])
|
||||
|
||||
def __init__( self, genFactory, **kwargs ):
|
||||
|
||||
self.categorized_articles = list()
|
||||
self.page_content = list()
|
||||
|
||||
super(type(self), self).__init__(**kwargs)
|
||||
|
||||
def run( self ):
|
||||
# query articles containing notice
|
||||
self.categorized_articles = type(self).get_categorized_articles()
|
||||
|
||||
fam_counter = 0
|
||||
|
||||
# iterate open redfams
|
||||
for redfam in RedFamWorker.gen_open():
|
||||
fam_counter += 1
|
||||
links = self.treat_open_redfam(redfam)
|
||||
|
||||
if links:
|
||||
self.page_content.append( self.format_row( links ) )
|
||||
|
||||
if (fam_counter % 50) == 0:
|
||||
jogobot.output( "Processed {n:d} open RedFams".format(
|
||||
n=fam_counter))
|
||||
|
||||
else:
|
||||
# To write "absent" states to db
|
||||
RedFamWorker.flush_db_cache()
|
||||
|
||||
# Update page content
|
||||
self.update_page()
|
||||
|
||||
def treat_open_redfam( self, redfam ):
|
||||
"""
|
||||
Works on current open redfam
|
||||
|
||||
@param redfam Redfam to work on
|
||||
@type redfam.RedFamWorker
|
||||
|
||||
@returns Tuple of disclink and list of articles missing notice or None
|
||||
@rtype ( str, list(str*) ) or None
|
||||
"""
|
||||
|
||||
# Check if related disc section exist
|
||||
if not redfam.disc_section_exists():
|
||||
return None
|
||||
|
||||
# Get links for articles without notice
|
||||
links = self.treat_articles( redfam.article_generator(
|
||||
filter_existing=True, filter_redirects=True ) )
|
||||
|
||||
# No articles without notice
|
||||
if not links:
|
||||
return None
|
||||
|
||||
return ( redfam.get_disc_link(as_link=True), links )
|
||||
|
||||
def treat_articles(self, articles):
|
||||
"""
|
||||
Iterates over given articles and checks weather them are included in
|
||||
self.categorized_articles (contain the notice)
|
||||
|
||||
@param articles Articles to check
|
||||
@type articles iterable of pywikibot.page() objects
|
||||
|
||||
@returns Possibly empty list of wikitext links ("[[article]]")
|
||||
@rtype list
|
||||
"""
|
||||
links = list()
|
||||
|
||||
for article in articles:
|
||||
|
||||
if article.title(underscore=True, with_section=False ) not in \
|
||||
self.categorized_articles:
|
||||
|
||||
links.append( article.title(as_link=True, textlink=True) )
|
||||
|
||||
return links
|
||||
|
||||
def format_row( self, links ):
|
||||
"""
|
||||
Formats row for output on wikipage
|
||||
|
||||
@param links Tuple of disc link and list of articles as returned by
|
||||
self.treat_open_redfam()
|
||||
@type links ( str, list(str*) )
|
||||
|
||||
@returns Formatet row text to add to page_content
|
||||
@rtype str
|
||||
"""
|
||||
|
||||
return jogobot.config["red.missingnotice"]["row_format"].format(
|
||||
disc=links[0],
|
||||
links=jogobot.config["red.missingnotice"]["link_sep"].join(
|
||||
links[1] ) )
|
||||
|
||||
def update_page( self, wikipage=None):
|
||||
"""
|
||||
Handles the updating process of the wikipage
|
||||
|
||||
@param wikipage Wikipage to put text on, otherwise use configured page
|
||||
@type wikipage str
|
||||
"""
|
||||
|
||||
# if not given get wikipage from config
|
||||
if not wikipage:
|
||||
wikipage = jogobot.config["red.missingnotice"]["wikipage"]
|
||||
|
||||
# Create page object for wikipage
|
||||
page = pywikibot.Page(pywikibot.Site(), wikipage)
|
||||
|
||||
# Define edit summary
|
||||
summary = jogobot.config["red.missingnotice"]["edit_summary"]
|
||||
|
||||
# Make sure summary starts with "Bot:"
|
||||
if not summary[:len("Bot:")] == "Bot:":
|
||||
summary = "Bot: " + summary.strip()
|
||||
|
||||
# Concatenate new text
|
||||
new_text = "\n".join(self.page_content)
|
||||
|
||||
# Save new text
|
||||
self.userPut( page, page.text, new_text, summary=summary )
|
||||
|
||||
@classmethod
|
||||
def get_categorized_articles( cls ):
|
||||
"""
|
||||
Queries all articles containing the notice based on category set by
|
||||
notice template. Category can be configured in
|
||||
jogobot.config["red.missingnotice"]["article_category"]
|
||||
|
||||
@returns List of all articles containing notice
|
||||
@rtype list
|
||||
"""
|
||||
|
||||
# construct connection url for sqlalchemy
|
||||
url = URL( "mysql+pymysql",
|
||||
username=pywikibot.config.db_username,
|
||||
password=pywikibot.config.db_password,
|
||||
host=jogobot.config["red.missingnotice"]["wikidb_host"],
|
||||
port=jogobot.config["red.missingnotice"]["wikidb_port"],
|
||||
database=jogobot.config["red.missingnotice"]["wikidb_name"],
|
||||
query={'charset': 'utf8'} )
|
||||
|
||||
# create sqlalchemy engine
|
||||
engine = create_engine(url, echo=False)
|
||||
|
||||
# fire the query to get articles with notice
|
||||
result = engine.execute(cls.cat_article_query)
|
||||
|
||||
# return list with articles with notice
|
||||
return [ row['page_title'].decode("utf-8") for row in result ]
|
||||
182
bots/reddiscparser.py
Normal file
182
bots/reddiscparser.py
Normal file
@@ -0,0 +1,182 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# reddiscparser.py
|
||||
#
|
||||
# Copyright 2017 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Bot to parse all reddisc pages in given Generator or configured categories
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
import pywikibot # noqa
|
||||
from pywikibot import pagegenerators # noqa
|
||||
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
|
||||
|
||||
import jogobot
|
||||
|
||||
from lib.redpage import RedPageParser
|
||||
from lib.redfam import RedFamParser
|
||||
|
||||
|
||||
class DiscussionParserBot(
|
||||
# CurrentPageBot, # via next two sets 'current_page' on each treat()
|
||||
ExistingPageBot, # CurrentPageBot only treats existing pages
|
||||
NoRedirectPageBot ): # class which only treats non-redirects
|
||||
"""
|
||||
Botclass witch initialises the parsing process of Redundancy Discussions
|
||||
"""
|
||||
|
||||
# RegEx to filter wrong pages
|
||||
onlyinclude_re = re.compile(
|
||||
jogobot.config["redundances"]["reddiscs_onlyinclude_re"] )
|
||||
|
||||
def __init__( self, genFactory, **kwargs ):
|
||||
"""
|
||||
Constructor
|
||||
|
||||
Parameters:
|
||||
@param genFactory GenFactory with parsed pagegenerator args to
|
||||
build generator
|
||||
@type genFactory pagegenerators.GeneratorFactory
|
||||
@param **kwargs Additional args
|
||||
@type iterable
|
||||
"""
|
||||
|
||||
# Copy needed args
|
||||
self.genFactory = genFactory
|
||||
|
||||
# Build generator with genFactory
|
||||
self.build_generator()
|
||||
|
||||
# Run super class init with builded generator
|
||||
super( DiscussionParserBot, self ).__init__(generator=self.gen)
|
||||
|
||||
def build_generator(self):
|
||||
"""
|
||||
Builds generator to work on, based on self.genFactory
|
||||
"""
|
||||
# Check wether there are generators waiting for factoring, if not
|
||||
# use configured categories
|
||||
if not self.genFactory.gens:
|
||||
self.apply_conf_cat_generators()
|
||||
|
||||
# Create combined Generator (Union of all Generators)
|
||||
gen = self.genFactory.getCombinedGenerator()
|
||||
|
||||
if gen:
|
||||
# The preloading generator is responsible for downloading multiple
|
||||
# pages from the wiki simultaneously.
|
||||
self.gen = pagegenerators.PreloadingGenerator(gen)
|
||||
|
||||
else:
|
||||
pywikibot.showHelp()
|
||||
|
||||
def apply_conf_cat_generators( self ):
|
||||
"""
|
||||
Builds generators for categories which are read from jogobot.config
|
||||
|
||||
Parameters:
|
||||
@param genFactory: The GeneratorFactory to which the builded
|
||||
generators should be added.
|
||||
@type genFactory: pagegenerators.GeneratorFactory
|
||||
"""
|
||||
# Create Generators for configured Categories
|
||||
for category in jogobot.config["redundances"]["redpage_cats"]:
|
||||
gen = self.genFactory.getCategoryGen(
|
||||
category, gen_func=pagegenerators.CategorizedPageGenerator)
|
||||
|
||||
# If there is one, append to genFactory
|
||||
if gen:
|
||||
self.genFactory.gens.append(gen)
|
||||
|
||||
# Reset gen for next iteration
|
||||
gen = None
|
||||
|
||||
def run( self ):
|
||||
"""
|
||||
Controls the overal parsing process, using super class for page switch
|
||||
|
||||
Needed to do things before/after treating pages is done
|
||||
"""
|
||||
try:
|
||||
|
||||
super( DiscussionParserBot, self ).run()
|
||||
|
||||
except:
|
||||
raise
|
||||
|
||||
else:
|
||||
|
||||
# If successfully parsed all pages in cat, flush db write cache
|
||||
RedPageParser.flush_db_cache()
|
||||
|
||||
def treat_page( self ):
|
||||
"""
|
||||
Handles work on current page
|
||||
"""
|
||||
|
||||
# Short circuit excluded pages
|
||||
if self.current_page.title() in (
|
||||
jogobot.config["redundances"]["redpage_exclude"] ):
|
||||
|
||||
return
|
||||
|
||||
# Exclude pages which does not match pattern
|
||||
if not type(self).onlyinclude_re.search( self.current_page.title() ):
|
||||
|
||||
return
|
||||
|
||||
# Initiate RedPage object
|
||||
redpage = RedPageParser.session.query(RedPageParser).filter(
|
||||
RedPageParser.pageid == self.current_page.pageid ).one_or_none()
|
||||
|
||||
if redpage:
|
||||
redpage.update( self.current_page )
|
||||
else:
|
||||
redpage = RedPageParser( self.current_page )
|
||||
|
||||
# Check whether parsing is needed
|
||||
if redpage.is_parsing_needed():
|
||||
# Count families for failure analysis
|
||||
fam_counter = 0
|
||||
|
||||
# Iterate over returned generator with redfam sections
|
||||
for fam in redpage.parse():
|
||||
# Run RedFamParser on section text
|
||||
RedFamParser.parser( fam, redpage, redpage.archive )
|
||||
|
||||
fam_counter += 1
|
||||
|
||||
else:
|
||||
# If successfully parsed whole page, flush
|
||||
# db write cache
|
||||
if( fam_counter ):
|
||||
|
||||
RedFamParser.flush_db_cache()
|
||||
jogobot.output( "Page [[{reddisc}]] parsed".format(
|
||||
reddisc=redpage.page.title() ) )
|
||||
else:
|
||||
jogobot.output(
|
||||
"\03{red}" + "Page [[{reddisc}]], ".format(
|
||||
reddisc=redpage.page.title() ) +
|
||||
"containing no redfam, parsed!",
|
||||
"WARNING" )
|
||||
338
lib/mysqlred.py
Normal file
338
lib/mysqlred.py
Normal file
@@ -0,0 +1,338 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# mysqlred.py
|
||||
#
|
||||
# Copyright 2017 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Provides interface classes for communication of redundances bot with mysql-db
|
||||
"""
|
||||
|
||||
import atexit # noqa
|
||||
|
||||
import pywikibot # noqa
|
||||
from pywikibot import config
|
||||
|
||||
import jogobot
|
||||
|
||||
from sqlalchemy import (
|
||||
create_engine, Column, Integer, String, Text, DateTime, ForeignKey )
|
||||
from sqlalchemy import text # noqa
|
||||
from sqlalchemy.engine.url import URL
|
||||
from sqlalchemy.ext.declarative import (
|
||||
declarative_base, declared_attr, has_inherited_table )
|
||||
from sqlalchemy.ext.mutable import MutableComposite, MutableSet
|
||||
from sqlalchemy.orm import sessionmaker, relationship, composite
|
||||
from sqlalchemy.orm.collections import attribute_mapped_collection
|
||||
import sqlalchemy.types as types
|
||||
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
url = URL( "mysql+pymysql",
|
||||
username=config.db_username,
|
||||
password=config.db_password,
|
||||
host=config.db_hostname_format.format('tools'),
|
||||
port=config.db_port,
|
||||
database=( config.db_username +
|
||||
jogobot.config['redundances']['db_suffix'] ),
|
||||
query={'charset': 'utf8'} )
|
||||
|
||||
engine = create_engine(url, echo=False)
|
||||
|
||||
|
||||
Session = sessionmaker(bind=engine)
|
||||
session = Session()
|
||||
|
||||
family = pywikibot.Site().family.dbName(pywikibot.Site().code)
|
||||
|
||||
|
||||
class Mysql(object):
|
||||
session = session
|
||||
|
||||
@declared_attr
|
||||
def _tableprefix(cls):
|
||||
return family + "_"
|
||||
|
||||
@declared_attr
|
||||
def _tablesuffix(cls):
|
||||
return "s"
|
||||
|
||||
@declared_attr
|
||||
def __tablename__(cls):
|
||||
if has_inherited_table(cls):
|
||||
return None
|
||||
name = cls.__name__[len("Mysql"):].lower()
|
||||
return cls._tableprefix + name + cls._tablesuffix
|
||||
|
||||
def changedp(self):
|
||||
return self.session.is_modified(self)
|
||||
|
||||
|
||||
class MutableSet(MutableSet):
|
||||
"""
|
||||
Extended version of the mutable set for our states
|
||||
"""
|
||||
|
||||
def has(self, item):
|
||||
"""
|
||||
Check if item is in set
|
||||
|
||||
@param item Item to check
|
||||
"""
|
||||
return item in self
|
||||
|
||||
def add(self, item):
|
||||
"""
|
||||
Extended add method, which only result in changed object if there is
|
||||
really an item added.
|
||||
|
||||
@param item Item to add
|
||||
"""
|
||||
if item not in self:
|
||||
super().add(item)
|
||||
|
||||
def discard(self, item):
|
||||
"""
|
||||
Wrapper for extended remove below
|
||||
|
||||
@param item Item to discard
|
||||
"""
|
||||
self.remove(item)
|
||||
|
||||
def remove(self, item, weak=True ):
|
||||
"""
|
||||
Extended remove method, which only results in changed object if there
|
||||
is really an item removed. Additionally, combine remove and discard!
|
||||
|
||||
@param item Item to remove/discard
|
||||
@param weak Set to false to use remove, else discard behavior
|
||||
"""
|
||||
if item in self:
|
||||
if weak:
|
||||
super().discard(item)
|
||||
else:
|
||||
super().remove(item)
|
||||
|
||||
|
||||
class ColumnList( list, MutableComposite ):
|
||||
"""
|
||||
Combines multiple Colums into a list like object
|
||||
"""
|
||||
|
||||
def __init__( self, *columns ):
|
||||
"""
|
||||
Wrapper to the list constructor deciding whether we have initialization
|
||||
with individual params per article or with an iterable.
|
||||
"""
|
||||
# Individual params per article (from db), first one is a str
|
||||
if isinstance( columns[0], str ) or \
|
||||
isinstance( columns[0], MutableSet ) or columns[0] is None:
|
||||
super().__init__( columns )
|
||||
# Iterable articles list
|
||||
else:
|
||||
super().__init__( columns[0] )
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""
|
||||
The MutableComposite class needs to be noticed about changes in our
|
||||
component. So we tweak the setitem process.
|
||||
"""
|
||||
|
||||
# set the item
|
||||
super().__setitem__( key, value)
|
||||
|
||||
# alert all parents to the change
|
||||
self.changed()
|
||||
|
||||
def __composite_values__(self):
|
||||
"""
|
||||
The Composite method needs to have this method to get the items for db.
|
||||
"""
|
||||
return self
|
||||
|
||||
|
||||
class Status( types.TypeDecorator ):
|
||||
|
||||
impl = types.String
|
||||
|
||||
def process_bind_param(self, value, dialect):
|
||||
"""
|
||||
Returns status as commaseparated string (to save in DB)
|
||||
|
||||
@returns Raw status string
|
||||
@rtype str
|
||||
"""
|
||||
if isinstance(value, MutableSet):
|
||||
return ",".join( value )
|
||||
elif isinstance(value, String ) or value is None:
|
||||
return value
|
||||
else:
|
||||
raise TypeError(
|
||||
"Value should be an instance of one of {0:s},".format(
|
||||
str( [type(MutableSet()), type(String()), type(None)] ) ) +
|
||||
"given value was an instance of {1:s}".format(
|
||||
str(type(value))) )
|
||||
|
||||
def process_result_value(self, value, dialect):
|
||||
"""
|
||||
Sets status based on comma separated list
|
||||
|
||||
@param raw_status Commaseparated string of stati (from DB)
|
||||
@type raw_status str
|
||||
"""
|
||||
if value:
|
||||
return MutableSet( value.strip().split(","))
|
||||
else:
|
||||
return MutableSet([])
|
||||
|
||||
def copy(self, **kw):
|
||||
return Status(self.impl.length)
|
||||
|
||||
|
||||
class MysqlRedFam( Mysql, Base ):
|
||||
|
||||
famhash = Column( String(64), primary_key=True, unique=True )
|
||||
|
||||
__article0 = Column('article0', String(255), nullable=False )
|
||||
__article1 = Column('article1', String(255), nullable=False )
|
||||
__article2 = Column('article2', String(255), nullable=True )
|
||||
__article3 = Column('article3', String(255), nullable=True )
|
||||
__article4 = Column('article4', String(255), nullable=True )
|
||||
__article5 = Column('article5', String(255), nullable=True )
|
||||
__article6 = Column('article6', String(255), nullable=True )
|
||||
__article7 = Column('article7', String(255), nullable=True )
|
||||
__articlesList = composite(
|
||||
ColumnList, __article0, __article1, __article2, __article3,
|
||||
__article4, __article5, __article6, __article7 )
|
||||
|
||||
heading = Column( Text, nullable=False )
|
||||
redpageid = Column(
|
||||
Integer, ForeignKey( family + "_redpages.pageid" ), nullable=False )
|
||||
beginning = Column( DateTime, nullable=False )
|
||||
ending = Column( DateTime, nullable=True )
|
||||
_status = Column( 'status', MutableSet.as_mutable(Status(255)),
|
||||
nullable=True )
|
||||
|
||||
__article0_status = Column(
|
||||
'article0_status', MutableSet.as_mutable(Status(64)), nullable=True )
|
||||
__article1_status = Column(
|
||||
'article1_status', MutableSet.as_mutable(Status(64)), nullable=True )
|
||||
__article2_status = Column(
|
||||
'article2_status', MutableSet.as_mutable(Status(64)), nullable=True )
|
||||
__article3_status = Column(
|
||||
'article3_status', MutableSet.as_mutable(Status(64)), nullable=True )
|
||||
__article4_status = Column(
|
||||
'article4_status', MutableSet.as_mutable(Status(64)), nullable=True )
|
||||
__article5_status = Column(
|
||||
'article5_status', MutableSet.as_mutable(Status(64)), nullable=True )
|
||||
__article6_status = Column(
|
||||
'article6_status', MutableSet.as_mutable(Status(64)), nullable=True )
|
||||
__article7_status = Column(
|
||||
'article7_status', MutableSet.as_mutable(Status(64)), nullable=True )
|
||||
__articlesStatus = composite(
|
||||
ColumnList, __article0_status, __article1_status, __article2_status,
|
||||
__article3_status, __article4_status, __article5_status,
|
||||
__article6_status, __article7_status )
|
||||
|
||||
redpage = relationship( "MysqlRedPage", enable_typechecks=False,
|
||||
back_populates="redfams" )
|
||||
|
||||
@property
|
||||
def articlesList(self):
|
||||
"""
|
||||
List of articles belonging to the redfam
|
||||
"""
|
||||
return self.__articlesList
|
||||
|
||||
@articlesList.setter
|
||||
def articlesList(self, articlesList):
|
||||
# Make sure to always have full length for complete overwrites
|
||||
while( len(articlesList) < 8 ):
|
||||
articlesList.append(None)
|
||||
self.__articlesList = ColumnList(articlesList)
|
||||
|
||||
@property
|
||||
def status( self ):
|
||||
"""
|
||||
Current fam status
|
||||
"""
|
||||
return self._status
|
||||
|
||||
@status.setter
|
||||
def status( self, status ):
|
||||
if status:
|
||||
self._status = MutableSet( status )
|
||||
else:
|
||||
self._status = MutableSet()
|
||||
|
||||
@property
|
||||
def articlesStatus(self):
|
||||
"""
|
||||
List of status strings/sets for the articles of the redfam
|
||||
"""
|
||||
return self.__articlesStatus
|
||||
|
||||
@articlesStatus.setter
|
||||
def articlesStatus(self, articlesStatus):
|
||||
self.__articlesStatus = ColumnList(articlesStatus)
|
||||
|
||||
|
||||
class MysqlRedPage( Mysql, Base ):
|
||||
pageid = Column( Integer, unique=True, primary_key=True )
|
||||
revid = Column( Integer, unique=True, nullable=False )
|
||||
pagetitle = Column( String(255), nullable=False )
|
||||
__status = Column( 'status', MutableSet.as_mutable(Status(255)),
|
||||
nullable=True )
|
||||
|
||||
redfams = relationship(
|
||||
"MysqlRedFam", enable_typechecks=False,
|
||||
back_populates="redpage", order_by=MysqlRedFam.famhash,
|
||||
collection_class=attribute_mapped_collection("famhash") )
|
||||
|
||||
@property
|
||||
def status( self ):
|
||||
"""
|
||||
Current fam status
|
||||
"""
|
||||
return self.__status
|
||||
|
||||
@status.setter
|
||||
def status( self, status ):
|
||||
if status:
|
||||
self.__status = MutableSet( status )
|
||||
else:
|
||||
self.__status = MutableSet()
|
||||
|
||||
Base.metadata.create_all(engine)
|
||||
|
||||
|
||||
class MysqlRedError(Exception):
|
||||
"""
|
||||
Basic Exception class for this module
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class MysqlRedConnectionError(MysqlRedError):
|
||||
"""
|
||||
Raised if there are Errors with Mysql-Connections
|
||||
"""
|
||||
pass
|
||||
851
lib/redfam.py
Normal file
851
lib/redfam.py
Normal file
@@ -0,0 +1,851 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# redfam.py
|
||||
#
|
||||
# Copyright 2018 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Provides classes for working with RedFams
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import locale
|
||||
import re
|
||||
import urllib.parse
|
||||
from datetime import datetime
|
||||
|
||||
import mwparserfromhell as mwparser # noqa
|
||||
import pywikibot # noqa
|
||||
from pywikibot.tools import deprecated # noqa
|
||||
|
||||
import jogobot
|
||||
from lib.mysqlred import MysqlRedFam, text
|
||||
|
||||
|
||||
class RedFam( MysqlRedFam ):
|
||||
"""
|
||||
Basic class for RedFams, containing the basic data structure
|
||||
"""
|
||||
|
||||
def __init__( self, articlesList, beginning, ending=None, redpageid=None,
|
||||
status=None, famhash=None, heading=None ):
|
||||
"""
|
||||
Generates a new RedFam object
|
||||
|
||||
@param articlesList list List of articles
|
||||
@param beginning datetime Beginning date
|
||||
@param ending datetime Ending date
|
||||
@param red_page_id int MW pageid of containing RedPage
|
||||
@param status str Status of RedFam
|
||||
@param fam_hash str SHA1 hash of articlesList
|
||||
@param heading str Original heading of RedFam (Link)
|
||||
"""
|
||||
|
||||
# Having pywikibot.Site() is a good idea most of the time
|
||||
self.site = pywikibot.Site()
|
||||
|
||||
super().__init__(
|
||||
articlesList=articlesList,
|
||||
beginning=beginning,
|
||||
ending=ending,
|
||||
redpageid=redpageid,
|
||||
famhash=famhash,
|
||||
heading=heading,
|
||||
status=status,
|
||||
articlesStatus=None
|
||||
)
|
||||
|
||||
def __repr__( self ):
|
||||
"""
|
||||
Returns repression str of RedFam object
|
||||
|
||||
@returns str repr() string
|
||||
"""
|
||||
|
||||
__repr = "RedFam( " + \
|
||||
"articlesList=" + repr( self.articlesList ) + \
|
||||
", heading=" + repr( self.heading ) + \
|
||||
", beginning=" + repr( self.beginning ) + \
|
||||
", ending=" + repr( self.ending ) + \
|
||||
", red_page_id=" + repr( self.redpageid ) + \
|
||||
", status=" + repr( self.status ) + \
|
||||
", fam_hash=" + repr( self.famhash ) + \
|
||||
", articlesStatus=" + repr( self.articlesStatus ) + \
|
||||
" )"
|
||||
|
||||
return __repr
|
||||
|
||||
@classmethod
|
||||
def calc_famhash(cls, articlesList ):
|
||||
"""
|
||||
Calculates the SHA-1 hash for the articlesList of redundance family.
|
||||
Since we don't need security SHA-1 is just fine.
|
||||
|
||||
@returns str String with the hexadecimal hash digest
|
||||
"""
|
||||
|
||||
h = hashlib.sha1()
|
||||
# Since articlesList attr of RedFam will have always 8 Members we
|
||||
# need to fill up smaller lists (longers will be cropped below).
|
||||
while len( articlesList) < 8:
|
||||
articlesList.append(None)
|
||||
|
||||
h.update( str( articlesList[:8] ).encode('utf-8') )
|
||||
|
||||
return h.hexdigest()
|
||||
|
||||
@classmethod
|
||||
def flush_db_cache( cls ):
|
||||
"""
|
||||
Calls flush method of Mysql Interface class
|
||||
"""
|
||||
cls.session.commit()
|
||||
|
||||
def article_add_status(self, status, index=None, title=None ):
|
||||
"""
|
||||
Adds a status specified by status, to article (identified by title
|
||||
or index in articlesList) status set
|
||||
|
||||
@param status Statusstring to add
|
||||
@type status str
|
||||
@param index Add to article with index in articlesList
|
||||
@type index int
|
||||
@param title Add to article with title in articlesList
|
||||
@type title str
|
||||
"""
|
||||
if title and not index:
|
||||
index = self.articlesList.index( title )
|
||||
|
||||
if isinstance( index, int ) and index < len(self.articlesList):
|
||||
self.articlesStatus[index].add(status)
|
||||
else:
|
||||
raise IndexError( "No index given or wrong format!")
|
||||
|
||||
def article_remove_status(self, status, index=None, title=None, weak=True):
|
||||
"""
|
||||
Removes a status specified by status, from article (identified by title
|
||||
or index in articlesList) status set
|
||||
If weak is set to False it will throw a KeyError when trying to
|
||||
remove a status not set.
|
||||
|
||||
@param status Statusstring to add
|
||||
@type status str
|
||||
@param index Remove from article with index in articlesList
|
||||
@type index int
|
||||
@param title Remove from article with title in articlesList
|
||||
@type title str
|
||||
@param weak Change behavior on missing status
|
||||
@type bool
|
||||
"""
|
||||
if title and not index:
|
||||
index = self.articlesList.index( title )
|
||||
|
||||
if isinstance( index, int ) and index < len(self.articlesList):
|
||||
if weak:
|
||||
self.articlesStatus[index].discard(status)
|
||||
else:
|
||||
self.articlesStatus[index].remove(status)
|
||||
else:
|
||||
raise IndexError( "No index given or wrong format!")
|
||||
|
||||
def article_has_status(self, status, index=None, title=None ):
|
||||
"""
|
||||
Adds a status specified by status, to articles (identified by title
|
||||
or index in articlesList) status set
|
||||
|
||||
@param status Statusstring to add
|
||||
@type status str
|
||||
@param index Check article with index in articlesList
|
||||
@type index int
|
||||
@param title Check article with title in articlesList
|
||||
@type title str
|
||||
"""
|
||||
if title and not index:
|
||||
index = self.articlesList.index( title )
|
||||
|
||||
if isinstance( index, int ) and index < len(self.articlesList):
|
||||
if status in self.articlesStatus[index]:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
raise IndexError( "No index given or wrong format!")
|
||||
|
||||
|
||||
class RedFamParser( RedFam ):
|
||||
"""
|
||||
Provides an interface to RedFam for adding/updating redundance families
|
||||
while parsig redundance pages
|
||||
"""
|
||||
|
||||
# Define the timestamp format
|
||||
__timestamp_format = jogobot.config['redundances']['timestamp_format']
|
||||
|
||||
# Define section heading re.pattern
|
||||
__sectionhead_pat = re.compile( r"^(.*\[\[.+\]\].*\[\[.+\]\].*)" )
|
||||
|
||||
# Define timestamp re.pattern
|
||||
__timestamp_pat = re.compile( jogobot.config['redundances']
|
||||
['timestamp_regex'] )
|
||||
|
||||
# Textpattern for recognisation of done-notices
|
||||
__done_notice = ":<small>Archivierung dieses Abschnittes \
|
||||
wurde gewünscht von:"
|
||||
__done_notice2 = "{{Erledigt|"
|
||||
|
||||
def __init__( self, articlesList, heading, redpage, redpagearchive,
|
||||
beginning, ending=None ):
|
||||
"""
|
||||
Creates a RedFam object based on data collected while parsing red_pages
|
||||
combined with possibly former known data from db
|
||||
|
||||
@param redfam_heading str Wikitext heading of section
|
||||
@param redpage page Pywikibot.page object
|
||||
@param redpagearchive bool Is red_page an archive
|
||||
@param beginning datetime Timestamp of beginning
|
||||
str as strptime parseable string
|
||||
@param ending datetime Timestamp of ending
|
||||
str strptime parseable string
|
||||
"""
|
||||
|
||||
# Calculates the sha1 hash over self._articlesList to
|
||||
# rediscover known redundance families
|
||||
famhash = type(self).calc_famhash(articlesList)
|
||||
|
||||
# Set object attributes:
|
||||
self.redpage = redpage
|
||||
|
||||
# Parse Timestamps
|
||||
beginning = self.__datetime(beginning)
|
||||
if ending:
|
||||
ending = self.__datetime(ending)
|
||||
|
||||
super().__init__( articlesList,
|
||||
beginning,
|
||||
ending=ending,
|
||||
redpageid=redpage.page._pageid,
|
||||
famhash=famhash,
|
||||
heading=heading )
|
||||
|
||||
# Check status changes
|
||||
self.check_status()
|
||||
|
||||
self.session.add(self)
|
||||
|
||||
def update( self, articlesList, heading, redpage, redpagearchive,
|
||||
beginning, ending=None ):
|
||||
|
||||
self.articlesList = articlesList
|
||||
self.heading = heading
|
||||
self.redpage = redpage
|
||||
self.redpageid = redpage.pageid
|
||||
|
||||
self.add_beginning( beginning )
|
||||
|
||||
if ending:
|
||||
self.add_ending( ending )
|
||||
|
||||
self._redpagearchive = redpagearchive
|
||||
|
||||
# Check status changes
|
||||
self.check_status()
|
||||
|
||||
@classmethod
|
||||
def heading_parser( cls, heading ):
|
||||
"""
|
||||
Parses given red_fam_heading string and saves articles list
|
||||
|
||||
@param heading Heading of RedFam-Section
|
||||
@type heading wikicode or mwparser-parseable
|
||||
"""
|
||||
|
||||
# Parse string heading with mwparse again everytime
|
||||
# In some cases the given wikicode is broken due to syntax errors
|
||||
# (Task FS#77)
|
||||
heading = mwparser.parse( str( heading ) )
|
||||
|
||||
articlesList = []
|
||||
for link in heading.ifilter_wikilinks():
|
||||
article = str( link.title ).strip()
|
||||
|
||||
# Short circuit empty links
|
||||
if not article:
|
||||
continue
|
||||
|
||||
# Make sure first letter is uppercase
|
||||
article = article[0].upper() + article[1:]
|
||||
|
||||
# Unquote possible url encoded special chars
|
||||
article = urllib.parse.unquote( article )
|
||||
|
||||
# Split in title and anchor part
|
||||
article = article.split("#", 1)
|
||||
# Replace underscores in title with spaces
|
||||
article[0] = article[0].replace("_", " ")
|
||||
|
||||
if len(article) > 1:
|
||||
# Strip both parts to prevent leading/trailing spaces
|
||||
article[0] = article[0].strip()
|
||||
article[1] = article[1].strip()
|
||||
|
||||
# other way round, replace spaces with underscores in anchors
|
||||
article[1] = article[1].replace(" ", "_")
|
||||
|
||||
# Rejoin title and anchor
|
||||
article = "#".join(article)
|
||||
|
||||
# Add to list
|
||||
articlesList.append(article)
|
||||
|
||||
return articlesList
|
||||
|
||||
def add_beginning( self, beginning ):
|
||||
"""
|
||||
Adds the beginning date of a redundance diskussion to the object
|
||||
|
||||
@param datetime datetime Beginning date
|
||||
"""
|
||||
|
||||
self.beginning = self.__datetime( beginning )
|
||||
|
||||
def add_ending( self, ending ):
|
||||
"""
|
||||
Adds the ending date of a redundance diskussion to the object.
|
||||
|
||||
@param datetime datetime Ending date
|
||||
"""
|
||||
|
||||
self.ending = self.__datetime( ending )
|
||||
|
||||
def __datetime( self, timestamp ):
|
||||
"""
|
||||
Decides wether given timestamp is a parseable string or a
|
||||
datetime object and returns a datetime object in both cases
|
||||
|
||||
@param datetime timestamp Datetime object
|
||||
str timestamp Parseable string with timestamp
|
||||
|
||||
@returns datetime Datetime object
|
||||
"""
|
||||
|
||||
# Make sure locale is set to 'de_DE.UTF-8' to prevent problems
|
||||
# with wrong month abreviations in strptime
|
||||
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
|
||||
|
||||
if( isinstance( timestamp, datetime ) ):
|
||||
return timestamp
|
||||
else:
|
||||
result = datetime.strptime( timestamp,
|
||||
type( self ).__timestamp_format )
|
||||
return result
|
||||
|
||||
def check_status( self ):
|
||||
"""
|
||||
Handles detection of correct status
|
||||
There are three possible stati:
|
||||
- 0 Discussion running --> no ending, page is not an archive
|
||||
- 1 Discussion over --> ending present, page is not an archive
|
||||
- 2 Discussion archived --> ending (normaly) present, page is archive
|
||||
- 3 and greater status was set by worker script, do not change it
|
||||
"""
|
||||
|
||||
# Since we have parsed it, the section can never be absent
|
||||
self.status.remove("absent")
|
||||
|
||||
# No ending, discussion is running:
|
||||
# Sometimes archived discussions also have no detectable ending
|
||||
if not self.ending and not self.redpage.archive:
|
||||
self.status.add("open")
|
||||
else:
|
||||
self.status.remove("open")
|
||||
if not self.redpage.archive:
|
||||
self.status.add("done")
|
||||
else:
|
||||
self.status.remove("done")
|
||||
self.status.remove("open")
|
||||
self.status.add("archived")
|
||||
|
||||
@classmethod
|
||||
def is_section_redfam_cb( cls, heading ):
|
||||
"""
|
||||
Used as callback for wikicode.get_sections in redpage.parse to
|
||||
select sections which are redfams
|
||||
"""
|
||||
# Because of strange behavior in some cases, parse heading again
|
||||
# (Task FS#77)
|
||||
heading = mwparser.parse( str( heading ) )
|
||||
|
||||
# Make sure we have min. two wikilinks in heading to assume a redfam
|
||||
if len( heading.filter_wikilinks() ) >= 2:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def parser( cls, text, redpage, isarchive=False ):
|
||||
"""
|
||||
Handles parsing of redfam section
|
||||
|
||||
@param text Text of RedFam-Section
|
||||
@type text wikicode or mwparser-parseable
|
||||
"""
|
||||
|
||||
# Parse heading with mwparse if needed
|
||||
if not isinstance( text, mwparser.wikicode.Wikicode ):
|
||||
text = mwparser.parse( text )
|
||||
|
||||
# Extract heading text
|
||||
heading = next( text.ifilter_headings() ).title.strip()
|
||||
|
||||
# Extract beginnig and maybe ending
|
||||
(beginning, ending) = RedFamParser.extract_dates( text, isarchive )
|
||||
|
||||
# Missing beginning (Task: FS#76)
|
||||
# Use first day of month of reddisc
|
||||
if not beginning:
|
||||
match = re.search(
|
||||
jogobot.config["redundances"]["reddiscs_onlyinclude_re"],
|
||||
redpage.page.title() )
|
||||
|
||||
if match:
|
||||
beginning = datetime.strptime(
|
||||
"01. {month} {year}".format(
|
||||
month=match.group(1), year=match.group(2)),
|
||||
"%d. %B %Y" )
|
||||
articlesList = RedFamParser.heading_parser( heading )
|
||||
famhash = RedFamParser.calc_famhash( articlesList )
|
||||
|
||||
# Check for existing objects in DB first in current redpage
|
||||
redfam = redpage.redfams.get(famhash)
|
||||
|
||||
with RedFamParser.session.no_autoflush:
|
||||
if not redfam:
|
||||
# Otherwise in db table
|
||||
redfam = RedFamParser.session.query(RedFamParser).filter(
|
||||
RedFamParser.famhash == famhash ).one_or_none()
|
||||
|
||||
if redfam:
|
||||
# Existing redfams need to be updated
|
||||
redfam.update( articlesList, str(heading), redpage, isarchive,
|
||||
beginning, ending )
|
||||
|
||||
else:
|
||||
# Create the RedFam object
|
||||
redfam = RedFamParser( articlesList, str(heading),
|
||||
redpage, isarchive, beginning, ending )
|
||||
|
||||
# Add redfam to redpage object
|
||||
redpage.redfams.set( redfam )
|
||||
|
||||
@classmethod
|
||||
def extract_dates( cls, text, isarchive=False ):
|
||||
"""
|
||||
Returns tuple of the first and maybe last timestamp of a section.
|
||||
Last timestamp is only returned if there is a done notice or param
|
||||
*isarchiv* is set to 'True'
|
||||
|
||||
@param text Text to search in
|
||||
@type line Any Type castable to str
|
||||
@param isarchive If true skip searching done notice (on archivepages)
|
||||
@type isarchive bool
|
||||
|
||||
@returns Timestamps, otherwise None
|
||||
@returntype tuple of strs
|
||||
"""
|
||||
|
||||
# Match all timestamps
|
||||
matches = cls.__timestamp_pat.findall( str( text ) )
|
||||
if matches:
|
||||
|
||||
# First one is beginning
|
||||
# Since some timestamps are broken we need to reconstruct them
|
||||
# by regex match groups
|
||||
beginning = ( matches[0][0] + ", " + matches[0][1] + ". " +
|
||||
matches[0][2] + ". " + matches[0][3] )
|
||||
|
||||
# Last one maybe is ending
|
||||
# Done notice format 1
|
||||
# Done notice format 2
|
||||
# Or on archivepages
|
||||
if ( cls.__done_notice in text or
|
||||
cls.__done_notice2 in text or
|
||||
isarchive ):
|
||||
|
||||
ending = ( matches[-1][0] + ", " + matches[-1][1] + ". " +
|
||||
matches[-1][2] + ". " + matches[-1][3] )
|
||||
|
||||
else:
|
||||
ending = None
|
||||
# Missing dates (Task: FS#76)
|
||||
else:
|
||||
beginning = None
|
||||
ending = None
|
||||
|
||||
return (beginning, ending)
|
||||
|
||||
|
||||
class RedFamWorker( RedFam ):
|
||||
"""
|
||||
Handles working with redundance families stored in database
|
||||
where discussion is finished
|
||||
"""
|
||||
def __init__( self ):
|
||||
|
||||
super().__init__()
|
||||
|
||||
# Make sure locale is set to 'de_DE.UTF-8' to prevent problems
|
||||
# with wrong month abreviations in strptime
|
||||
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
|
||||
|
||||
def article_generator(self, # noqa
|
||||
filter_existing=None, filter_redirects=None,
|
||||
exclude_article_status=[],
|
||||
onlyinclude_article_status=[],
|
||||
talkpages=None ):
|
||||
"""
|
||||
Yields pywikibot pageobjects for articles belonging to this redfams
|
||||
in a generator
|
||||
self.
|
||||
|
||||
@param filter_existing Set to True to only get existing pages
|
||||
set to False to only get nonexisting pages
|
||||
unset/None results in not filtering
|
||||
@type filter_existing bool/None
|
||||
@param filter_redirects Set to True to get only noredirectpages,
|
||||
set to False to get only redirectpages,
|
||||
unset/None results in not filtering
|
||||
@type filter_redirects bool/None
|
||||
@param talkpages Set to True to get Talkpages instead of article page
|
||||
@type talkpages bool/None
|
||||
|
||||
"""
|
||||
|
||||
# Helper to leave multidimensional loop
|
||||
# https://docs.python.org/3/faq/design.html#why-is-there-no-goto
|
||||
class Continue(Exception):
|
||||
pass
|
||||
|
||||
class Break(Exception):
|
||||
pass
|
||||
|
||||
# Iterate over articles in redfam
|
||||
for article in self.articlesList:
|
||||
|
||||
# To be able to control outer loop from inside child loops
|
||||
try:
|
||||
|
||||
# Not all list elements contain articles
|
||||
if not article:
|
||||
raise Break()
|
||||
|
||||
page = pywikibot.Page( pywikibot.Link(article),
|
||||
pywikibot.Site() )
|
||||
|
||||
# Filter existing pages if requested with filter_existing=False
|
||||
if page.exists():
|
||||
self.article_remove_status( "deleted", title=article )
|
||||
if filter_existing is False:
|
||||
raise Continue()
|
||||
# Filter non existing Pages if requested with
|
||||
# filter_existing=True
|
||||
else:
|
||||
self.article_add_status( "deleted", title=article )
|
||||
if filter_existing:
|
||||
raise Continue()
|
||||
|
||||
# Filter redirects if requested with filter_redirects=True
|
||||
if page.isRedirectPage():
|
||||
self.article_add_status( "redirect", title=article )
|
||||
if filter_redirects:
|
||||
raise Continue()
|
||||
# Filter noredirects if requested with filter_redirects=False
|
||||
else:
|
||||
self.article_remove_status("redirect", title=article )
|
||||
if filter_redirects is False:
|
||||
raise Continue()
|
||||
|
||||
# Exclude by article status
|
||||
for status in exclude_article_status:
|
||||
if self.article_has_status( status, title=article ):
|
||||
raise Continue()
|
||||
|
||||
# Only include by article status
|
||||
for status in onlyinclude_article_status:
|
||||
if not self.article_has_status( status, title=article ):
|
||||
raise Continue()
|
||||
|
||||
# Proxy loop control to outer loop
|
||||
except Continue:
|
||||
continue
|
||||
except Break:
|
||||
break
|
||||
|
||||
# Follow moved pages
|
||||
if self.article_has_status( "redirect", title=article ):
|
||||
try:
|
||||
page = page.moved_target()
|
||||
|
||||
# Short circuit if movement destination does not exists
|
||||
if not page.exists():
|
||||
continue
|
||||
|
||||
except pywikibot.exceptions.NoMoveTarget:
|
||||
pass
|
||||
|
||||
# Exclude Users & User Talkpage
|
||||
if page.namespace() == 2 or page.namespace() == 3:
|
||||
self.article_add_status( "user", title=article )
|
||||
continue
|
||||
|
||||
# Toggle talkpage
|
||||
if talkpages and not page.isTalkPage() or\
|
||||
not talkpages and page.isTalkPage():
|
||||
page = page.toggleTalkPage()
|
||||
|
||||
# Add reference to redfam to pages
|
||||
page.redfam = self
|
||||
|
||||
# Keep article title from db with page object
|
||||
page.redarticle = article
|
||||
|
||||
# Yield filtered pages
|
||||
yield page
|
||||
|
||||
def update_status( self ):
|
||||
"""
|
||||
Sets status to 3 when worked on
|
||||
"""
|
||||
for article in self.articlesList:
|
||||
if not article:
|
||||
break
|
||||
|
||||
if self.article_has_status( "sav_err", title=article ):
|
||||
self.status.add( "sav_err" )
|
||||
return
|
||||
elif self.article_has_status( "note_rej", title=article ):
|
||||
self.status.add( "note_rej" )
|
||||
return
|
||||
|
||||
elif not self.article_has_status("deleted", title=article ) and \
|
||||
not self.article_has_status("redirect", title=article) and\
|
||||
not self.article_has_status("marked", title=article):
|
||||
return
|
||||
|
||||
self.status.remove("sav_err")
|
||||
self.status.remove("note_rej")
|
||||
self.status.add( "marked" )
|
||||
|
||||
def get_disc_link( self, as_link=False ):
|
||||
"""
|
||||
Constructs and returns the link to Redundancy discussion
|
||||
|
||||
@param as_link If true, wrap link in double square brackets (wikilink)
|
||||
@type as_link bool
|
||||
|
||||
@returns Link to diskussion
|
||||
@rtype str
|
||||
"""
|
||||
|
||||
# Expand templates using pwb site object
|
||||
site = pywikibot.Site()
|
||||
anchor_code = site.expand_text(self.heading.strip())
|
||||
|
||||
# Remove possibly embbeded files
|
||||
anchor_code = re.sub( r"\[\[\w+:[^\|]+(?:\|.+){2,}\]\]", "",
|
||||
anchor_code )
|
||||
|
||||
# Replace non-breaking-space by correct urlencoded value
|
||||
anchor_code = anchor_code.replace( " ", ".C2.A0" )
|
||||
|
||||
# Use mwparser to strip and normalize
|
||||
anchor_code = mwparser.parse( anchor_code ).strip_code()
|
||||
|
||||
# We try it without any more parsing as mw will do while parsing page
|
||||
link = self.redpage.pagetitle + "#" + anchor_code.strip()
|
||||
|
||||
if as_link:
|
||||
return "[[{0}]]".format(link)
|
||||
else:
|
||||
return link
|
||||
|
||||
def disc_section_exists( self ):
|
||||
"""
|
||||
Checks weather the redundance discussion is still existing. Sometimes
|
||||
it is absent, since heading was changed and therefore we get a
|
||||
different famhash ergo new redfam.
|
||||
As a side effect, the method sets status "absent" for missing sections.
|
||||
|
||||
@returns True if it exists otherwise False
|
||||
@rtype bool
|
||||
"""
|
||||
# The redpage
|
||||
discpage = pywikibot.Page(pywikibot.Site(), self.get_disc_link() )
|
||||
|
||||
# Parse redpage content
|
||||
wikicode = mwparser.parse( discpage.get() )
|
||||
|
||||
# List fams
|
||||
fams = wikicode.filter_headings(
|
||||
matches=RedFamParser.is_section_redfam_cb )
|
||||
|
||||
# Check if current fam is in list of fams
|
||||
# If not, set status absent and return False
|
||||
if self.heading not in [ fam.title.strip() for fam in fams]:
|
||||
self.status.remove("open")
|
||||
self.status.add("absent")
|
||||
return False
|
||||
|
||||
# The section exists
|
||||
return True
|
||||
|
||||
def generate_disc_notice_template( self ):
|
||||
"""
|
||||
Generates notice template to add on discussion Pages of Articles when
|
||||
redundancy discussion is finished
|
||||
|
||||
@return Notice template to add on article disc
|
||||
@rtype wikicode-node
|
||||
"""
|
||||
|
||||
# Generate template boilerplate
|
||||
template = mwparser.nodes.template.Template(
|
||||
jogobot.config['redundances']['disc_notice_template_name'])
|
||||
|
||||
# Index of first article's param
|
||||
param_cnt = 3
|
||||
|
||||
# Iterate over articles in redfam
|
||||
for article in self.articlesList:
|
||||
if not article:
|
||||
break
|
||||
# Make sure to only use 8 articles (max. param 10)
|
||||
if param_cnt > 10:
|
||||
break
|
||||
|
||||
# Add param for article
|
||||
template.add( param_cnt, article, True )
|
||||
|
||||
param_cnt += 1
|
||||
|
||||
# Add begin
|
||||
begin = self.beginning.strftime( "%B %Y" )
|
||||
template.add( "Beginn", begin, True )
|
||||
|
||||
# Add end (if not same as begin)
|
||||
end = self.ending.strftime( "%B %Y" )
|
||||
if not end == begin:
|
||||
template.add( "Ende", end, True )
|
||||
|
||||
# Add link to related reddisc
|
||||
template.add( "Diskussion", self.get_disc_link(), True )
|
||||
|
||||
# Add signature and timestamp
|
||||
# Not used atm
|
||||
# template.add( 1, "-- ~~~~", True )
|
||||
|
||||
return template
|
||||
|
||||
@classmethod
|
||||
def list_by_status( cls, status ):
|
||||
"""
|
||||
Lists red_fams stored in db by given status
|
||||
"""
|
||||
mysql = MysqlRedFam()
|
||||
for fam in mysql.get_by_status( status ):
|
||||
try:
|
||||
print( cls( fam ) )
|
||||
except RedFamHashError:
|
||||
print(fam)
|
||||
raise
|
||||
|
||||
@classmethod
|
||||
def gen_by_status_and_ending( cls, status, ending ):
|
||||
"""
|
||||
Yield red_fams stored in db by given status which have an ending after
|
||||
given one
|
||||
"""
|
||||
for redfam in RedFamWorker.session.query(RedFamWorker).filter(
|
||||
# NOT WORKING WITH OBJECT NOTATION
|
||||
# RedFamWorker._status.like('archived'),
|
||||
# RedFamWorker._status.like("%{0:s}%".format(status)),
|
||||
text("status LIKE '%archived%'"),
|
||||
text("status NOT LIKE '%marked%'"),
|
||||
RedFamWorker.ending >= ending ):
|
||||
|
||||
yield redfam
|
||||
|
||||
@classmethod
|
||||
def gen_open( cls ):
|
||||
"""
|
||||
Yield red_fams stored in db by given status which have an ending after
|
||||
given one
|
||||
"""
|
||||
for redfam in RedFamWorker.session.query(RedFamWorker).filter(
|
||||
# NOT WORKING WITH OBJECT NOTATION
|
||||
text("status LIKE '%open%'") ):
|
||||
|
||||
yield redfam
|
||||
|
||||
|
||||
class RedFamError( Exception ):
|
||||
"""
|
||||
Base class for all Errors of RedFam-Module
|
||||
"""
|
||||
|
||||
def __init__( self, message=None ):
|
||||
"""
|
||||
Handles Instantiation of RedFamError's
|
||||
"""
|
||||
if not message:
|
||||
self.message = "An Error occured while executing a RedFam action"
|
||||
else:
|
||||
self.message = message
|
||||
|
||||
def __str__( self ):
|
||||
"""
|
||||
Output of error message
|
||||
"""
|
||||
|
||||
return self.message
|
||||
|
||||
|
||||
class RedFamHashError( RedFamError ):
|
||||
"""
|
||||
Raised when given RedFamHash does not match with calculated
|
||||
"""
|
||||
|
||||
def __init__( self, givenHash, calculatedHash ):
|
||||
|
||||
message = "Given fam_hash ('{given}') does not match with \
|
||||
calculated ('{calc}'".format( given=givenHash, calc=calculatedHash )
|
||||
|
||||
super().__init__( message )
|
||||
|
||||
|
||||
class RedFamHeadingError ( RedFamError ):
|
||||
"""
|
||||
Raised when given RedFamHeading does not match __sectionhead_pat Regex
|
||||
"""
|
||||
def __init__( self, heading ):
|
||||
|
||||
message = "Error while trying to parse section heading. Given heading \
|
||||
'{heading}' does not match RegEx".format( heading=heading )
|
||||
|
||||
super().__init__( message )
|
||||
143
lib/redpage.py
Normal file
143
lib/redpage.py
Normal file
@@ -0,0 +1,143 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# redpage.py
|
||||
#
|
||||
# Copyright 2017 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Provides a class for handling redundance discussion pages and archives
|
||||
"""
|
||||
|
||||
import pywikibot # noqa
|
||||
import mwparserfromhell as mwparser
|
||||
|
||||
import jogobot # noqa
|
||||
|
||||
from lib.mysqlred import (
|
||||
MysqlRedPage, relationship, attribute_mapped_collection )
|
||||
from lib.redfam import RedFamParser
|
||||
|
||||
|
||||
class RedPage( MysqlRedPage ):
|
||||
"""
|
||||
Class for handling redundance discussion pages and archives
|
||||
"""
|
||||
|
||||
def __init__( self, page=None, pageid=None, archive=False ):
|
||||
"""
|
||||
Generate a new RedPage object based on the given pywikibot page object
|
||||
|
||||
@param page Pywikibot/MediaWiki page object for page
|
||||
@type page pywikibot.Page
|
||||
@param pageid MW-Pageid for related page
|
||||
@type pageid int
|
||||
"""
|
||||
|
||||
# Safe the pywikibot page object
|
||||
if page:
|
||||
self._page = page
|
||||
|
||||
super().__init__(
|
||||
pageid=self._page.pageid,
|
||||
revid=self._page._revid,
|
||||
pagetitle=self._page.title(),
|
||||
status=None
|
||||
)
|
||||
|
||||
self.is_archive()
|
||||
|
||||
self.session.add(self)
|
||||
|
||||
def update( self, page ):
|
||||
self._page = page
|
||||
self.revid = page._revid
|
||||
self.pagetitle = page.title()
|
||||
self.is_archive()
|
||||
|
||||
@property
|
||||
def page(self):
|
||||
if not hasattr(self, "_page"):
|
||||
self._page = pywikibot.Page( pywikibot.Site(), self.pagetitle )
|
||||
|
||||
return self._page
|
||||
|
||||
@property
|
||||
def archive(self):
|
||||
self.is_archive()
|
||||
return self.status.has("archive")
|
||||
|
||||
def is_archive( self ):
|
||||
"""
|
||||
Detects wether current page is an archive of discussions
|
||||
"""
|
||||
if( ( u"/Archiv" in self.page.title() ) or
|
||||
( "{{Archiv}}" in self.page.text ) or
|
||||
( "{{Archiv|" in self.page.text ) ):
|
||||
self.status.add("archive")
|
||||
else:
|
||||
self.status.discard("archive")
|
||||
|
||||
def is_parsing_needed( self ):
|
||||
"""
|
||||
Decides wether current RedPage needs to be parsed or not
|
||||
"""
|
||||
return self.changedp() or not self.status.has("parsed")
|
||||
|
||||
def parse( self ):
|
||||
"""
|
||||
Handles the parsing process
|
||||
"""
|
||||
|
||||
# Generate Wikicode object
|
||||
self.wikicode = mwparser.parse( self.page.text )
|
||||
|
||||
# Select RedFam-sections
|
||||
# matches=Regexp or
|
||||
# function( gets heading content as wikicode as param 1)
|
||||
# include_lead = if true include first section (intro)
|
||||
# include_heading = if true include heading
|
||||
fams = self.wikicode.get_sections(
|
||||
matches=RedFamParser.is_section_redfam_cb,
|
||||
include_lead=False, include_headings=True )
|
||||
|
||||
# Iterate over RedFam
|
||||
for fam in fams:
|
||||
|
||||
yield fam
|
||||
|
||||
else:
|
||||
self.status.add("parsed")
|
||||
self._parsed = True
|
||||
|
||||
@classmethod
|
||||
def flush_db_cache( cls ):
|
||||
"""
|
||||
Calls flush method of Mysql Interface class
|
||||
"""
|
||||
cls.session.commit()
|
||||
|
||||
|
||||
class RedPageParser( RedPage ):
|
||||
"""
|
||||
Wrapper class to change the type of redfams collection elements in parser
|
||||
"""
|
||||
redfams = relationship(
|
||||
"RedFamParser", enable_typechecks=False, back_populates="redpage",
|
||||
collection_class=attribute_mapped_collection("famhash") )
|
||||
255
mysqlred.py
255
mysqlred.py
@@ -1,255 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# mysqlred.py
|
||||
#
|
||||
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Provides interface classes for communication of redundances bot with mysql-db
|
||||
"""
|
||||
|
||||
# Prefere using oursql then MySQLdb
|
||||
try:
|
||||
import oursql as mysqldb
|
||||
except ImportError:
|
||||
import MySQLdb as mysqldb
|
||||
|
||||
from pywikibot import config
|
||||
import jogobot
|
||||
|
||||
|
||||
class MysqlRed:
|
||||
"""
|
||||
Basic interface class, containing opening of connection
|
||||
|
||||
Specific querys should be defined in descendant classes per data type
|
||||
"""
|
||||
|
||||
# Save mysqldb-connection as class attribute to use only one
|
||||
# in descendant classes
|
||||
connection = False
|
||||
db_hostname = config.db_hostname
|
||||
db_username = config.db_username
|
||||
db_password = config.db_password
|
||||
db_name = config.db_username + jogobot.db_namesuffix
|
||||
|
||||
def __init__( self ):
|
||||
"""
|
||||
Opens a connection to MySQL-DB
|
||||
|
||||
@returns mysql-stream MySQL Connection
|
||||
"""
|
||||
|
||||
# Connect to mysqldb only once
|
||||
if not type( self ).connection:
|
||||
|
||||
type( self ).connection = mysqldb.connect(
|
||||
host=type( self ).db_hostname,
|
||||
user=type( self ).db_username,
|
||||
passwd=type( self ).db_password,
|
||||
db=type( self ).db_name )
|
||||
|
||||
def __del__( self ):
|
||||
"""
|
||||
Before deleting class, close connection to MySQL-DB
|
||||
"""
|
||||
|
||||
type( self ).connection.close()
|
||||
|
||||
|
||||
class MysqlRedPage( MysqlRed ):
|
||||
"""
|
||||
MySQL-db Interface for handling querys for RedPages
|
||||
"""
|
||||
|
||||
def __init__( self, page_id ):
|
||||
"""
|
||||
Creates a new instance, runs __init__ of parent class
|
||||
"""
|
||||
|
||||
super().__init__( )
|
||||
|
||||
self.__page_id = int( page_id )
|
||||
|
||||
self.data = self.get_page()
|
||||
|
||||
def __del__( self ):
|
||||
pass
|
||||
|
||||
def get_page( self ):
|
||||
"""
|
||||
Retrieves a red page row from MySQL-Database for given page_id
|
||||
|
||||
@param int page_id MediaWiki page_id for page to retrieve
|
||||
|
||||
@returns tuple Tuple with data for given page_id
|
||||
bool FALSE if none found
|
||||
"""
|
||||
|
||||
cursor = type( self ).connection.cursor(mysqldb.DictCursor)
|
||||
|
||||
cursor.execute( 'SELECT * FROM `red_pages` WHERE `page_id` = ?;',
|
||||
( self.__page_id, ) )
|
||||
res = cursor.fetchone()
|
||||
|
||||
if res:
|
||||
return res
|
||||
else:
|
||||
return False
|
||||
|
||||
def add_page( self, page_title, rev_id, status=0 ):
|
||||
"""
|
||||
Inserts a red page row in MySQL-Database for given page_id
|
||||
|
||||
@param int rev_id MediaWiki current rev_id
|
||||
@param str page_title MediaWiki new page_title
|
||||
@param int status Page parsing status
|
||||
"""
|
||||
|
||||
cursor = type( self ).connection.cursor()
|
||||
|
||||
if not page_title:
|
||||
page_title = self.data[ 'page_title' ]
|
||||
if not rev_id:
|
||||
rev_id = self.data[ 'rev_id' ]
|
||||
|
||||
query = 'INSERT INTO `red_pages` \
|
||||
( page_id, page_title, rev_id, status ) \
|
||||
VALUES ( ?, ?, ?, ? );'
|
||||
data = ( self.__page_id, page_title, rev_id, status )
|
||||
|
||||
cursor.execute( query, data)
|
||||
|
||||
type( self ).connection.commit()
|
||||
|
||||
self.data = self.get_page()
|
||||
|
||||
def update_page( self, rev_id=None, page_title=None, status=0 ):
|
||||
"""
|
||||
Updates the red page row in MySQL-Database for given page_id
|
||||
|
||||
@param int rev_id MediaWiki current rev_id
|
||||
@param str page_title MediaWiki new page_title
|
||||
@param int status Page parsing status
|
||||
"""
|
||||
|
||||
cursor = type( self ).connection.cursor()
|
||||
|
||||
if not page_title:
|
||||
page_title = self.data[ 'page_title' ]
|
||||
if not rev_id:
|
||||
rev_id = self.data[ 'rev_id' ]
|
||||
|
||||
query = 'UPDATE `red_pages` \
|
||||
SET `page_title` = ?, `rev_id` = ?, `status`= ? \
|
||||
WHERE `page_id` = ?;'
|
||||
data = ( page_title, rev_id, status, self.__page_id )
|
||||
|
||||
cursor.execute( query, data)
|
||||
|
||||
type( self ).connection.commit()
|
||||
|
||||
|
||||
class MysqlRedFam( MysqlRed ):
|
||||
"""
|
||||
MySQL-db Interface for handling querys for RedFams
|
||||
"""
|
||||
|
||||
def __init__( self, fam_hash ):
|
||||
"""
|
||||
Creates a new instance, runs __init__ of parent class
|
||||
"""
|
||||
|
||||
super().__init__( )
|
||||
|
||||
self.__fam_hash = fam_hash
|
||||
|
||||
self.data = self.get_fam()
|
||||
|
||||
def __del__( self ):
|
||||
pass
|
||||
|
||||
def get_fam( self ):
|
||||
"""
|
||||
Retrieves a red family row from MySQL-Database for given fam_hash
|
||||
|
||||
@returns dict Dictionairy with data for given fam hash
|
||||
False if none found
|
||||
"""
|
||||
|
||||
cursor = type( self ).connection.cursor( mysqldb.DictCursor )
|
||||
|
||||
cursor.execute( 'SELECT * FROM `red_families` WHERE `fam_hash` = ?;',
|
||||
( self.__fam_hash, ) )
|
||||
res = cursor.fetchone()
|
||||
|
||||
if res:
|
||||
return res
|
||||
else:
|
||||
return False
|
||||
|
||||
def add_fam( self, articlesList, heading, red_page_id,
|
||||
beginning, ending=None, status=0 ):
|
||||
|
||||
cursor = type( self ).connection.cursor()
|
||||
|
||||
query = 'INSERT INTO `red_families` \
|
||||
( fam_hash, red_page_id, beginning, ending, status, heading, \
|
||||
article0, article1, article2, article3, \
|
||||
article4, article5, article6, article7 ) \
|
||||
VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'
|
||||
data = [ self.__fam_hash, red_page_id, beginning, ending,
|
||||
status, heading ]
|
||||
|
||||
for article in articlesList:
|
||||
data.append( str( article ) )
|
||||
|
||||
while len( data ) < 14:
|
||||
data.append( None )
|
||||
|
||||
data = tuple( data )
|
||||
|
||||
cursor.execute( query, data)
|
||||
|
||||
type( self ).connection.commit()
|
||||
|
||||
self.data = self.get_fam()
|
||||
|
||||
def update_fam( self, red_page_id, heading, beginning, ending, status ):
|
||||
"""
|
||||
Updates the red fam row in MySQL-Database for given fam_hash
|
||||
|
||||
@param int red_page_id MediaWiki page_id
|
||||
@param datetime beginning Timestamp of beginning
|
||||
qparam datetime ending Timestamp of ending of
|
||||
@param int status red_fam status
|
||||
"""
|
||||
|
||||
cursor = type( self ).connection.cursor()
|
||||
|
||||
query = 'UPDATE `red_families` \
|
||||
SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, \
|
||||
`ending` = ?, `status`= ? WHERE `fam_hash` = ?;'
|
||||
data = ( red_page_id, heading, beginning,
|
||||
ending, status, self.__fam_hash )
|
||||
|
||||
cursor.execute( query, data)
|
||||
|
||||
type( self ).connection.commit()
|
||||
150
red.py
Normal file
150
red.py
Normal file
@@ -0,0 +1,150 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# reddiscparser.py
|
||||
#
|
||||
# Copyright 2017 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Wrapper script to invoke all redundances bot tasks
|
||||
"""
|
||||
|
||||
import os
|
||||
import locale
|
||||
|
||||
import pywikibot
|
||||
|
||||
import jogobot
|
||||
|
||||
|
||||
def prepare_bot( task_slug, subtask, genFactory, subtask_args ):
|
||||
"""
|
||||
Handles importing subtask Bot class and prepares specific args
|
||||
|
||||
Throws exception if bot not exists
|
||||
|
||||
@param task_slug Task slug, needed for logging
|
||||
@type task_slug str
|
||||
@param subtask Slug of given subtask
|
||||
@type subtask str
|
||||
@param genFactory GenFactory with parsed pagegenerator args
|
||||
@type genFactory pagegenerators.GeneratorFactory
|
||||
@param subtask_args Additional args for subtasks
|
||||
@type subtask_args dict\
|
||||
|
||||
@returns The following tuple
|
||||
@return 1 Subtask slug (replaced None for default)
|
||||
@rtype str
|
||||
@return 2 Botclass of given subtask (Arg "-task")
|
||||
@rtype Class
|
||||
@return 3 GenFactory with parsed pagegenerator args
|
||||
@rtype pagegenerators.GeneratorFactory
|
||||
@return 4 Additional args for subtasks
|
||||
@rtype dict
|
||||
@rtype tuple
|
||||
"""
|
||||
# kwargs are passed to selected bot as **kwargs
|
||||
kwargs = subtask_args
|
||||
|
||||
if not subtask or subtask == "discparser":
|
||||
# Default case: discparser
|
||||
subtask = "discparser"
|
||||
|
||||
# Import related bot
|
||||
from bots.reddiscparser import DiscussionParserBot as Bot
|
||||
|
||||
elif subtask == "markpages":
|
||||
# Import related bot
|
||||
from bots.markpages import MarkPagesBot as Bot
|
||||
|
||||
elif subtask == "missingnotice":
|
||||
# Import related bot
|
||||
from bots.missingnotice import MissingNoticeBot as Bot
|
||||
|
||||
# Subtask error
|
||||
else:
|
||||
jogobot.output( (
|
||||
"\03{{red}} Given subtask \"{subtask} \"" +
|
||||
"is not existing!" ).format( subtask=subtask ), "ERROR" )
|
||||
raise Exception
|
||||
|
||||
return ( subtask, Bot, genFactory, kwargs )
|
||||
|
||||
|
||||
def parse_red_args( argkey, value ):
|
||||
"""
|
||||
Process additional args for red.py
|
||||
|
||||
@param argkey The arguments key
|
||||
@type argkey str
|
||||
@param value The arguments value
|
||||
@type value str
|
||||
|
||||
@return Tuple with (key, value) if given pair is relevant, else None
|
||||
@rtype tuple or None
|
||||
"""
|
||||
|
||||
if argkey.startswith("-famhash"):
|
||||
return ( "famhash", value )
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def main(*args):
|
||||
"""
|
||||
Process command line arguments and invoke bot.
|
||||
|
||||
If args is an empty list, sys.argv is used.
|
||||
|
||||
@param args: command line arguments
|
||||
@type args: list of unicode
|
||||
"""
|
||||
|
||||
# Make sure locale is set to 'de_DE.UTF-8' to prevent problems
|
||||
# with wrong month abreviations in strptime
|
||||
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
|
||||
|
||||
# Process global arguments to determine desired site
|
||||
local_args = pywikibot.handle_args(args)
|
||||
|
||||
# Get the jogobot-task_slug (basename of current file without ending)
|
||||
task_slug = os.path.basename(__file__)[:-len(".py")]
|
||||
|
||||
# Disabled until [FS#86] is done
|
||||
# Before run, we need to check wether we are currently active or not
|
||||
if not jogobot.bot.active( task_slug ):
|
||||
return
|
||||
|
||||
# Parse local Args to get information about subtask
|
||||
( subtask, genFactory, subtask_args ) = jogobot.bot.parse_local_args(
|
||||
local_args, parse_red_args )
|
||||
|
||||
# select subtask and prepare args
|
||||
( subtask, Bot, genFactory, kwargs ) = prepare_bot(
|
||||
task_slug, subtask, genFactory, subtask_args )
|
||||
|
||||
# Init Bot
|
||||
bot = jogobot.bot.init_bot( task_slug, subtask, Bot, genFactory, **kwargs)
|
||||
|
||||
# Run bot
|
||||
jogobot.bot.run_bot( task_slug, subtask, bot )
|
||||
|
||||
|
||||
if( __name__ == "__main__" ):
|
||||
main()
|
||||
364
redfam.py
364
redfam.py
@@ -1,364 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# redfam.py
|
||||
#
|
||||
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Provides classes for working with RedFams
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import locale
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
import pywikibot
|
||||
|
||||
from .mysqlred import MysqlRedFam
|
||||
|
||||
|
||||
class RedFam:
|
||||
"""
|
||||
Basic class for RedFams, containing the basic data structure
|
||||
"""
|
||||
|
||||
def __init__( self, fam_hash=None, articlesList=None, red_page_id=None,
|
||||
beginning=None, ending=None, status=0 ):
|
||||
"""
|
||||
Generates a new RedFam object
|
||||
|
||||
@param articlesList list List of articles
|
||||
@param beginning datetime Beginning date
|
||||
@param ending datetime Ending date
|
||||
"""
|
||||
pass
|
||||
|
||||
def __repr__( self ):
|
||||
|
||||
if( self._beginning ):
|
||||
beginning = ", beginning=" + repr( self._beginning )
|
||||
else:
|
||||
beginning = ""
|
||||
|
||||
if( self._ending ):
|
||||
ending = ", ending=" + repr( self._ending )
|
||||
else:
|
||||
ending = ""
|
||||
|
||||
__repr = "RedFam( " + repr( self._articlesList ) + beginning +\
|
||||
ending + ", status=" + repr( self._status ) + " )"
|
||||
|
||||
return __repr
|
||||
|
||||
|
||||
class RedFamParser( RedFam ):
|
||||
"""
|
||||
Provides an interface to RedFam for adding/updating redundance families
|
||||
while parsig redundance pages
|
||||
"""
|
||||
|
||||
# Define the timestamp format
|
||||
__timestamp_format = "%H:%M, %d. %b. %Y"
|
||||
|
||||
# Define section heading re.pattern
|
||||
__sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" )
|
||||
|
||||
# Define timestamp re.pattern
|
||||
__timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4})" ) # noqa
|
||||
|
||||
# Textpattern for recognisation of done-notices
|
||||
__done_notice = ":<small>Archivierung dieses Abschnittes \
|
||||
wurde gewünscht von:"
|
||||
__done_notice2 = "{{Erledigt|"
|
||||
|
||||
def __init__( self, heading, red_page_id, red_page_archive,
|
||||
beginning, ending=None ):
|
||||
"""
|
||||
Creates a RedFam object based on data collected while parsing red_pages
|
||||
combined with possibly former known data from db
|
||||
|
||||
@param red_fam_heading str Wikitext heading of section
|
||||
@param red_page_id int MediaWiki page_id
|
||||
@param red_page_archive bool Is red_page an archive
|
||||
@param beginning datetime Timestamp of beginning
|
||||
str as strptime parseable string
|
||||
@param ending datetime Timestamp of ending
|
||||
str strptime parseable string
|
||||
"""
|
||||
|
||||
# Set object attributes:
|
||||
self._red_page_id = red_page_id
|
||||
self._red_page_archive = red_page_archive
|
||||
|
||||
# Method self.add_beginning sets self._beginning directly
|
||||
self.add_beginning( beginning )
|
||||
|
||||
# Method self.add_ending sets self._ending directly
|
||||
if( ending ):
|
||||
self.add_ending( ending )
|
||||
else:
|
||||
# If no ending was provided set to None
|
||||
self._ending = None
|
||||
|
||||
self._status = None
|
||||
|
||||
# Parse the provided heading of redundance section
|
||||
# to set self._articlesList
|
||||
self.heading_parser( heading )
|
||||
|
||||
# Calculates the sha1 hash over self._articlesList to
|
||||
# rediscover known redundance families
|
||||
self.fam_hash()
|
||||
|
||||
# Open database connection, ask for data if existing,
|
||||
# otherwise create entry
|
||||
self.__handle_db()
|
||||
|
||||
# Check status changes
|
||||
self.status()
|
||||
|
||||
# Triggers db update if anything changed
|
||||
self.changed()
|
||||
|
||||
def __handle_db( self ):
|
||||
"""
|
||||
Handles opening of db connection
|
||||
"""
|
||||
|
||||
# We need a connection to our mysqldb
|
||||
self.__mysql = MysqlRedFam( self._fam_hash )
|
||||
|
||||
if not self.__mysql.data:
|
||||
self.__mysql.add_fam( self._articlesList, self._heading,
|
||||
self._red_page_id, self._beginning,
|
||||
self._ending )
|
||||
|
||||
def heading_parser( self, heading ):
|
||||
"""
|
||||
Parses given red_fam_heading string and saves articles list
|
||||
"""
|
||||
|
||||
# Predefine a pattern for wikilinks' destination
|
||||
wikilink_pat = re.compile( r"\[\[([^\[\]\|]*)(\]\]|\|)" )
|
||||
|
||||
# Parse content of heading for generating section links later
|
||||
match = self.__sectionhead_pat.search( heading )
|
||||
if match:
|
||||
self._heading = match.group(2).lstrip()
|
||||
else:
|
||||
raise ValueError( "Heading is not valid" )
|
||||
|
||||
# We get the pages in first [0] element iterating over
|
||||
# wikilink_pat.findall( line )
|
||||
self._articlesList = [ link[0] for link
|
||||
in wikilink_pat.findall( self._heading ) ]
|
||||
|
||||
# Catch sections with more then 8 articles, print error
|
||||
if len( self._articlesList ) > 8:
|
||||
pywikibot.output( "{datetime} – \03{{lightred}}[WARNING] – \
|
||||
Maximum number of articles in red_fam exceeded, \
|
||||
maximum number is 8, {number:d} were given\n\
|
||||
{repress}".format(
|
||||
datetime=datetime.now().strftime( "%Y-%m-%d %H:%M:%S" ),
|
||||
number=len( self._articlesList ), repress=repr( self ) ) )
|
||||
|
||||
self._articlesList = self._articlesList[:8]
|
||||
|
||||
def fam_hash( self ):
|
||||
"""
|
||||
Calculates the SHA-1 hash for the articlesList of redundance family.
|
||||
Since we don't need security SHA-1 is just fine.
|
||||
|
||||
@returns str String with the hexadecimal hash digest
|
||||
"""
|
||||
|
||||
h = hashlib.sha1()
|
||||
h.update( str( self._articlesList ).encode('utf-8') )
|
||||
|
||||
self._fam_hash = h.hexdigest()
|
||||
|
||||
def add_beginning( self, beginning ):
|
||||
"""
|
||||
Adds the beginning date of a redundance diskussion to the object
|
||||
|
||||
@param datetime datetime Beginning date
|
||||
"""
|
||||
|
||||
self._beginning = self.__datetime( beginning )
|
||||
|
||||
def add_ending( self, ending ):
|
||||
"""
|
||||
Adds the ending date of a redundance diskussion to the object.
|
||||
|
||||
@param datetime datetime Ending date
|
||||
"""
|
||||
|
||||
self._ending = self.__datetime( ending )
|
||||
|
||||
def __datetime( self, timestamp ):
|
||||
"""
|
||||
Decides wether given timestamp is a parseable string or a
|
||||
datetime object and returns a datetime object in both cases
|
||||
|
||||
@param datetime timestamp Datetime object
|
||||
str timestamp Parseable string with timestamp
|
||||
|
||||
@returns datetime Datetime object
|
||||
"""
|
||||
|
||||
# Make sure locale is set to 'de_DE.UTF-8' to prevent problems
|
||||
# with wrong month abreviations in strptime
|
||||
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
|
||||
|
||||
if( isinstance( timestamp, datetime ) ):
|
||||
return timestamp
|
||||
else:
|
||||
result = datetime.strptime( timestamp,
|
||||
type( self ).__timestamp_format )
|
||||
return result
|
||||
|
||||
def status( self ):
|
||||
"""
|
||||
Handles detection of correct status
|
||||
There are three possible stati:
|
||||
- 0 Discussion running --> no ending, page is not an archive
|
||||
- 1 Discussion over --> ending present, page is not an archive
|
||||
- 2 Discussion archived --> ending (normaly) present, page is archive
|
||||
- 3 and greater status was set by worker script, do not change it
|
||||
"""
|
||||
|
||||
# Do not change stati set by worker script etc.
|
||||
if not self.__mysql.data['status'] > 2:
|
||||
|
||||
# No ending, discussion is running:
|
||||
# Sometimes archived discussions also have no detectable ending
|
||||
if not self._ending and not self._red_page_archive:
|
||||
self._status = 0
|
||||
else:
|
||||
if not self._red_page_archive:
|
||||
self._status = 1
|
||||
else:
|
||||
self._status = 2
|
||||
else:
|
||||
self._status = self.__mysql.data[ 'status' ]
|
||||
|
||||
def changed( self ):
|
||||
"""
|
||||
Checks wether anything has changed and maybe triggers db update
|
||||
"""
|
||||
|
||||
# On archived red_fams do not delete possibly existing ending
|
||||
if( not self._ending and self._status > 1
|
||||
and self.__mysql.data[ 'ending' ] ):
|
||||
|
||||
self._ending = self.__mysql.data[ 'ending' ]
|
||||
|
||||
# Since status change means something has changed, update database
|
||||
if( self._status != self.__mysql.data[ 'status' ] or
|
||||
self._beginning != self.__mysql.data[ 'beginning' ] or
|
||||
self._ending != self.__mysql.data[ 'ending' ] or
|
||||
self._red_page_id != self.__mysql.data[ 'red_page_id' ] or
|
||||
self._heading != self.__mysql.data[ 'heading' ]):
|
||||
|
||||
self.__mysql.update_fam( self._red_page_id, self._heading,
|
||||
self._beginning, self._ending,
|
||||
self._status )
|
||||
|
||||
@classmethod
|
||||
def is_sectionheading( cls, line ):
|
||||
"""
|
||||
Checks wether given line is a red_fam section heading
|
||||
|
||||
@param str line String to check
|
||||
|
||||
@returns bool Returns True if it is a section heading
|
||||
"""
|
||||
|
||||
if cls.__sectionhead_pat.search( line ):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def is_beginning( cls, line ):
|
||||
"""
|
||||
Returns the first timestamp found in line, otherwise None
|
||||
|
||||
@param str line String to search in
|
||||
|
||||
@returns str Timestamp, otherwise None
|
||||
"""
|
||||
|
||||
match = cls.__timestamp_pat.search( line )
|
||||
if match:
|
||||
# Since some timestamps are broken we need to reconstruct them
|
||||
# by regex match groups
|
||||
result = match.group(1) + ", " + match.group(2) + ". " +\
|
||||
match.group(3) + ". " + match.group(4)
|
||||
return result
|
||||
else:
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def is_ending( cls, line ):
|
||||
"""
|
||||
Returns the timestamp of done notice ( if one ), otherwise None
|
||||
@param str line String to search in
|
||||
|
||||
@returns str Timestamp, otherwise None
|
||||
"""
|
||||
|
||||
if ( cls.__done_notice in line ) or ( cls.__done_notice2 in line ):
|
||||
match = cls.__timestamp_pat.search( line )
|
||||
if match:
|
||||
# Since some timestamps are broken we need to reconstruct them
|
||||
# by regex match groups
|
||||
result = match.group(1) + ", " + match.group(2) + ". " +\
|
||||
match.group(3) + ". " + match.group(4)
|
||||
return result
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def is_ending2( cls, line ):
|
||||
"""
|
||||
Returns the last timestamp found in line, otherwise None
|
||||
@param str line String to search in
|
||||
|
||||
@returns str Timestamp, otherwise None
|
||||
"""
|
||||
|
||||
matches = cls.__timestamp_pat.findall( line )
|
||||
if matches:
|
||||
# Since some timestamps are broken we need to reconstruct them
|
||||
# by regex match groups
|
||||
result = matches[-1][0] + ", " + matches[-1][1] + ". " +\
|
||||
matches[-1][2] + ". " + matches[-1][3]
|
||||
return result
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class RedFamWorker( RedFam ):
|
||||
"""
|
||||
Handles working with redundance families stored in database
|
||||
where discussion is finished
|
||||
"""
|
||||
pass
|
||||
182
redpage.py
182
redpage.py
@@ -1,182 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# redpage.py
|
||||
#
|
||||
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Provides a class for handling redundance discussion pages and archives
|
||||
"""
|
||||
|
||||
import pywikibot # noqa
|
||||
|
||||
from .mysqlred import MysqlRedPage
|
||||
from .redfam import RedFamParser
|
||||
|
||||
|
||||
class RedPage:
|
||||
"""
|
||||
Class for handling redundance discussion pages and archives
|
||||
"""
|
||||
|
||||
def __init__( self, page, archive=False ):
|
||||
"""
|
||||
Generate a new RedPage object based on the given pywikibot page object
|
||||
|
||||
@param page page Pywikibot/MediaWiki page object for page
|
||||
"""
|
||||
|
||||
# Safe the pywikibot page object
|
||||
self.page = page
|
||||
self._archive = archive
|
||||
|
||||
self.__handle_db( )
|
||||
|
||||
self.is_page_changed()
|
||||
|
||||
self._parsed = None
|
||||
if( self._changed or self.__mysql.data[ 'status' ] == 0 ):
|
||||
self.parse()
|
||||
|
||||
self.__update_db()
|
||||
|
||||
def __handle_db( self ):
|
||||
"""
|
||||
Handles opening of db connection
|
||||
"""
|
||||
|
||||
# We need a connection to our mysqldb
|
||||
self.__mysql = MysqlRedPage( self.page._pageid )
|
||||
|
||||
if not self.__mysql.data:
|
||||
self.__mysql.add_page( self.page.title(), self.page._revid )
|
||||
|
||||
def is_page_changed( self ):
|
||||
"""
|
||||
Check wether the page was changed since last run
|
||||
"""
|
||||
|
||||
if( self.__mysql.data != { 'page_id': self.page._pageid,
|
||||
'rev_id': self.page._revid,
|
||||
'page_title': self.page.title(),
|
||||
'status': self.__mysql.data[ 'status' ] } ):
|
||||
self._changed = True
|
||||
else:
|
||||
self._changed = False
|
||||
|
||||
def is_archive( self ):
|
||||
"""
|
||||
Detects wether current page is an archive of discussions
|
||||
"""
|
||||
|
||||
if( self._archive or ( u"/Archiv" in self.page.title() ) or
|
||||
( "{{Archiv}}" in self.page.text ) or
|
||||
( "{{Archiv|" in self.page.text ) ):
|
||||
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def parse( self ):
|
||||
"""
|
||||
Handles the parsing process
|
||||
"""
|
||||
|
||||
# Since @param text is a string we need to split it in lines
|
||||
text_lines = self.page.text.split( "\n" )
|
||||
length = len( text_lines )
|
||||
|
||||
# Initialise line counter
|
||||
i = 0
|
||||
fam_heading = None
|
||||
beginning = None
|
||||
ending = None
|
||||
|
||||
# Set line for last detected Redundance-Family to 0
|
||||
last_fam = 0
|
||||
|
||||
# Iterate over the lines of the page
|
||||
for line in text_lines:
|
||||
|
||||
# Check wether we have an "Redundance-Family"-Section heading
|
||||
if RedFamParser.is_sectionheading( line ):
|
||||
|
||||
# Save line number for last detected Redundance-Family
|
||||
last_fam = i
|
||||
# Save heading
|
||||
fam_heading = line
|
||||
|
||||
# Defined (re)initialisation of dates
|
||||
beginning = None
|
||||
ending = None
|
||||
|
||||
# Check wether we are currently in an "Redundance-Family"-Section
|
||||
if i > last_fam and last_fam > 0:
|
||||
|
||||
# Check if we have alredy recognized the beginning date of the
|
||||
# discussion (in former iteration) or if we have a done-notice
|
||||
if not beginning:
|
||||
beginning = RedFamParser.is_beginning( line )
|
||||
elif not ending:
|
||||
ending = RedFamParser.is_ending( line )
|
||||
|
||||
# Detect end of red_fam section (next line is new sectionheading)
|
||||
# or end of file
|
||||
# Prevent from running out of index
|
||||
if i < (length - 1):
|
||||
test = RedFamParser.is_sectionheading( text_lines[ i + 1 ] )
|
||||
else:
|
||||
test = False
|
||||
if ( test or ( length == ( i + 1 ) ) ):
|
||||
|
||||
# Create the red_fam object
|
||||
if( fam_heading and beginning ):
|
||||
|
||||
# Maybe we can find a ending by feed if we have None yet
|
||||
# (No done notice on archive pages)
|
||||
if not ending and self.is_archive():
|
||||
j = i
|
||||
while (j > last_fam) and not ending:
|
||||
j -= 1
|
||||
ending = RedFamParser.is_ending2( text_lines[ j ] )
|
||||
|
||||
# Create the RedFam object
|
||||
red_fam = RedFamParser( fam_heading, self.page._pageid,
|
||||
self.is_archive(), beginning,
|
||||
ending )
|
||||
|
||||
# Increment line counter
|
||||
i += 1
|
||||
else:
|
||||
self._parsed = True
|
||||
|
||||
def __update_db( self ):
|
||||
"""
|
||||
Updates the page meta data in mysql db
|
||||
"""
|
||||
if( self._parsed or not self._changed ):
|
||||
status = 1
|
||||
|
||||
if( self.is_archive() ):
|
||||
status = 2
|
||||
else:
|
||||
status = 0
|
||||
|
||||
self.__mysql.update_page( self.page._revid, self.page.title(), status )
|
||||
26
requirements.txt
Normal file
26
requirements.txt
Normal file
@@ -0,0 +1,26 @@
|
||||
# This is a PIP 6+ requirements file for using jogobot-red
|
||||
#
|
||||
# All dependencies can be installed using:
|
||||
# $ sudo pip install -r requirements.txt
|
||||
#
|
||||
# It is good practise to install packages using the system
|
||||
# package manager if it has a packaged version. If you are
|
||||
# unsure, please use pip as described at the top of the file.
|
||||
#
|
||||
# To get a list of potential matches, use
|
||||
#
|
||||
# $ awk -F '[#>=]' '{print $1}' requirements.txt | xargs yum search
|
||||
# or
|
||||
# $ awk -F '[#>=]' '{print $1}' requirements.txt | xargs apt-cache search
|
||||
|
||||
# Needed for Database-Connection
|
||||
# SQLAlchemy Python ORM-Framework
|
||||
SQLAlchemy>=1.1
|
||||
# PyMySQL DB-Connector
|
||||
PyMySQL>=0.7
|
||||
|
||||
# Also needed, but not covered here, is a working copy of pywikibot-core
|
||||
# which also brings mwparserfromhell
|
||||
|
||||
# jogobot
|
||||
git+https://git.golderweb.de/wiki/jogobot.git#egg=jogobot
|
||||
@@ -1,9 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# __init__.py
|
||||
# missingnotice_tests.py
|
||||
#
|
||||
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
# Copyright 2018 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@@ -21,6 +21,8 @@
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
"""
|
||||
Scripts for our redundances bot
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(
|
||||
0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
94
tests/missingnotice_tests.py
Normal file
94
tests/missingnotice_tests.py
Normal file
@@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# missingnotice_tests.py
|
||||
#
|
||||
# Copyright 2018 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
|
||||
"""
|
||||
Test module bot/missingnotice.py
|
||||
"""
|
||||
|
||||
import unittest
|
||||
from unittest import mock # noqa
|
||||
|
||||
import pywikibot
|
||||
|
||||
import context # noqa
|
||||
from bots.missingnotice import MissingNoticeBot # noqa
|
||||
|
||||
|
||||
class TestMissingNoticeBot(unittest.TestCase):
|
||||
"""
|
||||
Test class MissingNoticeBot
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
genFactory = pywikibot.pagegenerators.GeneratorFactory()
|
||||
self.MissingNoticeBot = MissingNoticeBot(genFactory)
|
||||
self.MissingNoticeBot.categorized_articles = [ "Deutschland",
|
||||
"Max_Schlee",
|
||||
"Hodeng-Hodenger" ]
|
||||
|
||||
@mock.patch( 'sqlalchemy.engine.Engine.execute',
|
||||
return_value=( { "page_title": b"a", },
|
||||
{ "page_title": b"b", },
|
||||
{ "page_title": b"c", },
|
||||
{ "page_title": b"d", }, ) )
|
||||
def test_get_categorized_articles(self, execute_mock):
|
||||
"""
|
||||
Test method get_categorized_articles()
|
||||
"""
|
||||
self.assertFalse(execute_mock.called)
|
||||
|
||||
result = MissingNoticeBot.get_categorized_articles()
|
||||
|
||||
self.assertTrue(execute_mock.called)
|
||||
self.assertEqual(result, ["a", "b", "c", "d"] )
|
||||
|
||||
def test_treat_articles( self ):
|
||||
"""
|
||||
Test method treat_articles()
|
||||
"""
|
||||
|
||||
# articles with notice
|
||||
a = pywikibot.Page(pywikibot.Site(), "Deutschland" )
|
||||
b = pywikibot.Page(pywikibot.Site(), "Max_Schlee" )
|
||||
c = pywikibot.Page(pywikibot.Site(), "Hodeng-Hodenger#Test" )
|
||||
# articles without notice
|
||||
x = pywikibot.Page(pywikibot.Site(), "Quodvultdeus" )
|
||||
y = pywikibot.Page(pywikibot.Site(), "Zoo_Bremen" )
|
||||
z = pywikibot.Page(pywikibot.Site(), "Nulka#Test" )
|
||||
|
||||
cases = ( ( ( a, b, c ), list() ),
|
||||
( ( x, y, z ), [ "[[Quodvultdeus]]",
|
||||
"[[Zoo Bremen]]",
|
||||
"[[Nulka#Test]]" ]),
|
||||
( ( a, b, y, z ), [ "[[Zoo Bremen]]",
|
||||
"[[Nulka#Test]]" ]), )
|
||||
|
||||
for case in cases:
|
||||
res = self.MissingNoticeBot.treat_articles( case[0] )
|
||||
|
||||
self.assertEqual( res, case[1] )
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user