Compare commits
101 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 466b9da886 | |||
| b80f5bd2c9 | |||
| ff2421b63e | |||
| 236ba6a870 | |||
| 0df2017387 | |||
| b9faed8847 | |||
| 5cdccaeec6 | |||
| 54d8b8ea3b | |||
| dea5a393ad | |||
| f021a13202 | |||
| 4c8ba95534 | |||
| 9804db212f | |||
| 68b81b1111 | |||
| 389c48605e | |||
| 95af95aca6 | |||
| 99adad873e | |||
| dbcc2717d7 | |||
| e5a45fa692 | |||
| 63d3f837e9 | |||
| cfb3e8e37c | |||
| dfffe97200 | |||
| 246e94c228 | |||
| 181486c718 | |||
| 4f31b1a792 | |||
| 3fbfd4ccd7 | |||
| 50b0e142ec | |||
| 14db996a43 | |||
| 110589cb5b | |||
| 5c277495a3 | |||
| a466ab4e74 | |||
| 860a285ab0 | |||
| 2c105336b0 | |||
| ea85ca731f | |||
| 6e119ea98f | |||
| 67aaf3cbbe | |||
| fa13e2a5cf | |||
| 562e689418 | |||
| ae1ee7d6a5 | |||
| 93447d8dc6 | |||
| 1b6faf9e53 | |||
| b4c193eedc | |||
| 788a3df0cd | |||
| 04f591b466 | |||
| 9640467f69 | |||
| bfec2abf98 | |||
| 20103d589d | |||
| e18aa96a84 | |||
| 1dd4c7f87e | |||
| 33b2e47312 | |||
| 3bd17ce692 | |||
| 5f4640d5ff | |||
| 7e0456ae4f | |||
| 108b7aa331 | |||
| a3adf31b89 | |||
| 614f288bb9 | |||
| c450a045bf | |||
| 84802cf521 | |||
| 5f6c443ba8 | |||
| 0c135ef1bb | |||
| 8b8221cfcd | |||
| bdccc8417c | |||
| a70835c58a | |||
| ec2b84df2a | |||
| 88848cb084 | |||
| 5057aed0d3 | |||
| 02e53475f1 | |||
| d6f9b460c9 | |||
| ff03ca8f13 | |||
| 88692ca678 | |||
| d9b4fcc0bd | |||
| 22ff78ea98 | |||
| b3cfcdc259 | |||
| b3e0ace2f4 | |||
| f8002c85da | |||
| 49bc05d29b | |||
| 8a26b6d92a | |||
| 49a8230d76 | |||
| 31c10073a2 | |||
| 642a29b022 | |||
| 2f90751dc2 | |||
| 024be69fe1 | |||
| b6d7268a7f | |||
| 526184c1e1 | |||
| 3aa6c5fb1c | |||
| ec8f459db5 | |||
| 3b2cb95f36 | |||
| 41e5cc1a9d | |||
| 9b9d50c4d2 | |||
| a755288700 | |||
| 14ec71dd09 | |||
| e283eb78ac | |||
| cc02006fd2 | |||
| 37b0cbef08 | |||
| 4137d72468 | |||
| cd87d1c2bb | |||
| 456b2ba3d4 | |||
| 47b85a0b5e | |||
| a6fdc974bd | |||
| 30de2a2e12 | |||
| 4a6855cf7b | |||
| 8422d08cb6 |
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -1,3 +0,0 @@
|
||||
[submodule "jogobot"]
|
||||
path = jogobot
|
||||
url = ../jogobot
|
||||
84
README.md
Normal file
84
README.md
Normal file
@@ -0,0 +1,84 @@
|
||||
jogobot-red
|
||||
===========
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
|
||||
* pywikibot-core
|
||||
* mwparserfromhell
|
||||
|
||||
The libraries above need to be installed and configured manualy considering [documentation of pywikibot-core](https://www.mediawiki.org/wiki/Manual:Pywikibot).
|
||||
|
||||
* SQLAlchemy
|
||||
* PyMySQL
|
||||
* [jogobot-core module](https://git.golderweb.de/wiki/jogobot)
|
||||
|
||||
Those can be installed using pip and the _requirements.txt_ file provided with this packet
|
||||
|
||||
pip install -r requirements.txt
|
||||
|
||||
Versions
|
||||
--------
|
||||
* v1.2
|
||||
- Create a list of redfams/articles missing reddisc notice
|
||||
|
||||
python red.py -task:missingnotice -family:wikipedia
|
||||
|
||||
- jogobot module not longer included
|
||||
|
||||
* v1.1.1
|
||||
- Check if moved page exists
|
||||
|
||||
* v1.1
|
||||
- Improved page filter
|
||||
|
||||
* v1.0
|
||||
- first stable release
|
||||
- less debug output
|
||||
- fixed problems with article title
|
||||
* test-v7
|
||||
- Fixed problem with url encoded chars in article title
|
||||
|
||||
* test-v6
|
||||
- jogobot status API enabled (Bot can be disabled onwiki)
|
||||
- Fixed problem with space between article title and anchor
|
||||
|
||||
* test-v5
|
||||
- Feature _markpages_ working in full-automatic mode with _always_-flag
|
||||
|
||||
python red.py -task:markpages -family:wikipedia -always
|
||||
|
||||
* test-v4
|
||||
|
||||
- Feature _markpages_ working in semi-automatic mode using command
|
||||
|
||||
python red.py -task:markpages -family:wikipedia
|
||||
|
||||
- Work on specific redfam using param
|
||||
|
||||
-famhash:[sha1-famhash]
|
||||
|
||||
- Use _PyMySQL_ instead of _OurSQL_
|
||||
|
||||
- Correctly parse redfams with articles with leading small character or spaces in wikilink
|
||||
|
||||
* test-v3
|
||||
|
||||
* test-v2
|
||||
|
||||
* test-v1
|
||||
|
||||
Bugs
|
||||
----
|
||||
[jogobot-red Issues](https://git.golderweb.de/wiki/jogobot-red/issues)
|
||||
|
||||
License
|
||||
-------
|
||||
GPLv3
|
||||
|
||||
Author Information
|
||||
------------------
|
||||
|
||||
Copyright 2018 Jonathan Golder jonathan@golderweb.de https://golderweb.de/
|
||||
|
||||
alias Wikipedia.org-User _Jogo.obb_ (https://de.wikipedia.org/Benutzer:Jogo.obb)
|
||||
@@ -3,7 +3,7 @@
|
||||
#
|
||||
# markpages.py
|
||||
#
|
||||
# Copyright 2016 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
# Copyright 2017 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@@ -26,6 +26,7 @@ Bot to mark pages which were/are subjects of redundance discussions
|
||||
with templates
|
||||
"""
|
||||
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
import pywikibot
|
||||
@@ -61,6 +62,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
# Init attribute
|
||||
self.__redfams = None # Will hold a generator with our redfams
|
||||
|
||||
if "famhash" in kwargs:
|
||||
self.famhash = kwargs["famhash"]
|
||||
|
||||
# We do not use predefined genFactory as there is no sensefull case to
|
||||
# give a generator via cmd-line for this right now
|
||||
self.genFactory = pagegenerators.GeneratorFactory()
|
||||
@@ -69,7 +73,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
self.build_generator()
|
||||
|
||||
# Run super class init with builded generator
|
||||
super( MarkPagesBot, self ).__init__(generator=self.gen)
|
||||
super( MarkPagesBot, self ).__init__(
|
||||
generator=self.gen,
|
||||
always=True if "always" in kwargs else False )
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
@@ -101,8 +107,15 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
end_after = datetime.strptime(
|
||||
jogobot.config["red.markpages"]["mark_done_after"],
|
||||
"%Y-%m-%d" )
|
||||
self.__redfams = list( RedFamWorker.gen_by_status_and_ending(
|
||||
"archived", end_after) )
|
||||
|
||||
if hasattr(self, "famhash"):
|
||||
self.__redfams = list(
|
||||
RedFamWorker.session.query(RedFamWorker).filter(
|
||||
RedFamWorker.famhash == self.famhash ) )
|
||||
|
||||
else:
|
||||
self.__redfams = list( RedFamWorker.gen_by_status_and_ending(
|
||||
"archived", end_after) )
|
||||
|
||||
return self.__redfams
|
||||
|
||||
@@ -114,8 +127,12 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
self.genFactory.gens.append( self.redfam_talkpages_generator() )
|
||||
|
||||
# Set generator to pass to super class
|
||||
self.gen = pagegenerators.PreloadingGenerator(
|
||||
self.genFactory.getCombinedGenerator() )
|
||||
# Since PreloadingGenerator mixis up the Pages, do not use it right now
|
||||
# (FS#148)
|
||||
# We can do so for automatic runs (FS#150)
|
||||
# self.gen = pagegenerators.PreloadingGenerator(
|
||||
# self.genFactory.getCombinedGenerator() )
|
||||
self.gen = self.genFactory.getCombinedGenerator()
|
||||
|
||||
def redfam_talkpages_generator( self ):
|
||||
"""
|
||||
@@ -128,15 +145,10 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
for redfam in self.redfams:
|
||||
|
||||
# We need the talkpage (and only this) of each existing page
|
||||
for talkpage in pagegenerators.PageWithTalkPageGenerator(
|
||||
redfam.article_generator(
|
||||
filter_existing=True,
|
||||
filter_redirects=True,
|
||||
exclude_article_status=["marked"] ),
|
||||
return_talk_only=True ):
|
||||
|
||||
# Add reference to redfam to talkpages
|
||||
talkpage.redfam = redfam
|
||||
for talkpage in redfam.article_generator(
|
||||
filter_existing=True,
|
||||
exclude_article_status=["marked"],
|
||||
talkpages=True ):
|
||||
|
||||
yield talkpage
|
||||
|
||||
@@ -172,25 +184,28 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
# None if change was not accepted by user
|
||||
save_ret = self.put_current( self.new_text, summary=summary )
|
||||
|
||||
# Get article as named in db
|
||||
article = self.current_page.redarticle
|
||||
|
||||
# Status
|
||||
if add_ret is None or ( add_ret and save_ret ):
|
||||
self.current_page.redfam.article_remove_status(
|
||||
"note_rej",
|
||||
title=self.current_page.title(withNamespace=False))
|
||||
title=article)
|
||||
self.current_page.redfam.article_remove_status(
|
||||
"sav_err",
|
||||
title=self.current_page.title(withNamespace=False))
|
||||
title=article)
|
||||
self.current_page.redfam.article_add_status(
|
||||
"marked",
|
||||
title=self.current_page.title(withNamespace=False))
|
||||
title=article)
|
||||
elif save_ret is None:
|
||||
self.current_page.redfam.article_add_status(
|
||||
"note_rej",
|
||||
title=self.current_page.title(withNamespace=False))
|
||||
title=article)
|
||||
else:
|
||||
self.current_page.redfam.article_add_status(
|
||||
"sav_err",
|
||||
title=self.current_page.title(withNamespace=False))
|
||||
title=article)
|
||||
|
||||
def add_disc_notice_template( self ):
|
||||
"""
|
||||
@@ -214,12 +229,37 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
# There is none on empty pages, so we need to check
|
||||
if leadsec:
|
||||
# Get the last template in leadsec
|
||||
ltemplates = leadsec.filter_templates()
|
||||
ltemplates = leadsec.filter_templates(recursive=False)
|
||||
|
||||
# If there is one, add notice after this
|
||||
if ltemplates:
|
||||
self.current_wikicode.insert_after( ltemplates[-1],
|
||||
self.disc_notice )
|
||||
|
||||
# Make sure not separate template and maybe following comment
|
||||
insert_after_index = self.current_wikicode.index(
|
||||
ltemplates[-1] )
|
||||
|
||||
# If there is more content
|
||||
if len(self.current_wikicode.nodes) > (insert_after_index + 1):
|
||||
# Filter one linebreak
|
||||
if isinstance( self.current_wikicode.get(
|
||||
insert_after_index + 1),
|
||||
mwparser.nodes.text.Text) and \
|
||||
re.search( r"^\n[^\n\S]+$", self.current_wikicode.get(
|
||||
insert_after_index + 1 ).value ):
|
||||
|
||||
insert_after_index += 1
|
||||
|
||||
while len(self.current_wikicode.nodes) > \
|
||||
(insert_after_index + 1) and \
|
||||
isinstance(
|
||||
self.current_wikicode.get(insert_after_index + 1),
|
||||
mwparser.nodes.comment.Comment ):
|
||||
|
||||
insert_after_index += 1
|
||||
|
||||
self.current_wikicode.insert_after(
|
||||
self.current_wikicode.get(insert_after_index),
|
||||
self.disc_notice )
|
||||
|
||||
# To have it in its own line we need to add a linbreak before
|
||||
self.current_wikicode.insert_before(self.disc_notice, "\n" )
|
||||
@@ -228,13 +268,16 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
else:
|
||||
self.current_wikicode.insert( 0, self.disc_notice )
|
||||
|
||||
# To have it in its own line we need to add a linbreak after it
|
||||
self.current_wikicode.insert_after(self.disc_notice, "\n" )
|
||||
|
||||
# If there is no leadsec (and therefore no template in it, we will add
|
||||
# before the first element
|
||||
else:
|
||||
self.current_wikicode.insert( 0, self.disc_notice )
|
||||
|
||||
# To have it in its own line we need to add a linbreak after it
|
||||
self.current_wikicode.insert_after(self.disc_notice, "\n" )
|
||||
# To have it in its own line we need to add a linbreak after it
|
||||
self.current_wikicode.insert_after(self.disc_notice, "\n" )
|
||||
|
||||
# Notice was added
|
||||
return True
|
||||
@@ -243,6 +286,10 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
"""
|
||||
Checks if disc notice which shall be added is already present.
|
||||
"""
|
||||
|
||||
if self.disc_notice in self.current_wikicode:
|
||||
return True
|
||||
|
||||
# Iterate over Templates with same name (if any) to search equal
|
||||
# Link to decide if they are the same
|
||||
for present_notice in self.current_wikicode.ifilter_templates(
|
||||
|
||||
201
bots/missingnotice.py
Normal file
201
bots/missingnotice.py
Normal file
@@ -0,0 +1,201 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# missingnotice.py
|
||||
#
|
||||
# Copyright 2018 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.engine.url import URL
|
||||
|
||||
import pywikibot
|
||||
|
||||
import jogobot
|
||||
|
||||
from lib.redfam import RedFamWorker
|
||||
|
||||
|
||||
class MissingNoticeBot(pywikibot.bot.Bot):
|
||||
"""
|
||||
"""
|
||||
|
||||
# MySQL-query to get articles with notice
|
||||
cat_article_query = """
|
||||
SELECT `page_title`
|
||||
FROM `categorylinks`
|
||||
JOIN `category`
|
||||
ON `cl_to` = `cat_title`
|
||||
AND `cat_title` LIKE "{cat}\_%%"
|
||||
JOIN `page`
|
||||
ON `cl_from` = `page_id`
|
||||
""".format(cat=jogobot.config["red.missingnotice"]["article_category"])
|
||||
|
||||
def __init__( self, genFactory, **kwargs ):
|
||||
|
||||
self.categorized_articles = list()
|
||||
self.page_content = list()
|
||||
|
||||
super(type(self), self).__init__(**kwargs)
|
||||
|
||||
def run( self ):
|
||||
# query articles containing notice
|
||||
self.categorized_articles = type(self).get_categorized_articles()
|
||||
|
||||
fam_counter = 0
|
||||
|
||||
# iterate open redfams
|
||||
for redfam in RedFamWorker.gen_open():
|
||||
fam_counter += 1
|
||||
links = self.treat_open_redfam(redfam)
|
||||
|
||||
if links:
|
||||
self.page_content.append( self.format_row( links ) )
|
||||
|
||||
if (fam_counter % 50) == 0:
|
||||
jogobot.output( "Processed {n:d} open RedFams".format(
|
||||
n=fam_counter))
|
||||
|
||||
else:
|
||||
# To write "absent" states to db
|
||||
RedFamWorker.flush_db_cache()
|
||||
|
||||
# Update page content
|
||||
self.update_page()
|
||||
|
||||
def treat_open_redfam( self, redfam ):
|
||||
"""
|
||||
Works on current open redfam
|
||||
|
||||
@param redfam Redfam to work on
|
||||
@type redfam.RedFamWorker
|
||||
|
||||
@returns Tuple of disclink and list of articles missing notice or None
|
||||
@rtype ( str, list(str*) ) or None
|
||||
"""
|
||||
|
||||
# Check if related disc section exist
|
||||
if not redfam.disc_section_exists():
|
||||
return None
|
||||
|
||||
# Get links for articles without notice
|
||||
links = self.treat_articles( redfam.article_generator(
|
||||
filter_existing=True, filter_redirects=True ) )
|
||||
|
||||
# No articles without notice
|
||||
if not links:
|
||||
return None
|
||||
|
||||
return ( redfam.get_disc_link(as_link=True), links )
|
||||
|
||||
def treat_articles(self, articles):
|
||||
"""
|
||||
Iterates over given articles and checks weather them are included in
|
||||
self.categorized_articles (contain the notice)
|
||||
|
||||
@param articles Articles to check
|
||||
@type articles iterable of pywikibot.page() objects
|
||||
|
||||
@returns Possibly empty list of wikitext links ("[[article]]")
|
||||
@rtype list
|
||||
"""
|
||||
links = list()
|
||||
|
||||
for article in articles:
|
||||
|
||||
if article.title(underscore=True, with_section=False ) not in \
|
||||
self.categorized_articles:
|
||||
|
||||
links.append( article.title(as_link=True, textlink=True) )
|
||||
|
||||
return links
|
||||
|
||||
def format_row( self, links ):
|
||||
"""
|
||||
Formats row for output on wikipage
|
||||
|
||||
@param links Tuple of disc link and list of articles as returned by
|
||||
self.treat_open_redfam()
|
||||
@type links ( str, list(str*) )
|
||||
|
||||
@returns Formatet row text to add to page_content
|
||||
@rtype str
|
||||
"""
|
||||
|
||||
return jogobot.config["red.missingnotice"]["row_format"].format(
|
||||
disc=links[0],
|
||||
links=jogobot.config["red.missingnotice"]["link_sep"].join(
|
||||
links[1] ) )
|
||||
|
||||
def update_page( self, wikipage=None):
|
||||
"""
|
||||
Handles the updating process of the wikipage
|
||||
|
||||
@param wikipage Wikipage to put text on, otherwise use configured page
|
||||
@type wikipage str
|
||||
"""
|
||||
|
||||
# if not given get wikipage from config
|
||||
if not wikipage:
|
||||
wikipage = jogobot.config["red.missingnotice"]["wikipage"]
|
||||
|
||||
# Create page object for wikipage
|
||||
page = pywikibot.Page(pywikibot.Site(), wikipage)
|
||||
|
||||
# Define edit summary
|
||||
summary = jogobot.config["red.missingnotice"]["edit_summary"]
|
||||
|
||||
# Make sure summary starts with "Bot:"
|
||||
if not summary[:len("Bot:")] == "Bot:":
|
||||
summary = "Bot: " + summary.strip()
|
||||
|
||||
# Concatenate new text
|
||||
new_text = "\n".join(self.page_content)
|
||||
|
||||
# Save new text
|
||||
self.userPut( page, page.text, new_text, summary=summary )
|
||||
|
||||
@classmethod
|
||||
def get_categorized_articles( cls ):
|
||||
"""
|
||||
Queries all articles containing the notice based on category set by
|
||||
notice template. Category can be configured in
|
||||
jogobot.config["red.missingnotice"]["article_category"]
|
||||
|
||||
@returns List of all articles containing notice
|
||||
@rtype list
|
||||
"""
|
||||
|
||||
# construct connection url for sqlalchemy
|
||||
url = URL( "mysql+pymysql",
|
||||
username=pywikibot.config.db_username,
|
||||
password=pywikibot.config.db_password,
|
||||
host=jogobot.config["red.missingnotice"]["wikidb_host"],
|
||||
port=jogobot.config["red.missingnotice"]["wikidb_port"],
|
||||
database=jogobot.config["red.missingnotice"]["wikidb_name"],
|
||||
query={'charset': 'utf8'} )
|
||||
|
||||
# create sqlalchemy engine
|
||||
engine = create_engine(url, echo=False)
|
||||
|
||||
# fire the query to get articles with notice
|
||||
result = engine.execute(cls.cat_article_query)
|
||||
|
||||
# return list with articles with notice
|
||||
return [ row['page_title'].decode("utf-8") for row in result ]
|
||||
@@ -3,7 +3,7 @@
|
||||
#
|
||||
# reddiscparser.py
|
||||
#
|
||||
# Copyright 2016 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
# Copyright 2017 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
1
jogobot
1
jogobot
Submodule jogobot deleted from 49ada2993e
@@ -3,7 +3,7 @@
|
||||
#
|
||||
# mysqlred.py
|
||||
#
|
||||
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
# Copyright 2017 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@@ -46,13 +46,16 @@ import sqlalchemy.types as types
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
url = URL( "mysql+oursql",
|
||||
url = URL( "mysql+pymysql",
|
||||
username=config.db_username,
|
||||
password=config.db_password,
|
||||
host=config.db_hostname,
|
||||
port=config.db_port,
|
||||
database=config.db_username + jogobot.config['db_suffix'] )
|
||||
engine = create_engine(url, echo=True)
|
||||
database=( config.db_username +
|
||||
jogobot.config['redundances']['db_suffix'] ),
|
||||
query={'charset': 'utf8'} )
|
||||
|
||||
engine = create_engine(url, echo=False)
|
||||
|
||||
|
||||
Session = sessionmaker(bind=engine)
|
||||
|
||||
217
lib/redfam.py
217
lib/redfam.py
@@ -3,7 +3,7 @@
|
||||
#
|
||||
# redfam.py
|
||||
#
|
||||
# Copyright 2017 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
# Copyright 2018 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@@ -28,6 +28,7 @@ Provides classes for working with RedFams
|
||||
import hashlib
|
||||
import locale
|
||||
import re
|
||||
import urllib.parse
|
||||
from datetime import datetime
|
||||
|
||||
import mwparserfromhell as mwparser # noqa
|
||||
@@ -282,7 +283,17 @@ class RedFamParser( RedFam ):
|
||||
|
||||
articlesList = []
|
||||
for link in heading.ifilter_wikilinks():
|
||||
article = str( link.title )
|
||||
article = str( link.title ).strip()
|
||||
|
||||
# Short circuit empty links
|
||||
if not article:
|
||||
continue
|
||||
|
||||
# Make sure first letter is uppercase
|
||||
article = article[0].upper() + article[1:]
|
||||
|
||||
# Unquote possible url encoded special chars
|
||||
article = urllib.parse.unquote( article )
|
||||
|
||||
# Split in title and anchor part
|
||||
article = article.split("#", 1)
|
||||
@@ -290,6 +301,10 @@ class RedFamParser( RedFam ):
|
||||
article[0] = article[0].replace("_", " ")
|
||||
|
||||
if len(article) > 1:
|
||||
# Strip both parts to prevent leading/trailing spaces
|
||||
article[0] = article[0].strip()
|
||||
article[1] = article[1].strip()
|
||||
|
||||
# other way round, replace spaces with underscores in anchors
|
||||
article[1] = article[1].replace(" ", "_")
|
||||
|
||||
@@ -351,6 +366,9 @@ class RedFamParser( RedFam ):
|
||||
- 3 and greater status was set by worker script, do not change it
|
||||
"""
|
||||
|
||||
# Since we have parsed it, the section can never be absent
|
||||
self.status.remove("absent")
|
||||
|
||||
# No ending, discussion is running:
|
||||
# Sometimes archived discussions also have no detectable ending
|
||||
if not self.ending and not self.redpage.archive:
|
||||
@@ -499,7 +517,8 @@ class RedFamWorker( RedFam ):
|
||||
def article_generator(self, # noqa
|
||||
filter_existing=None, filter_redirects=None,
|
||||
exclude_article_status=[],
|
||||
onlyinclude_article_status=[] ):
|
||||
onlyinclude_article_status=[],
|
||||
talkpages=None ):
|
||||
"""
|
||||
Yields pywikibot pageobjects for articles belonging to this redfams
|
||||
in a generator
|
||||
@@ -513,47 +532,98 @@ class RedFamWorker( RedFam ):
|
||||
set to False to get only redirectpages,
|
||||
unset/None results in not filtering
|
||||
@type filter_redirects bool/None
|
||||
@param talkpages Set to True to get Talkpages instead of article page
|
||||
@type talkpages bool/None
|
||||
|
||||
"""
|
||||
|
||||
# Helper to leave multidimensional loop
|
||||
# https://docs.python.org/3/faq/design.html#why-is-there-no-goto
|
||||
class Continue(Exception):
|
||||
pass
|
||||
|
||||
class Break(Exception):
|
||||
pass
|
||||
|
||||
# Iterate over articles in redfam
|
||||
for article in self.articlesList:
|
||||
# Not all list elements contain articles
|
||||
if not article:
|
||||
|
||||
# To be able to control outer loop from inside child loops
|
||||
try:
|
||||
|
||||
# Not all list elements contain articles
|
||||
if not article:
|
||||
raise Break()
|
||||
|
||||
page = pywikibot.Page( pywikibot.Link(article),
|
||||
pywikibot.Site() )
|
||||
|
||||
# Filter existing pages if requested with filter_existing=False
|
||||
if page.exists():
|
||||
self.article_remove_status( "deleted", title=article )
|
||||
if filter_existing is False:
|
||||
raise Continue()
|
||||
# Filter non existing Pages if requested with
|
||||
# filter_existing=True
|
||||
else:
|
||||
self.article_add_status( "deleted", title=article )
|
||||
if filter_existing:
|
||||
raise Continue()
|
||||
|
||||
# Filter redirects if requested with filter_redirects=True
|
||||
if page.isRedirectPage():
|
||||
self.article_add_status( "redirect", title=article )
|
||||
if filter_redirects:
|
||||
raise Continue()
|
||||
# Filter noredirects if requested with filter_redirects=False
|
||||
else:
|
||||
self.article_remove_status("redirect", title=article )
|
||||
if filter_redirects is False:
|
||||
raise Continue()
|
||||
|
||||
# Exclude by article status
|
||||
for status in exclude_article_status:
|
||||
if self.article_has_status( status, title=article ):
|
||||
raise Continue()
|
||||
|
||||
# Only include by article status
|
||||
for status in onlyinclude_article_status:
|
||||
if not self.article_has_status( status, title=article ):
|
||||
raise Continue()
|
||||
|
||||
# Proxy loop control to outer loop
|
||||
except Continue:
|
||||
continue
|
||||
except Break:
|
||||
break
|
||||
|
||||
page = pywikibot.Page(pywikibot.Link(article), pywikibot.Site())
|
||||
# Follow moved pages
|
||||
if self.article_has_status( "redirect", title=article ):
|
||||
try:
|
||||
page = page.moved_target()
|
||||
|
||||
# Filter existing pages if requested with filter_existing=False
|
||||
if page.exists():
|
||||
self.article_remove_status( "deleted", title=article )
|
||||
if filter_existing is False:
|
||||
continue
|
||||
# Filter non existing Pages if requested with filter_existing=True
|
||||
else:
|
||||
self.article_add_status( "deleted", title=article )
|
||||
if filter_existing:
|
||||
continue
|
||||
# Short circuit if movement destination does not exists
|
||||
if not page.exists():
|
||||
continue
|
||||
|
||||
# Filter redirects if requested with filter_redirects=True
|
||||
if page.isRedirectPage():
|
||||
self.article_add_status( "redirect", title=article )
|
||||
if filter_redirects:
|
||||
continue
|
||||
# Filter noredirects if requested with filter_redirects=False
|
||||
else:
|
||||
self.article_remove_status("redirect", title=article )
|
||||
if filter_redirects is False:
|
||||
continue
|
||||
except pywikibot.exceptions.NoMoveTarget:
|
||||
pass
|
||||
|
||||
# Exclude by article status
|
||||
for status in exclude_article_status:
|
||||
if self.article_has_status( status, title=article ):
|
||||
continue
|
||||
# Exclude Users & User Talkpage
|
||||
if page.namespace() == 2 or page.namespace() == 3:
|
||||
self.article_add_status( "user", title=article )
|
||||
continue
|
||||
|
||||
# Only include by article status
|
||||
for status in onlyinclude_article_status:
|
||||
if not self.article_has_status( status, title=article ):
|
||||
continue
|
||||
# Toggle talkpage
|
||||
if talkpages and not page.isTalkPage() or\
|
||||
not talkpages and page.isTalkPage():
|
||||
page = page.toggleTalkPage()
|
||||
|
||||
# Add reference to redfam to pages
|
||||
page.redfam = self
|
||||
|
||||
# Keep article title from db with page object
|
||||
page.redarticle = article
|
||||
|
||||
# Yield filtered pages
|
||||
yield page
|
||||
@@ -582,30 +652,68 @@ class RedFamWorker( RedFam ):
|
||||
self.status.remove("note_rej")
|
||||
self.status.add( "marked" )
|
||||
|
||||
def get_disc_link( self ):
|
||||
def get_disc_link( self, as_link=False ):
|
||||
"""
|
||||
Constructs and returns the link to Redundancy discussion
|
||||
|
||||
@param as_link If true, wrap link in double square brackets (wikilink)
|
||||
@type as_link bool
|
||||
|
||||
@returns Link to diskussion
|
||||
@rtype str
|
||||
"""
|
||||
|
||||
# We need to Replace Links with their linktext
|
||||
anchor_code = mwparser.parse( self.heading.strip() )
|
||||
for link in anchor_code.ifilter_wikilinks():
|
||||
if link.text:
|
||||
text = link.text
|
||||
else:
|
||||
text = link.title
|
||||
# Expand templates using pwb site object
|
||||
site = pywikibot.Site()
|
||||
anchor_code = site.expand_text(self.heading.strip())
|
||||
|
||||
anchor_code.replace( link, text )
|
||||
# Remove possibly embbeded files
|
||||
anchor_code = re.sub( r"\[\[\w+:[^\|]+(?:\|.+){2,}\]\]", "",
|
||||
anchor_code )
|
||||
|
||||
# Whitespace is replaced with underscores
|
||||
anchor_code.replace( " ", "_" )
|
||||
# Replace non-breaking-space by correct urlencoded value
|
||||
anchor_code = anchor_code.replace( " ", ".C2.A0" )
|
||||
|
||||
# We try it with out any more parsing as mw will do while parsing page
|
||||
return ( self.redpage.pagetitle + "#" +
|
||||
str(anchor_code).strip() )
|
||||
# Use mwparser to strip and normalize
|
||||
anchor_code = mwparser.parse( anchor_code ).strip_code()
|
||||
|
||||
# We try it without any more parsing as mw will do while parsing page
|
||||
link = self.redpage.pagetitle + "#" + anchor_code.strip()
|
||||
|
||||
if as_link:
|
||||
return "[[{0}]]".format(link)
|
||||
else:
|
||||
return link
|
||||
|
||||
def disc_section_exists( self ):
|
||||
"""
|
||||
Checks weather the redundance discussion is still existing. Sometimes
|
||||
it is absent, since heading was changed and therefore we get a
|
||||
different famhash ergo new redfam.
|
||||
As a side effect, the method sets status "absent" for missing sections.
|
||||
|
||||
@returns True if it exists otherwise False
|
||||
@rtype bool
|
||||
"""
|
||||
# The redpage
|
||||
discpage = pywikibot.Page(pywikibot.Site(), self.get_disc_link() )
|
||||
|
||||
# Parse redpage content
|
||||
wikicode = mwparser.parse( discpage.get() )
|
||||
|
||||
# List fams
|
||||
fams = wikicode.filter_headings(
|
||||
matches=RedFamParser.is_section_redfam_cb )
|
||||
|
||||
# Check if current fam is in list of fams
|
||||
# If not, set status absent and return False
|
||||
if self.heading not in [ fam.title.strip() for fam in fams]:
|
||||
self.status.remove("open")
|
||||
self.status.add("absent")
|
||||
return False
|
||||
|
||||
# The section exists
|
||||
return True
|
||||
|
||||
def generate_disc_notice_template( self ):
|
||||
"""
|
||||
@@ -678,10 +786,23 @@ class RedFamWorker( RedFam ):
|
||||
# RedFamWorker._status.like('archived'),
|
||||
# RedFamWorker._status.like("%{0:s}%".format(status)),
|
||||
text("status LIKE '%archived%'"),
|
||||
text("status NOT LIKE '%marked%'"),
|
||||
RedFamWorker.ending >= ending ):
|
||||
|
||||
yield redfam
|
||||
|
||||
@classmethod
|
||||
def gen_open( cls ):
|
||||
"""
|
||||
Yield red_fams stored in db by given status which have an ending after
|
||||
given one
|
||||
"""
|
||||
for redfam in RedFamWorker.session.query(RedFamWorker).filter(
|
||||
# NOT WORKING WITH OBJECT NOTATION
|
||||
text("status LIKE '%open%'") ):
|
||||
|
||||
yield redfam
|
||||
|
||||
|
||||
class RedFamError( Exception ):
|
||||
"""
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#
|
||||
# redpage.py
|
||||
#
|
||||
# Copyright 2015 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
# Copyright 2017 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
33
red.py
33
red.py
@@ -3,7 +3,7 @@
|
||||
#
|
||||
# reddiscparser.py
|
||||
#
|
||||
# Copyright 2016 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
|
||||
# Copyright 2017 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
@@ -60,7 +60,7 @@ def prepare_bot( task_slug, subtask, genFactory, subtask_args ):
|
||||
@rtype tuple
|
||||
"""
|
||||
# kwargs are passed to selected bot as **kwargs
|
||||
kwargs = dict()
|
||||
kwargs = subtask_args
|
||||
|
||||
if not subtask or subtask == "discparser":
|
||||
# Default case: discparser
|
||||
@@ -73,6 +73,10 @@ def prepare_bot( task_slug, subtask, genFactory, subtask_args ):
|
||||
# Import related bot
|
||||
from bots.markpages import MarkPagesBot as Bot
|
||||
|
||||
elif subtask == "missingnotice":
|
||||
# Import related bot
|
||||
from bots.missingnotice import MissingNoticeBot as Bot
|
||||
|
||||
# Subtask error
|
||||
else:
|
||||
jogobot.output( (
|
||||
@@ -83,6 +87,25 @@ def prepare_bot( task_slug, subtask, genFactory, subtask_args ):
|
||||
return ( subtask, Bot, genFactory, kwargs )
|
||||
|
||||
|
||||
def parse_red_args( argkey, value ):
|
||||
"""
|
||||
Process additional args for red.py
|
||||
|
||||
@param argkey The arguments key
|
||||
@type argkey str
|
||||
@param value The arguments value
|
||||
@type value str
|
||||
|
||||
@return Tuple with (key, value) if given pair is relevant, else None
|
||||
@rtype tuple or None
|
||||
"""
|
||||
|
||||
if argkey.startswith("-famhash"):
|
||||
return ( "famhash", value )
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def main(*args):
|
||||
"""
|
||||
Process command line arguments and invoke bot.
|
||||
@@ -105,12 +128,12 @@ def main(*args):
|
||||
|
||||
# Disabled until [FS#86] is done
|
||||
# Before run, we need to check wether we are currently active or not
|
||||
# if not jogobot.bot.active( task_slug ):
|
||||
# return
|
||||
if not jogobot.bot.active( task_slug ):
|
||||
return
|
||||
|
||||
# Parse local Args to get information about subtask
|
||||
( subtask, genFactory, subtask_args ) = jogobot.bot.parse_local_args(
|
||||
local_args )
|
||||
local_args, parse_red_args )
|
||||
|
||||
# select subtask and prepare args
|
||||
( subtask, Bot, genFactory, kwargs ) = prepare_bot(
|
||||
|
||||
26
requirements.txt
Normal file
26
requirements.txt
Normal file
@@ -0,0 +1,26 @@
|
||||
# This is a PIP 6+ requirements file for using jogobot-red
|
||||
#
|
||||
# All dependencies can be installed using:
|
||||
# $ sudo pip install -r requirements.txt
|
||||
#
|
||||
# It is good practise to install packages using the system
|
||||
# package manager if it has a packaged version. If you are
|
||||
# unsure, please use pip as described at the top of the file.
|
||||
#
|
||||
# To get a list of potential matches, use
|
||||
#
|
||||
# $ awk -F '[#>=]' '{print $1}' requirements.txt | xargs yum search
|
||||
# or
|
||||
# $ awk -F '[#>=]' '{print $1}' requirements.txt | xargs apt-cache search
|
||||
|
||||
# Needed for Database-Connection
|
||||
# SQLAlchemy Python ORM-Framework
|
||||
SQLAlchemy>=1.1
|
||||
# PyMySQL DB-Connector
|
||||
PyMySQL>=0.7
|
||||
|
||||
# Also needed, but not covered here, is a working copy of pywikibot-core
|
||||
# which also brings mwparserfromhell
|
||||
|
||||
# jogobot
|
||||
git+https://git.golderweb.de/wiki/jogobot.git#egg=jogobot
|
||||
28
tests/context.py
Normal file
28
tests/context.py
Normal file
@@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# missingnotice_tests.py
|
||||
#
|
||||
# Copyright 2018 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(
|
||||
0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
94
tests/missingnotice_tests.py
Normal file
94
tests/missingnotice_tests.py
Normal file
@@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# missingnotice_tests.py
|
||||
#
|
||||
# Copyright 2018 Jonathan Golder <jonathan@golderweb.de>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
# MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
|
||||
"""
|
||||
Test module bot/missingnotice.py
|
||||
"""
|
||||
|
||||
import unittest
|
||||
from unittest import mock # noqa
|
||||
|
||||
import pywikibot
|
||||
|
||||
import context # noqa
|
||||
from bots.missingnotice import MissingNoticeBot # noqa
|
||||
|
||||
|
||||
class TestMissingNoticeBot(unittest.TestCase):
|
||||
"""
|
||||
Test class MissingNoticeBot
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
genFactory = pywikibot.pagegenerators.GeneratorFactory()
|
||||
self.MissingNoticeBot = MissingNoticeBot(genFactory)
|
||||
self.MissingNoticeBot.categorized_articles = [ "Deutschland",
|
||||
"Max_Schlee",
|
||||
"Hodeng-Hodenger" ]
|
||||
|
||||
@mock.patch( 'sqlalchemy.engine.Engine.execute',
|
||||
return_value=( { "page_title": b"a", },
|
||||
{ "page_title": b"b", },
|
||||
{ "page_title": b"c", },
|
||||
{ "page_title": b"d", }, ) )
|
||||
def test_get_categorized_articles(self, execute_mock):
|
||||
"""
|
||||
Test method get_categorized_articles()
|
||||
"""
|
||||
self.assertFalse(execute_mock.called)
|
||||
|
||||
result = MissingNoticeBot.get_categorized_articles()
|
||||
|
||||
self.assertTrue(execute_mock.called)
|
||||
self.assertEqual(result, ["a", "b", "c", "d"] )
|
||||
|
||||
def test_treat_articles( self ):
|
||||
"""
|
||||
Test method treat_articles()
|
||||
"""
|
||||
|
||||
# articles with notice
|
||||
a = pywikibot.Page(pywikibot.Site(), "Deutschland" )
|
||||
b = pywikibot.Page(pywikibot.Site(), "Max_Schlee" )
|
||||
c = pywikibot.Page(pywikibot.Site(), "Hodeng-Hodenger#Test" )
|
||||
# articles without notice
|
||||
x = pywikibot.Page(pywikibot.Site(), "Quodvultdeus" )
|
||||
y = pywikibot.Page(pywikibot.Site(), "Zoo_Bremen" )
|
||||
z = pywikibot.Page(pywikibot.Site(), "Nulka#Test" )
|
||||
|
||||
cases = ( ( ( a, b, c ), list() ),
|
||||
( ( x, y, z ), [ "[[Quodvultdeus]]",
|
||||
"[[Zoo Bremen]]",
|
||||
"[[Nulka#Test]]" ]),
|
||||
( ( a, b, y, z ), [ "[[Zoo Bremen]]",
|
||||
"[[Nulka#Test]]" ]), )
|
||||
|
||||
for case in cases:
|
||||
res = self.MissingNoticeBot.treat_articles( case[0] )
|
||||
|
||||
self.assertEqual( res, case[1] )
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user