Compare commits
43 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0c135ef1bb | |||
| 8b8221cfcd | |||
| bdccc8417c | |||
| a70835c58a | |||
| ec2b84df2a | |||
| 88848cb084 | |||
| 5057aed0d3 | |||
| 02e53475f1 | |||
| d6f9b460c9 | |||
| ff03ca8f13 | |||
| 88692ca678 | |||
| d9b4fcc0bd | |||
| 22ff78ea98 | |||
| b3cfcdc259 | |||
| b3e0ace2f4 | |||
| f8002c85da | |||
| 49bc05d29b | |||
| 8a26b6d92a | |||
| 49a8230d76 | |||
| 31c10073a2 | |||
| 642a29b022 | |||
| 2f90751dc2 | |||
| 024be69fe1 | |||
| b6d7268a7f | |||
| 526184c1e1 | |||
| 3aa6c5fb1c | |||
| ec8f459db5 | |||
| 3b2cb95f36 | |||
| 41e5cc1a9d | |||
| 9b9d50c4d2 | |||
| a755288700 | |||
| 14ec71dd09 | |||
| e283eb78ac | |||
| cc02006fd2 | |||
| 37b0cbef08 | |||
| 4137d72468 | |||
| cd87d1c2bb | |||
| 456b2ba3d4 | |||
| 47b85a0b5e | |||
| a6fdc974bd | |||
| 30de2a2e12 | |||
| 4a6855cf7b | |||
| 8422d08cb6 |
56
README.md
Normal file
56
README.md
Normal file
@@ -0,0 +1,56 @@
|
||||
jogobot-red
|
||||
===========
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
|
||||
* pywikibot-core
|
||||
* mwparserfromhell
|
||||
|
||||
The libraries above need to be installed and configured manualy considering [documentation of pywikibot-core](https://www.mediawiki.org/wiki/Manual:Pywikibot).
|
||||
|
||||
* SQLAlchemy
|
||||
* PyMySQL
|
||||
|
||||
Those can be installed using pip and the _requirements.txt_ file provided with this packet
|
||||
|
||||
pip install -r requirements.txt
|
||||
|
||||
Versions
|
||||
--------
|
||||
|
||||
* test-v5
|
||||
- Feature _markpages_ working in full-automatic mode with _always_-flag
|
||||
|
||||
python red.py -task:markpages -family:wikipedia -always
|
||||
|
||||
* test-v4
|
||||
|
||||
- Feature _markpages_ working in semi-automatic mode using command
|
||||
|
||||
python red.py -task:markpages -family:wikipedia
|
||||
|
||||
- Work on specific redfam using param
|
||||
|
||||
-famhash:[sha1-famhash]
|
||||
|
||||
- Use _PyMySQL_ instead of _OurSQL_
|
||||
|
||||
- Correctly parse redfams with articles with leading small character or spaces in wikilink
|
||||
|
||||
* test-v3
|
||||
|
||||
* test-v2
|
||||
|
||||
* test-v1
|
||||
|
||||
License
|
||||
-------
|
||||
GPLv3
|
||||
|
||||
Author Information
|
||||
------------------
|
||||
|
||||
Copyright 2017 Jonathan Golder jonathan@golderweb.de https://golderweb.de/
|
||||
|
||||
alias Wikipedia.org-User _Jogo.obb_ (https://de.wikipedia.org/Benutzer:Jogo.obb)
|
||||
@@ -26,6 +26,7 @@ Bot to mark pages which were/are subjects of redundance discussions
|
||||
with templates
|
||||
"""
|
||||
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
import pywikibot
|
||||
@@ -61,6 +62,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
# Init attribute
|
||||
self.__redfams = None # Will hold a generator with our redfams
|
||||
|
||||
if "famhash" in kwargs:
|
||||
self.famhash = kwargs["famhash"]
|
||||
|
||||
# We do not use predefined genFactory as there is no sensefull case to
|
||||
# give a generator via cmd-line for this right now
|
||||
self.genFactory = pagegenerators.GeneratorFactory()
|
||||
@@ -69,7 +73,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
self.build_generator()
|
||||
|
||||
# Run super class init with builded generator
|
||||
super( MarkPagesBot, self ).__init__(generator=self.gen)
|
||||
super( MarkPagesBot, self ).__init__(
|
||||
generator=self.gen,
|
||||
always=True if "always" in kwargs else False )
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
@@ -101,6 +107,13 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
end_after = datetime.strptime(
|
||||
jogobot.config["red.markpages"]["mark_done_after"],
|
||||
"%Y-%m-%d" )
|
||||
|
||||
if hasattr(self, "famhash"):
|
||||
self.__redfams = list(
|
||||
RedFamWorker.session.query(RedFamWorker).filter(
|
||||
RedFamWorker.famhash == self.famhash ) )
|
||||
|
||||
else:
|
||||
self.__redfams = list( RedFamWorker.gen_by_status_and_ending(
|
||||
"archived", end_after) )
|
||||
|
||||
@@ -114,8 +127,12 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
self.genFactory.gens.append( self.redfam_talkpages_generator() )
|
||||
|
||||
# Set generator to pass to super class
|
||||
self.gen = pagegenerators.PreloadingGenerator(
|
||||
self.genFactory.getCombinedGenerator() )
|
||||
# Since PreloadingGenerator mixis up the Pages, do not use it right now
|
||||
# (FS#148)
|
||||
# We can do so for automatic runs (FS#150)
|
||||
# self.gen = pagegenerators.PreloadingGenerator(
|
||||
# self.genFactory.getCombinedGenerator() )
|
||||
self.gen = self.genFactory.getCombinedGenerator()
|
||||
|
||||
def redfam_talkpages_generator( self ):
|
||||
"""
|
||||
@@ -131,7 +148,6 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
for talkpage in pagegenerators.PageWithTalkPageGenerator(
|
||||
redfam.article_generator(
|
||||
filter_existing=True,
|
||||
filter_redirects=True,
|
||||
exclude_article_status=["marked"] ),
|
||||
return_talk_only=True ):
|
||||
|
||||
@@ -172,25 +188,34 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
# None if change was not accepted by user
|
||||
save_ret = self.put_current( self.new_text, summary=summary )
|
||||
|
||||
# Normalize title with anchor (replace spaces in anchor)
|
||||
article = self.current_page.toggleTalkPage().title(
|
||||
asLink=True, textlink=True)
|
||||
article = article.strip("[]")
|
||||
article_parts = article.split("#", 1)
|
||||
if len(article_parts) == 2:
|
||||
article_parts[1] = article_parts[1].replace(" ", "_")
|
||||
article = "#".join(article_parts)
|
||||
|
||||
# Status
|
||||
if add_ret is None or ( add_ret and save_ret ):
|
||||
self.current_page.redfam.article_remove_status(
|
||||
"note_rej",
|
||||
title=self.current_page.title(withNamespace=False))
|
||||
title=article)
|
||||
self.current_page.redfam.article_remove_status(
|
||||
"sav_err",
|
||||
title=self.current_page.title(withNamespace=False))
|
||||
title=article)
|
||||
self.current_page.redfam.article_add_status(
|
||||
"marked",
|
||||
title=self.current_page.title(withNamespace=False))
|
||||
title=article)
|
||||
elif save_ret is None:
|
||||
self.current_page.redfam.article_add_status(
|
||||
"note_rej",
|
||||
title=self.current_page.title(withNamespace=False))
|
||||
title=article)
|
||||
else:
|
||||
self.current_page.redfam.article_add_status(
|
||||
"sav_err",
|
||||
title=self.current_page.title(withNamespace=False))
|
||||
title=article)
|
||||
|
||||
def add_disc_notice_template( self ):
|
||||
"""
|
||||
@@ -214,11 +239,36 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
# There is none on empty pages, so we need to check
|
||||
if leadsec:
|
||||
# Get the last template in leadsec
|
||||
ltemplates = leadsec.filter_templates()
|
||||
ltemplates = leadsec.filter_templates(recursive=False)
|
||||
|
||||
# If there is one, add notice after this
|
||||
if ltemplates:
|
||||
self.current_wikicode.insert_after( ltemplates[-1],
|
||||
|
||||
# Make sure not separate template and maybe following comment
|
||||
insert_after_index = self.current_wikicode.index(
|
||||
ltemplates[-1] )
|
||||
|
||||
# If there is more content
|
||||
if len(self.current_wikicode.nodes) > (insert_after_index + 1):
|
||||
# Filter one linebreak
|
||||
if isinstance( self.current_wikicode.get(
|
||||
insert_after_index + 1),
|
||||
mwparser.nodes.text.Text) and \
|
||||
re.search( r"^\n[^\n\S]+$", self.current_wikicode.get(
|
||||
insert_after_index + 1 ).value ):
|
||||
|
||||
insert_after_index += 1
|
||||
|
||||
while len(self.current_wikicode.nodes) > \
|
||||
(insert_after_index + 1) and \
|
||||
isinstance(
|
||||
self.current_wikicode.get(insert_after_index + 1),
|
||||
mwparser.nodes.comment.Comment ):
|
||||
|
||||
insert_after_index += 1
|
||||
|
||||
self.current_wikicode.insert_after(
|
||||
self.current_wikicode.get(insert_after_index),
|
||||
self.disc_notice )
|
||||
|
||||
# To have it in its own line we need to add a linbreak before
|
||||
@@ -228,6 +278,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
else:
|
||||
self.current_wikicode.insert( 0, self.disc_notice )
|
||||
|
||||
# To have it in its own line we need to add a linbreak after it
|
||||
self.current_wikicode.insert_after(self.disc_notice, "\n" )
|
||||
|
||||
# If there is no leadsec (and therefore no template in it, we will add
|
||||
# before the first element
|
||||
else:
|
||||
@@ -243,6 +296,10 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat()
|
||||
"""
|
||||
Checks if disc notice which shall be added is already present.
|
||||
"""
|
||||
|
||||
if self.disc_notice in self.current_wikicode:
|
||||
return True
|
||||
|
||||
# Iterate over Templates with same name (if any) to search equal
|
||||
# Link to decide if they are the same
|
||||
for present_notice in self.current_wikicode.ifilter_templates(
|
||||
|
||||
@@ -46,12 +46,14 @@ import sqlalchemy.types as types
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
url = URL( "mysql+oursql",
|
||||
url = URL( "mysql+pymysql",
|
||||
username=config.db_username,
|
||||
password=config.db_password,
|
||||
host=config.db_hostname,
|
||||
port=config.db_port,
|
||||
database=config.db_username + jogobot.config['db_suffix'] )
|
||||
database=config.db_username + jogobot.config['db_suffix'],
|
||||
query={'charset': 'utf8'} )
|
||||
|
||||
engine = create_engine(url, echo=True)
|
||||
|
||||
|
||||
|
||||
@@ -282,7 +282,14 @@ class RedFamParser( RedFam ):
|
||||
|
||||
articlesList = []
|
||||
for link in heading.ifilter_wikilinks():
|
||||
article = str( link.title )
|
||||
article = str( link.title ).strip()
|
||||
|
||||
# Short circuit empty links
|
||||
if not article:
|
||||
continue
|
||||
|
||||
# Make sure first letter is uppercase
|
||||
article = article[0].upper() + article[1:]
|
||||
|
||||
# Split in title and anchor part
|
||||
article = article.split("#", 1)
|
||||
@@ -515,45 +522,66 @@ class RedFamWorker( RedFam ):
|
||||
@type filter_redirects bool/None
|
||||
|
||||
"""
|
||||
|
||||
# Helper to leave multidimensional loop
|
||||
# https://docs.python.org/3/faq/design.html#why-is-there-no-goto
|
||||
class Continue(Exception):
|
||||
pass
|
||||
|
||||
class Break(Exception):
|
||||
pass
|
||||
|
||||
# Iterate over articles in redfam
|
||||
for article in self.articlesList:
|
||||
|
||||
# To be able to control outer loop from inside child loops
|
||||
try:
|
||||
|
||||
# Not all list elements contain articles
|
||||
if not article:
|
||||
break
|
||||
raise Break()
|
||||
|
||||
page = pywikibot.Page(pywikibot.Link(article), pywikibot.Site())
|
||||
page = pywikibot.Page( pywikibot.Link(article),
|
||||
pywikibot.Site() )
|
||||
|
||||
# Filter existing pages if requested with filter_existing=False
|
||||
if page.exists():
|
||||
self.article_remove_status( "deleted", title=article )
|
||||
if filter_existing is False:
|
||||
continue
|
||||
# Filter non existing Pages if requested with filter_existing=True
|
||||
raise Continue()
|
||||
# Filter non existing Pages if requested with
|
||||
# filter_existing=True
|
||||
else:
|
||||
self.article_add_status( "deleted", title=article )
|
||||
if filter_existing:
|
||||
continue
|
||||
raise Continue()
|
||||
|
||||
# Filter redirects if requested with filter_redirects=True
|
||||
if page.isRedirectPage():
|
||||
self.article_add_status( "redirect", title=article )
|
||||
if filter_redirects:
|
||||
continue
|
||||
raise Continue()
|
||||
# Filter noredirects if requested with filter_redirects=False
|
||||
else:
|
||||
self.article_remove_status("redirect", title=article )
|
||||
if filter_redirects is False:
|
||||
continue
|
||||
raise Continue()
|
||||
|
||||
# Exclude by article status
|
||||
for status in exclude_article_status:
|
||||
if self.article_has_status( status, title=article ):
|
||||
continue
|
||||
raise Continue()
|
||||
|
||||
# Only include by article status
|
||||
for status in onlyinclude_article_status:
|
||||
if not self.article_has_status( status, title=article ):
|
||||
raise Continue()
|
||||
|
||||
# Proxy loop control to outer loop
|
||||
except Continue:
|
||||
continue
|
||||
except Break:
|
||||
break
|
||||
|
||||
# Yield filtered pages
|
||||
yield page
|
||||
@@ -590,22 +618,22 @@ class RedFamWorker( RedFam ):
|
||||
@rtype str
|
||||
"""
|
||||
|
||||
# We need to Replace Links with their linktext
|
||||
anchor_code = mwparser.parse( self.heading.strip() )
|
||||
for link in anchor_code.ifilter_wikilinks():
|
||||
if link.text:
|
||||
text = link.text
|
||||
else:
|
||||
text = link.title
|
||||
# Expand templates using pwb site object
|
||||
site = pywikibot.Site()
|
||||
anchor_code = site.expand_text(self.heading.strip())
|
||||
|
||||
anchor_code.replace( link, text )
|
||||
# Remove possibly embbeded files
|
||||
anchor_code = re.sub( r"\[\[\w+:[^\|]+(?:\|.+){2,}\]\]", "",
|
||||
anchor_code )
|
||||
|
||||
# Whitespace is replaced with underscores
|
||||
anchor_code.replace( " ", "_" )
|
||||
# Replace non-breaking-space by correct urlencoded value
|
||||
anchor_code = anchor_code.replace( " ", ".C2.A0" )
|
||||
|
||||
# We try it with out any more parsing as mw will do while parsing page
|
||||
return ( self.redpage.pagetitle + "#" +
|
||||
str(anchor_code).strip() )
|
||||
# Use mwparser to strip and normalize
|
||||
anchor_code = mwparser.parse( anchor_code ).strip_code()
|
||||
|
||||
# We try it without any more parsing as mw will do while parsing page
|
||||
return ( self.redpage.pagetitle + "#" + anchor_code.strip() )
|
||||
|
||||
def generate_disc_notice_template( self ):
|
||||
"""
|
||||
@@ -678,6 +706,7 @@ class RedFamWorker( RedFam ):
|
||||
# RedFamWorker._status.like('archived'),
|
||||
# RedFamWorker._status.like("%{0:s}%".format(status)),
|
||||
text("status LIKE '%archived%'"),
|
||||
text("status NOT LIKE '%marked%'"),
|
||||
RedFamWorker.ending >= ending ):
|
||||
|
||||
yield redfam
|
||||
|
||||
23
red.py
23
red.py
@@ -60,7 +60,7 @@ def prepare_bot( task_slug, subtask, genFactory, subtask_args ):
|
||||
@rtype tuple
|
||||
"""
|
||||
# kwargs are passed to selected bot as **kwargs
|
||||
kwargs = dict()
|
||||
kwargs = subtask_args
|
||||
|
||||
if not subtask or subtask == "discparser":
|
||||
# Default case: discparser
|
||||
@@ -83,6 +83,25 @@ def prepare_bot( task_slug, subtask, genFactory, subtask_args ):
|
||||
return ( subtask, Bot, genFactory, kwargs )
|
||||
|
||||
|
||||
def parse_red_args( argkey, value ):
|
||||
"""
|
||||
Process additional args for red.py
|
||||
|
||||
@param argkey The arguments key
|
||||
@type argkey str
|
||||
@param value The arguments value
|
||||
@type value str
|
||||
|
||||
@return Tuple with (key, value) if given pair is relevant, else None
|
||||
@rtype tuple or None
|
||||
"""
|
||||
|
||||
if argkey.startswith("-famhash"):
|
||||
return ( "famhash", value )
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def main(*args):
|
||||
"""
|
||||
Process command line arguments and invoke bot.
|
||||
@@ -110,7 +129,7 @@ def main(*args):
|
||||
|
||||
# Parse local Args to get information about subtask
|
||||
( subtask, genFactory, subtask_args ) = jogobot.bot.parse_local_args(
|
||||
local_args )
|
||||
local_args, parse_red_args )
|
||||
|
||||
# select subtask and prepare args
|
||||
( subtask, Bot, genFactory, kwargs ) = prepare_bot(
|
||||
|
||||
23
requirements.txt
Normal file
23
requirements.txt
Normal file
@@ -0,0 +1,23 @@
|
||||
# This is a PIP 6+ requirements file for using jogobot-red
|
||||
#
|
||||
# All dependencies can be installed using:
|
||||
# $ sudo pip install -r requirements.txt
|
||||
#
|
||||
# It is good practise to install packages using the system
|
||||
# package manager if it has a packaged version. If you are
|
||||
# unsure, please use pip as described at the top of the file.
|
||||
#
|
||||
# To get a list of potential matches, use
|
||||
#
|
||||
# $ awk -F '[#>=]' '{print $1}' requirements.txt | xargs yum search
|
||||
# or
|
||||
# $ awk -F '[#>=]' '{print $1}' requirements.txt | xargs apt-cache search
|
||||
|
||||
# Needed for Database-Connection
|
||||
# SQLAlchemy Python ORM-Framework
|
||||
SQLAlchemy>=1.1
|
||||
# PyMySQL DB-Connector
|
||||
PyMySQL>=0.7
|
||||
|
||||
# Also needed, but not covered here, is a working copy of pywikibot-core
|
||||
# which also brings mwparserfromhell
|
||||
Reference in New Issue
Block a user