Browse Source

Merge branch 'release-1.2'

Jonathan Golder 8 months ago
parent
commit
466b9da886
9 changed files with 398 additions and 7 deletions
  1. 0
    3
      .gitmodules
  2. 13
    1
      README.md
  3. 201
    0
      bots/missingnotice.py
  4. 0
    1
      jogobot
  5. 55
    2
      lib/redfam.py
  6. 4
    0
      red.py
  7. 3
    0
      requirements.txt
  8. 28
    0
      tests/context.py
  9. 94
    0
      tests/missingnotice_tests.py

+ 0
- 3
.gitmodules View File

@@ -1,3 +0,0 @@
1
-[submodule "jogobot"]
2
-	path = jogobot
3
-	url = ../jogobot

+ 13
- 1
README.md View File

@@ -11,6 +11,7 @@ The libraries above need to be installed and configured manualy considering [doc
11 11
 
12 12
 * SQLAlchemy
13 13
 * PyMySQL
14
+* [jogobot-core module](https://git.golderweb.de/wiki/jogobot)
14 15
 
15 16
 Those can be installed using pip and the _requirements.txt_ file provided with this packet
16 17
 
@@ -18,6 +19,13 @@ Those can be installed using pip and the _requirements.txt_ file provided with t
18 19
 
19 20
 Versions
20 21
 --------
22
+* v1.2
23
+  - Create a list of redfams/articles missing reddisc notice
24
+
25
+        python red.py -task:missingnotice -family:wikipedia
26
+
27
+  - jogobot module not longer included
28
+
21 29
 * v1.1.1
22 30
   - Check if moved page exists
23 31
 
@@ -60,6 +68,10 @@ Versions
60 68
 
61 69
 * test-v1
62 70
 
71
+Bugs
72
+----
73
+[jogobot-red Issues](https://git.golderweb.de/wiki/jogobot-red/issues)
74
+
63 75
 License
64 76
 -------
65 77
 GPLv3
@@ -67,6 +79,6 @@ GPLv3
67 79
 Author Information
68 80
 ------------------
69 81
 
70
-Copyright 2017 Jonathan Golder jonathan@golderweb.de https://golderweb.de/
82
+Copyright 2018 Jonathan Golder jonathan@golderweb.de https://golderweb.de/
71 83
 
72 84
 alias Wikipedia.org-User _Jogo.obb_ (https://de.wikipedia.org/Benutzer:Jogo.obb)

+ 201
- 0
bots/missingnotice.py View File

@@ -0,0 +1,201 @@
1
+#!/usr/bin/env python
2
+# -*- coding: utf-8 -*-
3
+#
4
+#  missingnotice.py
5
+#
6
+#  Copyright 2018 Jonathan Golder <jonathan@golderweb.de>
7
+#
8
+#  This program is free software; you can redistribute it and/or modify
9
+#  it under the terms of the GNU General Public License as published by
10
+#  the Free Software Foundation; either version 2 of the License, or
11
+#  (at your option) any later version.
12
+#
13
+#  This program is distributed in the hope that it will be useful,
14
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
+#  GNU General Public License for more details.
17
+#
18
+#  You should have received a copy of the GNU General Public License
19
+#  along with this program; if not, write to the Free Software
20
+#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21
+#  MA 02110-1301, USA.
22
+#
23
+#
24
+
25
+from sqlalchemy import create_engine
26
+from sqlalchemy.engine.url import URL
27
+
28
+import pywikibot
29
+
30
+import jogobot
31
+
32
+from lib.redfam import RedFamWorker
33
+
34
+
35
+class MissingNoticeBot(pywikibot.bot.Bot):
36
+    """
37
+    """
38
+
39
+    # MySQL-query to get articles with notice
40
+    cat_article_query = """
41
+SELECT `page_title`
42
+FROM `categorylinks`
43
+JOIN `category`
44
+ON `cl_to` = `cat_title`
45
+AND `cat_title` LIKE "{cat}\_%%"
46
+JOIN `page`
47
+ON `cl_from` = `page_id`
48
+""".format(cat=jogobot.config["red.missingnotice"]["article_category"])
49
+
50
+    def __init__( self, genFactory, **kwargs ):
51
+
52
+        self.categorized_articles = list()
53
+        self.page_content = list()
54
+
55
+        super(type(self), self).__init__(**kwargs)
56
+
57
+    def run( self ):
58
+        # query articles containing notice
59
+        self.categorized_articles = type(self).get_categorized_articles()
60
+
61
+        fam_counter = 0
62
+
63
+        # iterate open redfams
64
+        for redfam in RedFamWorker.gen_open():
65
+            fam_counter += 1
66
+            links = self.treat_open_redfam(redfam)
67
+
68
+            if links:
69
+                self.page_content.append( self.format_row( links ) )
70
+
71
+            if (fam_counter % 50) == 0:
72
+                jogobot.output( "Processed {n:d} open RedFams".format(
73
+                    n=fam_counter))
74
+
75
+        else:
76
+            # To write "absent" states to db
77
+            RedFamWorker.flush_db_cache()
78
+
79
+        # Update page content
80
+        self.update_page()
81
+
82
+    def treat_open_redfam( self, redfam ):
83
+        """
84
+        Works on current open redfam
85
+
86
+        @param redfam Redfam to work on
87
+        @type redfam.RedFamWorker
88
+
89
+        @returns Tuple of disclink and list of articles missing notice or None
90
+        @rtype ( str, list(str*) ) or None
91
+        """
92
+
93
+        # Check if related disc section exist
94
+        if not redfam.disc_section_exists():
95
+            return None
96
+
97
+        # Get links for articles without notice
98
+        links = self.treat_articles( redfam.article_generator(
99
+            filter_existing=True, filter_redirects=True ) )
100
+
101
+        # No articles without notice
102
+        if not links:
103
+            return None
104
+
105
+        return ( redfam.get_disc_link(as_link=True), links )
106
+
107
+    def treat_articles(self, articles):
108
+        """
109
+        Iterates over given articles and checks weather them are included in
110
+        self.categorized_articles (contain the notice)
111
+
112
+        @param articles Articles to check
113
+        @type articles iterable of pywikibot.page() objects
114
+
115
+        @returns Possibly empty list of wikitext links ("[[article]]")
116
+        @rtype list
117
+        """
118
+        links = list()
119
+
120
+        for article in articles:
121
+
122
+            if article.title(underscore=True, with_section=False ) not in \
123
+                    self.categorized_articles:
124
+
125
+                links.append( article.title(as_link=True, textlink=True) )
126
+
127
+        return links
128
+
129
+    def format_row( self, links ):
130
+        """
131
+        Formats row for output on wikipage
132
+
133
+        @param links Tuple of disc link and list of articles as returned by
134
+                     self.treat_open_redfam()
135
+        @type links ( str, list(str*) )
136
+
137
+        @returns Formatet row text to add to page_content
138
+        @rtype str
139
+        """
140
+
141
+        return jogobot.config["red.missingnotice"]["row_format"].format(
142
+            disc=links[0],
143
+            links=jogobot.config["red.missingnotice"]["link_sep"].join(
144
+                links[1] ) )
145
+
146
+    def update_page( self, wikipage=None):
147
+        """
148
+        Handles the updating process of the wikipage
149
+
150
+        @param wikipage Wikipage to put text on, otherwise use configured page
151
+        @type wikipage str
152
+        """
153
+
154
+        # if not given get wikipage from config
155
+        if not wikipage:
156
+            wikipage = jogobot.config["red.missingnotice"]["wikipage"]
157
+
158
+        # Create page object for wikipage
159
+        page = pywikibot.Page(pywikibot.Site(), wikipage)
160
+
161
+        # Define edit summary
162
+        summary = jogobot.config["red.missingnotice"]["edit_summary"]
163
+
164
+        # Make sure summary starts with "Bot:"
165
+        if not summary[:len("Bot:")] == "Bot:":
166
+            summary = "Bot: " + summary.strip()
167
+
168
+        # Concatenate new text
169
+        new_text = "\n".join(self.page_content)
170
+
171
+        # Save new text
172
+        self.userPut( page, page.text, new_text, summary=summary )
173
+
174
+    @classmethod
175
+    def get_categorized_articles( cls ):
176
+        """
177
+        Queries all articles containing the notice based on category set by
178
+        notice template. Category can be configured in
179
+        jogobot.config["red.missingnotice"]["article_category"]
180
+
181
+        @returns List of all articles containing notice
182
+        @rtype list
183
+        """
184
+
185
+        # construct connection url for sqlalchemy
186
+        url = URL( "mysql+pymysql",
187
+                   username=pywikibot.config.db_username,
188
+                   password=pywikibot.config.db_password,
189
+                   host=jogobot.config["red.missingnotice"]["wikidb_host"],
190
+                   port=jogobot.config["red.missingnotice"]["wikidb_port"],
191
+                   database=jogobot.config["red.missingnotice"]["wikidb_name"],
192
+                   query={'charset': 'utf8'} )
193
+
194
+        # create sqlalchemy engine
195
+        engine = create_engine(url, echo=False)
196
+
197
+        # fire the query to get articles with notice
198
+        result = engine.execute(cls.cat_article_query)
199
+
200
+        # return list with articles with notice
201
+        return [ row['page_title'].decode("utf-8") for row in result ]

+ 0
- 1
jogobot

@@ -1 +0,0 @@
1
-Subproject commit d69d873624abb70a25a0aef711a635cfc88aa7e9

+ 55
- 2
lib/redfam.py View File

@@ -366,6 +366,9 @@ class RedFamParser( RedFam ):
366 366
         - 3 and greater status was set by worker script, do not change it
367 367
         """
368 368
 
369
+        # Since we have parsed it, the section can never be absent
370
+        self.status.remove("absent")
371
+
369 372
         # No ending, discussion is running:
370 373
         # Sometimes archived discussions also have no detectable ending
371 374
         if not self.ending and not self.redpage.archive:
@@ -649,10 +652,13 @@ class RedFamWorker( RedFam ):
649 652
         self.status.remove("note_rej")
650 653
         self.status.add( "marked" )
651 654
 
652
-    def get_disc_link( self ):
655
+    def get_disc_link( self, as_link=False ):
653 656
         """
654 657
         Constructs and returns the link to Redundancy discussion
655 658
 
659
+        @param as_link If true, wrap link in double square brackets (wikilink)
660
+        @type as_link bool
661
+
656 662
         @returns  Link to diskussion
657 663
         @rtype  str
658 664
         """
@@ -672,7 +678,42 @@ class RedFamWorker( RedFam ):
672 678
         anchor_code = mwparser.parse( anchor_code ).strip_code()
673 679
 
674 680
         # We try it without any more parsing as mw will do while parsing page
675
-        return ( self.redpage.pagetitle + "#" + anchor_code.strip() )
681
+        link = self.redpage.pagetitle + "#" + anchor_code.strip()
682
+
683
+        if as_link:
684
+            return "[[{0}]]".format(link)
685
+        else:
686
+            return link
687
+
688
+    def disc_section_exists( self ):
689
+        """
690
+        Checks weather the redundance discussion is still existing. Sometimes
691
+        it is absent, since heading was changed and therefore we get a
692
+        different famhash ergo new redfam.
693
+        As a side effect, the method sets status "absent" for missing sections.
694
+
695
+        @returns True if it exists otherwise False
696
+        @rtype bool
697
+        """
698
+        # The redpage
699
+        discpage = pywikibot.Page(pywikibot.Site(), self.get_disc_link() )
700
+
701
+        # Parse redpage content
702
+        wikicode = mwparser.parse( discpage.get() )
703
+
704
+        # List fams
705
+        fams = wikicode.filter_headings(
706
+            matches=RedFamParser.is_section_redfam_cb )
707
+
708
+        # Check if current fam is in list of fams
709
+        # If not, set status absent and return False
710
+        if self.heading not in [ fam.title.strip() for fam in fams]:
711
+            self.status.remove("open")
712
+            self.status.add("absent")
713
+            return False
714
+
715
+        # The section exists
716
+        return True
676 717
 
677 718
     def generate_disc_notice_template( self ):
678 719
         """
@@ -750,6 +791,18 @@ class RedFamWorker( RedFam ):
750 791
 
751 792
             yield redfam
752 793
 
794
+    @classmethod
795
+    def gen_open( cls ):
796
+        """
797
+        Yield red_fams stored in db by given status which have an ending after
798
+        given one
799
+        """
800
+        for redfam in RedFamWorker.session.query(RedFamWorker).filter(
801
+                # NOT WORKING WITH OBJECT NOTATION
802
+                text("status LIKE '%open%'") ):
803
+
804
+            yield redfam
805
+
753 806
 
754 807
 class RedFamError( Exception ):
755 808
     """

+ 4
- 0
red.py View File

@@ -73,6 +73,10 @@ def prepare_bot( task_slug, subtask, genFactory, subtask_args ):
73 73
         # Import related bot
74 74
         from bots.markpages import MarkPagesBot as Bot
75 75
 
76
+    elif subtask == "missingnotice":
77
+        # Import related bot
78
+        from bots.missingnotice import MissingNoticeBot as Bot
79
+
76 80
     # Subtask error
77 81
     else:
78 82
         jogobot.output( (

+ 3
- 0
requirements.txt View File

@@ -21,3 +21,6 @@ PyMySQL>=0.7
21 21
 
22 22
 # Also needed, but not covered here, is a working copy of pywikibot-core
23 23
 # which also brings mwparserfromhell
24
+
25
+# jogobot
26
+git+https://git.golderweb.de/wiki/jogobot.git#egg=jogobot

+ 28
- 0
tests/context.py View File

@@ -0,0 +1,28 @@
1
+#!/usr/bin/env python
2
+# -*- coding: utf-8 -*-
3
+#
4
+#  missingnotice_tests.py
5
+#
6
+#  Copyright 2018 Jonathan Golder <jonathan@golderweb.de>
7
+#
8
+#  This program is free software; you can redistribute it and/or modify
9
+#  it under the terms of the GNU General Public License as published by
10
+#  the Free Software Foundation; either version 2 of the License, or
11
+#  (at your option) any later version.
12
+#
13
+#  This program is distributed in the hope that it will be useful,
14
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
+#  GNU General Public License for more details.
17
+#
18
+#  You should have received a copy of the GNU General Public License
19
+#  along with this program; if not, write to the Free Software
20
+#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21
+#  MA 02110-1301, USA.
22
+#
23
+#
24
+
25
+import os
26
+import sys
27
+sys.path.insert(
28
+    0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

+ 94
- 0
tests/missingnotice_tests.py View File

@@ -0,0 +1,94 @@
1
+#!/usr/bin/env python
2
+# -*- coding: utf-8 -*-
3
+#
4
+#  missingnotice_tests.py
5
+#
6
+#  Copyright 2018 Jonathan Golder <jonathan@golderweb.de>
7
+#
8
+#  This program is free software; you can redistribute it and/or modify
9
+#  it under the terms of the GNU General Public License as published by
10
+#  the Free Software Foundation; either version 2 of the License, or
11
+#  (at your option) any later version.
12
+#
13
+#  This program is distributed in the hope that it will be useful,
14
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
+#  GNU General Public License for more details.
17
+#
18
+#  You should have received a copy of the GNU General Public License
19
+#  along with this program; if not, write to the Free Software
20
+#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21
+#  MA 02110-1301, USA.
22
+#
23
+#
24
+
25
+"""
26
+Test module bot/missingnotice.py
27
+"""
28
+
29
+import unittest
30
+from unittest import mock  # noqa
31
+
32
+import pywikibot
33
+
34
+import context  # noqa
35
+from bots.missingnotice import MissingNoticeBot  # noqa
36
+
37
+
38
+class TestMissingNoticeBot(unittest.TestCase):
39
+    """
40
+    Test class MissingNoticeBot
41
+    """
42
+
43
+    def setUp(self):
44
+        genFactory = pywikibot.pagegenerators.GeneratorFactory()
45
+        self.MissingNoticeBot = MissingNoticeBot(genFactory)
46
+        self.MissingNoticeBot.categorized_articles = [ "Deutschland",
47
+                                                       "Max_Schlee",
48
+                                                       "Hodeng-Hodenger" ]
49
+
50
+    @mock.patch( 'sqlalchemy.engine.Engine.execute',
51
+                 return_value=( { "page_title": b"a", },
52
+                                { "page_title": b"b", },
53
+                                { "page_title": b"c", },
54
+                                { "page_title": b"d", }, ) )
55
+    def test_get_categorized_articles(self, execute_mock):
56
+        """
57
+        Test method get_categorized_articles()
58
+        """
59
+        self.assertFalse(execute_mock.called)
60
+
61
+        result = MissingNoticeBot.get_categorized_articles()
62
+
63
+        self.assertTrue(execute_mock.called)
64
+        self.assertEqual(result, ["a", "b", "c", "d"] )
65
+
66
+    def test_treat_articles( self ):
67
+        """
68
+        Test method treat_articles()
69
+        """
70
+
71
+        # articles with notice
72
+        a = pywikibot.Page(pywikibot.Site(), "Deutschland" )
73
+        b = pywikibot.Page(pywikibot.Site(), "Max_Schlee" )
74
+        c = pywikibot.Page(pywikibot.Site(), "Hodeng-Hodenger#Test" )
75
+        # articles without notice
76
+        x = pywikibot.Page(pywikibot.Site(), "Quodvultdeus" )
77
+        y = pywikibot.Page(pywikibot.Site(), "Zoo_Bremen" )
78
+        z = pywikibot.Page(pywikibot.Site(), "Nulka#Test" )
79
+
80
+        cases = ( ( ( a, b, c ), list() ),
81
+                  ( ( x, y, z ), [ "[[Quodvultdeus]]",
82
+                                   "[[Zoo Bremen]]",
83
+                                   "[[Nulka#Test]]" ]),
84
+                  ( ( a, b, y, z ), [ "[[Zoo Bremen]]",
85
+                                      "[[Nulka#Test]]" ]), )
86
+
87
+        for case in cases:
88
+            res = self.MissingNoticeBot.treat_articles( case[0] )
89
+
90
+            self.assertEqual( res, case[1] )
91
+
92
+
93
+if __name__ == '__main__':
94
+    unittest.main()