From 597df69275d7eccf05a73258fc906071dc7a62d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 5 Sep 2015 17:09:01 +0200 Subject: [PATCH 001/192] Save mysqldb-connection as class atribute to make it usable for all decandant classes of MYSQL_RED --- mysql_red.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/mysql_red.py b/mysql_red.py index acc6161..35af34c 100644 --- a/mysql_red.py +++ b/mysql_red.py @@ -6,6 +6,9 @@ except ImportError: class MYSQL_RED: + #Save mysqldb-connection as class attribute to use only one in descendant classes + connection = False + def __init__( self, db_hostname, db_username, db_password, db_name ): """ Opens a connection to MySQL-DB @@ -13,11 +16,14 @@ class MYSQL_RED: @returns mysql-stream MySQL Connection """ - self.__connection = mysqldb.connect( host=db_hostname, user=db_username, passwd=db_password, db=db_name ) + # Connect to mysqldb only once + if( type( self ).connection == False ): + + type( self ).connection = mysqldb.connect( host=db_hostname, user=db_username, passwd=db_password, db=db_name ) - def close( self ): + def __del__( self ): """ - Before deleting instance, close connection to MySQL-DB + Before deleting class, close connection to MySQL-DB """ - self.__connection.close() + type( self ).connection.close() From f5ac6a06d30b27bb052516902fe65dc8eb6d8c45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 5 Sep 2015 17:10:55 +0200 Subject: [PATCH 002/192] Implement a class MYSQL_RED_PAGE as desecendant of MYSQL_RED for handling querys about red_pages --- mysql_red.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/mysql_red.py b/mysql_red.py index 35af34c..df10232 100644 --- a/mysql_red.py +++ b/mysql_red.py @@ -27,3 +27,34 @@ class MYSQL_RED: """ type( self ).connection.close() + +class MYSQL_RED_PAGE( MYSQL_RED ): + + def __init__( self, db_hostname, db_username, db_password, db_name ): + """ + Creates a new instance, runs __init__ of parent class + """ + super().__init__( db_hostname, db_username, db_password, db_name ) + + + def get_page( self, page_id ): + """ + Retrieves a red page row from MySQL-Database for given page_id + + @param int page_id MediaWiki page_id for page to retrieve + + @returns tuple Tuple with data for given page_id otherwise if none found + bool FALSE + """ + cursor = type( self ).connection.cursor() + + format_str = """SELECT * FROM `red_pages` WHERE page_id={page_id};""" + query = format_str.format( page_id=int( page_id ) ) + + cursor.execute( query ) + res = cursor.fetchone() + + if res: + return res + else: + return False From 49def6f0f0a965732abbda1e63f91f956f34d0bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 5 Sep 2015 17:13:09 +0200 Subject: [PATCH 003/192] Implement a class MYSQL_RED_FAM as desecendant of MYSQL_RED for handling querys about red_families --- mysql_red.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mysql_red.py b/mysql_red.py index df10232..d6456a8 100644 --- a/mysql_red.py +++ b/mysql_red.py @@ -58,3 +58,12 @@ class MYSQL_RED_PAGE( MYSQL_RED ): return res else: return False + +class MYSQL_RED_FAM( MYSQL_RED ): + + def __init__( self, db_hostname, db_username, db_password, db_name ): + """ + Creates a new instance, runs __init__ of parent class + """ + super().__init__( db_hostname, db_username, db_password, db_name ) + From 10cfa79ee02030c3be9b4d9414bf7dd845d8cd34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 5 Sep 2015 18:06:17 +0200 Subject: [PATCH 004/192] Implement a class/modul RED_FAM for handling redundance families --- red_fam.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 red_fam.py diff --git a/red_fam.py b/red_fam.py new file mode 100644 index 0000000..c0b4848 --- /dev/null +++ b/red_fam.py @@ -0,0 +1,36 @@ + +import hashlib + +class RED_FAM: + + def __init__( self, articlesList ): + """ + Generates a new RED_FAM object + + @param articlesList list List of articles of redundance family + """ + + self.__articlesList = articlesList + + # Make sure we have 8 entrys for pages, if not fill with empty list items + while len( self.__articlesList ) < 8: + self.__articlesList.append( "" ) + + self.__hash = self.__get_fam_hash( ) + + print( self.__hash ) + + + def __get_fam_hash( self ): + """ + Calculates the SHA-1 hash for the articlesList of redundance family. + Since we don't need security SHA-1 is just fine. + + @returns str String with the hexadecimal hash digest + """ + + h = hashlib.sha1() + h.update( str( self.__articlesList ).encode('utf-8') ) + + return h.hexdigest() + From 72410d17b409de3c6973400c36ea5b7ea4e6a7e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 5 Sep 2015 18:36:30 +0200 Subject: [PATCH 005/192] Expand __init__ Method of RED_FAM to catch beginning, end and status Add a __repr__ Method to RED_FAM --- red_fam.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/red_fam.py b/red_fam.py index c0b4848..d302599 100644 --- a/red_fam.py +++ b/red_fam.py @@ -3,11 +3,13 @@ import hashlib class RED_FAM: - def __init__( self, articlesList ): + def __init__( self, articlesList, beginning=None, ending=None, status=0 ): """ Generates a new RED_FAM object - @param articlesList list List of articles of redundance family + @param articlesList list List of articles of redundance family + @param beginning datetime Beginning date of redundance diskussion + @param ending datetime Ending date of redundance diskussion """ self.__articlesList = articlesList @@ -18,8 +20,13 @@ class RED_FAM: self.__hash = self.__get_fam_hash( ) - print( self.__hash ) + if( beginning ): + self.__beginning = beginning + if( ending ): + self.__ending = ending + + self.__status = status # __TODO__ STATUS CODE def __get_fam_hash( self ): """ @@ -33,4 +40,44 @@ class RED_FAM: h.update( str( self.__articlesList ).encode('utf-8') ) return h.hexdigest() + + def add_beginning( self, datetime ): + """ + Adds the beginning date of a redundance diskussion to the object and sets changed to True + @param datetime datetime Beginning date of redundance diskussion + """ + + self.__beginning = datetime + self.__changed = True + + def add_ending( self, datetime ): + """ + Adds the ending date of a redundance diskussion to the object. Also sets the status to __TODO__ STATUS NUMBER and changed to True + + @param datetime datetime Ending date of redundance diskussion + """ + + self.__ending = datetime + + self.__status = 2 #__TODO__ STATUS NUMBER + self.__changed = True + + def __repr__( self ): + + if( hasattr( self, "__beginning" ) ): + beginning = ", beginning=" + repr( self.__beginning ) + else: + beginning = "" + + if( hasattr( self, "__ending" ) ): + ending = ", ending=" + repr( self.__ending ) + else: + ending = "" + + __repr = "RED_FAM( " + repr( self.__articlesList ) + beginning + ending + ", status=" + repr( self.__status ) + " )" + + return __repr + +x = RED_FAM( [ "Test", "Foo", "Bar" ] ) +print( repr( x ) ) From 122cc8b309ea4670f1d0c9ab2b2ab47526fd1e00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 6 Sep 2015 14:37:27 +0200 Subject: [PATCH 006/192] Set date attributes to none if nothing given, rather than don't set the attributes --- red_fam.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/red_fam.py b/red_fam.py index d302599..443be72 100644 --- a/red_fam.py +++ b/red_fam.py @@ -1,5 +1,6 @@ import hashlib +from datetime import datetime class RED_FAM: @@ -14,17 +15,17 @@ class RED_FAM: self.__articlesList = articlesList - # Make sure we have 8 entrys for pages, if not fill with empty list items - while len( self.__articlesList ) < 8: - self.__articlesList.append( "" ) - self.__hash = self.__get_fam_hash( ) if( beginning ): self.__beginning = beginning + else: + self.__beginning = None if( ending ): self.__ending = ending + else: + self.__ending = None self.__status = status # __TODO__ STATUS CODE @@ -65,12 +66,13 @@ class RED_FAM: def __repr__( self ): - if( hasattr( self, "__beginning" ) ): + if( self.__beginning ): + print( self.__beginning ) beginning = ", beginning=" + repr( self.__beginning ) else: beginning = "" - if( hasattr( self, "__ending" ) ): + if( self.__ending ): ending = ", ending=" + repr( self.__ending ) else: ending = "" From 28d9a26cf12b6d2e0609542be7cc70977b272ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 6 Sep 2015 15:04:49 +0200 Subject: [PATCH 007/192] Always handle beginning and ending definitions via setter functions Setter functions for beginning and ending decide wether it is a datetime object or a parseable timestamp string --- red_fam.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/red_fam.py b/red_fam.py index 443be72..9cc358b 100644 --- a/red_fam.py +++ b/red_fam.py @@ -4,6 +4,9 @@ from datetime import datetime class RED_FAM: + # Define the timestamp format + __timestamp_format = "%H:%M, %d. %b. %Y (%Z)" + def __init__( self, articlesList, beginning=None, ending=None, status=0 ): """ Generates a new RED_FAM object @@ -18,12 +21,12 @@ class RED_FAM: self.__hash = self.__get_fam_hash( ) if( beginning ): - self.__beginning = beginning + self.add_beginning( beginning ) else: self.__beginning = None if( ending ): - self.__ending = ending + self.add_ending( ending ) else: self.__ending = None @@ -42,32 +45,47 @@ class RED_FAM: return h.hexdigest() - def add_beginning( self, datetime ): + def add_beginning( self, beginning ): """ Adds the beginning date of a redundance diskussion to the object and sets changed to True @param datetime datetime Beginning date of redundance diskussion """ - self.__beginning = datetime + self.__beginning = self.__datetime( beginning ) + self.__changed = True - def add_ending( self, datetime ): + def add_ending( self, ending ): """ Adds the ending date of a redundance diskussion to the object. Also sets the status to __TODO__ STATUS NUMBER and changed to True @param datetime datetime Ending date of redundance diskussion """ - self.__ending = datetime + self.__ending = self.__datetime( ending ) self.__status = 2 #__TODO__ STATUS NUMBER self.__changed = True + + def __datetime( self, timestamp ): + """ + Decides wether given timestamp is a parseable string or a datetime object and returns a datetime object in both cases + @param timestamp datetime Datetime object + str Parseable string with timestamp in format __timestamp_format + + @returns datetime Datetime object + """ + + if( isinstance( timestamp, datetime ) ): + return timestamp + else: + return datetime.strptime( timestamp, type( self ).__timestamp_format ) + def __repr__( self ): if( self.__beginning ): - print( self.__beginning ) beginning = ", beginning=" + repr( self.__beginning ) else: beginning = "" From d2dfa15ddff7833f3493b23615a4c647bfe745d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 6 Sep 2015 15:43:10 +0200 Subject: [PATCH 008/192] Implement a class RED_PAGE for handling redundance pages --- red_page.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 red_page.py diff --git a/red_page.py b/red_page.py new file mode 100644 index 0000000..4eb0fa3 --- /dev/null +++ b/red_page.py @@ -0,0 +1,12 @@ + +class RED_PAGE: + """Class for handling redundance discussion pages and archives""" + + def __init__( self, page ): + """ + Generate a new RED_PAGE object based on the given pywikibot page object + + @param page page + """ + + return From 773557a59194b7a2ecef828e08b4da580cd855cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 6 Sep 2015 16:02:30 +0200 Subject: [PATCH 009/192] Use DictCursor ouf oursql to get better readability in MYSQL_RED_PAGE --- mysql_red.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mysql_red.py b/mysql_red.py index d6456a8..616ac3e 100644 --- a/mysql_red.py +++ b/mysql_red.py @@ -46,7 +46,7 @@ class MYSQL_RED_PAGE( MYSQL_RED ): @returns tuple Tuple with data for given page_id otherwise if none found bool FALSE """ - cursor = type( self ).connection.cursor() + cursor = type( self ).connection.cursor(mysqldb.DictCursor) format_str = """SELECT * FROM `red_pages` WHERE page_id={page_id};""" query = format_str.format( page_id=int( page_id ) ) From 0beb97d67db834c44b5aa321fbe4573c96b28d6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Mon, 7 Sep 2015 16:21:57 +0200 Subject: [PATCH 010/192] Use qmark parametrization style for mysql query --- mysql_red.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mysql_red.py b/mysql_red.py index 616ac3e..144e7ef 100644 --- a/mysql_red.py +++ b/mysql_red.py @@ -48,10 +48,7 @@ class MYSQL_RED_PAGE( MYSQL_RED ): """ cursor = type( self ).connection.cursor(mysqldb.DictCursor) - format_str = """SELECT * FROM `red_pages` WHERE page_id={page_id};""" - query = format_str.format( page_id=int( page_id ) ) - - cursor.execute( query ) + cursor.execute( 'SELECT * FROM `red_pages` WHERE `page_id` = ?;', ( page_id, ) ) res = cursor.fetchone() if res: From c800a13f0c8cecb01d39c2a0316ac3109fc364d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Mon, 7 Sep 2015 19:00:04 +0200 Subject: [PATCH 011/192] Change behavior of MYSQL_RED(_PAGE), handle save requested data in object for simple update handling --- mysql_red.py | 72 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/mysql_red.py b/mysql_red.py index 144e7ef..f576f2d 100644 --- a/mysql_red.py +++ b/mysql_red.py @@ -8,8 +8,12 @@ class MYSQL_RED: #Save mysqldb-connection as class attribute to use only one in descendant classes connection = False + db_hostname='localhost' + db_username='gwlocal' + db_password='RigTawwewmyagPepotugco' + db_name='gwlocal_wiki' - def __init__( self, db_hostname, db_username, db_password, db_name ): + def __init__( self ): """ Opens a connection to MySQL-DB @@ -19,7 +23,7 @@ class MYSQL_RED: # Connect to mysqldb only once if( type( self ).connection == False ): - type( self ).connection = mysqldb.connect( host=db_hostname, user=db_username, passwd=db_password, db=db_name ) + type( self ).connection = mysqldb.connect( host=type( self ).db_hostname, user=type( self ).db_username, passwd=type( self ).db_password, db=type( self ).db_name ) def __del__( self ): """ @@ -30,14 +34,18 @@ class MYSQL_RED: class MYSQL_RED_PAGE( MYSQL_RED ): - def __init__( self, db_hostname, db_username, db_password, db_name ): + def __init__( self, page_id ): """ Creates a new instance, runs __init__ of parent class """ - super().__init__( db_hostname, db_username, db_password, db_name ) + super().__init__( ) + + self.__page_id = int( page_id ); + + self.data = self.get_page() - def get_page( self, page_id ): + def get_page( self ): """ Retrieves a red page row from MySQL-Database for given page_id @@ -48,19 +56,67 @@ class MYSQL_RED_PAGE( MYSQL_RED ): """ cursor = type( self ).connection.cursor(mysqldb.DictCursor) - cursor.execute( 'SELECT * FROM `red_pages` WHERE `page_id` = ?;', ( page_id, ) ) + cursor.execute( 'SELECT * FROM `red_pages` WHERE `page_id` = ?;', ( self.__page_id, ) ) res = cursor.fetchone() if res: return res else: return False + + def add_page( self, page_title, rev_id, status=0 ): + """ + Inserts a red page row in MySQL-Database for given page_id + + @param int rev_id MediaWiki current rev_id for page to update + @param str page_title MediaWiki new page_title for page to update + @param int status Page parsing status (0 - not (successfully) parsed; 1 - successfully parsed) + """ + + cursor = type( self ).connection.cursor() + + if not page_title: + page_title = self.data[ 'page_title' ] + if not rev_id: + rev_id = self.data[ 'rev_id' ] + + query = 'INSERT INTO `red_pages` ( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' + data = ( self.__page_id, str( page_title ), int( rev_id ), int( status ) ) + + cursor.execute( query, data) + + type( self ).connection.commit() + + + def update_page( self, rev_id=None, page_title=None, status=0 ): + """ + Updates the red page row in MySQL-Database for given page_id + + @param int rev_id MediaWiki current rev_id for page to update + @param str page_title MediaWiki new page_title for page to update + @param int status Page parsing status (0 - not (successfully) parsed; 1 - successfully parsed) + """ + + cursor = type( self ).connection.cursor() + + if not page_title: + page_title = self.data[ 'page_title' ] + if not rev_id: + rev_id = self.data[ 'rev_id' ] + + query = 'UPDATE `red_pages` SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' + data = ( str( page_title ), int( rev_id ), int( status ), self.__page_id ) + + cursor.execute( query, data) + + type( self ).connection.commit() + class MYSQL_RED_FAM( MYSQL_RED ): - def __init__( self, db_hostname, db_username, db_password, db_name ): + def __init__( self ): """ Creates a new instance, runs __init__ of parent class """ - super().__init__( db_hostname, db_username, db_password, db_name ) + super().__init__( ) From cef90e4faca36ec6addb94133937108dcd74c534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Mon, 7 Sep 2015 19:04:16 +0200 Subject: [PATCH 012/192] Handle building of MYSQL_RED_PAGE objects --- red_page.py | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/red_page.py b/red_page.py index 4eb0fa3..e4892ef 100644 --- a/red_page.py +++ b/red_page.py @@ -1,3 +1,4 @@ +from mysql_red import MYSQL_RED_PAGE class RED_PAGE: """Class for handling redundance discussion pages and archives""" @@ -8,5 +9,42 @@ class RED_PAGE: @param page page """ + + # Safe the pywikibot page object + self.page = page + + self.__handle_db( ) + + # if( self._mysql_page ): + + # self.is_page_changed() + + # else: + # self.__mysql.add_page() - return + def __handle_db( self ): + """ + Handles opening of db connection + """ + # We need a connection to our mysqldb + self.__mysql = MYSQL_RED_PAGE( self.page._pageid ) + + if not self.__mysql.data: + self.__mysql.add_page( self.page.title, self.page._revid ) + + def is_page_changed( self ): + """ + Check wether the page was changed since last run + """ + if( self._mysql_page[ 'rev_id' ] != self.page._revid ): + changed_rev_id = True + self._mysql_page[ 'rev_id' ] = self.page._revid + + if( self._mysql_page[ 'page_title' ] != self.page.title ): + changed_title = True + self._mysql_page[ 'page_title' ] = self.page.title + + if( changed_rev_id or changed_title ): + return True + else: + return False From 73b5d87e8d5a71b6f4d3f1e6ebe2cdaa18e31ad0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Mon, 7 Sep 2015 22:51:07 +0200 Subject: [PATCH 013/192] Implemment handling of changed page meta data (rev_id, page_title, status) --- mysql_red.py | 2 +- red_page.py | 42 +++++++++++++++++++++++++++++++----------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/mysql_red.py b/mysql_red.py index f576f2d..72e784d 100644 --- a/mysql_red.py +++ b/mysql_red.py @@ -70,7 +70,7 @@ class MYSQL_RED_PAGE( MYSQL_RED ): @param int rev_id MediaWiki current rev_id for page to update @param str page_title MediaWiki new page_title for page to update - @param int status Page parsing status (0 - not (successfully) parsed; 1 - successfully parsed) + @param int status Page parsing status (0 - not (successfully) parsed; 1 - successfully parsed; 2 - successfully parsed archive) """ cursor = type( self ).connection.cursor() diff --git a/red_page.py b/red_page.py index e4892ef..76293e6 100644 --- a/red_page.py +++ b/red_page.py @@ -15,9 +15,13 @@ class RED_PAGE: self.__handle_db( ) - # if( self._mysql_page ): - - # self.is_page_changed() + self.is_page_changed() + + self.__parsed = None + if( self.__changed or self.__mysql.data[ 'status' ] == 0 ): + self.parse() + + self.__update_db() # else: # self.__mysql.add_page() @@ -36,15 +40,31 @@ class RED_PAGE: """ Check wether the page was changed since last run """ - if( self._mysql_page[ 'rev_id' ] != self.page._revid ): - changed_rev_id = True - self._mysql_page[ 'rev_id' ] = self.page._revid - if( self._mysql_page[ 'page_title' ] != self.page.title ): - changed_title = True - self._mysql_page[ 'page_title' ] = self.page.title - - if( changed_rev_id or changed_title ): + if( self.__mysql.data != { 'page_id': self.page._pageid, 'rev_id': self.page._revid, 'page_title': self.page.title, 'status': self.__mysql.data[ 'status' ] } ): + self.__changed = True + else: + self.__changed = False + + def is_archive( self ): + """ + Detects wether current page is an archive of discussions + """ + if u"/Archiv" in self.page.title: return True else: return False + def __update_db( self ): + """ + Updates the page meta data in mysql db + """ + if( self.__parsed ): + status = 1 + + if( self.is_archive() ): + status = 2 + else: + status = 0 + + self.__mysql.update_page( self.page._revid, self.page.title, status ) + From 45df354315b33acca81924d4743b37c2cd191b9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 10 Sep 2015 11:18:22 +0200 Subject: [PATCH 014/192] Prevent descendant classes of MYSQL_RED from deleting connection to db --- mysql_red.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mysql_red.py b/mysql_red.py index 72e784d..a494e1f 100644 --- a/mysql_red.py +++ b/mysql_red.py @@ -44,6 +44,8 @@ class MYSQL_RED_PAGE( MYSQL_RED ): self.data = self.get_page() + def __del__( self ): + pass def get_page( self ): """ @@ -119,4 +121,6 @@ class MYSQL_RED_FAM( MYSQL_RED ): Creates a new instance, runs __init__ of parent class """ super().__init__( ) - + + def __del__( self ): + pass From 6d1ed33699b54d0ddd11457a9ceec68a5e384462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 10 Sep 2015 11:20:20 +0200 Subject: [PATCH 015/192] Add methods for MySQL actions while parsing RED_FAMs --- mysql_red.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/mysql_red.py b/mysql_red.py index a494e1f..5a69aaf 100644 --- a/mysql_red.py +++ b/mysql_red.py @@ -116,11 +116,70 @@ class MYSQL_RED_PAGE( MYSQL_RED ): class MYSQL_RED_FAM( MYSQL_RED ): - def __init__( self ): + def __init__( self, fam_hash ): """ Creates a new instance, runs __init__ of parent class """ super().__init__( ) + + self.__fam_hash = fam_hash + + self.data = self.get_fam() def __del__( self ): pass + + def get_fam( self ): + """ + Retrieves a red family row from MySQL-Database for given fam_hash + + @returns dict Dictionairy with data for given fam hash otherwise if none found + bool FALSE + """ + cursor = type( self ).connection.cursor(mysqldb.DictCursor) + + cursor.execute( 'SELECT * FROM `red_families` WHERE `fam_hash` = ?;', ( self.__fam_hash, ) ) + res = cursor.fetchone() + + if res: + return res + else: + return False + + def add_fam( self, articlesList, red_page_id, beginning, ending=None, status=0 ): + + cursor = type( self ).connection.cursor() + + query = 'INSERT INTO `red_families` ( fam_hash, red_page_id, beginning, ending, status, article0, article1, article2, article3, article4, article5, article6, article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' + data = [ str( self.__fam_hash ), red_page_id, beginning, ending, status ] + + for article in articlesList: + data.append( str( article ) ) + + while len( data ) < 13: + data.append( None ) + + data = tuple( data ) + + cursor.execute( query, data) + + type( self ).connection.commit() + + def update_fam( self, red_page_id, beginning, ending, status ): + """ + Updates the red fam row in MySQL-Database for given fam_hash + + @param int red_page_id MediaWiki page_id which contains red_fam + @param datetime beginning Timestamp of beginning of redundance discussion + qparam datetime ending Timestamp of ending of redundance discussion + @param int status red_fam status (0 - discussion is running; 1 - discussion over; 2 - discussion archived) + """ + + cursor = type( self ).connection.cursor() + + query = 'UPDATE `red_families` SET `red_page_id` = ?, `beginning` = ?, `ending` = ?, `status`= ? WHERE `fam_hash` = ?;' + data = ( int(red_page_id ), beginning, ending, int( status ), self.__fam_hash ) + + cursor.execute( query, data) + + type( self ).connection.commit() From 30e1672557732f3734c0c8293af2e46c55bdf19a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 10 Sep 2015 11:23:15 +0200 Subject: [PATCH 016/192] Implement methods for parsing red_fams Add Interfaces for RED_FAM --- red_fam.py | 166 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 128 insertions(+), 38 deletions(-) diff --git a/red_fam.py b/red_fam.py index 9cc358b..77afd4c 100644 --- a/red_fam.py +++ b/red_fam.py @@ -1,13 +1,15 @@ import hashlib +import re from datetime import datetime +from mysql_red import MYSQL_RED_FAM + class RED_FAM: - # Define the timestamp format - __timestamp_format = "%H:%M, %d. %b. %Y (%Z)" + - def __init__( self, articlesList, beginning=None, ending=None, status=0 ): + def __init__( self, fam_hash=None, articlesList=None, red_page_id=None, beginning=None, ending=None, status=0 ): """ Generates a new RED_FAM object @@ -16,23 +18,109 @@ class RED_FAM: @param ending datetime Ending date of redundance diskussion """ - self.__articlesList = articlesList + #if( beginning ): + # self.add_beginning( beginning ) + # self._beginning = None - self.__hash = self.__get_fam_hash( ) + #if( ending ): + # self.add_ending( ending ) + #else: + # self._ending = None + + #self._status = status # __TODO__ STATUS CODE - if( beginning ): - self.add_beginning( beginning ) + #self._handle_db() + + + + def __repr__( self ): + + if( self._beginning ): + beginning = ", beginning=" + repr( self._beginning ) else: - self.__beginning = None + beginning = "" + + if( self._ending ): + ending = ", ending=" + repr( self._ending ) + else: + ending = "" + __repr = "RED_FAM( " + repr( self._articlesList ) + beginning + ending + ", status=" + repr( self._status ) + " )" + + return __repr + +class RED_FAM_PARSER( RED_FAM ): + """ + Provides an interface to RED_FAM for adding/updating redundance families while parsig redundance pages + """ + + # Define the timestamp format + __timestamp_format = "%H:%M, %d. %b. %Y (%Z)" + + def __init__( self, red_fam_heading, red_page_id, red_page_archive, beginning, ending=None ): + """ + Creates a RED_FAM object based on data collected while parsing red_pages combined with possibly former known data from db + + @param red_fam_heading string String with wikitext heading of redundance section + @param red_page_id int MediaWiki page_id of red_page containing red_fam + @param red_page_archive bool Is red_page an archive + @param beginning datetime Timestamp of beginning of redundance discussion + string Timestamp of beginning of redundance discussion as srftime parseable string + @param ending datetime Timestamp of ending of redundance discussion + string Timestamp of ending of redundance discussion as srftime parseable string + """ + ## Set object attributes: + self._red_page_id = red_page_id + self._red_page_archive = red_page_archive + + # Method self.add_beginning sets self._beginning directly + self.add_beginning( beginning ) + + # Method self.add_ending sets self._ending directly if( ending ): self.add_ending( ending ) else: - self.__ending = None - - self.__status = status # __TODO__ STATUS CODE + #If no ending was provided set to None + self._ending = None - def __get_fam_hash( self ): + # Parse the provided heading of redundance section to set self._articlesList + self.heading_parser( red_fam_heading ) + + # Calculates the sha1 hash over self._articlesList to rediscover known redundance families + self.fam_hash() + + # Open database connection, ask for data if existing, otherwise create entry + self.__handle_db() + + # Check status changes + self.status() + + # Since status change means something has changed, update database + if( self._status != self.__mysql.data[ 'status' ] ): + self.__mysql.update_fam( self._red_page_id, self._beginning, self._ending, self._status ) + + def __handle_db( self ): + """ + Handles opening of db connection + """ + # We need a connection to our mysqldb + self.__mysql = MYSQL_RED_FAM( self._fam_hash ) + + if not self.__mysql.data: + self.__mysql.add_fam( self._articlesList, self._red_page_id, self._beginning, self._ending ) + + def heading_parser( self, red_fam_heading): + """ + Parses given red_fam_heading string and saves articles list + """ + + # Predefine a pattern for wikilinks' destination + wikilink_pat = re.compile( r"\[\[([^\[\]\|]*)(\]\]|\|)" ) + + # We get the pages in first [0] element iterating over wikilink_pat.findall( line ) + self._articlesList = [ link[0] for link in wikilink_pat.findall( red_fam_heading ) ] + + def fam_hash( self ): """ Calculates the SHA-1 hash for the articlesList of redundance family. Since we don't need security SHA-1 is just fine. @@ -41,10 +129,10 @@ class RED_FAM: """ h = hashlib.sha1() - h.update( str( self.__articlesList ).encode('utf-8') ) + h.update( str( self._articlesList ).encode('utf-8') ) - return h.hexdigest() - + self._fam_hash= h.hexdigest() + def add_beginning( self, beginning ): """ Adds the beginning date of a redundance diskussion to the object and sets changed to True @@ -52,9 +140,7 @@ class RED_FAM: @param datetime datetime Beginning date of redundance diskussion """ - self.__beginning = self.__datetime( beginning ) - - self.__changed = True + self._beginning = self.__datetime( beginning ) def add_ending( self, ending ): """ @@ -63,11 +149,11 @@ class RED_FAM: @param datetime datetime Ending date of redundance diskussion """ - self.__ending = self.__datetime( ending ) + self._ending = self.__datetime( ending ) - self.__status = 2 #__TODO__ STATUS NUMBER - self.__changed = True - + self._status = 1 #__TODO__ STATUS NUMBER + self._changed = True + def __datetime( self, timestamp ): """ Decides wether given timestamp is a parseable string or a datetime object and returns a datetime object in both cases @@ -82,22 +168,26 @@ class RED_FAM: return timestamp else: return datetime.strptime( timestamp, type( self ).__timestamp_format ) - - def __repr__( self ): - - if( self.__beginning ): - beginning = ", beginning=" + repr( self.__beginning ) - else: - beginning = "" - if( self.__ending ): - ending = ", ending=" + repr( self.__ending ) + def status( self ): + """ + Handles detection of correct status + """ + # Diskussion läuft --> Status ? --> 0 ! _ending + # Diskussion beendet --> Status 0 --> 1 _ending + # Diskussion archiviert --> Status 0/1 --> 2 ??? + + if not self._ending: + self._status = 0 else: - ending = "" - - __repr = "RED_FAM( " + repr( self.__articlesList ) + beginning + ending + ", status=" + repr( self.__status ) + " )" - - return __repr - -x = RED_FAM( [ "Test", "Foo", "Bar" ] ) + if not self._red_page_archive: + self._status = 1 + else: + self._status = 2 + +class RED_FAM_WORKER( RED_FAM ): + """ + Handles working with redundance families stored in database where discussion is finished + """ + pass print( repr( x ) ) From ea92415b377c50264980ec446e8d0d55fed6c4f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 10 Sep 2015 11:24:25 +0200 Subject: [PATCH 017/192] Add dummy -method for parsing to RED_PAGE --- red_page.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/red_page.py b/red_page.py index 76293e6..c7c6ec3 100644 --- a/red_page.py +++ b/red_page.py @@ -54,6 +54,13 @@ class RED_PAGE: return True else: return False + + def parse( self ): + """ + Handles the parsing process + """ + pass + def __update_db( self ): """ Updates the page meta data in mysql db From 79e232d52b5701cc02212fb4690bd07a8185fa9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 10 Sep 2015 11:46:05 +0200 Subject: [PATCH 018/192] Add gitignore file --- .gitignore | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a68441e --- /dev/null +++ b/.gitignore @@ -0,0 +1,64 @@ +# Created by https://www.gitignore.io/api/python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Test +test.py From 9e865e1c2f5a76b9f4107290c9957badf3e9302a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 10 Sep 2015 11:57:10 +0200 Subject: [PATCH 019/192] Some clean ups --- mysql_red.py | 2 ++ red_fam.py | 3 ++- red_page.py | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/mysql_red.py b/mysql_red.py index 5a69aaf..e95ad6d 100644 --- a/mysql_red.py +++ b/mysql_red.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- try: import oursql as mysqldb diff --git a/red_fam.py b/red_fam.py index 77afd4c..9d3afa9 100644 --- a/red_fam.py +++ b/red_fam.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- import hashlib import re @@ -190,4 +192,3 @@ class RED_FAM_WORKER( RED_FAM ): Handles working with redundance families stored in database where discussion is finished """ pass -print( repr( x ) ) diff --git a/red_page.py b/red_page.py index c7c6ec3..d0df358 100644 --- a/red_page.py +++ b/red_page.py @@ -1,3 +1,6 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + from mysql_red import MYSQL_RED_PAGE class RED_PAGE: @@ -74,4 +77,3 @@ class RED_PAGE: status = 0 self.__mysql.update_page( self.page._revid, self.page.title, status ) - From 7535172d30595b4f25ccb825b4602e32c35e07a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 10 Sep 2015 11:57:35 +0200 Subject: [PATCH 020/192] Update mysql db if anything of re_fam changes --- red_fam.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/red_fam.py b/red_fam.py index 9d3afa9..b054d1f 100644 --- a/red_fam.py +++ b/red_fam.py @@ -97,9 +97,8 @@ class RED_FAM_PARSER( RED_FAM ): # Check status changes self.status() - # Since status change means something has changed, update database - if( self._status != self.__mysql.data[ 'status' ] ): - self.__mysql.update_fam( self._red_page_id, self._beginning, self._ending, self._status ) + # Triggers db update if anything changed + self.changed() def __handle_db( self ): """ @@ -186,6 +185,15 @@ class RED_FAM_PARSER( RED_FAM ): self._status = 1 else: self._status = 2 + + def changed( self ): + """ + Checks wether anything has changed and maybe triggers db update + """ + + # Since status change means something has changed, update database + if( self._status != self.__mysql.data[ 'status' ] or self._beginning != self.__mysql.data[ 'beginning' ] or self._ending != self.__mysql.data[ 'ending' ] or self._red_page_id != self.__mysql.data[ 'red_page_id' ] ): + self.__mysql.update_fam( self._red_page_id, self._beginning, self._ending, self._status ) class RED_FAM_WORKER( RED_FAM ): """ From 0f4ce7c4c994362bb94f0f36e80d94f9dc9af879 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 10 Sep 2015 23:02:38 +0200 Subject: [PATCH 021/192] Add workaround to get mysql.data dictionary while adding new pages/fams --- mysql_red.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mysql_red.py b/mysql_red.py index e95ad6d..f6d9535 100644 --- a/mysql_red.py +++ b/mysql_red.py @@ -90,6 +90,8 @@ class MYSQL_RED_PAGE( MYSQL_RED ): cursor.execute( query, data) type( self ).connection.commit() + + self.data = self.get_page() def update_page( self, rev_id=None, page_title=None, status=0 ): @@ -166,6 +168,8 @@ class MYSQL_RED_FAM( MYSQL_RED ): cursor.execute( query, data) type( self ).connection.commit() + + self.data = self.get_fam() def update_fam( self, red_page_id, beginning, ending, status ): """ From 984c269aa4eaad950afe3134e529998d60a1c36d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 10 Sep 2015 23:06:54 +0200 Subject: [PATCH 022/192] Implement classmethods for detecting sectionheading, beginning and ending in class RED_FAM_PARSER --- red_fam.py | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/red_fam.py b/red_fam.py index b054d1f..dec14eb 100644 --- a/red_fam.py +++ b/red_fam.py @@ -3,6 +3,7 @@ import hashlib import re +import locale from datetime import datetime from mysql_red import MYSQL_RED_FAM @@ -57,7 +58,16 @@ class RED_FAM_PARSER( RED_FAM ): """ # Define the timestamp format - __timestamp_format = "%H:%M, %d. %b. %Y (%Z)" + __timestamp_format = "%H:%M, %d. %b. %Y (%Z)" + + # Define section heading re.pattern + __sectionhead_pat = re.compile( r"={3,4}[^=]*={3,4}" ) + + # Define timestamp re.pattern + __timestamp_pat = re.compile( r"\d{2}:\d{2}, (\d{1,2}. (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? \d{4}) \(CES?T\)" ) + + # Textpattern for recognisation of done-notices + __done_notice = ":Archivierung dieses Abschnittes wurde gewünscht von:" def __init__( self, red_fam_heading, red_page_id, red_page_archive, beginning, ending=None ): """ @@ -164,6 +174,7 @@ class RED_FAM_PARSER( RED_FAM ): @returns datetime Datetime object """ + locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') if( isinstance( timestamp, datetime ) ): return timestamp @@ -194,6 +205,51 @@ class RED_FAM_PARSER( RED_FAM ): # Since status change means something has changed, update database if( self._status != self.__mysql.data[ 'status' ] or self._beginning != self.__mysql.data[ 'beginning' ] or self._ending != self.__mysql.data[ 'ending' ] or self._red_page_id != self.__mysql.data[ 'red_page_id' ] ): self.__mysql.update_fam( self._red_page_id, self._beginning, self._ending, self._status ) + + @classmethod + def is_sectionheading( cls, line ): + """ + Checks wether given line is a red_fam section heading + + @param line string String to check + + @returns bool Returns True if it is a section heading, otherwise false + """ + + if cls.__sectionhead_pat.search( line ): + return True + else: + return False + + @classmethod + def is_beginning( cls, line ): + """ + Returns the first timestamp found in line, otherwise None + + @param str line String to search in + + @returns str Timestamp, otherwise None + """ + + result = cls.__timestamp_pat.search( line ) + if result: + return result.group() + else: + return None + + @classmethod + def is_ending( cls, line ): + """ + Returns the timestamp of done notice ( if one ), otherwise None + @param str line String to search in + + @returns str Timestamp, otherwise None + """ + if cls.__done_notice in line: + result = cls.__timestamp_pat.search( line ) + if result: + return result.group() + return None class RED_FAM_WORKER( RED_FAM ): """ From a26b92082b5c52615a4305a49b9592647df6fd35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 10 Sep 2015 23:08:51 +0200 Subject: [PATCH 023/192] Implement parsing process in RED_PAGE.parse() --- red_page.py | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/red_page.py b/red_page.py index d0df358..5a5889c 100644 --- a/red_page.py +++ b/red_page.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- from mysql_red import MYSQL_RED_PAGE +from red_fam import RED_FAM_PARSER class RED_PAGE: """Class for handling redundance discussion pages and archives""" @@ -62,8 +63,59 @@ class RED_PAGE: """ Handles the parsing process """ - pass - + + # Since @param text is a string we need to split it in lines + text_lines = self.page.text.split( "\n" ) + + # Initialise line counter + i = 0 + fam_heading = None + beginning = None + ending = None + + # Set line for last detected Redundance-Family to 0 + last_fam = 0 + + # Iterate over the lines of the page + for line in text_lines: + + # Check wether we have an "Redundance-Family"-Section heading (Level 3) + if RED_FAM_PARSER.is_sectionheading( line ): + + # Before working with next red_fam create the object for the one before (if one) + if( fam_heading and beginning ): + try: + red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive, beginning, ending ) + except: + pass + + # Save line number for last detected Redundance-Family + last_fam = i + # Save heading + fam_heading = line + + # Defined (re)initialisation of dates + beginning = None + ending = None + + # Check wether we are currently in an "Redundance-Family"-Section Body + if i > last_fam and last_fam > 0: + + # Check if we have alredy recognized the beginning date of the discussion (in former iteration) or if we have a done-notice + if not beginning: + beginning = RED_FAM_PARSER.is_beginning( line ) + else: + ending = RED_FAM_PARSER.is_ending( line ) + + # Increment line counter + i += 1 + else: + # For the last red_fam create the object + if( fam_heading and beginning ): + try: + red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive, beginning, ending ) + except: + pass def __update_db( self ): """ Updates the page meta data in mysql db From 4f7cfc65ccb72a3e590f5e9b091ab86bfb6ab244 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 10 Sep 2015 23:09:35 +0200 Subject: [PATCH 024/192] Catch ValueErrors caused by missing points after month abreviation in RED_PAGE_PARSER.__timestamp --- red_fam.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/red_fam.py b/red_fam.py index dec14eb..76f087b 100644 --- a/red_fam.py +++ b/red_fam.py @@ -59,6 +59,7 @@ class RED_FAM_PARSER( RED_FAM ): # Define the timestamp format __timestamp_format = "%H:%M, %d. %b. %Y (%Z)" + __timestamp_format2 = "%H:%M, %d. %b %Y (%Z)" # Catch missing point after month abreviation # Define section heading re.pattern __sectionhead_pat = re.compile( r"={3,4}[^=]*={3,4}" ) @@ -179,7 +180,12 @@ class RED_FAM_PARSER( RED_FAM ): if( isinstance( timestamp, datetime ) ): return timestamp else: - return datetime.strptime( timestamp, type( self ).__timestamp_format ) + # Catch missing point after month abreviation + try: + result = datetime.strptime( timestamp, type( self ).__timestamp_format ) + except ValueError: + result = datetime.strptime( timestamp, type( self ).__timestamp_format2 ) + return result def status( self ): """ From 31ea512c41275ca6c310a5e6d229cd19edfb696a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 13:01:04 +0200 Subject: [PATCH 025/192] Prevent overwriting of ending with None by only calling RED_FAM_PARSER if there was no ending before --- red_page.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/red_page.py b/red_page.py index 5a5889c..e9fcfc6 100644 --- a/red_page.py +++ b/red_page.py @@ -104,9 +104,8 @@ class RED_PAGE: # Check if we have alredy recognized the beginning date of the discussion (in former iteration) or if we have a done-notice if not beginning: beginning = RED_FAM_PARSER.is_beginning( line ) - else: + elif not ending: ending = RED_FAM_PARSER.is_ending( line ) - # Increment line counter i += 1 else: From 907ccad63ad7034eb9e5e45d17c9086a912a01cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 13:12:29 +0200 Subject: [PATCH 026/192] Detect if red_fam is archived but has no detectable ending, set status 2 anyway --- red_fam.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/red_fam.py b/red_fam.py index 76f087b..5ce43a3 100644 --- a/red_fam.py +++ b/red_fam.py @@ -190,12 +190,18 @@ class RED_FAM_PARSER( RED_FAM ): def status( self ): """ Handles detection of correct status + There are three possible stati: + - 0 Discussion is running --> no ending, page is not an archive + - 1 Discussion is over --> ending present, page is not an archive + - 2 Discussion is archived --> ending (normaly) present, page is an archive """ # Diskussion läuft --> Status ? --> 0 ! _ending # Diskussion beendet --> Status 0 --> 1 _ending # Diskussion archiviert --> Status 0/1 --> 2 ??? - if not self._ending: + # No ending, discussion is running: + # Sometimes archived discussions also have no detectable ending + if not self._ending and not self._red_page_archive: self._status = 0 else: if not self._red_page_archive: From 47238f0f96fb238aaaa292bf9b869d37f3c1f2e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 13:37:58 +0200 Subject: [PATCH 027/192] On archived red_fams do not delete possibly existing ending --- red_fam.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/red_fam.py b/red_fam.py index 5ce43a3..8a7b20f 100644 --- a/red_fam.py +++ b/red_fam.py @@ -162,9 +162,6 @@ class RED_FAM_PARSER( RED_FAM ): """ self._ending = self.__datetime( ending ) - - self._status = 1 #__TODO__ STATUS NUMBER - self._changed = True def __datetime( self, timestamp ): """ @@ -194,26 +191,36 @@ class RED_FAM_PARSER( RED_FAM ): - 0 Discussion is running --> no ending, page is not an archive - 1 Discussion is over --> ending present, page is not an archive - 2 Discussion is archived --> ending (normaly) present, page is an archive + - 3 and greater status was set by worker script, do not change it """ - # Diskussion läuft --> Status ? --> 0 ! _ending - # Diskussion beendet --> Status 0 --> 1 _ending - # Diskussion archiviert --> Status 0/1 --> 2 ??? - # No ending, discussion is running: - # Sometimes archived discussions also have no detectable ending - if not self._ending and not self._red_page_archive: - self._status = 0 - else: - if not self._red_page_archive: - self._status = 1 + # Do not change stati set by worker script etc. + if not self.__mysql.data['status'] > 2: + + # No ending, discussion is running: + # Sometimes archived discussions also have no detectable ending + if not self._ending and not self._red_page_archive: + self._status = 0 else: - self._status = 2 + if not self._red_page_archive: + self._status = 1 + else: + self._status = 2 + else: + self._status = self.__mysql.data[ 'status' ] + + def changed( self ): """ Checks wether anything has changed and maybe triggers db update """ + # On archived red_fams do not delete possibly existing ending + if not self._ending and self._status > 1 and self.__mysql.data[ 'ending' ]: + self._ending = self.__mysql.data[ 'ending' ] + + # Since status change means something has changed, update database if( self._status != self.__mysql.data[ 'status' ] or self._beginning != self.__mysql.data[ 'beginning' ] or self._ending != self.__mysql.data[ 'ending' ] or self._red_page_id != self.__mysql.data[ 'red_page_id' ] ): self.__mysql.update_fam( self._red_page_id, self._beginning, self._ending, self._status ) From ebf7a8fe0b2cce6f1ece4f1297803f7531c6b13e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 13:56:16 +0200 Subject: [PATCH 028/192] Add alternative syntax for done_notice --- red_fam.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/red_fam.py b/red_fam.py index 8a7b20f..8a77856 100644 --- a/red_fam.py +++ b/red_fam.py @@ -69,6 +69,7 @@ class RED_FAM_PARSER( RED_FAM ): # Textpattern for recognisation of done-notices __done_notice = ":Archivierung dieses Abschnittes wurde gewünscht von:" + __done_notice2 = "{{Erledigt|" def __init__( self, red_fam_heading, red_page_id, red_page_archive, beginning, ending=None ): """ @@ -264,7 +265,7 @@ class RED_FAM_PARSER( RED_FAM ): @returns str Timestamp, otherwise None """ - if cls.__done_notice in line: + if ( cls.__done_notice in line ) or ( cls.__done_notice2 in line ): result = cls.__timestamp_pat.search( line ) if result: return result.group() From f8dacb53e1af84601384846a6b57937329ce5429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 14:10:33 +0200 Subject: [PATCH 029/192] Add param to RED_PAGE for predefining page status archived when using cat of archives --- red_page.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/red_page.py b/red_page.py index e9fcfc6..1a790ec 100644 --- a/red_page.py +++ b/red_page.py @@ -7,7 +7,7 @@ from red_fam import RED_FAM_PARSER class RED_PAGE: """Class for handling redundance discussion pages and archives""" - def __init__( self, page ): + def __init__( self, page, archive=False ): """ Generate a new RED_PAGE object based on the given pywikibot page object @@ -16,6 +16,7 @@ class RED_PAGE: # Safe the pywikibot page object self.page = page + self._archive = archive self.__handle_db( ) @@ -54,7 +55,7 @@ class RED_PAGE: """ Detects wether current page is an archive of discussions """ - if u"/Archiv" in self.page.title: + if self._archive or ( u"/Archiv" in self.page.title() ): return True else: return False From 5e39ea0b0653c81b8ccd9de54873f60df47a9e2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 14:12:26 +0200 Subject: [PATCH 030/192] Correct use of pywikibot page.title() --- red_fam.py | 2 +- red_page.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/red_fam.py b/red_fam.py index 8a77856..ba550e3 100644 --- a/red_fam.py +++ b/red_fam.py @@ -197,7 +197,7 @@ class RED_FAM_PARSER( RED_FAM ): # Do not change stati set by worker script etc. if not self.__mysql.data['status'] > 2: - + # No ending, discussion is running: # Sometimes archived discussions also have no detectable ending if not self._ending and not self._red_page_archive: diff --git a/red_page.py b/red_page.py index 1a790ec..3f870d0 100644 --- a/red_page.py +++ b/red_page.py @@ -39,14 +39,14 @@ class RED_PAGE: self.__mysql = MYSQL_RED_PAGE( self.page._pageid ) if not self.__mysql.data: - self.__mysql.add_page( self.page.title, self.page._revid ) + self.__mysql.add_page( self.page.title(), self.page._revid ) def is_page_changed( self ): """ Check wether the page was changed since last run """ - if( self.__mysql.data != { 'page_id': self.page._pageid, 'rev_id': self.page._revid, 'page_title': self.page.title, 'status': self.__mysql.data[ 'status' ] } ): + if( self.__mysql.data != { 'page_id': self.page._pageid, 'rev_id': self.page._revid, 'page_title': self.page.title(), 'status': self.__mysql.data[ 'status' ] } ): self.__changed = True else: self.__changed = False @@ -86,7 +86,7 @@ class RED_PAGE: # Before working with next red_fam create the object for the one before (if one) if( fam_heading and beginning ): try: - red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive, beginning, ending ) + red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) except: pass @@ -112,10 +112,10 @@ class RED_PAGE: else: # For the last red_fam create the object if( fam_heading and beginning ): - try: - red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive, beginning, ending ) - except: - pass + #~ try: + red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) + #~ except: + #~ pass def __update_db( self ): """ Updates the page meta data in mysql db @@ -128,4 +128,4 @@ class RED_PAGE: else: status = 0 - self.__mysql.update_page( self.page._revid, self.page.title, status ) + self.__mysql.update_page( self.page._revid, self.page.title(), status ) From 8b7dc5dbf28915b4f5efa0558aa75047a3d45c7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 16:18:46 +0200 Subject: [PATCH 031/192] Improve sectionheading regex to reduce false positives, require at least two links --- red_fam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/red_fam.py b/red_fam.py index ba550e3..2215dc6 100644 --- a/red_fam.py +++ b/red_fam.py @@ -62,7 +62,7 @@ class RED_FAM_PARSER( RED_FAM ): __timestamp_format2 = "%H:%M, %d. %b %Y (%Z)" # Catch missing point after month abreviation # Define section heading re.pattern - __sectionhead_pat = re.compile( r"={3,4}[^=]*={3,4}" ) + __sectionhead_pat = re.compile( r"^=+.*\[\[.+\]\].*\[\[.+\]\].*=+$" ) # Define timestamp re.pattern __timestamp_pat = re.compile( r"\d{2}:\d{2}, (\d{1,2}. (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? \d{4}) \(CES?T\)" ) From 99f050acd33ada72aaa0c9bff7a78b62452ddce0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 16:22:19 +0200 Subject: [PATCH 032/192] Add workaround to detect ending datetime if there is no done notice --- red_fam.py | 16 +++++++++++++++- red_page.py | 12 ++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/red_fam.py b/red_fam.py index 2215dc6..c332eac 100644 --- a/red_fam.py +++ b/red_fam.py @@ -65,7 +65,7 @@ class RED_FAM_PARSER( RED_FAM ): __sectionhead_pat = re.compile( r"^=+.*\[\[.+\]\].*\[\[.+\]\].*=+$" ) # Define timestamp re.pattern - __timestamp_pat = re.compile( r"\d{2}:\d{2}, (\d{1,2}. (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? \d{4}) \(CES?T\)" ) + __timestamp_pat = re.compile( r"(\d{2}:\d{2}, \d{1,2}. (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? \d{4} \(CES?T\))" ) # Textpattern for recognisation of done-notices __done_notice = ":Archivierung dieses Abschnittes wurde gewünscht von:" @@ -270,6 +270,20 @@ class RED_FAM_PARSER( RED_FAM ): if result: return result.group() return None + + @classmethod + def is_ending2( cls, line ): + """ + Returns the last timestamp found in line, otherwise None + @param str line String to search in + + @returns str Timestamp, otherwise None + """ + result = cls.__timestamp_pat.findall( line ) + if result: + return result[-1][0] + else: + return None class RED_FAM_WORKER( RED_FAM ): """ diff --git a/red_page.py b/red_page.py index 3f870d0..d719994 100644 --- a/red_page.py +++ b/red_page.py @@ -85,9 +85,15 @@ class RED_PAGE: # Before working with next red_fam create the object for the one before (if one) if( fam_heading and beginning ): - try: + #Maybe we can find a ending by feed + if not ending: + j = i + while (j > last_fam) and not ending: + j -= 1 + ending = RED_FAM_PARSER.is_ending2( text_lines[ j ] ) + + red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) - except: pass # Save line number for last detected Redundance-Family @@ -116,6 +122,8 @@ class RED_PAGE: red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) #~ except: #~ pass + # Set status of red_page to parsed + self.__parsed = True def __update_db( self ): """ Updates the page meta data in mysql db From 1892c6d015b9c9512fd852dda0337de30ec0fa3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 16:41:04 +0200 Subject: [PATCH 033/192] Fix wrong intendet line (RED_FAM_PARSER only was instantiated when ending-datetime workaround fired) --- red_page.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/red_page.py b/red_page.py index d719994..6dfb242 100644 --- a/red_page.py +++ b/red_page.py @@ -92,9 +92,8 @@ class RED_PAGE: j -= 1 ending = RED_FAM_PARSER.is_ending2( text_lines[ j ] ) - - red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) - pass + red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) + print( red_fam ) # Save line number for last detected Redundance-Family last_fam = i From 529ed097029157854421659e65236858a393aab9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 16:42:12 +0200 Subject: [PATCH 034/192] Use pywikibot.output instead of print --- red_page.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/red_page.py b/red_page.py index 6dfb242..d2452e0 100644 --- a/red_page.py +++ b/red_page.py @@ -1,9 +1,12 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import pywikibot + from mysql_red import MYSQL_RED_PAGE from red_fam import RED_FAM_PARSER + class RED_PAGE: """Class for handling redundance discussion pages and archives""" @@ -99,6 +102,7 @@ class RED_PAGE: last_fam = i # Save heading fam_heading = line + pywikibot.output( fam_heading ) # Defined (re)initialisation of dates beginning = None From 90da23171211bab0ea1fe4a534dbcbe39150b98b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 17:05:18 +0200 Subject: [PATCH 035/192] Move the code for instanciating of RED_FAM_PARSER to the end of loop --- red_page.py | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/red_page.py b/red_page.py index d2452e0..0d7d2f3 100644 --- a/red_page.py +++ b/red_page.py @@ -70,7 +70,8 @@ class RED_PAGE: # Since @param text is a string we need to split it in lines text_lines = self.page.text.split( "\n" ) - + length = len( text_lines ) + # Initialise line counter i = 0 fam_heading = None @@ -86,18 +87,6 @@ class RED_PAGE: # Check wether we have an "Redundance-Family"-Section heading (Level 3) if RED_FAM_PARSER.is_sectionheading( line ): - # Before working with next red_fam create the object for the one before (if one) - if( fam_heading and beginning ): - #Maybe we can find a ending by feed - if not ending: - j = i - while (j > last_fam) and not ending: - j -= 1 - ending = RED_FAM_PARSER.is_ending2( text_lines[ j ] ) - - red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) - print( red_fam ) - # Save line number for last detected Redundance-Family last_fam = i # Save heading @@ -116,16 +105,31 @@ class RED_PAGE: beginning = RED_FAM_PARSER.is_beginning( line ) elif not ending: ending = RED_FAM_PARSER.is_ending( line ) + + # Detect end of red_fam section (next line is new sectionheading) or end of file + # Prevent from running out of index + if i < (length - 1): + test = RED_FAM_PARSER.is_sectionheading( text_lines[ i + 1 ] ) + else: + test = False + if ( test or ( length == ( i + 1 ) ) ): + + # Create the red_fam object + if( fam_heading and beginning ): + + #Maybe we can find a ending by feed if we have None yet (No done notice on archive pages) + if not ending and self.is_archive(): + j = i + while (j > last_fam) and not ending: + j -= 1 + ending = RED_FAM_PARSER.is_ending2( text_lines[ j ] ) + + red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) + pywikibot.output( red_fam ) + # Increment line counter i += 1 else: - # For the last red_fam create the object - if( fam_heading and beginning ): - #~ try: - red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) - #~ except: - #~ pass - # Set status of red_page to parsed self.__parsed = True def __update_db( self ): """ From deaa1d855b3f353c94cb776ed73405e0e352f668 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 22:27:25 +0200 Subject: [PATCH 036/192] Fix bug which prevents detecting of red_fam sectionheading when there is whitespace after closing heading tags (===) --- red_fam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/red_fam.py b/red_fam.py index c332eac..e7df494 100644 --- a/red_fam.py +++ b/red_fam.py @@ -62,7 +62,7 @@ class RED_FAM_PARSER( RED_FAM ): __timestamp_format2 = "%H:%M, %d. %b %Y (%Z)" # Catch missing point after month abreviation # Define section heading re.pattern - __sectionhead_pat = re.compile( r"^=+.*\[\[.+\]\].*\[\[.+\]\].*=+$" ) + __sectionhead_pat = re.compile( r"^=+.*\[\[.+\]\].*\[\[.+\]\].*=+" ) # Define timestamp re.pattern __timestamp_pat = re.compile( r"(\d{2}:\d{2}, \d{1,2}. (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? \d{4} \(CES?T\))" ) From 6e64d8448e54122542958a5c379545bf184a2e32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 22:29:16 +0200 Subject: [PATCH 037/192] Prevent RED_PAGE from resetting status if parser is not fired --- red_page.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/red_page.py b/red_page.py index 0d7d2f3..1ca6d66 100644 --- a/red_page.py +++ b/red_page.py @@ -135,7 +135,7 @@ class RED_PAGE: """ Updates the page meta data in mysql db """ - if( self.__parsed ): + if( self.__parsed or not self.__changed ): status = 1 if( self.is_archive() ): From a97e2cea5a9f3f5a3ef9cb3263905b3484f00d38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 11 Sep 2015 23:47:35 +0200 Subject: [PATCH 038/192] Since some timestamps are broken we need to reconstruct them by regex match groups Prevents ValueErrors of datetime.strptime in most cases --- red_fam.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/red_fam.py b/red_fam.py index e7df494..072321e 100644 --- a/red_fam.py +++ b/red_fam.py @@ -59,13 +59,12 @@ class RED_FAM_PARSER( RED_FAM ): # Define the timestamp format __timestamp_format = "%H:%M, %d. %b. %Y (%Z)" - __timestamp_format2 = "%H:%M, %d. %b %Y (%Z)" # Catch missing point after month abreviation # Define section heading re.pattern __sectionhead_pat = re.compile( r"^=+.*\[\[.+\]\].*\[\[.+\]\].*=+" ) # Define timestamp re.pattern - __timestamp_pat = re.compile( r"(\d{2}:\d{2}, \d{1,2}. (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? \d{4} \(CES?T\))" ) + __timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4}) (\(CES?T\))" ) # Textpattern for recognisation of done-notices __done_notice = ":Archivierung dieses Abschnittes wurde gewünscht von:" @@ -178,11 +177,7 @@ class RED_FAM_PARSER( RED_FAM ): if( isinstance( timestamp, datetime ) ): return timestamp else: - # Catch missing point after month abreviation - try: - result = datetime.strptime( timestamp, type( self ).__timestamp_format ) - except ValueError: - result = datetime.strptime( timestamp, type( self ).__timestamp_format2 ) + result = datetime.strptime( timestamp, type( self ).__timestamp_format ) return result def status( self ): @@ -251,9 +246,11 @@ class RED_FAM_PARSER( RED_FAM ): @returns str Timestamp, otherwise None """ - result = cls.__timestamp_pat.search( line ) - if result: - return result.group() + match = cls.__timestamp_pat.search( line ) + if match: + # Since some timestamps are broken we need to reconstruct them by regex match groups + result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) + " " + match.group(5) + return result else: return None @@ -266,9 +263,11 @@ class RED_FAM_PARSER( RED_FAM ): @returns str Timestamp, otherwise None """ if ( cls.__done_notice in line ) or ( cls.__done_notice2 in line ): - result = cls.__timestamp_pat.search( line ) - if result: - return result.group() + match = cls.__timestamp_pat.search( line ) + if match: + # Since some timestamps are broken we need to reconstruct them by regex match groups + result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) + " " + match.group(5) + return result return None @classmethod @@ -279,9 +278,11 @@ class RED_FAM_PARSER( RED_FAM ): @returns str Timestamp, otherwise None """ - result = cls.__timestamp_pat.findall( line ) - if result: - return result[-1][0] + matches = cls.__timestamp_pat.findall( line ) + if matches: + # Since some timestamps are broken we need to reconstruct them by regex match groups + result = matches[-1][0] + ", " + matches[-1][1] + ". " + matches[-1][2] + ". " + matches[-1][3] + " " + matches[-1][4] + return result else: return None From 14e865c6e6d0a2286f60b5895365ae8dd40bb98e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 12 Sep 2015 00:36:51 +0200 Subject: [PATCH 039/192] We need to save the red_fam section heading in database for generating section links since format is not consistent --- mysql_red.py | 14 +++++++------- red_fam.py | 23 +++++++++++++++-------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/mysql_red.py b/mysql_red.py index f6d9535..3293912 100644 --- a/mysql_red.py +++ b/mysql_red.py @@ -150,17 +150,17 @@ class MYSQL_RED_FAM( MYSQL_RED ): else: return False - def add_fam( self, articlesList, red_page_id, beginning, ending=None, status=0 ): + def add_fam( self, articlesList, heading, red_page_id, beginning, ending=None, status=0 ): cursor = type( self ).connection.cursor() - query = 'INSERT INTO `red_families` ( fam_hash, red_page_id, beginning, ending, status, article0, article1, article2, article3, article4, article5, article6, article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - data = [ str( self.__fam_hash ), red_page_id, beginning, ending, status ] + query = 'INSERT INTO `red_families` ( fam_hash, red_page_id, beginning, ending, status, heading, article0, article1, article2, article3, article4, article5, article6, article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' + data = [ str( self.__fam_hash ), red_page_id, beginning, ending, status, heading ] for article in articlesList: data.append( str( article ) ) - while len( data ) < 13: + while len( data ) < 14: data.append( None ) data = tuple( data ) @@ -171,7 +171,7 @@ class MYSQL_RED_FAM( MYSQL_RED ): self.data = self.get_fam() - def update_fam( self, red_page_id, beginning, ending, status ): + def update_fam( self, red_page_id, heading, beginning, ending, status ): """ Updates the red fam row in MySQL-Database for given fam_hash @@ -183,8 +183,8 @@ class MYSQL_RED_FAM( MYSQL_RED ): cursor = type( self ).connection.cursor() - query = 'UPDATE `red_families` SET `red_page_id` = ?, `beginning` = ?, `ending` = ?, `status`= ? WHERE `fam_hash` = ?;' - data = ( int(red_page_id ), beginning, ending, int( status ), self.__fam_hash ) + query = 'UPDATE `red_families` SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, `status`= ? WHERE `fam_hash` = ?;' + data = ( int(red_page_id ), str( heading ), beginning, ending, int( status ), self.__fam_hash ) cursor.execute( query, data) diff --git a/red_fam.py b/red_fam.py index 072321e..0eb16fb 100644 --- a/red_fam.py +++ b/red_fam.py @@ -61,7 +61,7 @@ class RED_FAM_PARSER( RED_FAM ): __timestamp_format = "%H:%M, %d. %b. %Y (%Z)" # Define section heading re.pattern - __sectionhead_pat = re.compile( r"^=+.*\[\[.+\]\].*\[\[.+\]\].*=+" ) + __sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" ) # Define timestamp re.pattern __timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4}) (\(CES?T\))" ) @@ -70,7 +70,7 @@ class RED_FAM_PARSER( RED_FAM ): __done_notice = ":Archivierung dieses Abschnittes wurde gewünscht von:" __done_notice2 = "{{Erledigt|" - def __init__( self, red_fam_heading, red_page_id, red_page_archive, beginning, ending=None ): + def __init__( self, heading, red_page_id, red_page_archive, beginning, ending=None ): """ Creates a RED_FAM object based on data collected while parsing red_pages combined with possibly former known data from db @@ -97,7 +97,7 @@ class RED_FAM_PARSER( RED_FAM ): self._ending = None # Parse the provided heading of redundance section to set self._articlesList - self.heading_parser( red_fam_heading ) + self.heading_parser( heading ) # Calculates the sha1 hash over self._articlesList to rediscover known redundance families self.fam_hash() @@ -119,9 +119,9 @@ class RED_FAM_PARSER( RED_FAM ): self.__mysql = MYSQL_RED_FAM( self._fam_hash ) if not self.__mysql.data: - self.__mysql.add_fam( self._articlesList, self._red_page_id, self._beginning, self._ending ) + self.__mysql.add_fam( self._articlesList, self._heading, self._red_page_id, self._beginning, self._ending ) - def heading_parser( self, red_fam_heading): + def heading_parser( self, heading ): """ Parses given red_fam_heading string and saves articles list """ @@ -129,8 +129,15 @@ class RED_FAM_PARSER( RED_FAM ): # Predefine a pattern for wikilinks' destination wikilink_pat = re.compile( r"\[\[([^\[\]\|]*)(\]\]|\|)" ) + # Parse content of heading for generating section links later + match = self.__sectionhead_pat.search( heading ) + if match: + self._heading = match.group(2).lstrip() + else: + raise ValueError( "Heading is not valid" ) + # We get the pages in first [0] element iterating over wikilink_pat.findall( line ) - self._articlesList = [ link[0] for link in wikilink_pat.findall( red_fam_heading ) ] + self._articlesList = [ link[0] for link in wikilink_pat.findall( self._heading ) ] def fam_hash( self ): """ @@ -218,8 +225,8 @@ class RED_FAM_PARSER( RED_FAM ): # Since status change means something has changed, update database - if( self._status != self.__mysql.data[ 'status' ] or self._beginning != self.__mysql.data[ 'beginning' ] or self._ending != self.__mysql.data[ 'ending' ] or self._red_page_id != self.__mysql.data[ 'red_page_id' ] ): - self.__mysql.update_fam( self._red_page_id, self._beginning, self._ending, self._status ) + if( self._status != self.__mysql.data[ 'status' ] or self._beginning != self.__mysql.data[ 'beginning' ] or self._ending != self.__mysql.data[ 'ending' ] or self._red_page_id != self.__mysql.data[ 'red_page_id' ] or self._heading != self.__mysql.data[ 'heading' ]): + self.__mysql.update_fam( self._red_page_id, self._heading, self._beginning, self._ending, self._status ) @classmethod def is_sectionheading( cls, line ): From e70655f6790ba0e9a83984c2c725fee097cdea3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 12 Sep 2015 12:42:23 +0200 Subject: [PATCH 040/192] Catch sections with more then 8 articles --- red_fam.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/red_fam.py b/red_fam.py index 0eb16fb..ccf1044 100644 --- a/red_fam.py +++ b/red_fam.py @@ -6,6 +6,8 @@ import re import locale from datetime import datetime +import pywikibot + from mysql_red import MYSQL_RED_FAM class RED_FAM: @@ -96,6 +98,8 @@ class RED_FAM_PARSER( RED_FAM ): #If no ending was provided set to None self._ending = None + self._status = None + # Parse the provided heading of redundance section to set self._articlesList self.heading_parser( heading ) @@ -138,6 +142,11 @@ class RED_FAM_PARSER( RED_FAM ): # We get the pages in first [0] element iterating over wikilink_pat.findall( line ) self._articlesList = [ link[0] for link in wikilink_pat.findall( self._heading ) ] + + # Catch sections with more then 8 articles, print error + if len( self._articlesList ) > 8: + pywikibot.output( "{datetime} – \03{{lightred}}[WARNING] – Maximum number of articles in red_fam exceeded, maximum number is 8, {number:d} were given\n{repress}".format( datetime=datetime.now().strftime("%Y-%m-%d %H:%M:%S (%Z)"), number=len( self._articlesList ), repress=repr( self ) ) ) + self._articlesList = self._articlesList[:8] def fam_hash( self ): """ From f50f4b664faebcd490759d89af359c82594b21c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 12 Sep 2015 13:48:01 +0200 Subject: [PATCH 041/192] Add additional possibilities to detect wether a red_page is an archive or not --- red_page.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/red_page.py b/red_page.py index 1ca6d66..436c57c 100644 --- a/red_page.py +++ b/red_page.py @@ -58,7 +58,7 @@ class RED_PAGE: """ Detects wether current page is an archive of discussions """ - if self._archive or ( u"/Archiv" in self.page.title() ): + if self._archive or ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ): return True else: return False @@ -91,7 +91,6 @@ class RED_PAGE: last_fam = i # Save heading fam_heading = line - pywikibot.output( fam_heading ) # Defined (re)initialisation of dates beginning = None @@ -124,8 +123,8 @@ class RED_PAGE: j -= 1 ending = RED_FAM_PARSER.is_ending2( text_lines[ j ] ) + red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) - pywikibot.output( red_fam ) # Increment line counter i += 1 From 6e0d3200c270afa592241b5975bab02d04b5f8ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 12 Sep 2015 14:24:45 +0200 Subject: [PATCH 042/192] Change visibility of attributes changed and parsed in RED_PAGE to protected --- red_page.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/red_page.py b/red_page.py index 436c57c..4ba27d8 100644 --- a/red_page.py +++ b/red_page.py @@ -25,8 +25,8 @@ class RED_PAGE: self.is_page_changed() - self.__parsed = None - if( self.__changed or self.__mysql.data[ 'status' ] == 0 ): + self._parsed = None + if( self._changed or self.__mysql.data[ 'status' ] == 0 ): self.parse() self.__update_db() @@ -50,9 +50,9 @@ class RED_PAGE: """ if( self.__mysql.data != { 'page_id': self.page._pageid, 'rev_id': self.page._revid, 'page_title': self.page.title(), 'status': self.__mysql.data[ 'status' ] } ): - self.__changed = True + self._changed = True else: - self.__changed = False + self._changed = False def is_archive( self ): """ @@ -129,12 +129,12 @@ class RED_PAGE: # Increment line counter i += 1 else: - self.__parsed = True + self._parsed = True def __update_db( self ): """ Updates the page meta data in mysql db """ - if( self.__parsed or not self.__changed ): + if( self._parsed or not self._changed ): status = 1 if( self.is_archive() ): From c78480a75d737e03523f5f97c3e998852a2784b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 12 Sep 2015 22:33:44 +0200 Subject: [PATCH 043/192] Use mysql config from pywikibot user-config.py --- mysql_red.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mysql_red.py b/mysql_red.py index 3293912..4fb2384 100644 --- a/mysql_red.py +++ b/mysql_red.py @@ -6,14 +6,16 @@ try: except ImportError: import MySQLdb as mysqldb +from pywikibot import config + class MYSQL_RED: #Save mysqldb-connection as class attribute to use only one in descendant classes connection = False - db_hostname='localhost' - db_username='gwlocal' - db_password='RigTawwewmyagPepotugco' - db_name='gwlocal_wiki' + db_hostname=config.db_hostname + db_username=config.db_username + db_password=config.db_password + db_name=config.db_username + '__bot' def __init__( self ): """ From ad100e96eb80d38573aca397121a6a93aec851ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 12 Sep 2015 22:35:12 +0200 Subject: [PATCH 044/192] Remove timezone from timestamp patterns since it is not stored and causes errors on labs --- red_fam.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/red_fam.py b/red_fam.py index ccf1044..df699e1 100644 --- a/red_fam.py +++ b/red_fam.py @@ -60,13 +60,13 @@ class RED_FAM_PARSER( RED_FAM ): """ # Define the timestamp format - __timestamp_format = "%H:%M, %d. %b. %Y (%Z)" + __timestamp_format = "%H:%M, %d. %b. %Y" # Define section heading re.pattern __sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" ) # Define timestamp re.pattern - __timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4}) (\(CES?T\))" ) + __timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4})" ) # Textpattern for recognisation of done-notices __done_notice = ":Archivierung dieses Abschnittes wurde gewünscht von:" @@ -265,7 +265,7 @@ class RED_FAM_PARSER( RED_FAM ): match = cls.__timestamp_pat.search( line ) if match: # Since some timestamps are broken we need to reconstruct them by regex match groups - result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) + " " + match.group(5) + result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) return result else: return None @@ -282,7 +282,7 @@ class RED_FAM_PARSER( RED_FAM ): match = cls.__timestamp_pat.search( line ) if match: # Since some timestamps are broken we need to reconstruct them by regex match groups - result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) + " " + match.group(5) + result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) return result return None @@ -297,7 +297,7 @@ class RED_FAM_PARSER( RED_FAM ): matches = cls.__timestamp_pat.findall( line ) if matches: # Since some timestamps are broken we need to reconstruct them by regex match groups - result = matches[-1][0] + ", " + matches[-1][1] + ". " + matches[-1][2] + ". " + matches[-1][3] + " " + matches[-1][4] + result = matches[-1][0] + ", " + matches[-1][1] + ". " + matches[-1][2] + ". " + matches[-1][3] return result else: return None From cafe08dd7f555081f61d0ca09ba53e7027a9ea6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 13 Sep 2015 01:22:27 +0200 Subject: [PATCH 045/192] Added license GPLv3 + Reworked code Better compatibility with https://www.mediawiki.org/wiki/Manual:Pywikibot/Development_guidelines --- license.txt | 188 ++++++++++++++++++++++++++++ mysql_red.py | 193 ----------------------------- mysqlred.py | 231 +++++++++++++++++++++++++++++++++++ red_fam.py | 309 ---------------------------------------------- red_page.py | 145 ---------------------- redfam.py | 338 +++++++++++++++++++++++++++++++++++++++++++++++++++ redpage.py | 170 ++++++++++++++++++++++++++ 7 files changed, 927 insertions(+), 647 deletions(-) create mode 100644 license.txt delete mode 100644 mysql_red.py create mode 100644 mysqlred.py delete mode 100644 red_fam.py delete mode 100644 red_page.py create mode 100644 redfam.py create mode 100644 redpage.py diff --git a/license.txt b/license.txt new file mode 100644 index 0000000..b977fb9 --- /dev/null +++ b/license.txt @@ -0,0 +1,188 @@ +GNU GENERAL PUBLIC LICENSE + +Version 3, 29 June 2007 + +Copyright © 2007 Free Software Foundation, Inc. + +Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. + +Preamble + +The GNU General Public License is a free, copyleft license for software and other kinds of works. + +The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. + +To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. + +For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. + +Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. + +For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. + +Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. + +Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. + +The precise terms and conditions for copying, distribution and modification follow. +TERMS AND CONDITIONS +0. Definitions. + +“This License” refers to version 3 of the GNU General Public License. + +“Copyright” also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. + +“The Program” refers to any copyrightable work licensed under this License. Each licensee is addressed as “you”. “Licensees” and “recipients” may be individuals or organizations. + +To “modify” a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a “modified version” of the earlier work or a work “based on” the earlier work. + +A “covered work” means either the unmodified Program or a work based on the Program. + +To “propagate” a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. + +To “convey” a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. + +An interactive user interface displays “Appropriate Legal Notices” to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. +1. Source Code. + +The “source code” for a work means the preferred form of the work for making modifications to it. “Object code” means any non-source form of a work. + +A “Standard Interface” means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. + +The “System Libraries” of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A “Major Component”, in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. + +The “Corresponding Source” for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. + +The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. + +The Corresponding Source for a work in source code form is that same work. +2. Basic Permissions. + +All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. + +You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. + +Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. +3. Protecting Users' Legal Rights From Anti-Circumvention Law. + +No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. + +When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. +4. Conveying Verbatim Copies. + +You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. + +You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. +5. Conveying Modified Source Versions. + +You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified it, and giving a relevant date. + b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to “keep intact all notices”. + c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. + d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. + +A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an “aggregate” if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. +6. Conveying Non-Source Forms. + +You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: + + a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. + b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. + c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. + d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. + e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. + +A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. + +A “User Product” is either (1) a “consumer product”, which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, “normally used” refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. + +“Installation Information” for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. + +If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). + +The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. + +Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. +7. Additional Terms. + +“Additional permissions” are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. + +When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. + +Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or + b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or + c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or + d) Limiting the use for publicity purposes of names of licensors or authors of the material; or + e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or + f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. + +All other non-permissive additional terms are considered “further restrictions” within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. + +If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. + +Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. +8. Termination. + +You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). + +However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. + +Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. + +Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. +9. Acceptance Not Required for Having Copies. + +You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. +10. Automatic Licensing of Downstream Recipients. + +Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. + +An “entity transaction” is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. + +You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. +11. Patents. + +A “contributor” is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's “contributor version”. + +A contributor's “essential patent claims” are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, “control” includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. + +In the following three paragraphs, a “patent license” is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To “grant” such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. + +If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. “Knowingly relying” means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. + +A patent license is “discriminatory” if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. +12. No Surrender of Others' Freedom. + +If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. +13. Use with the GNU Affero General Public License. + +Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. +14. Revised Versions of this License. + +The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License “or any later version” applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. + +If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. + +Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. +15. Disclaimer of Warranty. + +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. +16. Limitation of Liability. + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. +17. Interpretation of Sections 15 and 16. + +If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. \ No newline at end of file diff --git a/mysql_red.py b/mysql_red.py deleted file mode 100644 index 4fb2384..0000000 --- a/mysql_red.py +++ /dev/null @@ -1,193 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -try: - import oursql as mysqldb -except ImportError: - import MySQLdb as mysqldb - -from pywikibot import config - -class MYSQL_RED: - - #Save mysqldb-connection as class attribute to use only one in descendant classes - connection = False - db_hostname=config.db_hostname - db_username=config.db_username - db_password=config.db_password - db_name=config.db_username + '__bot' - - def __init__( self ): - """ - Opens a connection to MySQL-DB - - @returns mysql-stream MySQL Connection - """ - - # Connect to mysqldb only once - if( type( self ).connection == False ): - - type( self ).connection = mysqldb.connect( host=type( self ).db_hostname, user=type( self ).db_username, passwd=type( self ).db_password, db=type( self ).db_name ) - - def __del__( self ): - """ - Before deleting class, close connection to MySQL-DB - """ - - type( self ).connection.close() - -class MYSQL_RED_PAGE( MYSQL_RED ): - - def __init__( self, page_id ): - """ - Creates a new instance, runs __init__ of parent class - """ - super().__init__( ) - - self.__page_id = int( page_id ); - - self.data = self.get_page() - - def __del__( self ): - pass - - def get_page( self ): - """ - Retrieves a red page row from MySQL-Database for given page_id - - @param int page_id MediaWiki page_id for page to retrieve - - @returns tuple Tuple with data for given page_id otherwise if none found - bool FALSE - """ - cursor = type( self ).connection.cursor(mysqldb.DictCursor) - - cursor.execute( 'SELECT * FROM `red_pages` WHERE `page_id` = ?;', ( self.__page_id, ) ) - res = cursor.fetchone() - - if res: - return res - else: - return False - - def add_page( self, page_title, rev_id, status=0 ): - """ - Inserts a red page row in MySQL-Database for given page_id - - @param int rev_id MediaWiki current rev_id for page to update - @param str page_title MediaWiki new page_title for page to update - @param int status Page parsing status (0 - not (successfully) parsed; 1 - successfully parsed; 2 - successfully parsed archive) - """ - - cursor = type( self ).connection.cursor() - - if not page_title: - page_title = self.data[ 'page_title' ] - if not rev_id: - rev_id = self.data[ 'rev_id' ] - - query = 'INSERT INTO `red_pages` ( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' - data = ( self.__page_id, str( page_title ), int( rev_id ), int( status ) ) - - cursor.execute( query, data) - - type( self ).connection.commit() - - self.data = self.get_page() - - - def update_page( self, rev_id=None, page_title=None, status=0 ): - """ - Updates the red page row in MySQL-Database for given page_id - - @param int rev_id MediaWiki current rev_id for page to update - @param str page_title MediaWiki new page_title for page to update - @param int status Page parsing status (0 - not (successfully) parsed; 1 - successfully parsed) - """ - - cursor = type( self ).connection.cursor() - - if not page_title: - page_title = self.data[ 'page_title' ] - if not rev_id: - rev_id = self.data[ 'rev_id' ] - - query = 'UPDATE `red_pages` SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' - data = ( str( page_title ), int( rev_id ), int( status ), self.__page_id ) - - cursor.execute( query, data) - - type( self ).connection.commit() - - -class MYSQL_RED_FAM( MYSQL_RED ): - - def __init__( self, fam_hash ): - """ - Creates a new instance, runs __init__ of parent class - """ - super().__init__( ) - - self.__fam_hash = fam_hash - - self.data = self.get_fam() - - def __del__( self ): - pass - - def get_fam( self ): - """ - Retrieves a red family row from MySQL-Database for given fam_hash - - @returns dict Dictionairy with data for given fam hash otherwise if none found - bool FALSE - """ - cursor = type( self ).connection.cursor(mysqldb.DictCursor) - - cursor.execute( 'SELECT * FROM `red_families` WHERE `fam_hash` = ?;', ( self.__fam_hash, ) ) - res = cursor.fetchone() - - if res: - return res - else: - return False - - def add_fam( self, articlesList, heading, red_page_id, beginning, ending=None, status=0 ): - - cursor = type( self ).connection.cursor() - - query = 'INSERT INTO `red_families` ( fam_hash, red_page_id, beginning, ending, status, heading, article0, article1, article2, article3, article4, article5, article6, article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - data = [ str( self.__fam_hash ), red_page_id, beginning, ending, status, heading ] - - for article in articlesList: - data.append( str( article ) ) - - while len( data ) < 14: - data.append( None ) - - data = tuple( data ) - - cursor.execute( query, data) - - type( self ).connection.commit() - - self.data = self.get_fam() - - def update_fam( self, red_page_id, heading, beginning, ending, status ): - """ - Updates the red fam row in MySQL-Database for given fam_hash - - @param int red_page_id MediaWiki page_id which contains red_fam - @param datetime beginning Timestamp of beginning of redundance discussion - qparam datetime ending Timestamp of ending of redundance discussion - @param int status red_fam status (0 - discussion is running; 1 - discussion over; 2 - discussion archived) - """ - - cursor = type( self ).connection.cursor() - - query = 'UPDATE `red_families` SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, `status`= ? WHERE `fam_hash` = ?;' - data = ( int(red_page_id ), str( heading ), beginning, ending, int( status ), self.__fam_hash ) - - cursor.execute( query, data) - - type( self ).connection.commit() diff --git a/mysqlred.py b/mysqlred.py new file mode 100644 index 0000000..ecd1abc --- /dev/null +++ b/mysqlred.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# mysqlred.py +# +# Copyright 2015 GOLDERWEB – Jonathan Golder +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# +""" +Provides interface classes for communication of redundances bot with mysql-db +""" + +# Prefere using oursql then MySQLdb +try: + import oursql as mysqldb +except ImportError: + import MySQLdb as mysqldb + +from pywikibot import config + +class MysqlRed: + """ + Basic interface class, containing opening of connection + + Specific querys should be defined in descendant classes per data type + """ + + #Save mysqldb-connection as class attribute to use only one in descendant classes + connection = False + db_hostname=config.db_hostname + db_username=config.db_username + db_password=config.db_password + db_name=config.db_username + '__bot' + + def __init__( self ): + """ + Opens a connection to MySQL-DB + + @returns mysql-stream MySQL Connection + """ + + # Connect to mysqldb only once + if( type( self ).connection == False ): + + type( self ).connection = mysqldb.connect( host=type( self ).db_hostname, user=type( self ).db_username, passwd=type( self ).db_password, db=type( self ).db_name ) + + def __del__( self ): + """ + Before deleting class, close connection to MySQL-DB + """ + + type( self ).connection.close() + +class MysqlRedPage( MysqlRed ): + """ + MySQL-db Interface for handling querys for RedPages + """ + + def __init__( self, page_id ): + """ + Creates a new instance, runs __init__ of parent class + """ + + super().__init__( ) + + self.__page_id = int( page_id ); + + self.data = self.get_page() + + def __del__( self ): + pass + + def get_page( self ): + """ + Retrieves a red page row from MySQL-Database for given page_id + + @param int page_id MediaWiki page_id for page to retrieve + + @returns tuple Tuple with data for given page_id otherwise if none found + bool FALSE + """ + + cursor = type( self ).connection.cursor(mysqldb.DictCursor) + + cursor.execute( 'SELECT * FROM `red_pages` WHERE `page_id` = ?;', ( self.__page_id, ) ) + res = cursor.fetchone() + + if res: + return res + else: + return False + + def add_page( self, page_title, rev_id, status=0 ): + """ + Inserts a red page row in MySQL-Database for given page_id + + @param int rev_id MediaWiki current rev_id for page to update + @param str page_title MediaWiki new page_title for page to update + @param int status Page parsing status (0 - not (successfully) parsed; 1 - successfully parsed; 2 - successfully parsed archive) + """ + + cursor = type( self ).connection.cursor() + + if not page_title: + page_title = self.data[ 'page_title' ] + if not rev_id: + rev_id = self.data[ 'rev_id' ] + + query = 'INSERT INTO `red_pages` ( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' + data = ( self.__page_id, str( page_title ), int( rev_id ), int( status ) ) + + cursor.execute( query, data) + + type( self ).connection.commit() + + self.data = self.get_page() + + def update_page( self, rev_id=None, page_title=None, status=0 ): + """ + Updates the red page row in MySQL-Database for given page_id + + @param int rev_id MediaWiki current rev_id for page to update + @param str page_title MediaWiki new page_title for page to update + @param int status Page parsing status (0 - not (successfully) parsed; 1 - successfully parsed) + """ + + cursor = type( self ).connection.cursor() + + if not page_title: + page_title = self.data[ 'page_title' ] + if not rev_id: + rev_id = self.data[ 'rev_id' ] + + query = 'UPDATE `red_pages` SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' + data = ( str( page_title ), int( rev_id ), int( status ), self.__page_id ) + + cursor.execute( query, data) + + type( self ).connection.commit() + +class MysqlRedFam( MysqlRed ): + """ + MySQL-db Interface for handling querys for RedFams + """ + + def __init__( self, fam_hash ): + """ + Creates a new instance, runs __init__ of parent class + """ + + super().__init__( ) + + self.__fam_hash = fam_hash + + self.data = self.get_fam() + + def __del__( self ): + pass + + def get_fam( self ): + """ + Retrieves a red family row from MySQL-Database for given fam_hash + + @returns dict Dictionairy with data for given fam hash otherwise if none found + bool FALSE + """ + + cursor = type( self ).connection.cursor(mysqldb.DictCursor) + + cursor.execute( 'SELECT * FROM `red_families` WHERE `fam_hash` = ?;', ( self.__fam_hash, ) ) + res = cursor.fetchone() + + if res: + return res + else: + return False + + def add_fam( self, articlesList, heading, red_page_id, beginning, ending=None, status=0 ): + + cursor = type( self ).connection.cursor() + + query = 'INSERT INTO `red_families` ( fam_hash, red_page_id, beginning, ending, status, heading, article0, article1, article2, article3, article4, article5, article6, article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' + data = [ str( self.__fam_hash ), red_page_id, beginning, ending, status, heading ] + + for article in articlesList: + data.append( str( article ) ) + + while len( data ) < 14: + data.append( None ) + + data = tuple( data ) + + cursor.execute( query, data) + + type( self ).connection.commit() + + self.data = self.get_fam() + + def update_fam( self, red_page_id, heading, beginning, ending, status ): + """ + Updates the red fam row in MySQL-Database for given fam_hash + + @param int red_page_id MediaWiki page_id which contains red_fam + @param datetime beginning Timestamp of beginning of redundance discussion + qparam datetime ending Timestamp of ending of redundance discussion + @param int status red_fam status (0 - discussion is running; 1 - discussion over; 2 - discussion archived) + """ + + cursor = type( self ).connection.cursor() + + query = 'UPDATE `red_families` SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, `status`= ? WHERE `fam_hash` = ?;' + data = ( int(red_page_id ), str( heading ), beginning, ending, int( status ), self.__fam_hash ) + + cursor.execute( query, data) + + type( self ).connection.commit() diff --git a/red_fam.py b/red_fam.py deleted file mode 100644 index df699e1..0000000 --- a/red_fam.py +++ /dev/null @@ -1,309 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import hashlib -import re -import locale -from datetime import datetime - -import pywikibot - -from mysql_red import MYSQL_RED_FAM - -class RED_FAM: - - - - def __init__( self, fam_hash=None, articlesList=None, red_page_id=None, beginning=None, ending=None, status=0 ): - """ - Generates a new RED_FAM object - - @param articlesList list List of articles of redundance family - @param beginning datetime Beginning date of redundance diskussion - @param ending datetime Ending date of redundance diskussion - """ - - #if( beginning ): - # self.add_beginning( beginning ) - # self._beginning = None - - #if( ending ): - # self.add_ending( ending ) - #else: - # self._ending = None - - #self._status = status # __TODO__ STATUS CODE - - #self._handle_db() - - - - def __repr__( self ): - - if( self._beginning ): - beginning = ", beginning=" + repr( self._beginning ) - else: - beginning = "" - - if( self._ending ): - ending = ", ending=" + repr( self._ending ) - else: - ending = "" - - __repr = "RED_FAM( " + repr( self._articlesList ) + beginning + ending + ", status=" + repr( self._status ) + " )" - - return __repr - -class RED_FAM_PARSER( RED_FAM ): - """ - Provides an interface to RED_FAM for adding/updating redundance families while parsig redundance pages - """ - - # Define the timestamp format - __timestamp_format = "%H:%M, %d. %b. %Y" - - # Define section heading re.pattern - __sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" ) - - # Define timestamp re.pattern - __timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4})" ) - - # Textpattern for recognisation of done-notices - __done_notice = ":Archivierung dieses Abschnittes wurde gewünscht von:" - __done_notice2 = "{{Erledigt|" - - def __init__( self, heading, red_page_id, red_page_archive, beginning, ending=None ): - """ - Creates a RED_FAM object based on data collected while parsing red_pages combined with possibly former known data from db - - @param red_fam_heading string String with wikitext heading of redundance section - @param red_page_id int MediaWiki page_id of red_page containing red_fam - @param red_page_archive bool Is red_page an archive - @param beginning datetime Timestamp of beginning of redundance discussion - string Timestamp of beginning of redundance discussion as srftime parseable string - @param ending datetime Timestamp of ending of redundance discussion - string Timestamp of ending of redundance discussion as srftime parseable string - """ - ## Set object attributes: - self._red_page_id = red_page_id - self._red_page_archive = red_page_archive - - # Method self.add_beginning sets self._beginning directly - self.add_beginning( beginning ) - - # Method self.add_ending sets self._ending directly - if( ending ): - self.add_ending( ending ) - else: - #If no ending was provided set to None - self._ending = None - - self._status = None - - # Parse the provided heading of redundance section to set self._articlesList - self.heading_parser( heading ) - - # Calculates the sha1 hash over self._articlesList to rediscover known redundance families - self.fam_hash() - - # Open database connection, ask for data if existing, otherwise create entry - self.__handle_db() - - # Check status changes - self.status() - - # Triggers db update if anything changed - self.changed() - - def __handle_db( self ): - """ - Handles opening of db connection - """ - # We need a connection to our mysqldb - self.__mysql = MYSQL_RED_FAM( self._fam_hash ) - - if not self.__mysql.data: - self.__mysql.add_fam( self._articlesList, self._heading, self._red_page_id, self._beginning, self._ending ) - - def heading_parser( self, heading ): - """ - Parses given red_fam_heading string and saves articles list - """ - - # Predefine a pattern for wikilinks' destination - wikilink_pat = re.compile( r"\[\[([^\[\]\|]*)(\]\]|\|)" ) - - # Parse content of heading for generating section links later - match = self.__sectionhead_pat.search( heading ) - if match: - self._heading = match.group(2).lstrip() - else: - raise ValueError( "Heading is not valid" ) - - # We get the pages in first [0] element iterating over wikilink_pat.findall( line ) - self._articlesList = [ link[0] for link in wikilink_pat.findall( self._heading ) ] - - # Catch sections with more then 8 articles, print error - if len( self._articlesList ) > 8: - pywikibot.output( "{datetime} – \03{{lightred}}[WARNING] – Maximum number of articles in red_fam exceeded, maximum number is 8, {number:d} were given\n{repress}".format( datetime=datetime.now().strftime("%Y-%m-%d %H:%M:%S (%Z)"), number=len( self._articlesList ), repress=repr( self ) ) ) - self._articlesList = self._articlesList[:8] - - def fam_hash( self ): - """ - Calculates the SHA-1 hash for the articlesList of redundance family. - Since we don't need security SHA-1 is just fine. - - @returns str String with the hexadecimal hash digest - """ - - h = hashlib.sha1() - h.update( str( self._articlesList ).encode('utf-8') ) - - self._fam_hash= h.hexdigest() - - def add_beginning( self, beginning ): - """ - Adds the beginning date of a redundance diskussion to the object and sets changed to True - - @param datetime datetime Beginning date of redundance diskussion - """ - - self._beginning = self.__datetime( beginning ) - - def add_ending( self, ending ): - """ - Adds the ending date of a redundance diskussion to the object. Also sets the status to __TODO__ STATUS NUMBER and changed to True - - @param datetime datetime Ending date of redundance diskussion - """ - - self._ending = self.__datetime( ending ) - - def __datetime( self, timestamp ): - """ - Decides wether given timestamp is a parseable string or a datetime object and returns a datetime object in both cases - - @param timestamp datetime Datetime object - str Parseable string with timestamp in format __timestamp_format - - @returns datetime Datetime object - """ - locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') - - if( isinstance( timestamp, datetime ) ): - return timestamp - else: - result = datetime.strptime( timestamp, type( self ).__timestamp_format ) - return result - - def status( self ): - """ - Handles detection of correct status - There are three possible stati: - - 0 Discussion is running --> no ending, page is not an archive - - 1 Discussion is over --> ending present, page is not an archive - - 2 Discussion is archived --> ending (normaly) present, page is an archive - - 3 and greater status was set by worker script, do not change it - """ - - # Do not change stati set by worker script etc. - if not self.__mysql.data['status'] > 2: - - # No ending, discussion is running: - # Sometimes archived discussions also have no detectable ending - if not self._ending and not self._red_page_archive: - self._status = 0 - else: - if not self._red_page_archive: - self._status = 1 - else: - self._status = 2 - else: - self._status = self.__mysql.data[ 'status' ] - - - - def changed( self ): - """ - Checks wether anything has changed and maybe triggers db update - """ - - # On archived red_fams do not delete possibly existing ending - if not self._ending and self._status > 1 and self.__mysql.data[ 'ending' ]: - self._ending = self.__mysql.data[ 'ending' ] - - - # Since status change means something has changed, update database - if( self._status != self.__mysql.data[ 'status' ] or self._beginning != self.__mysql.data[ 'beginning' ] or self._ending != self.__mysql.data[ 'ending' ] or self._red_page_id != self.__mysql.data[ 'red_page_id' ] or self._heading != self.__mysql.data[ 'heading' ]): - self.__mysql.update_fam( self._red_page_id, self._heading, self._beginning, self._ending, self._status ) - - @classmethod - def is_sectionheading( cls, line ): - """ - Checks wether given line is a red_fam section heading - - @param line string String to check - - @returns bool Returns True if it is a section heading, otherwise false - """ - - if cls.__sectionhead_pat.search( line ): - return True - else: - return False - - @classmethod - def is_beginning( cls, line ): - """ - Returns the first timestamp found in line, otherwise None - - @param str line String to search in - - @returns str Timestamp, otherwise None - """ - - match = cls.__timestamp_pat.search( line ) - if match: - # Since some timestamps are broken we need to reconstruct them by regex match groups - result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) - return result - else: - return None - - @classmethod - def is_ending( cls, line ): - """ - Returns the timestamp of done notice ( if one ), otherwise None - @param str line String to search in - - @returns str Timestamp, otherwise None - """ - if ( cls.__done_notice in line ) or ( cls.__done_notice2 in line ): - match = cls.__timestamp_pat.search( line ) - if match: - # Since some timestamps are broken we need to reconstruct them by regex match groups - result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) - return result - return None - - @classmethod - def is_ending2( cls, line ): - """ - Returns the last timestamp found in line, otherwise None - @param str line String to search in - - @returns str Timestamp, otherwise None - """ - matches = cls.__timestamp_pat.findall( line ) - if matches: - # Since some timestamps are broken we need to reconstruct them by regex match groups - result = matches[-1][0] + ", " + matches[-1][1] + ". " + matches[-1][2] + ". " + matches[-1][3] - return result - else: - return None - -class RED_FAM_WORKER( RED_FAM ): - """ - Handles working with redundance families stored in database where discussion is finished - """ - pass diff --git a/red_page.py b/red_page.py deleted file mode 100644 index 4ba27d8..0000000 --- a/red_page.py +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import pywikibot - -from mysql_red import MYSQL_RED_PAGE -from red_fam import RED_FAM_PARSER - - -class RED_PAGE: - """Class for handling redundance discussion pages and archives""" - - def __init__( self, page, archive=False ): - """ - Generate a new RED_PAGE object based on the given pywikibot page object - - @param page page - """ - - # Safe the pywikibot page object - self.page = page - self._archive = archive - - self.__handle_db( ) - - self.is_page_changed() - - self._parsed = None - if( self._changed or self.__mysql.data[ 'status' ] == 0 ): - self.parse() - - self.__update_db() - - # else: - # self.__mysql.add_page() - - def __handle_db( self ): - """ - Handles opening of db connection - """ - # We need a connection to our mysqldb - self.__mysql = MYSQL_RED_PAGE( self.page._pageid ) - - if not self.__mysql.data: - self.__mysql.add_page( self.page.title(), self.page._revid ) - - def is_page_changed( self ): - """ - Check wether the page was changed since last run - """ - - if( self.__mysql.data != { 'page_id': self.page._pageid, 'rev_id': self.page._revid, 'page_title': self.page.title(), 'status': self.__mysql.data[ 'status' ] } ): - self._changed = True - else: - self._changed = False - - def is_archive( self ): - """ - Detects wether current page is an archive of discussions - """ - if self._archive or ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ): - return True - else: - return False - - def parse( self ): - """ - Handles the parsing process - """ - - # Since @param text is a string we need to split it in lines - text_lines = self.page.text.split( "\n" ) - length = len( text_lines ) - - # Initialise line counter - i = 0 - fam_heading = None - beginning = None - ending = None - - # Set line for last detected Redundance-Family to 0 - last_fam = 0 - - # Iterate over the lines of the page - for line in text_lines: - - # Check wether we have an "Redundance-Family"-Section heading (Level 3) - if RED_FAM_PARSER.is_sectionheading( line ): - - # Save line number for last detected Redundance-Family - last_fam = i - # Save heading - fam_heading = line - - # Defined (re)initialisation of dates - beginning = None - ending = None - - # Check wether we are currently in an "Redundance-Family"-Section Body - if i > last_fam and last_fam > 0: - - # Check if we have alredy recognized the beginning date of the discussion (in former iteration) or if we have a done-notice - if not beginning: - beginning = RED_FAM_PARSER.is_beginning( line ) - elif not ending: - ending = RED_FAM_PARSER.is_ending( line ) - - # Detect end of red_fam section (next line is new sectionheading) or end of file - # Prevent from running out of index - if i < (length - 1): - test = RED_FAM_PARSER.is_sectionheading( text_lines[ i + 1 ] ) - else: - test = False - if ( test or ( length == ( i + 1 ) ) ): - - # Create the red_fam object - if( fam_heading and beginning ): - - #Maybe we can find a ending by feed if we have None yet (No done notice on archive pages) - if not ending and self.is_archive(): - j = i - while (j > last_fam) and not ending: - j -= 1 - ending = RED_FAM_PARSER.is_ending2( text_lines[ j ] ) - - - red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) - - # Increment line counter - i += 1 - else: - self._parsed = True - def __update_db( self ): - """ - Updates the page meta data in mysql db - """ - if( self._parsed or not self._changed ): - status = 1 - - if( self.is_archive() ): - status = 2 - else: - status = 0 - - self.__mysql.update_page( self.page._revid, self.page.title(), status ) diff --git a/redfam.py b/redfam.py new file mode 100644 index 0000000..ab33771 --- /dev/null +++ b/redfam.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# redfam.py +# +# Copyright 2015 GOLDERWEB – Jonathan Golder +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# +""" +Provides classes for working with RedFams +""" + +import hashlib +import re +import locale +from datetime import datetime + +import pywikibot + +from mysqlred import MysqlRedFam + +class RedFam: + """ + Basic class for RedFams, containing the basic data structure + """ + + def __init__( self, fam_hash=None, articlesList=None, red_page_id=None, beginning=None, ending=None, status=0 ): + """ + Generates a new RedFam object + + @param articlesList list List of articles of redundance family + @param beginning datetime Beginning date of redundance diskussion + @param ending datetime Ending date of redundance diskussion + """ + + #if( beginning ): + # self.add_beginning( beginning ) + # self._beginning = None + + #if( ending ): + # self.add_ending( ending ) + #else: + # self._ending = None + + #self._status = status # __TODO__ STATUS CODE + + #self._handle_db() + + def __repr__( self ): + + if( self._beginning ): + beginning = ", beginning=" + repr( self._beginning ) + else: + beginning = "" + + if( self._ending ): + ending = ", ending=" + repr( self._ending ) + else: + ending = "" + + __repr = "RedFam( " + repr( self._articlesList ) + beginning + ending + ", status=" + repr( self._status ) + " )" + + return __repr + +class RedFamParser( RedFam ): + """ + Provides an interface to RedFam for adding/updating redundance families while parsig redundance pages + """ + + # Define the timestamp format + __timestamp_format = "%H:%M, %d. %b. %Y" + + # Define section heading re.pattern + __sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" ) + + # Define timestamp re.pattern + __timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4})" ) + + # Textpattern for recognisation of done-notices + __done_notice = ":Archivierung dieses Abschnittes wurde gewünscht von:" + __done_notice2 = "{{Erledigt|" + + def __init__( self, heading, red_page_id, red_page_archive, beginning, ending=None ): + """ + Creates a RedFam object based on data collected while parsing red_pages combined with possibly former known data from db + + @param red_fam_heading string String with wikitext heading of redundance section + @param red_page_id int MediaWiki page_id of red_page containing red_fam + @param red_page_archive bool Is red_page an archive + @param beginning datetime Timestamp of beginning of redundance discussion + string Timestamp of beginning of redundance discussion as srftime parseable string + @param ending datetime Timestamp of ending of redundance discussion + string Timestamp of ending of redundance discussion as srftime parseable string + """ + + ## Set object attributes: + self._red_page_id = red_page_id + self._red_page_archive = red_page_archive + + # Method self.add_beginning sets self._beginning directly + self.add_beginning( beginning ) + + # Method self.add_ending sets self._ending directly + if( ending ): + self.add_ending( ending ) + else: + #If no ending was provided set to None + self._ending = None + + self._status = None + + # Parse the provided heading of redundance section to set self._articlesList + self.heading_parser( heading ) + + # Calculates the sha1 hash over self._articlesList to rediscover known redundance families + self.fam_hash() + + # Open database connection, ask for data if existing, otherwise create entry + self.__handle_db() + + # Check status changes + self.status() + + # Triggers db update if anything changed + self.changed() + + def __handle_db( self ): + """ + Handles opening of db connection + """ + + # We need a connection to our mysqldb + self.__mysql = MysqlRedFam( self._fam_hash ) + + if not self.__mysql.data: + self.__mysql.add_fam( self._articlesList, self._heading, self._red_page_id, self._beginning, self._ending ) + + def heading_parser( self, heading ): + """ + Parses given red_fam_heading string and saves articles list + """ + + # Predefine a pattern for wikilinks' destination + wikilink_pat = re.compile( r"\[\[([^\[\]\|]*)(\]\]|\|)" ) + + # Parse content of heading for generating section links later + match = self.__sectionhead_pat.search( heading ) + if match: + self._heading = match.group(2).lstrip() + else: + raise ValueError( "Heading is not valid" ) + + # We get the pages in first [0] element iterating over wikilink_pat.findall( line ) + self._articlesList = [ link[0] for link in wikilink_pat.findall( self._heading ) ] + + # Catch sections with more then 8 articles, print error + if len( self._articlesList ) > 8: + pywikibot.output( "{datetime} – \03{{lightred}}[WARNING] – Maximum number of articles in red_fam exceeded, maximum number is 8, {number:d} were given\n{repress}".format( datetime=datetime.now().strftime("%Y-%m-%d %H:%M:%S (%Z)"), number=len( self._articlesList ), repress=repr( self ) ) ) + self._articlesList = self._articlesList[:8] + + def fam_hash( self ): + """ + Calculates the SHA-1 hash for the articlesList of redundance family. + Since we don't need security SHA-1 is just fine. + + @returns str String with the hexadecimal hash digest + """ + + h = hashlib.sha1() + h.update( str( self._articlesList ).encode('utf-8') ) + + self._fam_hash= h.hexdigest() + + def add_beginning( self, beginning ): + """ + Adds the beginning date of a redundance diskussion to the object and sets changed to True + + @param datetime datetime Beginning date of redundance diskussion + """ + + self._beginning = self.__datetime( beginning ) + + def add_ending( self, ending ): + """ + Adds the ending date of a redundance diskussion to the object. Also sets the status to __TODO__ STATUS NUMBER and changed to True + + @param datetime datetime Ending date of redundance diskussion + """ + + self._ending = self.__datetime( ending ) + + def __datetime( self, timestamp ): + """ + Decides wether given timestamp is a parseable string or a datetime object and returns a datetime object in both cases + + @param datetime timestamp Datetime object + str timestamp Parseable string with timestamp in format __timestamp_format + + @returns datetime Datetime object + """ + + # Make sure locale is set to 'de_DE.UTF-8' to prevent problems with wrong month abreviations in strptime + locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') + + if( isinstance( timestamp, datetime ) ): + return timestamp + else: + result = datetime.strptime( timestamp, type( self ).__timestamp_format ) + return result + + def status( self ): + """ + Handles detection of correct status + There are three possible stati: + - 0 Discussion is running --> no ending, page is not an archive + - 1 Discussion is over --> ending present, page is not an archive + - 2 Discussion is archived --> ending (normaly) present, page is an archive + - 3 and greater status was set by worker script, do not change it + """ + + # Do not change stati set by worker script etc. + if not self.__mysql.data['status'] > 2: + + # No ending, discussion is running: + # Sometimes archived discussions also have no detectable ending + if not self._ending and not self._red_page_archive: + self._status = 0 + else: + if not self._red_page_archive: + self._status = 1 + else: + self._status = 2 + else: + self._status = self.__mysql.data[ 'status' ] + + + + def changed( self ): + """ + Checks wether anything has changed and maybe triggers db update + """ + + # On archived red_fams do not delete possibly existing ending + if not self._ending and self._status > 1 and self.__mysql.data[ 'ending' ]: + self._ending = self.__mysql.data[ 'ending' ] + + + # Since status change means something has changed, update database + if( self._status != self.__mysql.data[ 'status' ] or self._beginning != self.__mysql.data[ 'beginning' ] or self._ending != self.__mysql.data[ 'ending' ] or self._red_page_id != self.__mysql.data[ 'red_page_id' ] or self._heading != self.__mysql.data[ 'heading' ]): + self.__mysql.update_fam( self._red_page_id, self._heading, self._beginning, self._ending, self._status ) + + @classmethod + def is_sectionheading( cls, line ): + """ + Checks wether given line is a red_fam section heading + + @param str line String to check + + @returns bool Returns True if it is a section heading, otherwise false + """ + + if cls.__sectionhead_pat.search( line ): + return True + else: + return False + + @classmethod + def is_beginning( cls, line ): + """ + Returns the first timestamp found in line, otherwise None + + @param str line String to search in + + @returns str Timestamp, otherwise None + """ + + match = cls.__timestamp_pat.search( line ) + if match: + # Since some timestamps are broken we need to reconstruct them by regex match groups + result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) + return result + else: + return None + + @classmethod + def is_ending( cls, line ): + """ + Returns the timestamp of done notice ( if one ), otherwise None + @param str line String to search in + + @returns str Timestamp, otherwise None + """ + + if ( cls.__done_notice in line ) or ( cls.__done_notice2 in line ): + match = cls.__timestamp_pat.search( line ) + if match: + # Since some timestamps are broken we need to reconstruct them by regex match groups + result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) + return result + return None + + @classmethod + def is_ending2( cls, line ): + """ + Returns the last timestamp found in line, otherwise None + @param str line String to search in + + @returns str Timestamp, otherwise None + """ + + matches = cls.__timestamp_pat.findall( line ) + if matches: + # Since some timestamps are broken we need to reconstruct them by regex match groups + result = matches[-1][0] + ", " + matches[-1][1] + ". " + matches[-1][2] + ". " + matches[-1][3] + return result + else: + return None + +class RedFamWorker( RedFam ): + """ + Handles working with redundance families stored in database where discussion is finished + """ + pass diff --git a/redpage.py b/redpage.py new file mode 100644 index 0000000..af32154 --- /dev/null +++ b/redpage.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# redpage.py +# +# Copyright 2015 GOLDERWEB – Jonathan Golder +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# +""" +Provides a class for handling redundance discussion pages and archives +""" + +import pywikibot + +from mysqlred import MysqlRedPage +from redfam import RedFamParser + +class RedPage: + """ + Class for handling redundance discussion pages and archives + """ + + def __init__( self, page, archive=False ): + """ + Generate a new RedPage object based on the given pywikibot page object + + @param page page Pywikibot/MediaWiki page object for page to work on + """ + + # Safe the pywikibot page object + self.page = page + self._archive = archive + + self.__handle_db( ) + + self.is_page_changed() + + self._parsed = None + if( self._changed or self.__mysql.data[ 'status' ] == 0 ): + self.parse() + + self.__update_db() + + def __handle_db( self ): + """ + Handles opening of db connection + """ + + # We need a connection to our mysqldb + self.__mysql = MysqlRedPage( self.page._pageid ) + + if not self.__mysql.data: + self.__mysql.add_page( self.page.title(), self.page._revid ) + + def is_page_changed( self ): + """ + Check wether the page was changed since last run + """ + + if( self.__mysql.data != { 'page_id': self.page._pageid, 'rev_id': self.page._revid, 'page_title': self.page.title(), 'status': self.__mysql.data[ 'status' ] } ): + self._changed = True + else: + self._changed = False + + def is_archive( self ): + """ + Detects wether current page is an archive of discussions + """ + + if self._archive or ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ): + return True + else: + return False + + def parse( self ): + """ + Handles the parsing process + """ + + # Since @param text is a string we need to split it in lines + text_lines = self.page.text.split( "\n" ) + length = len( text_lines ) + + # Initialise line counter + i = 0 + fam_heading = None + beginning = None + ending = None + + # Set line for last detected Redundance-Family to 0 + last_fam = 0 + + # Iterate over the lines of the page + for line in text_lines: + + # Check wether we have an "Redundance-Family"-Section heading (Level 3) + if RedFamParser.is_sectionheading( line ): + + # Save line number for last detected Redundance-Family + last_fam = i + # Save heading + fam_heading = line + + # Defined (re)initialisation of dates + beginning = None + ending = None + + # Check wether we are currently in an "Redundance-Family"-Section Body + if i > last_fam and last_fam > 0: + + # Check if we have alredy recognized the beginning date of the discussion (in former iteration) or if we have a done-notice + if not beginning: + beginning = RedFamParser.is_beginning( line ) + elif not ending: + ending = RedFamParser.is_ending( line ) + + # Detect end of red_fam section (next line is new sectionheading) or end of file + # Prevent from running out of index + if i < (length - 1): + test = RedFamParser.is_sectionheading( text_lines[ i + 1 ] ) + else: + test = False + if ( test or ( length == ( i + 1 ) ) ): + + # Create the red_fam object + if( fam_heading and beginning ): + + #Maybe we can find a ending by feed if we have None yet (No done notice on archive pages) + if not ending and self.is_archive(): + j = i + while (j > last_fam) and not ending: + j -= 1 + ending = RedFamParser.is_ending2( text_lines[ j ] ) + + # Create the RedFam object + red_fam = RedFamParser( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) + + # Increment line counter + i += 1 + else: + self._parsed = True + + def __update_db( self ): + """ + Updates the page meta data in mysql db + """ + if( self._parsed or not self._changed ): + status = 1 + + if( self.is_archive() ): + status = 2 + else: + status = 0 + + self.__mysql.update_page( self.page._revid, self.page.title(), status ) From 74b2dc727c619dc6b7afa4fbd71369de9f62ea5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 13 Sep 2015 11:53:47 +0200 Subject: [PATCH 046/192] Clean up PEP8 styleguide compatibility with flake8 --- mysqlred.py | 115 +++++++++++++++++++------------- redfam.py | 184 ++++++++++++++++++++++++++++++---------------------- redpage.py | 46 ++++++++----- tox.ini | 2 + 4 files changed, 205 insertions(+), 142 deletions(-) create mode 100644 tox.ini diff --git a/mysqlred.py b/mysqlred.py index ecd1abc..8b105fa 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -2,25 +2,25 @@ # -*- coding: utf-8 -*- # # mysqlred.py -# +# # Copyright 2015 GOLDERWEB – Jonathan Golder -# +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. -# -# +# +# """ Provides interface classes for communication of redundances bot with mysql-db """ @@ -33,6 +33,7 @@ except ImportError: from pywikibot import config + class MysqlRed: """ Basic interface class, containing opening of connection @@ -40,12 +41,13 @@ class MysqlRed: Specific querys should be defined in descendant classes per data type """ - #Save mysqldb-connection as class attribute to use only one in descendant classes + # Save mysqldb-connection as class attribute to use only one + # in descendant classes connection = False - db_hostname=config.db_hostname - db_username=config.db_username - db_password=config.db_password - db_name=config.db_username + '__bot' + db_hostname = config.db_hostname + db_username = config.db_username + db_password = config.db_password + db_name = config.db_username + '__bot' def __init__( self ): """ @@ -55,9 +57,13 @@ class MysqlRed: """ # Connect to mysqldb only once - if( type( self ).connection == False ): + if not type( self ).connection: - type( self ).connection = mysqldb.connect( host=type( self ).db_hostname, user=type( self ).db_username, passwd=type( self ).db_password, db=type( self ).db_name ) + type( self ).connection = mysqldb.connect( + host=type( self ).db_hostname, + user=type( self ).db_username, + passwd=type( self ).db_password, + db=type( self ).db_name ) def __del__( self ): """ @@ -66,10 +72,11 @@ class MysqlRed: type( self ).connection.close() + class MysqlRedPage( MysqlRed ): - """ - MySQL-db Interface for handling querys for RedPages - """ + """ + MySQL-db Interface for handling querys for RedPages + """ def __init__( self, page_id ): """ @@ -78,7 +85,7 @@ class MysqlRedPage( MysqlRed ): super().__init__( ) - self.__page_id = int( page_id ); + self.__page_id = int( page_id ) self.data = self.get_page() @@ -91,13 +98,14 @@ class MysqlRedPage( MysqlRed ): @param int page_id MediaWiki page_id for page to retrieve - @returns tuple Tuple with data for given page_id otherwise if none found - bool FALSE + @returns tuple Tuple with data for given page_id + bool FALSE if none found """ cursor = type( self ).connection.cursor(mysqldb.DictCursor) - cursor.execute( 'SELECT * FROM `red_pages` WHERE `page_id` = ?;', ( self.__page_id, ) ) + cursor.execute( 'SELECT * FROM `red_pages` WHERE `page_id` = ?;', + ( self.__page_id, ) ) res = cursor.fetchone() if res: @@ -109,9 +117,9 @@ class MysqlRedPage( MysqlRed ): """ Inserts a red page row in MySQL-Database for given page_id - @param int rev_id MediaWiki current rev_id for page to update - @param str page_title MediaWiki new page_title for page to update - @param int status Page parsing status (0 - not (successfully) parsed; 1 - successfully parsed; 2 - successfully parsed archive) + @param int rev_id MediaWiki current rev_id + @param str page_title MediaWiki new page_title + @param int status Page parsing status """ cursor = type( self ).connection.cursor() @@ -121,8 +129,10 @@ class MysqlRedPage( MysqlRed ): if not rev_id: rev_id = self.data[ 'rev_id' ] - query = 'INSERT INTO `red_pages` ( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' - data = ( self.__page_id, str( page_title ), int( rev_id ), int( status ) ) + query = 'INSERT INTO `red_pages` \ + ( page_id, page_title, rev_id, status ) \ + VALUES ( ?, ?, ?, ? );' + data = ( self.__page_id, page_title, rev_id, status ) cursor.execute( query, data) @@ -134,9 +144,9 @@ class MysqlRedPage( MysqlRed ): """ Updates the red page row in MySQL-Database for given page_id - @param int rev_id MediaWiki current rev_id for page to update - @param str page_title MediaWiki new page_title for page to update - @param int status Page parsing status (0 - not (successfully) parsed; 1 - successfully parsed) + @param int rev_id MediaWiki current rev_id + @param str page_title MediaWiki new page_title + @param int status Page parsing status """ cursor = type( self ).connection.cursor() @@ -146,17 +156,20 @@ class MysqlRedPage( MysqlRed ): if not rev_id: rev_id = self.data[ 'rev_id' ] - query = 'UPDATE `red_pages` SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' - data = ( str( page_title ), int( rev_id ), int( status ), self.__page_id ) + query = 'UPDATE `red_pages` \ + SET `page_title` = ?, `rev_id` = ?, `status`= ? \ + WHERE `page_id` = ?;' + data = ( page_title, rev_id, status, self.__page_id ) cursor.execute( query, data) type( self ).connection.commit() - + + class MysqlRedFam( MysqlRed ): """ - MySQL-db Interface for handling querys for RedFams - """ + MySQL-db Interface for handling querys for RedFams + """ def __init__( self, fam_hash ): """ @@ -176,13 +189,14 @@ class MysqlRedFam( MysqlRed ): """ Retrieves a red family row from MySQL-Database for given fam_hash - @returns dict Dictionairy with data for given fam hash otherwise if none found - bool FALSE + @returns dict Dictionairy with data for given fam hash + False if none found """ - cursor = type( self ).connection.cursor(mysqldb.DictCursor) + cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - cursor.execute( 'SELECT * FROM `red_families` WHERE `fam_hash` = ?;', ( self.__fam_hash, ) ) + cursor.execute( 'SELECT * FROM `red_families` WHERE `fam_hash` = ?;', + ( self.__fam_hash, ) ) res = cursor.fetchone() if res: @@ -190,12 +204,18 @@ class MysqlRedFam( MysqlRed ): else: return False - def add_fam( self, articlesList, heading, red_page_id, beginning, ending=None, status=0 ): + def add_fam( self, articlesList, heading, red_page_id, + beginning, ending=None, status=0 ): cursor = type( self ).connection.cursor() - query = 'INSERT INTO `red_families` ( fam_hash, red_page_id, beginning, ending, status, heading, article0, article1, article2, article3, article4, article5, article6, article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - data = [ str( self.__fam_hash ), red_page_id, beginning, ending, status, heading ] + query = 'INSERT INTO `red_families` \ + ( fam_hash, red_page_id, beginning, ending, status, heading, \ + article0, article1, article2, article3, \ + article4, article5, article6, article7 ) \ + VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' + data = [ self.__fam_hash, red_page_id, beginning, ending, + status, heading ] for article in articlesList: data.append( str( article ) ) @@ -215,16 +235,19 @@ class MysqlRedFam( MysqlRed ): """ Updates the red fam row in MySQL-Database for given fam_hash - @param int red_page_id MediaWiki page_id which contains red_fam - @param datetime beginning Timestamp of beginning of redundance discussion - qparam datetime ending Timestamp of ending of redundance discussion - @param int status red_fam status (0 - discussion is running; 1 - discussion over; 2 - discussion archived) + @param int red_page_id MediaWiki page_id + @param datetime beginning Timestamp of beginning + qparam datetime ending Timestamp of ending of + @param int status red_fam status """ cursor = type( self ).connection.cursor() - query = 'UPDATE `red_families` SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, `status`= ? WHERE `fam_hash` = ?;' - data = ( int(red_page_id ), str( heading ), beginning, ending, int( status ), self.__fam_hash ) + query = 'UPDATE `red_families` \ + SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, \ + `ending` = ?, `status`= ? WHERE `fam_hash` = ?;' + data = ( red_page_id, heading, beginning, + ending, status, self.__fam_hash ) cursor.execute( query, data) diff --git a/redfam.py b/redfam.py index ab33771..5119a38 100644 --- a/redfam.py +++ b/redfam.py @@ -2,64 +2,54 @@ # -*- coding: utf-8 -*- # # redfam.py -# +# # Copyright 2015 GOLDERWEB – Jonathan Golder -# +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. -# -# +# +# """ Provides classes for working with RedFams """ import hashlib -import re import locale +import re from datetime import datetime import pywikibot from mysqlred import MysqlRedFam + class RedFam: """ Basic class for RedFams, containing the basic data structure """ - def __init__( self, fam_hash=None, articlesList=None, red_page_id=None, beginning=None, ending=None, status=0 ): + def __init__( self, fam_hash=None, articlesList=None, red_page_id=None, + beginning=None, ending=None, status=0 ): """ Generates a new RedFam object - @param articlesList list List of articles of redundance family - @param beginning datetime Beginning date of redundance diskussion - @param ending datetime Ending date of redundance diskussion + @param articlesList list List of articles + @param beginning datetime Beginning date + @param ending datetime Ending date """ - - #if( beginning ): - # self.add_beginning( beginning ) - # self._beginning = None - - #if( ending ): - # self.add_ending( ending ) - #else: - # self._ending = None - - #self._status = status # __TODO__ STATUS CODE - - #self._handle_db() + pass def __repr__( self ): @@ -73,13 +63,16 @@ class RedFam: else: ending = "" - __repr = "RedFam( " + repr( self._articlesList ) + beginning + ending + ", status=" + repr( self._status ) + " )" + __repr = "RedFam( " + repr( self._articlesList ) + beginning +\ + ending + ", status=" + repr( self._status ) + " )" return __repr - + + class RedFamParser( RedFam ): """ - Provides an interface to RedFam for adding/updating redundance families while parsig redundance pages + Provides an interface to RedFam for adding/updating redundance families + while parsig redundance pages """ # Define the timestamp format @@ -89,26 +82,29 @@ class RedFamParser( RedFam ): __sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" ) # Define timestamp re.pattern - __timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4})" ) + __timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4})" ) # noqa # Textpattern for recognisation of done-notices - __done_notice = ":Archivierung dieses Abschnittes wurde gewünscht von:" + __done_notice = ":Archivierung dieses Abschnittes \ + wurde gewünscht von:" __done_notice2 = "{{Erledigt|" - def __init__( self, heading, red_page_id, red_page_archive, beginning, ending=None ): + def __init__( self, heading, red_page_id, red_page_archive, + beginning, ending=None ): """ - Creates a RedFam object based on data collected while parsing red_pages combined with possibly former known data from db + Creates a RedFam object based on data collected while parsing red_pages + combined with possibly former known data from db - @param red_fam_heading string String with wikitext heading of redundance section - @param red_page_id int MediaWiki page_id of red_page containing red_fam - @param red_page_archive bool Is red_page an archive - @param beginning datetime Timestamp of beginning of redundance discussion - string Timestamp of beginning of redundance discussion as srftime parseable string - @param ending datetime Timestamp of ending of redundance discussion - string Timestamp of ending of redundance discussion as srftime parseable string + @param red_fam_heading str Wikitext heading of section + @param red_page_id int MediaWiki page_id + @param red_page_archive bool Is red_page an archive + @param beginning datetime Timestamp of beginning + str as strptime parseable string + @param ending datetime Timestamp of ending + str strptime parseable string """ - ## Set object attributes: + # Set object attributes: self._red_page_id = red_page_id self._red_page_archive = red_page_archive @@ -119,18 +115,21 @@ class RedFamParser( RedFam ): if( ending ): self.add_ending( ending ) else: - #If no ending was provided set to None + # If no ending was provided set to None self._ending = None self._status = None - # Parse the provided heading of redundance section to set self._articlesList + # Parse the provided heading of redundance section + # to set self._articlesList self.heading_parser( heading ) - # Calculates the sha1 hash over self._articlesList to rediscover known redundance families + # Calculates the sha1 hash over self._articlesList to + # rediscover known redundance families self.fam_hash() - # Open database connection, ask for data if existing, otherwise create entry + # Open database connection, ask for data if existing, + # otherwise create entry self.__handle_db() # Check status changes @@ -148,7 +147,9 @@ class RedFamParser( RedFam ): self.__mysql = MysqlRedFam( self._fam_hash ) if not self.__mysql.data: - self.__mysql.add_fam( self._articlesList, self._heading, self._red_page_id, self._beginning, self._ending ) + self.__mysql.add_fam( self._articlesList, self._heading, + self._red_page_id, self._beginning, + self._ending ) def heading_parser( self, heading ): """ @@ -165,12 +166,20 @@ class RedFamParser( RedFam ): else: raise ValueError( "Heading is not valid" ) - # We get the pages in first [0] element iterating over wikilink_pat.findall( line ) - self._articlesList = [ link[0] for link in wikilink_pat.findall( self._heading ) ] + # We get the pages in first [0] element iterating over + # wikilink_pat.findall( line ) + self._articlesList = [ link[0] for link + in wikilink_pat.findall( self._heading ) ] # Catch sections with more then 8 articles, print error if len( self._articlesList ) > 8: - pywikibot.output( "{datetime} – \03{{lightred}}[WARNING] – Maximum number of articles in red_fam exceeded, maximum number is 8, {number:d} were given\n{repress}".format( datetime=datetime.now().strftime("%Y-%m-%d %H:%M:%S (%Z)"), number=len( self._articlesList ), repress=repr( self ) ) ) + pywikibot.output( "{datetime} – \03{{lightred}}[WARNING] – \ + Maximum number of articles in red_fam exceeded, \ + maximum number is 8, {number:d} were given\n\ + {repress}".format( + datetime=datetime.now().strftime( "%Y-%m-%d %H:%M:%S" ), + number=len( self._articlesList ), repress=repr( self ) ) ) + self._articlesList = self._articlesList[:8] def fam_hash( self ): @@ -178,58 +187,61 @@ class RedFamParser( RedFam ): Calculates the SHA-1 hash for the articlesList of redundance family. Since we don't need security SHA-1 is just fine. - @returns str String with the hexadecimal hash digest + @returns str String with the hexadecimal hash digest """ h = hashlib.sha1() h.update( str( self._articlesList ).encode('utf-8') ) - self._fam_hash= h.hexdigest() + self._fam_hash = h.hexdigest() def add_beginning( self, beginning ): """ - Adds the beginning date of a redundance diskussion to the object and sets changed to True + Adds the beginning date of a redundance diskussion to the object - @param datetime datetime Beginning date of redundance diskussion + @param datetime datetime Beginning date """ self._beginning = self.__datetime( beginning ) def add_ending( self, ending ): """ - Adds the ending date of a redundance diskussion to the object. Also sets the status to __TODO__ STATUS NUMBER and changed to True + Adds the ending date of a redundance diskussion to the object. - @param datetime datetime Ending date of redundance diskussion + @param datetime datetime Ending date """ self._ending = self.__datetime( ending ) def __datetime( self, timestamp ): """ - Decides wether given timestamp is a parseable string or a datetime object and returns a datetime object in both cases + Decides wether given timestamp is a parseable string or a + datetime object and returns a datetime object in both cases @param datetime timestamp Datetime object - str timestamp Parseable string with timestamp in format __timestamp_format + str timestamp Parseable string with timestamp @returns datetime Datetime object """ - # Make sure locale is set to 'de_DE.UTF-8' to prevent problems with wrong month abreviations in strptime + # Make sure locale is set to 'de_DE.UTF-8' to prevent problems + # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') if( isinstance( timestamp, datetime ) ): return timestamp else: - result = datetime.strptime( timestamp, type( self ).__timestamp_format ) + result = datetime.strptime( timestamp, + type( self ).__timestamp_format ) return result def status( self ): """ Handles detection of correct status There are three possible stati: - - 0 Discussion is running --> no ending, page is not an archive - - 1 Discussion is over --> ending present, page is not an archive - - 2 Discussion is archived --> ending (normaly) present, page is an archive + - 0 Discussion running --> no ending, page is not an archive + - 1 Discussion over --> ending present, page is not an archive + - 2 Discussion archived --> ending (normaly) present, page is archive - 3 and greater status was set by worker script, do not change it """ @@ -247,22 +259,28 @@ class RedFamParser( RedFam ): self._status = 2 else: self._status = self.__mysql.data[ 'status' ] - - - + def changed( self ): """ Checks wether anything has changed and maybe triggers db update """ # On archived red_fams do not delete possibly existing ending - if not self._ending and self._status > 1 and self.__mysql.data[ 'ending' ]: - self._ending = self.__mysql.data[ 'ending' ] - + if( not self._ending and self._status > 1 + and self.__mysql.data[ 'ending' ] ): + + self._ending = self.__mysql.data[ 'ending' ] # Since status change means something has changed, update database - if( self._status != self.__mysql.data[ 'status' ] or self._beginning != self.__mysql.data[ 'beginning' ] or self._ending != self.__mysql.data[ 'ending' ] or self._red_page_id != self.__mysql.data[ 'red_page_id' ] or self._heading != self.__mysql.data[ 'heading' ]): - self.__mysql.update_fam( self._red_page_id, self._heading, self._beginning, self._ending, self._status ) + if( self._status != self.__mysql.data[ 'status' ] or + self._beginning != self.__mysql.data[ 'beginning' ] or + self._ending != self.__mysql.data[ 'ending' ] or + self._red_page_id != self.__mysql.data[ 'red_page_id' ] or + self._heading != self.__mysql.data[ 'heading' ]): + + self.__mysql.update_fam( self._red_page_id, self._heading, + self._beginning, self._ending, + self._status ) @classmethod def is_sectionheading( cls, line ): @@ -271,7 +289,7 @@ class RedFamParser( RedFam ): @param str line String to check - @returns bool Returns True if it is a section heading, otherwise false + @returns bool Returns True if it is a section heading """ if cls.__sectionhead_pat.search( line ): @@ -291,8 +309,10 @@ class RedFamParser( RedFam ): match = cls.__timestamp_pat.search( line ) if match: - # Since some timestamps are broken we need to reconstruct them by regex match groups - result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) + # Since some timestamps are broken we need to reconstruct them + # by regex match groups + result = match.group(1) + ", " + match.group(2) + ". " +\ + match.group(3) + ". " + match.group(4) return result else: return None @@ -301,16 +321,18 @@ class RedFamParser( RedFam ): def is_ending( cls, line ): """ Returns the timestamp of done notice ( if one ), otherwise None - @param str line String to search in + @param str line String to search in - @returns str Timestamp, otherwise None + @returns str Timestamp, otherwise None """ if ( cls.__done_notice in line ) or ( cls.__done_notice2 in line ): match = cls.__timestamp_pat.search( line ) if match: - # Since some timestamps are broken we need to reconstruct them by regex match groups - result = match.group(1) + ", " + match.group(2) + ". " + match.group(3) + ". " + match.group(4) + # Since some timestamps are broken we need to reconstruct them + # by regex match groups + result = match.group(1) + ", " + match.group(2) + ". " +\ + match.group(3) + ". " + match.group(4) return result return None @@ -325,14 +347,18 @@ class RedFamParser( RedFam ): matches = cls.__timestamp_pat.findall( line ) if matches: - # Since some timestamps are broken we need to reconstruct them by regex match groups - result = matches[-1][0] + ", " + matches[-1][1] + ". " + matches[-1][2] + ". " + matches[-1][3] + # Since some timestamps are broken we need to reconstruct them + # by regex match groups + result = matches[-1][0] + ", " + matches[-1][1] + ". " +\ + matches[-1][2] + ". " + matches[-1][3] return result else: - return None + return None + class RedFamWorker( RedFam ): """ - Handles working with redundance families stored in database where discussion is finished + Handles working with redundance families stored in database + where discussion is finished """ pass diff --git a/redpage.py b/redpage.py index af32154..0f4d8fe 100644 --- a/redpage.py +++ b/redpage.py @@ -2,34 +2,35 @@ # -*- coding: utf-8 -*- # # redpage.py -# +# # Copyright 2015 GOLDERWEB – Jonathan Golder -# +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. -# +# # """ Provides a class for handling redundance discussion pages and archives """ -import pywikibot +import pywikibot # noqa from mysqlred import MysqlRedPage from redfam import RedFamParser + class RedPage: """ Class for handling redundance discussion pages and archives @@ -39,7 +40,7 @@ class RedPage: """ Generate a new RedPage object based on the given pywikibot page object - @param page page Pywikibot/MediaWiki page object for page to work on + @param page page Pywikibot/MediaWiki page object for page """ # Safe the pywikibot page object @@ -72,7 +73,10 @@ class RedPage: Check wether the page was changed since last run """ - if( self.__mysql.data != { 'page_id': self.page._pageid, 'rev_id': self.page._revid, 'page_title': self.page.title(), 'status': self.__mysql.data[ 'status' ] } ): + if( self.__mysql.data != { 'page_id': self.page._pageid, + 'rev_id': self.page._revid, + 'page_title': self.page.title(), + 'status': self.__mysql.data[ 'status' ] } ): self._changed = True else: self._changed = False @@ -82,10 +86,13 @@ class RedPage: Detects wether current page is an archive of discussions """ - if self._archive or ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ): - return True + if( self._archive or ( u"/Archiv" in self.page.title() ) or + ( "{{Archiv}}" in self.page.text ) or + ( "{{Archiv|" in self.page.text ) ): + + return True else: - return False + return False def parse( self ): """ @@ -108,7 +115,7 @@ class RedPage: # Iterate over the lines of the page for line in text_lines: - # Check wether we have an "Redundance-Family"-Section heading (Level 3) + # Check wether we have an "Redundance-Family"-Section heading if RedFamParser.is_sectionheading( line ): # Save line number for last detected Redundance-Family @@ -120,16 +127,18 @@ class RedPage: beginning = None ending = None - # Check wether we are currently in an "Redundance-Family"-Section Body + # Check wether we are currently in an "Redundance-Family"-Section if i > last_fam and last_fam > 0: - # Check if we have alredy recognized the beginning date of the discussion (in former iteration) or if we have a done-notice + # Check if we have alredy recognized the beginning date of the + # discussion (in former iteration) or if we have a done-notice if not beginning: beginning = RedFamParser.is_beginning( line ) elif not ending: ending = RedFamParser.is_ending( line ) - # Detect end of red_fam section (next line is new sectionheading) or end of file + # Detect end of red_fam section (next line is new sectionheading) + # or end of file # Prevent from running out of index if i < (length - 1): test = RedFamParser.is_sectionheading( text_lines[ i + 1 ] ) @@ -140,7 +149,8 @@ class RedPage: # Create the red_fam object if( fam_heading and beginning ): - #Maybe we can find a ending by feed if we have None yet (No done notice on archive pages) + # Maybe we can find a ending by feed if we have None yet + # (No done notice on archive pages) if not ending and self.is_archive(): j = i while (j > last_fam) and not ending: @@ -148,7 +158,9 @@ class RedPage: ending = RedFamParser.is_ending2( text_lines[ j ] ) # Create the RedFam object - red_fam = RedFamParser( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) + red_fam = RedFamParser( fam_heading, self.page._pageid, + self.is_archive(), beginning, + ending ) # Increment line counter i += 1 diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..9236f4f --- /dev/null +++ b/tox.ini @@ -0,0 +1,2 @@ +[flake8] +ignore = E129,E201,E202,W293 From 7c9061d37f7db7fec56f920f39940ecbe6024724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 13 Sep 2015 13:13:28 +0200 Subject: [PATCH 047/192] Use redundances as python package --- __init__.py | 28 ++++++++++++++++++++++++++++ redfam.py | 2 +- redpage.py | 4 ++-- 3 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 __init__.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..ebbaf6c --- /dev/null +++ b/__init__.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# __init__.py +# +# Copyright 2015 GOLDERWEB – Jonathan Golder +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# +""" +Scripts for our redundances bot +""" + +from redundances import mysqlred, redpage, redfam diff --git a/redfam.py b/redfam.py index 5119a38..a183431 100644 --- a/redfam.py +++ b/redfam.py @@ -32,7 +32,7 @@ from datetime import datetime import pywikibot -from mysqlred import MysqlRedFam +from .mysqlred import MysqlRedFam class RedFam: diff --git a/redpage.py b/redpage.py index 0f4d8fe..1ecd6e7 100644 --- a/redpage.py +++ b/redpage.py @@ -27,8 +27,8 @@ Provides a class for handling redundance discussion pages and archives import pywikibot # noqa -from mysqlred import MysqlRedPage -from redfam import RedFamParser +from .mysqlred import MysqlRedPage +from .redfam import RedFamParser class RedPage: From af6d4832805b76284b0c18f3cb3ed00a12f2ec91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 13 Sep 2015 13:14:37 +0200 Subject: [PATCH 048/192] Use a additional module to store custom config ... --- mysqlred.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mysqlred.py b/mysqlred.py index 8b105fa..0f400f4 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -32,7 +32,7 @@ except ImportError: import MySQLdb as mysqldb from pywikibot import config - +import jogobot class MysqlRed: """ @@ -47,7 +47,7 @@ class MysqlRed: db_hostname = config.db_hostname db_username = config.db_username db_password = config.db_password - db_name = config.db_username + '__bot' + db_name = config.db_username + jogobot.db_namesuffix def __init__( self ): """ From be0041804af46a2f3dc87d76e9d47b154a8f46b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 13 Sep 2015 13:19:22 +0200 Subject: [PATCH 049/192] Clean up --- __init__.py | 2 -- mysqlred.py | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/__init__.py b/__init__.py index ebbaf6c..1aef520 100644 --- a/__init__.py +++ b/__init__.py @@ -24,5 +24,3 @@ """ Scripts for our redundances bot """ - -from redundances import mysqlred, redpage, redfam diff --git a/mysqlred.py b/mysqlred.py index 0f400f4..947d6e5 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -34,6 +34,7 @@ except ImportError: from pywikibot import config import jogobot + class MysqlRed: """ Basic interface class, containing opening of connection From db5bb7401e817a9ee922208542f4cd390fa7277d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 15 Sep 2015 21:11:06 +0200 Subject: [PATCH 050/192] Update RedFam class to rebuild the whole structure of RedFamPaser generated object Move fam_hash() method from RedFamParser to RedFam Define custom Error classes --- mysqlred.py | 1 + redfam.py | 120 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 92 insertions(+), 29 deletions(-) diff --git a/mysqlred.py b/mysqlred.py index 947d6e5..1a4da14 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -32,6 +32,7 @@ except ImportError: import MySQLdb as mysqldb from pywikibot import config + import jogobot diff --git a/redfam.py b/redfam.py index a183431..36ee687 100644 --- a/redfam.py +++ b/redfam.py @@ -32,6 +32,7 @@ from datetime import datetime import pywikibot +import jogobot from .mysqlred import MysqlRedFam @@ -40,34 +41,72 @@ class RedFam: Basic class for RedFams, containing the basic data structure """ - def __init__( self, fam_hash=None, articlesList=None, red_page_id=None, - beginning=None, ending=None, status=0 ): + def __init__( self, articlesList, beginning, ending=None, red_page_id=None, + status=0, fam_hash=None, heading=None ): """ Generates a new RedFam object @param articlesList list List of articles @param beginning datetime Beginning date @param ending datetime Ending date + @param red_page_id int MW pageid of containing RedPage + @param status int Status of RedFam + @param fam_hash str SHA1 hash of articlesList + @param heading str Original heading of RedFam (Link) """ - pass - + + # Initial attribute values + self._articlesList = articlesList + self._beginning = beginning + self._ending = ending + self._red_page_id = red_page_id + self._status = status + self._fam_hash = fam_hash + self._heading = heading + + # Calculates the sha1 hash over self._articlesList to + # rediscover known redundance families + self.calc_fam_hash() + def __repr__( self ): + """ + Returns repression str of RedFam object - if( self._beginning ): - beginning = ", beginning=" + repr( self._beginning ) - else: - beginning = "" - - if( self._ending ): - ending = ", ending=" + repr( self._ending ) - else: - ending = "" + @returns str repr() string + """ - __repr = "RedFam( " + repr( self._articlesList ) + beginning +\ - ending + ", status=" + repr( self._status ) + " )" + __repr = "RedFam( " + \ + "articlesList=" + repr( self._articlesList ) + \ + ", heading=" + repr( self._heading ) + \ + ", beginning=" + repr( self._beginning ) + \ + ", ending=" + repr( self._ending ) + \ + ", red_page_id=" + repr( self._red_page_id ) + \ + ", status=" + repr( self._status ) + \ + ", fam_hash=" + repr( self._fam_hash ) + \ + ", heading=" + repr( self._heading ) + \ + " )" return __repr + def calc_fam_hash( self ): + """ + Calculates the SHA-1 hash for the articlesList of redundance family. + Since we don't need security SHA-1 is just fine. + + @returns str String with the hexadecimal hash digest + """ + + h = hashlib.sha1() + h.update( str( self._articlesList ).encode('utf-8') ) + + if self._fam_hash and h.hexdigest() != self._fam_hash: + raise RedFamHashError( self._fam_hash, h.hexdigest() ) + + elif self._fam_hash: + return + else: + self._fam_hash = h.hexdigest() + class RedFamParser( RedFam ): """ @@ -126,7 +165,7 @@ class RedFamParser( RedFam ): # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families - self.fam_hash() + self.calc_fam_hash() # Open database connection, ask for data if existing, # otherwise create entry @@ -181,19 +220,6 @@ class RedFamParser( RedFam ): number=len( self._articlesList ), repress=repr( self ) ) ) self._articlesList = self._articlesList[:8] - - def fam_hash( self ): - """ - Calculates the SHA-1 hash for the articlesList of redundance family. - Since we don't need security SHA-1 is just fine. - - @returns str String with the hexadecimal hash digest - """ - - h = hashlib.sha1() - h.update( str( self._articlesList ).encode('utf-8') ) - - self._fam_hash = h.hexdigest() def add_beginning( self, beginning ): """ @@ -362,3 +388,39 @@ class RedFamWorker( RedFam ): where discussion is finished """ pass + + +class RedFamError( Exception ): + """ + Base class for all Errors of RedFam-Module + """ + + def __init__( self, message=None ): + """ + Handles Instantiation of RedFamError's + """ + if not message: + self.message = "An Error occured while executing a RedFam action" + else: + self.message = message + + def __str__( self ): + """ + Output of error message + """ + + return message + + +class RedFamHashError( RedFamError ): + """ + Raised when given RedFamHash does not match with calculated + """ + + def __init__( self, givenHash, calculatedHash ): + + message = "Error: Given fam_hash ('{given}') does not match with \ + calculated ('{calc}'".format( given=givenHash, + calc=calculatedHash ) + + super().__init__( message ) From b514eb5c42bf7d20a89b08426d43303d2f671c5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 15 Sep 2015 21:21:05 +0200 Subject: [PATCH 051/192] Move configuration to jogobot module Use custom Error classes --- redfam.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/redfam.py b/redfam.py index 36ee687..8947fe9 100644 --- a/redfam.py +++ b/redfam.py @@ -115,13 +115,13 @@ class RedFamParser( RedFam ): """ # Define the timestamp format - __timestamp_format = "%H:%M, %d. %b. %Y" + __timestamp_format = jogobot.timestamp_format # Define section heading re.pattern __sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" ) # Define timestamp re.pattern - __timestamp_pat = re.compile( r"(\d{2}:\d{2}), (\d{1,2}). (Jan|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez).? (\d{4})" ) # noqa + __timestamp_pat = re.compile( jogobot.timestamp_regex ) # Textpattern for recognisation of done-notices __done_notice = ":Archivierung dieses Abschnittes \ @@ -199,11 +199,11 @@ class RedFamParser( RedFam ): wikilink_pat = re.compile( r"\[\[([^\[\]\|]*)(\]\]|\|)" ) # Parse content of heading for generating section links later - match = self.__sectionhead_pat.search( heading ) + match = type( self ).__sectionhead_pat.search( heading ) if match: self._heading = match.group(2).lstrip() else: - raise ValueError( "Heading is not valid" ) + raise RedFamHeadingError( heading ) # We get the pages in first [0] element iterating over # wikilink_pat.findall( line ) @@ -424,3 +424,16 @@ class RedFamHashError( RedFamError ): calc=calculatedHash ) super().__init__( message ) + + +class RedFamHeadingError ( RedFamError ): + """ + Raised when given RedFamHeading does not match __sectionhead_pat Regex + """ + def __init__( self, heading ): + + message = "Error while trying to parse section heading. Given heading \ + '{heading}' does not match RegEx".format( + heading=heading ) + + super().__init__( message ) From 1dea5d7e84f77c9272525e49579ef8a10ea7028d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 16 Sep 2015 18:31:54 +0200 Subject: [PATCH 052/192] NOT WORKING Cache SQL querys to reduce amount of querys --- mysqlred.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/mysqlred.py b/mysqlred.py index 1a4da14..13f65c8 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -80,6 +80,13 @@ class MysqlRedPage( MysqlRed ): MySQL-db Interface for handling querys for RedPages """ + # Class variables for storing cached querys + __cached_update = [] + __cached_insert_data = [] + __insert_query = 'INSERT INTO `red_pages` \ + ( page_id, page_title, rev_id, status ) \ + VALUES ( ?, ?, ?, ? );' + def __init__( self, page_id ): """ Creates a new instance, runs __init__ of parent class @@ -124,23 +131,12 @@ class MysqlRedPage( MysqlRed ): @param int status Page parsing status """ - cursor = type( self ).connection.cursor() + __cached_insert_data.apend( ( self.__page_id, page_title, + rev_id, status ) ) - if not page_title: - page_title = self.data[ 'page_title' ] - if not rev_id: - rev_id = self.data[ 'rev_id' ] - - query = 'INSERT INTO `red_pages` \ - ( page_id, page_title, rev_id, status ) \ - VALUES ( ?, ?, ?, ? );' - data = ( self.__page_id, page_title, rev_id, status ) - - cursor.execute( query, data) - - type( self ).connection.commit() - - self.data = self.get_page() + # Manualy construct self.data dict + self.data = { 'page_id' : self.__page_id, 'rev_id' : rev_id, + 'page_title' : page_title, 'status' : status } def update_page( self, rev_id=None, page_title=None, status=0 ): """ From 26f5912f88c680ba532fbda770fd71bae0430c4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 16 Sep 2015 21:01:30 +0200 Subject: [PATCH 053/192] Collect writing db querys for running once in MysqlRedPage Add classmethod to MysqlRed for executing collected querys --- mysqlred.py | 54 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/mysqlred.py b/mysqlred.py index 13f65c8..c8b23a2 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -51,6 +51,12 @@ class MysqlRed: db_password = config.db_password db_name = config.db_username + jogobot.db_namesuffix + # Class variables for storing cached querys + __cached_update_data = [] + __update_query = '' + __cached_insert_data = [] + __insert_query = '' + def __init__( self ): """ Opens a connection to MySQL-DB @@ -73,6 +79,23 @@ class MysqlRed: """ type( self ).connection.close() + + @classmethod + def flush( cls ): + """ + Run cached querys + """ + cursor = cls.connection.cursor() + + # Execute insert query + cursor.execute( cls.__insert_query, cls.__cached_insert_data ) + + # Execute update query + # Use executemany since update could not be reduced to one query + cursor.executemany( cls.__update_query, cls.__cached_update_data ) + + # Commit db changes + cls.connection.commit() class MysqlRedPage( MysqlRed ): @@ -80,12 +103,15 @@ class MysqlRedPage( MysqlRed ): MySQL-db Interface for handling querys for RedPages """ - # Class variables for storing cached querys - __cached_update = [] + # Class variables for storing cached querys + __cached_update_data = [] + __update_query = 'UPDATE `red_pages` \ + SET `page_title` = ?, `rev_id` = ?, `status`= ? \ + WHERE `page_id` = ?;' __cached_insert_data = [] __insert_query = 'INSERT INTO `red_pages` \ - ( page_id, page_title, rev_id, status ) \ - VALUES ( ?, ?, ?, ? );' + ( page_id, page_title, rev_id, status ) \ + VALUES ( ?, ?, ?, ? );' def __init__( self, page_id ): """ @@ -131,12 +157,12 @@ class MysqlRedPage( MysqlRed ): @param int status Page parsing status """ - __cached_insert_data.apend( ( self.__page_id, page_title, - rev_id, status ) ) + type( self ).__cached_insert_data.apend( ( self.__page_id, page_title, + rev_id, status ) ) # Manualy construct self.data dict - self.data = { 'page_id' : self.__page_id, 'rev_id' : rev_id, - 'page_title' : page_title, 'status' : status } + self.data = { 'page_id': self.__page_id, 'rev_id': rev_id, + 'page_title': page_title, 'status': status } def update_page( self, rev_id=None, page_title=None, status=0 ): """ @@ -147,21 +173,13 @@ class MysqlRedPage( MysqlRed ): @param int status Page parsing status """ - cursor = type( self ).connection.cursor() - if not page_title: page_title = self.data[ 'page_title' ] if not rev_id: rev_id = self.data[ 'rev_id' ] - query = 'UPDATE `red_pages` \ - SET `page_title` = ?, `rev_id` = ?, `status`= ? \ - WHERE `page_id` = ?;' - data = ( page_title, rev_id, status, self.__page_id ) - - cursor.execute( query, data) - - type( self ).connection.commit() + type( self ).__cached_update_data.append( ( page_title, rev_id, + status, self.__page_id ) ) class MysqlRedFam( MysqlRed ): From 53f53ddb8bf81e2ab5fff8537e46f86a33150a1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 16 Sep 2015 21:26:55 +0200 Subject: [PATCH 054/192] Implement cached querys in MysqlRedFam --- mysqlred.py | 54 ++++++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/mysqlred.py b/mysqlred.py index c8b23a2..63cb10f 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -88,14 +88,19 @@ class MysqlRed: cursor = cls.connection.cursor() # Execute insert query - cursor.execute( cls.__insert_query, cls.__cached_insert_data ) + if cls.__cached_insert_data: + cursor.execute( cls.__insert_query, cls.__cached_insert_data ) + cls.__cached_insert_data = [] # Execute update query # Use executemany since update could not be reduced to one query - cursor.executemany( cls.__update_query, cls.__cached_update_data ) + if cls.__cached_update_data: + cursor.executemany( cls.__update_query, cls.__cached_update_data ) + cls.__cached_update_data = [] # Commit db changes - cls.connection.commit() + if cls.__cached_insert_data or cls.__cached_update_data: + cls.connection.commit() class MysqlRedPage( MysqlRed ): @@ -186,6 +191,17 @@ class MysqlRedFam( MysqlRed ): """ MySQL-db Interface for handling querys for RedFams """ + # Class variables for storing cached querys + __cached_update_data = [] + __update_query = 'UPDATE `red_families` \ + SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, \ + `ending` = ?, `status`= ? WHERE `fam_hash` = ?;' + __cached_insert_data = [] + __insert_query = 'INSERT INTO `red_families` \ + ( fam_hash, red_page_id, beginning, ending, status, \ + heading, article0, article1, article2, article3, \ + article4, article5, article6, article7 ) \ + VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' def __init__( self, fam_hash ): """ @@ -223,13 +239,6 @@ class MysqlRedFam( MysqlRed ): def add_fam( self, articlesList, heading, red_page_id, beginning, ending=None, status=0 ): - cursor = type( self ).connection.cursor() - - query = 'INSERT INTO `red_families` \ - ( fam_hash, red_page_id, beginning, ending, status, heading, \ - article0, article1, article2, article3, \ - article4, article5, article6, article7 ) \ - VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' data = [ self.__fam_hash, red_page_id, beginning, ending, status, heading ] @@ -241,11 +250,14 @@ class MysqlRedFam( MysqlRed ): data = tuple( data ) - cursor.execute( query, data) + type( self ).__cached_insert_data.append( data ) - type( self ).connection.commit() - - self.data = self.get_fam() + # Manualy construct self.data dict + data_keys = ( 'fam_hash', 'red_page_id', 'beginning', 'ending', + 'status', 'heading', 'article0', 'article1', 'article2', + 'article3', 'article4', 'article5', 'article6', + 'article7' ) + self.data = dict( zip( data_keys, data ) ) def update_fam( self, red_page_id, heading, beginning, ending, status ): """ @@ -257,14 +269,6 @@ class MysqlRedFam( MysqlRed ): @param int status red_fam status """ - cursor = type( self ).connection.cursor() - - query = 'UPDATE `red_families` \ - SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, \ - `ending` = ?, `status`= ? WHERE `fam_hash` = ?;' - data = ( red_page_id, heading, beginning, - ending, status, self.__fam_hash ) - - cursor.execute( query, data) - - type( self ).connection.commit() + type( self ).__cached_update_data.append( ( red_page_id, heading, + beginning, ending, status, + self.__fam_hash ) ) From 8dc7fe678dd277a6622742717ee860c96017bbe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 17 Sep 2015 19:57:53 +0200 Subject: [PATCH 055/192] Fix bug caused by adding fam_hash to repr of RedFam class since it was not defined yet while output of warning caused by to many articles --- redfam.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/redfam.py b/redfam.py index 8947fe9..e62b70d 100644 --- a/redfam.py +++ b/redfam.py @@ -97,7 +97,7 @@ class RedFam: """ h = hashlib.sha1() - h.update( str( self._articlesList ).encode('utf-8') ) + h.update( str( self._articlesList[:8] ).encode('utf-8') ) if self._fam_hash and h.hexdigest() != self._fam_hash: raise RedFamHashError( self._fam_hash, h.hexdigest() ) @@ -146,6 +146,7 @@ class RedFamParser( RedFam ): # Set object attributes: self._red_page_id = red_page_id self._red_page_archive = red_page_archive + self._fam_hash = None # Method self.add_beginning sets self._beginning directly self.add_beginning( beginning ) @@ -165,6 +166,7 @@ class RedFamParser( RedFam ): # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families + self.calc_fam_hash() # Open database connection, ask for data if existing, @@ -212,10 +214,12 @@ class RedFamParser( RedFam ): # Catch sections with more then 8 articles, print error if len( self._articlesList ) > 8: - pywikibot.output( "{datetime} – \03{{lightred}}[WARNING] – \ - Maximum number of articles in red_fam exceeded, \ - maximum number is 8, {number:d} were given\n\ - {repress}".format( + # For repression in output we need to know the fam hash + self.calc_fam_hash() + pywikibot.output( "\ +{datetime} – \03{{lightred}}[WARNING] – \ +Maximum number of articles in red_fam exceeded, maximum number is 8, \ +{number:d} were given \n {repress}".format( datetime=datetime.now().strftime( "%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ), repress=repr( self ) ) ) @@ -409,7 +413,7 @@ class RedFamError( Exception ): Output of error message """ - return message + return self.message class RedFamHashError( RedFamError ): From b1b37f9b9ed98175c5f0cb8698a9517986cd31d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 17 Sep 2015 20:00:13 +0200 Subject: [PATCH 056/192] Implement functions for flushing db query caches --- redfam.py | 7 +++++++ redpage.py | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/redfam.py b/redfam.py index e62b70d..0a23315 100644 --- a/redfam.py +++ b/redfam.py @@ -107,6 +107,13 @@ class RedFam: else: self._fam_hash = h.hexdigest() + @classmethod + def flush_db_cache( cls ): + """ + Calls flush method of Mysql Interface class + """ + MysqlRedFam.flush() + class RedFamParser( RedFam ): """ diff --git a/redpage.py b/redpage.py index 1ecd6e7..6a016bd 100644 --- a/redpage.py +++ b/redpage.py @@ -165,6 +165,7 @@ class RedPage: # Increment line counter i += 1 else: + RedFamParser.flush_db_cache() self._parsed = True def __update_db( self ): @@ -180,3 +181,10 @@ class RedPage: status = 0 self.__mysql.update_page( self.page._revid, self.page.title(), status ) + + @classmethod + def flush_db_cache( cls ): + """ + Calls flush method of Mysql Interface class + """ + MysqlRedPage.flush() From 4518efc504966027406d2270717f651f181472af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 18 Sep 2015 18:08:13 +0200 Subject: [PATCH 057/192] Fix bug (Cached querys not executed) caused by class attribute protection level --> changed from private to protected Reformat MySQL querys to remove whitespace generated by indetation --- mysqlred.py | 72 ++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/mysqlred.py b/mysqlred.py index 63cb10f..1a782b8 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -52,10 +52,10 @@ class MysqlRed: db_name = config.db_username + jogobot.db_namesuffix # Class variables for storing cached querys - __cached_update_data = [] - __update_query = '' - __cached_insert_data = [] - __insert_query = '' + _cached_update_data = [] + _update_query = '' + _cached_insert_data = [] + _insert_query = '' def __init__( self ): """ @@ -88,18 +88,18 @@ class MysqlRed: cursor = cls.connection.cursor() # Execute insert query - if cls.__cached_insert_data: - cursor.execute( cls.__insert_query, cls.__cached_insert_data ) - cls.__cached_insert_data = [] + if cls._cached_insert_data: + cursor.executemany( cls._insert_query, cls._cached_insert_data ) + cls._cached_insert_data = [] # Execute update query # Use executemany since update could not be reduced to one query - if cls.__cached_update_data: - cursor.executemany( cls.__update_query, cls.__cached_update_data ) - cls.__cached_update_data = [] + if cls._cached_update_data: + cursor.executemany( cls._update_query, cls._cached_update_data ) + cls._cached_update_data = [] # Commit db changes - if cls.__cached_insert_data or cls.__cached_update_data: + if cls._cached_insert_data or cls._cached_update_data: cls.connection.commit() @@ -109,14 +109,13 @@ class MysqlRedPage( MysqlRed ): """ # Class variables for storing cached querys - __cached_update_data = [] - __update_query = 'UPDATE `red_pages` \ - SET `page_title` = ?, `rev_id` = ?, `status`= ? \ - WHERE `page_id` = ?;' - __cached_insert_data = [] - __insert_query = 'INSERT INTO `red_pages` \ - ( page_id, page_title, rev_id, status ) \ - VALUES ( ?, ?, ?, ? );' + _cached_update_data = [] + _update_query = 'UPDATE `red_pages` \ +SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' + + _cached_insert_data = [] + _insert_query = 'INSERT INTO `red_pages` \ +( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' def __init__( self, page_id ): """ @@ -162,7 +161,7 @@ class MysqlRedPage( MysqlRed ): @param int status Page parsing status """ - type( self ).__cached_insert_data.apend( ( self.__page_id, page_title, + type( self )._cached_insert_data.append( ( self.__page_id, page_title, rev_id, status ) ) # Manualy construct self.data dict @@ -183,25 +182,26 @@ class MysqlRedPage( MysqlRed ): if not rev_id: rev_id = self.data[ 'rev_id' ] - type( self ).__cached_update_data.append( ( page_title, rev_id, - status, self.__page_id ) ) + type( self )._cached_update_data.append( ( page_title, rev_id, + status, self.__page_id ) ) class MysqlRedFam( MysqlRed ): """ MySQL-db Interface for handling querys for RedFams """ + # Class variables for storing cached querys - __cached_update_data = [] - __update_query = 'UPDATE `red_families` \ - SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, \ - `ending` = ?, `status`= ? WHERE `fam_hash` = ?;' - __cached_insert_data = [] - __insert_query = 'INSERT INTO `red_families` \ - ( fam_hash, red_page_id, beginning, ending, status, \ - heading, article0, article1, article2, article3, \ - article4, article5, article6, article7 ) \ - VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' + _cached_update_data = [] + _update_query = 'UPDATE `red_families` \ +SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ +`status`= ? WHERE `fam_hash` = ?;' + + _cached_insert_data = [] + _insert_query = 'INSERT INTO `red_families` \ +( fam_hash, red_page_id, beginning, ending, status, heading, \ +article0, article1, article2, article3, article4, article5, article6, \ +article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' def __init__( self, fam_hash ): """ @@ -250,7 +250,7 @@ class MysqlRedFam( MysqlRed ): data = tuple( data ) - type( self ).__cached_insert_data.append( data ) + type( self )._cached_insert_data.append( data ) # Manualy construct self.data dict data_keys = ( 'fam_hash', 'red_page_id', 'beginning', 'ending', @@ -269,6 +269,6 @@ class MysqlRedFam( MysqlRed ): @param int status red_fam status """ - type( self ).__cached_update_data.append( ( red_page_id, heading, - beginning, ending, status, - self.__fam_hash ) ) + type( self )._cached_update_data.append( ( red_page_id, heading, + beginning, ending, status, + self.__fam_hash ) ) From 523d029fdc71e84a932d8b2ecfb718765463f670 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 19 Sep 2015 19:45:34 +0200 Subject: [PATCH 058/192] Fix bug causing db table cells containing empty strings --- redfam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redfam.py b/redfam.py index 0a23315..3bc68a0 100644 --- a/redfam.py +++ b/redfam.py @@ -205,7 +205,7 @@ class RedFamParser( RedFam ): """ # Predefine a pattern for wikilinks' destination - wikilink_pat = re.compile( r"\[\[([^\[\]\|]*)(\]\]|\|)" ) + wikilink_pat = re.compile( r"\[\[([^\[\]\|]+)(?:\]\]|\|)" ) # Parse content of heading for generating section links later match = type( self ).__sectionhead_pat.search( heading ) From b5ca69077c5f3a3644fb206921f52fcc570d91ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 19 Sep 2015 19:47:09 +0200 Subject: [PATCH 059/192] Remove double appearence of heading parameter in repression of RedFam --- redfam.py | 1 - 1 file changed, 1 deletion(-) diff --git a/redfam.py b/redfam.py index 3bc68a0..3b4581b 100644 --- a/redfam.py +++ b/redfam.py @@ -83,7 +83,6 @@ class RedFam: ", red_page_id=" + repr( self._red_page_id ) + \ ", status=" + repr( self._status ) + \ ", fam_hash=" + repr( self._fam_hash ) + \ - ", heading=" + repr( self._heading ) + \ " )" return __repr From 8059bb999260c9b8d49105dfdc77404b8c0afbfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 19 Sep 2015 19:49:20 +0200 Subject: [PATCH 060/192] Change behavior of MysqlRedFam to be able to get instance without knowen fam_hash --- mysqlred.py | 18 +++++------------- redfam.py | 3 ++- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/mysqlred.py b/mysqlred.py index 1a782b8..ea964d4 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -203,38 +203,30 @@ SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ article0, article1, article2, article3, article4, article5, article6, \ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - def __init__( self, fam_hash ): + def __init__( self ): """ Creates a new instance, runs __init__ of parent class """ super().__init__( ) - - self.__fam_hash = fam_hash - - self.data = self.get_fam() def __del__( self ): pass - def get_fam( self ): + def get_fam( self, fam_hash ): """ Retrieves a red family row from MySQL-Database for given fam_hash @returns dict Dictionairy with data for given fam hash False if none found """ + self.__fam_hash = fam_hash cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor.execute( 'SELECT * FROM `red_families` WHERE `fam_hash` = ?;', - ( self.__fam_hash, ) ) - res = cursor.fetchone() - - if res: - return res - else: - return False + ( fam_hash, ) ) + self.data = cursor.fetchone() def add_fam( self, articlesList, heading, red_page_id, beginning, ending=None, status=0 ): diff --git a/redfam.py b/redfam.py index 3b4581b..ba33d1e 100644 --- a/redfam.py +++ b/redfam.py @@ -191,7 +191,8 @@ class RedFamParser( RedFam ): """ # We need a connection to our mysqldb - self.__mysql = MysqlRedFam( self._fam_hash ) + self.__mysql = MysqlRedFam( ) + self.__mysql.get_fam( self._fam_hash ) if not self.__mysql.data: self.__mysql.add_fam( self._articlesList, self._heading, From dbcfe8f1067335725d9897f01619ed22038e0f29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 19 Sep 2015 19:50:38 +0200 Subject: [PATCH 061/192] Add a generator to MysqlRedFam to retrieve redfams from db by status --- mysqlred.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/mysqlred.py b/mysqlred.py index ea964d4..cb9754d 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -264,3 +264,20 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' type( self )._cached_update_data.append( ( red_page_id, heading, beginning, ending, status, self.__fam_hash ) ) + + def get_by_status( self, status ): + """ + Generator witch fetches redFams with given status from DB + """ + + cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + + cursor.execute( 'SELECT * FROM `red_families` WHERE `status` = ?;', + ( status, ) ) + + while True: + res = cursor.fetchmany( 1000 ) + if not res: + break + for row in res: + yield row From 6992f82f02f3e9489efc3da90249ecc9ee7b44eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 19 Sep 2015 20:51:21 +0200 Subject: [PATCH 062/192] Start Implementing of RedFamWorker --- redfam.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/redfam.py b/redfam.py index ba33d1e..5504b7b 100644 --- a/redfam.py +++ b/redfam.py @@ -398,8 +398,31 @@ class RedFamWorker( RedFam ): Handles working with redundance families stored in database where discussion is finished """ - pass + def __init__( self, mysql_data ): + + articlesList = [] + for key in sorted( mysql_data.keys() ): + if 'article' in key and mysql_data[ key ]: + articlesList.append( mysql_data[ key ] ) + + super().__init__( articlesList, mysql_data[ 'beginning' ], + mysql_data[ 'ending' ], mysql_data[ 'red_page_id' ], + mysql_data[ 'status' ], mysql_data[ 'fam_hash' ], + mysql_data[ 'heading' ] ) + @classmethod + def list_by_status( cls, status ): + """ + Lists red_fams stored in db by given status + """ + mysql = MysqlRedFam() + for fam in mysql.get_by_status( status ): + try: + print( cls( fam ) ) + except RedFamHashError: + print(fam) + raise + class RedFamError( Exception ): """ From 4e21b6696a377eeb3cb9e86d6e46a2c04ef18612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 19 Sep 2015 20:51:52 +0200 Subject: [PATCH 063/192] Remove unnecessary whitespace from error messages --- redfam.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/redfam.py b/redfam.py index 5504b7b..fd96556 100644 --- a/redfam.py +++ b/redfam.py @@ -453,9 +453,8 @@ class RedFamHashError( RedFamError ): def __init__( self, givenHash, calculatedHash ): - message = "Error: Given fam_hash ('{given}') does not match with \ - calculated ('{calc}'".format( given=givenHash, - calc=calculatedHash ) + message = "Given fam_hash ('{given}') does not match with \ +calculated ('{calc}'".format( given=givenHash, calc=calculatedHash ) super().__init__( message ) @@ -467,7 +466,6 @@ class RedFamHeadingError ( RedFamError ): def __init__( self, heading ): message = "Error while trying to parse section heading. Given heading \ - '{heading}' does not match RegEx".format( - heading=heading ) +'{heading}' does not match RegEx".format( heading=heading ) super().__init__( message ) From 7d6cd8bb306541b2a96acb2003c885fab9b67786 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 19 Sep 2015 21:29:12 +0200 Subject: [PATCH 064/192] Strip leading and trailing whitespace in Links to prevent wrong fam_hashes (when receiving redfam from db) since MySQL drops it --- redfam.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/redfam.py b/redfam.py index fd96556..f056cc4 100644 --- a/redfam.py +++ b/redfam.py @@ -210,13 +210,15 @@ class RedFamParser( RedFam ): # Parse content of heading for generating section links later match = type( self ).__sectionhead_pat.search( heading ) if match: - self._heading = match.group(2).lstrip() + self._heading = match.group(2).strip() else: raise RedFamHeadingError( heading ) # We get the pages in first [0] element iterating over # wikilink_pat.findall( line ) - self._articlesList = [ link[0] for link + # Strip leading and trailing whitespace in Links to prevent wrong + # fam_hashes (when receiving redfam from db) since MySQL drops it + self._articlesList = [ link.strip() for link in wikilink_pat.findall( self._heading ) ] # Catch sections with more then 8 articles, print error @@ -405,7 +407,7 @@ class RedFamWorker( RedFam ): if 'article' in key and mysql_data[ key ]: articlesList.append( mysql_data[ key ] ) - super().__init__( articlesList, mysql_data[ 'beginning' ], + super().__init__( articlesList, mysql_data[ 'beginning' ], mysql_data[ 'ending' ], mysql_data[ 'red_page_id' ], mysql_data[ 'status' ], mysql_data[ 'fam_hash' ], mysql_data[ 'heading' ] ) From e186f2f22b9ac86c3d0946d26378cb8f394b4aca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 20 Sep 2015 17:45:07 +0200 Subject: [PATCH 065/192] Use dictionary with page_id / fam_hash as key for cached_insert_data to prevent double entrys --- mysqlred.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/mysqlred.py b/mysqlred.py index cb9754d..589c648 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -54,7 +54,7 @@ class MysqlRed: # Class variables for storing cached querys _cached_update_data = [] _update_query = '' - _cached_insert_data = [] + _cached_insert_data = {} _insert_query = '' def __init__( self ): @@ -89,8 +89,9 @@ class MysqlRed: # Execute insert query if cls._cached_insert_data: - cursor.executemany( cls._insert_query, cls._cached_insert_data ) - cls._cached_insert_data = [] + print( cls._cached_insert_data ) + cursor.executemany( cls._insert_query, ( cls._cached_insert_data[ key ] for key in cls._cached_insert_data ) ) + cls._cached_insert_data = {} # Execute update query # Use executemany since update could not be reduced to one query @@ -113,7 +114,7 @@ class MysqlRedPage( MysqlRed ): _update_query = 'UPDATE `red_pages` \ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' - _cached_insert_data = [] + _cached_insert_data = {} _insert_query = 'INSERT INTO `red_pages` \ ( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' @@ -161,8 +162,10 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' @param int status Page parsing status """ - type( self )._cached_insert_data.append( ( self.__page_id, page_title, - rev_id, status ) ) + insert_data = { self.__page_id: ( self.__page_id, page_title, + rev_id, status ) } + + type( self )._cached_insert_data.update( insert_data ) # Manualy construct self.data dict self.data = { 'page_id': self.__page_id, 'rev_id': rev_id, @@ -197,7 +200,7 @@ class MysqlRedFam( MysqlRed ): SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ `status`= ? WHERE `fam_hash` = ?;' - _cached_insert_data = [] + _cached_insert_data = {} _insert_query = 'INSERT INTO `red_families` \ ( fam_hash, red_page_id, beginning, ending, status, heading, \ article0, article1, article2, article3, article4, article5, article6, \ @@ -242,7 +245,8 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' data = tuple( data ) - type( self )._cached_insert_data.append( data ) + insert_data = { self.__fam_hash: data } + type( self )._cached_insert_data.update( insert_data ) # Manualy construct self.data dict data_keys = ( 'fam_hash', 'red_page_id', 'beginning', 'ending', From ef9c13324aa34979202855e617354e1bc4b4b785 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 20 Sep 2015 18:17:59 +0200 Subject: [PATCH 066/192] Improve documentation of MysqlRed.flush() --- mysqlred.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mysqlred.py b/mysqlred.py index 589c648..83dd48c 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -89,14 +89,19 @@ class MysqlRed: # Execute insert query if cls._cached_insert_data: - print( cls._cached_insert_data ) - cursor.executemany( cls._insert_query, ( cls._cached_insert_data[ key ] for key in cls._cached_insert_data ) ) + # Since cls._cached_insert_data is a dict, we need to have a custom + # Generator to iterate over it + cursor.executemany( cls._insert_query, + ( cls._cached_insert_data[ key ] + for key in cls._cached_insert_data ) ) + # Reset after writing cls._cached_insert_data = {} # Execute update query # Use executemany since update could not be reduced to one query if cls._cached_update_data: cursor.executemany( cls._update_query, cls._cached_update_data ) + # Reset after writing cls._cached_update_data = [] # Commit db changes From f29dfd50039d74f605676f51054e2f8467fba370 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Feb 2016 18:00:46 +0100 Subject: [PATCH 067/192] Use new jogobot module --- .gitmodules | 3 +++ jogobot | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 jogobot diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..07bf754 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "jogobot"] + path = jogobot + url = ../jogobot diff --git a/jogobot b/jogobot new file mode 160000 index 0000000..b0c844b --- /dev/null +++ b/jogobot @@ -0,0 +1 @@ +Subproject commit b0c844b2de2503eb77a172e57a288f947a978530 From b26f04db8c3bc460664dd93040d457b00ab8160d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Mon, 29 Feb 2016 11:13:14 +0100 Subject: [PATCH 068/192] Use updated version of jogobot with ast.literal_eval parsed config entrys --- jogobot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jogobot b/jogobot index b0c844b..dac01a2 160000 --- a/jogobot +++ b/jogobot @@ -1 +1 @@ -Subproject commit b0c844b2de2503eb77a172e57a288f947a978530 +Subproject commit dac01a224b8b190ed8c5bf8c0183dc879e775f81 From 24adafeee7953b67339ed7b5e3df1fa4c7fdf783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Mon, 29 Feb 2016 11:35:48 +0100 Subject: [PATCH 069/192] Changes for new jogobot-module --- mysqlred.py | 2 +- redfam.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mysqlred.py b/mysqlred.py index 83dd48c..c7e27ab 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -49,7 +49,7 @@ class MysqlRed: db_hostname = config.db_hostname db_username = config.db_username db_password = config.db_password - db_name = config.db_username + jogobot.db_namesuffix + db_name = config.db_username + jogobot.config['db_suffix'] # Class variables for storing cached querys _cached_update_data = [] diff --git a/redfam.py b/redfam.py index f056cc4..771d9f5 100644 --- a/redfam.py +++ b/redfam.py @@ -121,13 +121,14 @@ class RedFamParser( RedFam ): """ # Define the timestamp format - __timestamp_format = jogobot.timestamp_format + __timestamp_format = jogobot.config['redundances']['timestamp_format'] # Define section heading re.pattern __sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" ) # Define timestamp re.pattern - __timestamp_pat = re.compile( jogobot.timestamp_regex ) + __timestamp_pat = re.compile( jogobot.config['redundances'] + ['timestamp_regex'] ) # Textpattern for recognisation of done-notices __done_notice = ":Archivierung dieses Abschnittes \ From f53a5b3745ad5ebf5c37961072efd76be653e6d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 2 Mar 2016 17:10:08 +0100 Subject: [PATCH 070/192] Output a warning if there are update/insert querys cached when exit programm --- mysqlred.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/mysqlred.py b/mysqlred.py index c7e27ab..5c2d48b 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -31,6 +31,8 @@ try: except ImportError: import MySQLdb as mysqldb +import atexit + from pywikibot import config import jogobot @@ -72,7 +74,10 @@ class MysqlRed: user=type( self ).db_username, passwd=type( self ).db_password, db=type( self ).db_name ) - + + # Register callback for warnig if exit with cached db write querys + atexit.register( type(self).warn_if_not_flushed ) + def __del__( self ): """ Before deleting class, close connection to MySQL-DB @@ -108,6 +113,16 @@ class MysqlRed: if cls._cached_insert_data or cls._cached_update_data: cls.connection.commit() + @classmethod + def warn_if_not_flushed(cls): + """ + Outputs a warning if there are db write querys cached and not flushed + before exiting programm! + """ + if cls._cached_update_data or cls._cached_insert_data: + jogobot.output( "Cached Database write querys not flushed!!! " + + "Data loss is possible!", "WARNING" ) + class MysqlRedPage( MysqlRed ): """ From 24f1a7f516c576dfdf901fa7442713f19444c4a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 2 Mar 2016 17:14:30 +0100 Subject: [PATCH 071/192] Remove __init__.py as we won't use it as a package --- __init__.py | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 __init__.py diff --git a/__init__.py b/__init__.py deleted file mode 100644 index 1aef520..0000000 --- a/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# __init__.py -# -# Copyright 2015 GOLDERWEB – Jonathan Golder -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, -# MA 02110-1301, USA. -# -# -""" -Scripts for our redundances bot -""" From 10f64199abde1dc200b2de27d78ce8fbba8856a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 2 Mar 2016 17:19:11 +0100 Subject: [PATCH 072/192] Remove relativ imports as we don't are in a package anymore --- redfam.py | 2 +- redpage.py | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/redfam.py b/redfam.py index 771d9f5..519fe81 100644 --- a/redfam.py +++ b/redfam.py @@ -33,7 +33,7 @@ from datetime import datetime import pywikibot import jogobot -from .mysqlred import MysqlRedFam +from mysqlred import MysqlRedFam class RedFam: diff --git a/redpage.py b/redpage.py index 6a016bd..96a853d 100644 --- a/redpage.py +++ b/redpage.py @@ -27,8 +27,8 @@ Provides a class for handling redundance discussion pages and archives import pywikibot # noqa -from .mysqlred import MysqlRedPage -from .redfam import RedFamParser +from mysqlred import MysqlRedPage +from redfam import RedFamParser class RedPage: @@ -94,7 +94,7 @@ class RedPage: else: return False - def parse( self ): + def parse( self ): # noqa """ Handles the parsing process """ @@ -158,9 +158,8 @@ class RedPage: ending = RedFamParser.is_ending2( text_lines[ j ] ) # Create the RedFam object - red_fam = RedFamParser( fam_heading, self.page._pageid, - self.is_archive(), beginning, - ending ) + RedFamParser( fam_heading, self.page._pageid, + self.is_archive(), beginning, ending ) # Increment line counter i += 1 From 163972c9249cbb52512af5fc58c8c94869bf9a5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 3 Mar 2016 17:20:57 +0100 Subject: [PATCH 073/192] New method dates_extract which finds begining and ending at once --- redfam.py | 176 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 110 insertions(+), 66 deletions(-) diff --git a/redfam.py b/redfam.py index 519fe81..2fcddba 100644 --- a/redfam.py +++ b/redfam.py @@ -30,7 +30,9 @@ import locale import re from datetime import datetime +import mwparserfromhell as mwparser # noqa import pywikibot +from pywikibot.tools import deprecated # noqa import jogobot from mysqlred import MysqlRedFam @@ -40,12 +42,12 @@ class RedFam: """ Basic class for RedFams, containing the basic data structure """ - + def __init__( self, articlesList, beginning, ending=None, red_page_id=None, status=0, fam_hash=None, heading=None ): """ Generates a new RedFam object - + @param articlesList list List of articles @param beginning datetime Beginning date @param ending datetime Ending date @@ -54,7 +56,7 @@ class RedFam: @param fam_hash str SHA1 hash of articlesList @param heading str Original heading of RedFam (Link) """ - + # Initial attribute values self._articlesList = articlesList self._beginning = beginning @@ -63,18 +65,18 @@ class RedFam: self._status = status self._fam_hash = fam_hash self._heading = heading - + # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families self.calc_fam_hash() - + def __repr__( self ): """ Returns repression str of RedFam object - + @returns str repr() string """ - + __repr = "RedFam( " + \ "articlesList=" + repr( self._articlesList ) + \ ", heading=" + repr( self._heading ) + \ @@ -84,28 +86,28 @@ class RedFam: ", status=" + repr( self._status ) + \ ", fam_hash=" + repr( self._fam_hash ) + \ " )" - + return __repr def calc_fam_hash( self ): """ Calculates the SHA-1 hash for the articlesList of redundance family. Since we don't need security SHA-1 is just fine. - + @returns str String with the hexadecimal hash digest """ - + h = hashlib.sha1() h.update( str( self._articlesList[:8] ).encode('utf-8') ) - + if self._fam_hash and h.hexdigest() != self._fam_hash: raise RedFamHashError( self._fam_hash, h.hexdigest() ) - + elif self._fam_hash: return else: self._fam_hash = h.hexdigest() - + @classmethod def flush_db_cache( cls ): """ @@ -119,28 +121,28 @@ class RedFamParser( RedFam ): Provides an interface to RedFam for adding/updating redundance families while parsig redundance pages """ - + # Define the timestamp format __timestamp_format = jogobot.config['redundances']['timestamp_format'] - + # Define section heading re.pattern __sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" ) # Define timestamp re.pattern __timestamp_pat = re.compile( jogobot.config['redundances'] ['timestamp_regex'] ) - + # Textpattern for recognisation of done-notices __done_notice = ":Archivierung dieses Abschnittes \ wurde gewünscht von:" __done_notice2 = "{{Erledigt|" - + def __init__( self, heading, red_page_id, red_page_archive, beginning, ending=None ): """ Creates a RedFam object based on data collected while parsing red_pages combined with possibly former known data from db - + @param red_fam_heading str Wikitext heading of section @param red_page_id int MediaWiki page_id @param red_page_archive bool Is red_page an archive @@ -149,57 +151,57 @@ class RedFamParser( RedFam ): @param ending datetime Timestamp of ending str strptime parseable string """ - + # Set object attributes: self._red_page_id = red_page_id self._red_page_archive = red_page_archive self._fam_hash = None - + # Method self.add_beginning sets self._beginning directly self.add_beginning( beginning ) - + # Method self.add_ending sets self._ending directly if( ending ): self.add_ending( ending ) else: # If no ending was provided set to None self._ending = None - + self._status = None - + # Parse the provided heading of redundance section # to set self._articlesList self.heading_parser( heading ) - + # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families - + self.calc_fam_hash() - + # Open database connection, ask for data if existing, # otherwise create entry self.__handle_db() - + # Check status changes self.status() - + # Triggers db update if anything changed self.changed() - + def __handle_db( self ): """ Handles opening of db connection """ - + # We need a connection to our mysqldb self.__mysql = MysqlRedFam( ) self.__mysql.get_fam( self._fam_hash ) - + if not self.__mysql.data: self.__mysql.add_fam( self._articlesList, self._heading, self._red_page_id, self._beginning, self._ending ) - + def heading_parser( self, heading ): """ Parses given red_fam_heading string and saves articles list @@ -238,43 +240,43 @@ Maximum number of articles in red_fam exceeded, maximum number is 8, \ def add_beginning( self, beginning ): """ Adds the beginning date of a redundance diskussion to the object - + @param datetime datetime Beginning date """ - + self._beginning = self.__datetime( beginning ) - + def add_ending( self, ending ): """ Adds the ending date of a redundance diskussion to the object. - + @param datetime datetime Ending date """ - + self._ending = self.__datetime( ending ) - + def __datetime( self, timestamp ): """ Decides wether given timestamp is a parseable string or a datetime object and returns a datetime object in both cases - + @param datetime timestamp Datetime object str timestamp Parseable string with timestamp - + @returns datetime Datetime object """ - + # Make sure locale is set to 'de_DE.UTF-8' to prevent problems # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') - + if( isinstance( timestamp, datetime ) ): return timestamp else: result = datetime.strptime( timestamp, type( self ).__timestamp_format ) return result - + def status( self ): """ Handles detection of correct status @@ -284,10 +286,10 @@ Maximum number of articles in red_fam exceeded, maximum number is 8, \ - 2 Discussion archived --> ending (normaly) present, page is archive - 3 and greater status was set by worker script, do not change it """ - + # Do not change stati set by worker script etc. if not self.__mysql.data['status'] > 2: - + # No ending, discussion is running: # Sometimes archived discussions also have no detectable ending if not self._ending and not self._red_page_archive: @@ -299,36 +301,36 @@ Maximum number of articles in red_fam exceeded, maximum number is 8, \ self._status = 2 else: self._status = self.__mysql.data[ 'status' ] - + def changed( self ): """ Checks wether anything has changed and maybe triggers db update """ - + # On archived red_fams do not delete possibly existing ending if( not self._ending and self._status > 1 and self.__mysql.data[ 'ending' ] ): - + self._ending = self.__mysql.data[ 'ending' ] - + # Since status change means something has changed, update database if( self._status != self.__mysql.data[ 'status' ] or self._beginning != self.__mysql.data[ 'beginning' ] or self._ending != self.__mysql.data[ 'ending' ] or self._red_page_id != self.__mysql.data[ 'red_page_id' ] or self._heading != self.__mysql.data[ 'heading' ]): - + self.__mysql.update_fam( self._red_page_id, self._heading, self._beginning, self._ending, self._status ) - + @classmethod def is_sectionheading( cls, line ): """ Checks wether given line is a red_fam section heading - + @param str line String to check - + @returns bool Returns True if it is a section heading """ @@ -336,7 +338,49 @@ Maximum number of articles in red_fam exceeded, maximum number is 8, \ return True else: return False - + + @classmethod + def extract_dates( cls, text, isarchive=False ): + """ + Returns tuple of the first and maybe last timestamp of a section. + Last timestamp is only returned if there is a done notice or param + *isarchiv* is set to 'True' + + @param text Text to search in + @type line Any Type castable to str + @param isarchive If true skip searching done notice (on archivepages) + @type isarchive bool + + @returns Timestamps, otherwise None + @returntype tuple of strs + """ + + # Match all timestamps + matches = cls.__timestamp_pat.findall( str( text ) ) + if matches: + + # First one is beginning + # Since some timestamps are broken we need to reconstruct them + # by regex match groups + beginning = ( matches[0][0] + ", " + matches[0][1] + ". " + + matches[0][2] + ". " + matches[0][3] ) + + # Last one maybe is ending + # Done notice format 1 + # Done notice format 2 + # Or on archivepages + if ( cls.__done_notice in text or + cls.__done_notice2 in text or + isarchive ): + + ending = ( matches[-1][0] + ", " + matches[-1][1] + ". " + + matches[-1][2] + ". " + matches[-1][3] ) + + else: + ending = None + + return (beginning, ending) + @classmethod def is_beginning( cls, line ): """ @@ -402,17 +446,17 @@ class RedFamWorker( RedFam ): where discussion is finished """ def __init__( self, mysql_data ): - + articlesList = [] for key in sorted( mysql_data.keys() ): if 'article' in key and mysql_data[ key ]: articlesList.append( mysql_data[ key ] ) - + super().__init__( articlesList, mysql_data[ 'beginning' ], mysql_data[ 'ending' ], mysql_data[ 'red_page_id' ], mysql_data[ 'status' ], mysql_data[ 'fam_hash' ], mysql_data[ 'heading' ] ) - + @classmethod def list_by_status( cls, status ): """ @@ -426,12 +470,12 @@ class RedFamWorker( RedFam ): print(fam) raise - + class RedFamError( Exception ): """ Base class for all Errors of RedFam-Module """ - + def __init__( self, message=None ): """ Handles Instantiation of RedFamError's @@ -440,12 +484,12 @@ class RedFamError( Exception ): self.message = "An Error occured while executing a RedFam action" else: self.message = message - + def __str__( self ): """ Output of error message """ - + return self.message @@ -453,12 +497,12 @@ class RedFamHashError( RedFamError ): """ Raised when given RedFamHash does not match with calculated """ - + def __init__( self, givenHash, calculatedHash ): - + message = "Given fam_hash ('{given}') does not match with \ calculated ('{calc}'".format( given=givenHash, calc=calculatedHash ) - + super().__init__( message ) @@ -467,8 +511,8 @@ class RedFamHeadingError ( RedFamError ): Raised when given RedFamHeading does not match __sectionhead_pat Regex """ def __init__( self, heading ): - + message = "Error while trying to parse section heading. Given heading \ '{heading}' does not match RegEx".format( heading=heading ) - + super().__init__( message ) From a2dfffc74bfe834d2680dcbea30fef8a55092e42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 3 Mar 2016 17:23:44 +0100 Subject: [PATCH 074/192] Let old date-extracting methods use dates_extract and mark them as deprecated --- redfam.py | 62 +++++++++++++++++++++---------------------------------- 1 file changed, 23 insertions(+), 39 deletions(-) diff --git a/redfam.py b/redfam.py index 2fcddba..c06e26a 100644 --- a/redfam.py +++ b/redfam.py @@ -382,62 +382,46 @@ Maximum number of articles in red_fam exceeded, maximum number is 8, \ return (beginning, ending) @classmethod + @deprecated( extract_dates ) def is_beginning( cls, line ): """ Returns the first timestamp found in line, otherwise None - + @param str line String to search in - + @returns str Timestamp, otherwise None """ - - match = cls.__timestamp_pat.search( line ) - if match: - # Since some timestamps are broken we need to reconstruct them - # by regex match groups - result = match.group(1) + ", " + match.group(2) + ". " +\ - match.group(3) + ". " + match.group(4) - return result - else: - return None - + + return cls.extract_dates( line )[0] + @classmethod - def is_ending( cls, line ): + @deprecated( extract_dates ) + def is_ending( cls, line, isarchive=False ): """ Returns the timestamp of done notice ( if one ), otherwise None - @param str line String to search in - - @returns str Timestamp, otherwise None + + @param line String to search in + @type line str + @param isarchive If true skip searching done notice (on archivepages) + @type isarchive bool + + @returns Timestamp, otherwise None + @returntype str """ - - if ( cls.__done_notice in line ) or ( cls.__done_notice2 in line ): - match = cls.__timestamp_pat.search( line ) - if match: - # Since some timestamps are broken we need to reconstruct them - # by regex match groups - result = match.group(1) + ", " + match.group(2) + ". " +\ - match.group(3) + ". " + match.group(4) - return result - return None - + + return cls.extract_dates( line )[1] + @classmethod + @deprecated( extract_dates ) def is_ending2( cls, line ): """ Returns the last timestamp found in line, otherwise None @param str line String to search in - + @returns str Timestamp, otherwise None """ - - matches = cls.__timestamp_pat.findall( line ) - if matches: - # Since some timestamps are broken we need to reconstruct them - # by regex match groups - result = matches[-1][0] + ", " + matches[-1][1] + ". " +\ - matches[-1][2] + ". " + matches[-1][3] - return result - else: - return None + + return cls.extract_dates( line, True )[1] class RedFamWorker( RedFam ): From b81694c6d3ec40a8b4936b94ae63985edcc83b66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 3 Mar 2016 17:30:39 +0100 Subject: [PATCH 075/192] Rewrite heading_parser using mwparserfromhell to make it simpler --- redfam.py | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/redfam.py b/redfam.py index c06e26a..048777a 100644 --- a/redfam.py +++ b/redfam.py @@ -31,7 +31,7 @@ import re from datetime import datetime import mwparserfromhell as mwparser # noqa -import pywikibot +import pywikibot # noqa from pywikibot.tools import deprecated # noqa import jogobot @@ -205,36 +205,36 @@ class RedFamParser( RedFam ): def heading_parser( self, heading ): """ Parses given red_fam_heading string and saves articles list + + @param heading Heading of RedFam-Section + @type heading wikicode or mwparser-parseable """ - - # Predefine a pattern for wikilinks' destination - wikilink_pat = re.compile( r"\[\[([^\[\]\|]+)(?:\]\]|\|)" ) - - # Parse content of heading for generating section links later - match = type( self ).__sectionhead_pat.search( heading ) - if match: - self._heading = match.group(2).strip() - else: - raise RedFamHeadingError( heading ) - - # We get the pages in first [0] element iterating over - # wikilink_pat.findall( line ) - # Strip leading and trailing whitespace in Links to prevent wrong - # fam_hashes (when receiving redfam from db) since MySQL drops it - self._articlesList = [ link.strip() for link - in wikilink_pat.findall( self._heading ) ] - + + # Parse heading with mwparse if needed + if not isinstance( heading, mwparser.wikicode.Wikicode ): + heading = mwparser.parse( heading ) + + # Save heading as string + self._heading = str( heading ) + + # Save destinations of wikilinks in headings + self._articlesList = [ str( link.title ) for link + in heading.ifilter_wikilinks() ] + # Catch sections with more then 8 articles, print error if len( self._articlesList ) > 8: # For repression in output we need to know the fam hash self.calc_fam_hash() - pywikibot.output( "\ -{datetime} – \03{{lightred}}[WARNING] – \ -Maximum number of articles in red_fam exceeded, maximum number is 8, \ -{number:d} were given \n {repress}".format( - datetime=datetime.now().strftime( "%Y-%m-%d %H:%M:%S" ), - number=len( self._articlesList ), repress=repr( self ) ) ) - + jogobot.output( + ( "\03{{lightred}}" + + "Maximum number of articles in red_fam exceeded, " + + "maximum number is 8, {number:d} were given \n {repress}" + ).format( datetime=datetime.now().strftime( + "%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ), + repress=repr( self ) ), + "WARNING" ) + + # Only save the first 8 articles self._articlesList = self._articlesList[:8] def add_beginning( self, beginning ): From 74223079859caded0f526da1f9943f6d9806af87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 3 Mar 2016 17:37:46 +0100 Subject: [PATCH 076/192] Rewrite RedPage.parse using mwparserfromhell to make it simpler --- redfam.py | 8 ++-- redpage.py | 130 +++++++++++++++++++---------------------------------- 2 files changed, 51 insertions(+), 87 deletions(-) diff --git a/redfam.py b/redfam.py index 048777a..1632841 100644 --- a/redfam.py +++ b/redfam.py @@ -126,8 +126,8 @@ class RedFamParser( RedFam ): __timestamp_format = jogobot.config['redundances']['timestamp_format'] # Define section heading re.pattern - __sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" ) - + __sectionhead_pat = re.compile( r"^(.*\[\[.+\]\].*\[\[.+\]\].*)" ) + # Define timestamp re.pattern __timestamp_pat = re.compile( jogobot.config['redundances'] ['timestamp_regex'] ) @@ -333,8 +333,8 @@ class RedFamParser( RedFam ): @returns bool Returns True if it is a section heading """ - - if cls.__sectionhead_pat.search( line ): + + if cls.__sectionhead_pat.search( str(line) ): return True else: return False diff --git a/redpage.py b/redpage.py index 96a853d..a35cf9e 100644 --- a/redpage.py +++ b/redpage.py @@ -26,6 +26,7 @@ Provides a class for handling redundance discussion pages and archives """ import pywikibot # noqa +import mwparserfromhell as mwparser from mysqlred import MysqlRedPage from redfam import RedFamParser @@ -35,44 +36,44 @@ class RedPage: """ Class for handling redundance discussion pages and archives """ - + def __init__( self, page, archive=False ): """ Generate a new RedPage object based on the given pywikibot page object - + @param page page Pywikibot/MediaWiki page object for page """ - + # Safe the pywikibot page object self.page = page self._archive = archive - + self.__handle_db( ) self.is_page_changed() - + self._parsed = None if( self._changed or self.__mysql.data[ 'status' ] == 0 ): self.parse() - + self.__update_db() - + def __handle_db( self ): """ Handles opening of db connection """ - + # We need a connection to our mysqldb self.__mysql = MysqlRedPage( self.page._pageid ) - + if not self.__mysql.data: self.__mysql.add_page( self.page.title(), self.page._revid ) - + def is_page_changed( self ): """ Check wether the page was changed since last run """ - + if( self.__mysql.data != { 'page_id': self.page._pageid, 'rev_id': self.page._revid, 'page_title': self.page.title(), @@ -85,102 +86,65 @@ class RedPage: """ Detects wether current page is an archive of discussions """ - + if( self._archive or ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ) ): - + return True else: return False - - def parse( self ): # noqa + + def parse( self ): """ Handles the parsing process """ - # Since @param text is a string we need to split it in lines - text_lines = self.page.text.split( "\n" ) - length = len( text_lines ) - - # Initialise line counter - i = 0 - fam_heading = None - beginning = None - ending = None - - # Set line for last detected Redundance-Family to 0 - last_fam = 0 - - # Iterate over the lines of the page - for line in text_lines: - - # Check wether we have an "Redundance-Family"-Section heading - if RedFamParser.is_sectionheading( line ): - - # Save line number for last detected Redundance-Family - last_fam = i - # Save heading - fam_heading = line - - # Defined (re)initialisation of dates - beginning = None - ending = None - - # Check wether we are currently in an "Redundance-Family"-Section - if i > last_fam and last_fam > 0: - - # Check if we have alredy recognized the beginning date of the - # discussion (in former iteration) or if we have a done-notice - if not beginning: - beginning = RedFamParser.is_beginning( line ) - elif not ending: - ending = RedFamParser.is_ending( line ) - - # Detect end of red_fam section (next line is new sectionheading) - # or end of file - # Prevent from running out of index - if i < (length - 1): - test = RedFamParser.is_sectionheading( text_lines[ i + 1 ] ) - else: - test = False - if ( test or ( length == ( i + 1 ) ) ): - - # Create the red_fam object - if( fam_heading and beginning ): - - # Maybe we can find a ending by feed if we have None yet - # (No done notice on archive pages) - if not ending and self.is_archive(): - j = i - while (j > last_fam) and not ending: - j -= 1 - ending = RedFamParser.is_ending2( text_lines[ j ] ) - - # Create the RedFam object - RedFamParser( fam_heading, self.page._pageid, - self.is_archive(), beginning, ending ) - - # Increment line counter - i += 1 + # Generate Wikicode object + self.wikicode = mwparser.parse( self.page.text ) + + # Select RedFam-sections + # matches=Regexp or + # function( gets heading content as wikicode as param 1) + # include_lead = if true include first section (intro) + # include_heading = if true include heading + fams = self.wikicode.get_sections( + matches=RedFamParser.is_sectionheading, + include_lead=False, include_headings=True ) + + # Iterate over RedFam + for fam in fams: + + # Extract heading text + heading = next( fam.ifilter_headings() ).title + + # Extract beginnig and maybe ending + (beginning, ending) = RedFamParser.extract_dates( fam, + self.is_archive() + ) + + # Create the RedFam object + RedFamParser( heading, self.page._pageid, + self.is_archive(), beginning, ending ) + else: RedFamParser.flush_db_cache() self._parsed = True - + def __update_db( self ): """ Updates the page meta data in mysql db """ if( self._parsed or not self._changed ): status = 1 - + if( self.is_archive() ): status = 2 else: status = 0 - + self.__mysql.update_page( self.page._revid, self.page.title(), status ) - + @classmethod def flush_db_cache( cls ): """ From 0af7eb11d65261c157e97ec0099927777b7b682a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 3 Mar 2016 20:41:14 +0100 Subject: [PATCH 077/192] Move parsing of redfams from RedPageParser to RedFamParser.parser so RedPageParse won't do anything with redfams except for returning a generator of text-sections --- redfam.py | 23 +++++++++++++++++++++++ redpage.py | 33 +++++++++++++++------------------ 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/redfam.py b/redfam.py index 1632841..0e316a7 100644 --- a/redfam.py +++ b/redfam.py @@ -325,6 +325,7 @@ class RedFamParser( RedFam ): self._status ) @classmethod + @deprecated def is_sectionheading( cls, line ): """ Checks wether given line is a red_fam section heading @@ -339,6 +340,28 @@ class RedFamParser( RedFam ): else: return False + @classmethod + def parser( cls, text, pageid, isarchive=False ): + """ + Handles parsing of redfam section + + @param text Text of RedFam-Section + @type text wikicode or mwparser-parseable + """ + + # Parse heading with mwparse if needed + if not isinstance( text, mwparser.wikicode.Wikicode ): + text = mwparser.parse( text ) + + # Extract heading text + heading = next( text.ifilter_headings() ).title + + # Extract beginnig and maybe ending + (beginning, ending) = RedFamParser.extract_dates( text, isarchive ) + + # Create the RedFam object + RedFamParser( heading, pageid, isarchive, beginning, ending ) + @classmethod def extract_dates( cls, text, isarchive=False ): """ diff --git a/redpage.py b/redpage.py index a35cf9e..2b93ae8 100644 --- a/redpage.py +++ b/redpage.py @@ -28,8 +28,9 @@ Provides a class for handling redundance discussion pages and archives import pywikibot # noqa import mwparserfromhell as mwparser +import jogobot + from mysqlred import MysqlRedPage -from redfam import RedFamParser class RedPage: @@ -53,10 +54,6 @@ class RedPage: self.is_page_changed() self._parsed = None - if( self._changed or self.__mysql.data[ 'status' ] == 0 ): - self.parse() - - self.__update_db() def __handle_db( self ): """ @@ -95,6 +92,16 @@ class RedPage: else: return False + def is_parsing_needed( self ): + """ + Decides wether current RedPage needs to be parsed or not + """ + + if( self._changed or self.__mysql.data[ 'status' ] == 0 ): + return True + else: + return False + def parse( self ): """ Handles the parsing process @@ -109,27 +116,17 @@ class RedPage: # include_lead = if true include first section (intro) # include_heading = if true include heading fams = self.wikicode.get_sections( - matches=RedFamParser.is_sectionheading, + matches=jogobot.config["redundances"]["section_heading_regex"], include_lead=False, include_headings=True ) # Iterate over RedFam for fam in fams: - # Extract heading text - heading = next( fam.ifilter_headings() ).title - - # Extract beginnig and maybe ending - (beginning, ending) = RedFamParser.extract_dates( fam, - self.is_archive() - ) - - # Create the RedFam object - RedFamParser( heading, self.page._pageid, - self.is_archive(), beginning, ending ) + yield fam else: - RedFamParser.flush_db_cache() self._parsed = True + self.__update_db() def __update_db( self ): """ From a24f208449c891c29ca4bf6fb1d165c8546e513b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 3 Mar 2016 21:05:16 +0100 Subject: [PATCH 078/192] Add parse-pages.py Script --- parse-pages.py | 107 +++++++++++++++++++++++++++++++++++++++++++++++++ tox.ini | 2 - 2 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 parse-pages.py delete mode 100644 tox.ini diff --git a/parse-pages.py b/parse-pages.py new file mode 100644 index 0000000..4545aef --- /dev/null +++ b/parse-pages.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# parse-pages.py +# +# Copyright 2016 GOLDERWEB – Jonathan Golder +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# +""" +Script to parse all redpages in configured categories +""" + +import pywikibot +from pywikibot import pagegenerators + +import jogobot + +import redpage +import redfam + + +def get_cat_pages( cat ): + """ + Generates a iteratable generator-object with all pages listet in given + category + + @param cat Category to request + @type cat str + + @returns generator Iteratable object with pages of given category + """ + + # Get site to work on from pywikibot config + site = pywikibot.Site() + + # Retrieve the content of given category + category = pywikibot.Category( site, cat ) + + # Build an iteratable generator object with page objects for given category + generator = pagegenerators.CategorizedPageGenerator( category ) + + return generator + + +def main(*args): + """ + Handles process + """ + + try: + jogobot.output( "BEGINN – parser-pages.py" ) + + # Iterate over configured categories + for cat in ( jogobot.config["redundances"]["redpage_cats"] ): + + # Iterate over pages in current cat + for page in get_cat_pages( cat ): + + # For pages configured to exclude, go on with next page + if page.title() in ( + jogobot.config["redundances"]["redpage_exclude"] ): + + continue + + # Initiate RedPage object + red_page = redpage.RedPage( page ) + + # Check whether parsing is needed + if red_page.is_parsing_needed(): + + # Iterate over returned generator with redfam sections + for fam in red_page.parse(): + + # Run RedFamParser on section text + redfam.RedFamParser.parser( fam, red_page.page._pageid, + red_page.is_archive() ) + else: + # If successfully parsed whole page, flush + # db write cache + redfam.RedFamParser.flush_db_cache() + jogobot.output( "Page '%s' parsed" % + red_page.page.title() ) + else: + # If successfully parsed all pages in cat, flush db write cache + redpage.RedPage.flush_db_cache() + + finally: + jogobot.output( "END – parser-pages.py" ) + pywikibot.stopme() + +if( __name__ == "__main__" ): + main() diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 9236f4f..0000000 --- a/tox.ini +++ /dev/null @@ -1,2 +0,0 @@ -[flake8] -ignore = E129,E201,E202,W293 From bd9dbdfa17a01036fe6ac6ed661158bb9d4a17db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 23 Aug 2016 21:12:07 +0200 Subject: [PATCH 079/192] Make use of declared db_host_port The port to connect to MySQL-Server was previously always assumed as the default one. So the library was incompatible to db's on nonstandard ports Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=68 FS#68] --- mysqlred.py | 114 ++++++++++++++++++++++++++-------------------------- 1 file changed, 58 insertions(+), 56 deletions(-) diff --git a/mysqlred.py b/mysqlred.py index 5c2d48b..055b995 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -41,36 +41,38 @@ import jogobot class MysqlRed: """ Basic interface class, containing opening of connection - + Specific querys should be defined in descendant classes per data type """ - + # Save mysqldb-connection as class attribute to use only one # in descendant classes connection = False db_hostname = config.db_hostname + db_port = config.db_port db_username = config.db_username db_password = config.db_password db_name = config.db_username + jogobot.config['db_suffix'] - + # Class variables for storing cached querys _cached_update_data = [] _update_query = '' _cached_insert_data = {} _insert_query = '' - + def __init__( self ): """ Opens a connection to MySQL-DB - + @returns mysql-stream MySQL Connection """ - + # Connect to mysqldb only once if not type( self ).connection: - + type( self ).connection = mysqldb.connect( host=type( self ).db_hostname, + port=type( self ).db_port, user=type( self ).db_username, passwd=type( self ).db_password, db=type( self ).db_name ) @@ -82,16 +84,16 @@ class MysqlRed: """ Before deleting class, close connection to MySQL-DB """ - + type( self ).connection.close() - + @classmethod def flush( cls ): """ Run cached querys """ cursor = cls.connection.cursor() - + # Execute insert query if cls._cached_insert_data: # Since cls._cached_insert_data is a dict, we need to have a custom @@ -101,14 +103,14 @@ class MysqlRed: for key in cls._cached_insert_data ) ) # Reset after writing cls._cached_insert_data = {} - + # Execute update query # Use executemany since update could not be reduced to one query if cls._cached_update_data: cursor.executemany( cls._update_query, cls._cached_update_data ) # Reset after writing cls._cached_update_data = [] - + # Commit db changes if cls._cached_insert_data or cls._cached_update_data: cls.connection.commit() @@ -128,7 +130,7 @@ class MysqlRedPage( MysqlRed ): """ MySQL-db Interface for handling querys for RedPages """ - + # Class variables for storing cached querys _cached_update_data = [] _update_query = 'UPDATE `red_pages` \ @@ -137,74 +139,74 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' _cached_insert_data = {} _insert_query = 'INSERT INTO `red_pages` \ ( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' - + def __init__( self, page_id ): """ Creates a new instance, runs __init__ of parent class """ - + super().__init__( ) - + self.__page_id = int( page_id ) - + self.data = self.get_page() - + def __del__( self ): pass - + def get_page( self ): """ Retrieves a red page row from MySQL-Database for given page_id - + @param int page_id MediaWiki page_id for page to retrieve - + @returns tuple Tuple with data for given page_id bool FALSE if none found """ - + cursor = type( self ).connection.cursor(mysqldb.DictCursor) - + cursor.execute( 'SELECT * FROM `red_pages` WHERE `page_id` = ?;', ( self.__page_id, ) ) res = cursor.fetchone() - + if res: return res else: return False - + def add_page( self, page_title, rev_id, status=0 ): """ Inserts a red page row in MySQL-Database for given page_id - + @param int rev_id MediaWiki current rev_id @param str page_title MediaWiki new page_title @param int status Page parsing status """ - + insert_data = { self.__page_id: ( self.__page_id, page_title, rev_id, status ) } - + type( self )._cached_insert_data.update( insert_data ) - + # Manualy construct self.data dict self.data = { 'page_id': self.__page_id, 'rev_id': rev_id, 'page_title': page_title, 'status': status } - + def update_page( self, rev_id=None, page_title=None, status=0 ): """ Updates the red page row in MySQL-Database for given page_id - + @param int rev_id MediaWiki current rev_id @param str page_title MediaWiki new page_title @param int status Page parsing status """ - + if not page_title: page_title = self.data[ 'page_title' ] if not rev_id: rev_id = self.data[ 'rev_id' ] - + type( self )._cached_update_data.append( ( page_title, rev_id, status, self.__page_id ) ) @@ -213,92 +215,92 @@ class MysqlRedFam( MysqlRed ): """ MySQL-db Interface for handling querys for RedFams """ - + # Class variables for storing cached querys _cached_update_data = [] _update_query = 'UPDATE `red_families` \ SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ `status`= ? WHERE `fam_hash` = ?;' - + _cached_insert_data = {} _insert_query = 'INSERT INTO `red_families` \ ( fam_hash, red_page_id, beginning, ending, status, heading, \ article0, article1, article2, article3, article4, article5, article6, \ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - + def __init__( self ): """ Creates a new instance, runs __init__ of parent class """ - + super().__init__( ) - + def __del__( self ): pass - + def get_fam( self, fam_hash ): """ Retrieves a red family row from MySQL-Database for given fam_hash - + @returns dict Dictionairy with data for given fam hash False if none found """ self.__fam_hash = fam_hash - + cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - + cursor.execute( 'SELECT * FROM `red_families` WHERE `fam_hash` = ?;', ( fam_hash, ) ) self.data = cursor.fetchone() - + def add_fam( self, articlesList, heading, red_page_id, beginning, ending=None, status=0 ): - + data = [ self.__fam_hash, red_page_id, beginning, ending, status, heading ] - + for article in articlesList: data.append( str( article ) ) - + while len( data ) < 14: data.append( None ) - + data = tuple( data ) - + insert_data = { self.__fam_hash: data } type( self )._cached_insert_data.update( insert_data ) - + # Manualy construct self.data dict data_keys = ( 'fam_hash', 'red_page_id', 'beginning', 'ending', 'status', 'heading', 'article0', 'article1', 'article2', 'article3', 'article4', 'article5', 'article6', 'article7' ) self.data = dict( zip( data_keys, data ) ) - + def update_fam( self, red_page_id, heading, beginning, ending, status ): """ Updates the red fam row in MySQL-Database for given fam_hash - + @param int red_page_id MediaWiki page_id @param datetime beginning Timestamp of beginning qparam datetime ending Timestamp of ending of @param int status red_fam status """ - + type( self )._cached_update_data.append( ( red_page_id, heading, beginning, ending, status, self.__fam_hash ) ) - + def get_by_status( self, status ): """ Generator witch fetches redFams with given status from DB """ - + cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - + cursor.execute( 'SELECT * FROM `red_families` WHERE `status` = ?;', ( status, ) ) - + while True: res = cursor.fetchmany( 1000 ) if not res: From 79dbde2413d8ece0986e89462f1ebffef4d51e05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 23 Aug 2016 21:23:24 +0200 Subject: [PATCH 080/192] Provide Replacement to @deprecated() as str Since use of pywikibot-master (or Python3.5 @see ticket below) the @deprecator requires a str as param and no callable object like done before Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=69 FS#69] --- redfam.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/redfam.py b/redfam.py index 0e316a7..3dec12f 100644 --- a/redfam.py +++ b/redfam.py @@ -308,8 +308,8 @@ class RedFamParser( RedFam ): """ # On archived red_fams do not delete possibly existing ending - if( not self._ending and self._status > 1 - and self.__mysql.data[ 'ending' ] ): + if( not self._ending and self._status > 1 and + self.__mysql.data[ 'ending' ] ): self._ending = self.__mysql.data[ 'ending' ] @@ -405,7 +405,7 @@ class RedFamParser( RedFam ): return (beginning, ending) @classmethod - @deprecated( extract_dates ) + @deprecated( 'extract_dates' ) def is_beginning( cls, line ): """ Returns the first timestamp found in line, otherwise None @@ -418,7 +418,7 @@ class RedFamParser( RedFam ): return cls.extract_dates( line )[0] @classmethod - @deprecated( extract_dates ) + @deprecated( 'extract_dates' ) def is_ending( cls, line, isarchive=False ): """ Returns the timestamp of done notice ( if one ), otherwise None @@ -435,7 +435,7 @@ class RedFamParser( RedFam ): return cls.extract_dates( line )[1] @classmethod - @deprecated( extract_dates ) + @deprecated( 'extract_dates' ) def is_ending2( cls, line ): """ Returns the last timestamp found in line, otherwise None From 5d31bdd7eb681312e2625028ca61f3b7a8c7cf41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 23 Aug 2016 21:28:13 +0200 Subject: [PATCH 081/192] Jogobot submodule updated --- jogobot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jogobot b/jogobot index dac01a2..2173f29 160000 --- a/jogobot +++ b/jogobot @@ -1 +1 @@ -Subproject commit dac01a224b8b190ed8c5bf8c0183dc879e775f81 +Subproject commit 2173f2984f1de6950728a15709bf93db5188731d From a8605bcee64a12fc2787492d978622710c4132a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 23 Aug 2016 21:50:22 +0200 Subject: [PATCH 082/192] Mv pages-parser.py to reddiscparser.py New, more meaningfull naming conventions, from redpage to reddisc (page) Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=72 FS#72] --- parse-pages.py => reddiscparser.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename parse-pages.py => reddiscparser.py (100%) diff --git a/parse-pages.py b/reddiscparser.py similarity index 100% rename from parse-pages.py rename to reddiscparser.py From 6cb92c1da7cac0ecfa5875968c11aae9e8252aaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 23 Aug 2016 21:53:44 +0200 Subject: [PATCH 083/192] Rewrite parse control using pywikibot.bot classes To use the default pywikibot.classes making life easier at some point Beeing standardconform with pywikibot in handling args Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=72 FS#72] --- reddiscparser.py | 190 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 140 insertions(+), 50 deletions(-) diff --git a/reddiscparser.py b/reddiscparser.py index 4545aef..2d7164f 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -22,11 +22,15 @@ # # """ -Script to parse all redpages in configured categories +Script to parse all reddisc pages in configured categories """ +import os +import sys + import pywikibot from pywikibot import pagegenerators +from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot @@ -34,74 +38,160 @@ import redpage import redfam -def get_cat_pages( cat ): +class DiscussionParserBot( + # CurrentPageBot, # via next two sets 'current_page' on each treat() + ExistingPageBot, # CurrentPageBot only treats existing pages + NoRedirectPageBot ): # class which only treats non-redirects """ - Generates a iteratable generator-object with all pages listet in given - category - - @param cat Category to request - @type cat str - - @returns generator Iteratable object with pages of given category + Botclass witch initialises the parsing process of Redundancy Discussions """ - # Get site to work on from pywikibot config - site = pywikibot.Site() + def __init__( self, generator ): + """ + Constructor - # Retrieve the content of given category - category = pywikibot.Category( site, cat ) + Parameters: + @param generator: The page generator that determines on which pages + to work. + @type generator: generator. + """ + super( DiscussionParserBot, self ).__init__(generator=generator) - # Build an iteratable generator object with page objects for given category - generator = pagegenerators.CategorizedPageGenerator( category ) + def run( self ): + """ + Controls the overal parsing process, using super class for page switch - return generator + Needed to do things before/after treating pages is done + """ + try: + super( DiscussionParserBot, self ).run() -def main(*args): - """ - Handles process - """ + except: + raise - try: - jogobot.output( "BEGINN – parser-pages.py" ) + else: - # Iterate over configured categories - for cat in ( jogobot.config["redundances"]["redpage_cats"] ): + # If successfully parsed all pages in cat, flush db write cache + redpage.RedPage.flush_db_cache() - # Iterate over pages in current cat - for page in get_cat_pages( cat ): + def treat_page( self ): + """ + Handles work on current page + """ - # For pages configured to exclude, go on with next page - if page.title() in ( - jogobot.config["redundances"]["redpage_exclude"] ): + # Short circuit excluded pages + if self.current_page.title() in ( + jogobot.config["redundances"]["redpage_exclude"] ): - continue + return - # Initiate RedPage object - red_page = redpage.RedPage( page ) + # Initiate RedPage object + red_page = redpage.RedPage( self.current_page ) - # Check whether parsing is needed - if red_page.is_parsing_needed(): + # Check whether parsing is needed + if red_page.is_parsing_needed(): - # Iterate over returned generator with redfam sections - for fam in red_page.parse(): + # Iterate over returned generator with redfam sections + for fam in red_page.parse(): - # Run RedFamParser on section text - redfam.RedFamParser.parser( fam, red_page.page._pageid, - red_page.is_archive() ) - else: - # If successfully parsed whole page, flush - # db write cache - redfam.RedFamParser.flush_db_cache() - jogobot.output( "Page '%s' parsed" % - red_page.page.title() ) + # Run RedFamParser on section text + redfam.RedFamParser.parser( fam, red_page.page._pageid, + red_page.is_archive() ) else: - # If successfully parsed all pages in cat, flush db write cache - redpage.RedPage.flush_db_cache() + # If successfully parsed whole page, flush + # db write cache + redfam.RedFamParser.flush_db_cache() + jogobot.output( "Page [[{redisc}]] parsed".format( + reddisc=red_page.page.title() ) ) - finally: - jogobot.output( "END – parser-pages.py" ) - pywikibot.stopme() + +def main(*args): # noqa + """ + Process command line arguments and invoke bot. + + If args is an empty list, sys.argv is used. + + @param args: command line arguments + @type args: list of unicode + """ + + # Process global arguments to determine desired site + local_args = pywikibot.handle_args(args) + + # Get the jogobot-task_slug (basename of current file without ending) + task_slug = os.path.basename(__file__)[:-len(".py")] + + # Before run, we need to check wether we are currently active or not + try: + # Will throw Exception if disabled/blocked + # jogobot.is_active( task_slug ) + pass + + except jogobot.jogobot.Blocked: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), + "CRITICAL" ) + + except jogobot.jogobot.Disabled: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{red} %s (%s)" % (value, type ), + "ERROR" ) + + # Bot/Task is active + else: + + # This factory is responsible for processing command line arguments + # that are also used by other scripts and that determine on which pages + # to work on. + genFactory = pagegenerators.GeneratorFactory() + # The generator gives the pages that should be worked upon. + gen = None + + # If always is True, bot won't ask for confirmation of edit (automode) + # always = False + + # If force_reload is True, bot will always parse Countrylist regardless + # if parsing is needed or not + # force_reload = False + + # Parse command line arguments + for arg in local_args: + if arg.startswith("-always"): + # always = True + pass + else: + genFactory.handleArg(arg) + + if not gen: + + # Check wether there are generators waiting for factoring, if not + # use configured categories + if not genFactory.gens: + + # Create Generators for configured Categories + for category in jogobot.config["redundances"]["redpage_cats"]: + cgen = genFactory.getCategoryGen( + category, + gen_func=pagegenerators.CategorizedPageGenerator) + + # If there is one, append to genFactory + if cgen: + genFactory.gens.append(cgen) + + # Create combined Generator (Union of all Generators) + gen = genFactory.getCombinedGenerator() + + if gen: + # Log beginning of parsing + jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) ) + + # The preloading generator is responsible for downloading multiple + # pages from the wiki simultaneously. + gen = pagegenerators.PreloadingGenerator(gen) + DiscussionParserBot( gen ).run() + else: + pywikibot.showHelp() if( __name__ == "__main__" ): main() From 17bfb32dede157bf33272c1a025b357729850561 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 11:13:12 +0200 Subject: [PATCH 084/192] Building generators of config cats in sep Function Since the main()-Function was too complex the logic to build generators out of categories provided in jogobot.conf was moved in a separate function [https://fs.golderweb.de/index.php?do=details&task_id=73 FS#73] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=72 FS#72] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=72 FS#72] --- reddiscparser.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/reddiscparser.py b/reddiscparser.py index 2d7164f..cd9cf29 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -106,7 +106,26 @@ class DiscussionParserBot( reddisc=red_page.page.title() ) ) -def main(*args): # noqa +def apply_conf_cat_generators( genFactory ): + """ + Builds generators for categories which are read from jogobot.config + + Parameters: + @param genFactory: The GeneratorFactory to which the builded generators + should be added. + @type genFactory: pagegenerators.GeneratorFactory + """ + # Create Generators for configured Categories + for category in jogobot.config["redundances"]["redpage_cats"]: + cgen = genFactory.getCategoryGen( + category, gen_func=pagegenerators.CategorizedPageGenerator) + + # If there is one, append to genFactory + if cgen: + genFactory.gens.append(cgen) + + +def main(*args): """ Process command line arguments and invoke bot. @@ -168,16 +187,7 @@ def main(*args): # noqa # Check wether there are generators waiting for factoring, if not # use configured categories if not genFactory.gens: - - # Create Generators for configured Categories - for category in jogobot.config["redundances"]["redpage_cats"]: - cgen = genFactory.getCategoryGen( - category, - gen_func=pagegenerators.CategorizedPageGenerator) - - # If there is one, append to genFactory - if cgen: - genFactory.gens.append(cgen) + apply_conf_cat_generators( genFactory ) # Create combined Generator (Union of all Generators) gen = genFactory.getCombinedGenerator() From 2f878ee901051c5d58eb22f640d882dae99eaa92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 11:20:28 +0200 Subject: [PATCH 085/192] Correct filename in header Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=72 FS#72] --- reddiscparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reddiscparser.py b/reddiscparser.py index cd9cf29..6525ac9 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # -# parse-pages.py +# reddiscparser.py # # Copyright 2016 GOLDERWEB – Jonathan Golder # From dcc485151392a9c05d75e6b845d5c7d3fd1044a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 15:27:42 +0200 Subject: [PATCH 086/192] Check reddisc page titles against regex To prevent parsing Pages which have been categorized in configured cats wrong or are given via cmd params Parsing them results in unexpected behaviour Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] --- reddiscparser.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/reddiscparser.py b/reddiscparser.py index 6525ac9..00329e4 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -27,6 +27,7 @@ Script to parse all reddisc pages in configured categories import os import sys +import re import pywikibot from pywikibot import pagegenerators @@ -46,6 +47,10 @@ class DiscussionParserBot( Botclass witch initialises the parsing process of Redundancy Discussions """ + # RegEx to filter wrong pages + onlyinclude_re = re.compile( + jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) + def __init__( self, generator ): """ Constructor @@ -86,6 +91,11 @@ class DiscussionParserBot( return + # Exclude pages which does not match pattern + if not type(self).onlyinclude_re.search( self.current_page.title() ): + + return + # Initiate RedPage object red_page = redpage.RedPage( self.current_page ) @@ -102,7 +112,7 @@ class DiscussionParserBot( # If successfully parsed whole page, flush # db write cache redfam.RedFamParser.flush_db_cache() - jogobot.output( "Page [[{redisc}]] parsed".format( + jogobot.output( "Page [[{reddisc}]] parsed".format( reddisc=red_page.page.title() ) ) From ee8ebbc8bc088d41ba15801f7d42ac3f29bbbf1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 15:41:13 +0200 Subject: [PATCH 087/192] Make sure only flush db if there are redfams To prevent from doing unnecessary stuff and trying to use not existing db connection Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] --- reddiscparser.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/reddiscparser.py b/reddiscparser.py index 00329e4..962eb5a 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -102,18 +102,31 @@ class DiscussionParserBot( # Check whether parsing is needed if red_page.is_parsing_needed(): + # Count families for failure analysis + fam_counter = 0 + # Iterate over returned generator with redfam sections for fam in red_page.parse(): # Run RedFamParser on section text redfam.RedFamParser.parser( fam, red_page.page._pageid, red_page.is_archive() ) + + fam_counter += 1 + else: # If successfully parsed whole page, flush # db write cache - redfam.RedFamParser.flush_db_cache() - jogobot.output( "Page [[{reddisc}]] parsed".format( - reddisc=red_page.page.title() ) ) + if( fam_counter ): + redfam.RedFamParser.flush_db_cache() + jogobot.output( "Page [[{reddisc}]] parsed".format( + reddisc=red_page.page.title() ) ) + else: + jogobot.output( + "\03{red} Page [[{reddisc}]], ".format( + reddisc=red_page.page.title() ) + + "containing no redfam, parsed!", + "WARNING" ) def apply_conf_cat_generators( genFactory ): From bd2d221c488d80cf992bd4d141d2db27db1b8ce4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 15:48:30 +0200 Subject: [PATCH 088/192] Prevent flush from creating cursor without con MysqlRed.flush() tried to create a cursor in any case. If there was no connection (because the subclasses haven't been instantiated an oursql Error occured. Instead, check before if there is a connection and otherwise raise an Error Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=75 FS#75] --- mysqlred.py | 17 +++++++++++++++++ reddiscparser.py | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/mysqlred.py b/mysqlred.py index 055b995..77eae35 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -92,6 +92,9 @@ class MysqlRed: """ Run cached querys """ + if not cls.connection: + raise MysqlRedConnectionError( "No connection exists!" ) + cursor = cls.connection.cursor() # Execute insert query @@ -307,3 +310,17 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' break for row in res: yield row + + +class MysqlRedError(Exception): + """ + Basic Exception class for this module + """ + pass + + +class MysqlRedConnectionError(MysqlRedError): + """ + Raised if there are Errors with Mysql-Connections + """ + pass diff --git a/reddiscparser.py b/reddiscparser.py index 962eb5a..3a6f43b 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -123,7 +123,7 @@ class DiscussionParserBot( reddisc=red_page.page.title() ) ) else: jogobot.output( - "\03{red} Page [[{reddisc}]], ".format( + "\03{red}" + "Page [[{reddisc}]], ".format( reddisc=red_page.page.title() ) + "containing no redfam, parsed!", "WARNING" ) From 0bb0b2d95756a0ea8c334054a9cf10514583adfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 16:51:23 +0200 Subject: [PATCH 089/192] Make sure var beginning is always defined To prevent unbound Errors caused by using undeclared variable beginning if the redfam-section does not contain any timestamp Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=76 FS#76] --- redfam.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/redfam.py b/redfam.py index 3dec12f..7b03131 100644 --- a/redfam.py +++ b/redfam.py @@ -401,6 +401,10 @@ class RedFamParser( RedFam ): else: ending = None + # Missing dates (Task: FS#76) + else: + beginning = None + ending = None return (beginning, ending) From 95be31385982180a3fd352d54f908b385eec30aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 16:53:45 +0200 Subject: [PATCH 090/192] Pass reddisc pywikibot.page object to redfam To access page information like page title (eg. to get dates from it) of the reddisc page Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=76 FS#76] --- reddiscparser.py | 2 +- redfam.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/reddiscparser.py b/reddiscparser.py index 3a6f43b..43417f3 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -109,7 +109,7 @@ class DiscussionParserBot( for fam in red_page.parse(): # Run RedFamParser on section text - redfam.RedFamParser.parser( fam, red_page.page._pageid, + redfam.RedFamParser.parser( fam, red_page.page, red_page.is_archive() ) fam_counter += 1 diff --git a/redfam.py b/redfam.py index 7b03131..26b3c76 100644 --- a/redfam.py +++ b/redfam.py @@ -137,14 +137,14 @@ class RedFamParser( RedFam ): wurde gewünscht von:" __done_notice2 = "{{Erledigt|" - def __init__( self, heading, red_page_id, red_page_archive, + def __init__( self, heading, red_page, red_page_archive, beginning, ending=None ): """ Creates a RedFam object based on data collected while parsing red_pages combined with possibly former known data from db @param red_fam_heading str Wikitext heading of section - @param red_page_id int MediaWiki page_id + @param red_page page Pywikibot.page object @param red_page_archive bool Is red_page an archive @param beginning datetime Timestamp of beginning str as strptime parseable string @@ -153,7 +153,7 @@ class RedFamParser( RedFam ): """ # Set object attributes: - self._red_page_id = red_page_id + self._red_page_id = red_page._pageid self._red_page_archive = red_page_archive self._fam_hash = None @@ -341,7 +341,7 @@ class RedFamParser( RedFam ): return False @classmethod - def parser( cls, text, pageid, isarchive=False ): + def parser( cls, text, page, isarchive=False ): """ Handles parsing of redfam section @@ -360,7 +360,7 @@ class RedFamParser( RedFam ): (beginning, ending) = RedFamParser.extract_dates( text, isarchive ) # Create the RedFam object - RedFamParser( heading, pageid, isarchive, beginning, ending ) + RedFamParser( heading, page, isarchive, beginning, ending ) @classmethod def extract_dates( cls, text, isarchive=False ): From ab430e00857f380d2738e25a4e276d22eb08146e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 16:56:54 +0200 Subject: [PATCH 091/192] Use month of reddisc as beginning if missing Construct a fictive but sensfull beginning if we cant detect one Needed since beginning is mandatory Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=76 FS#76] --- redfam.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/redfam.py b/redfam.py index 26b3c76..a78b150 100644 --- a/redfam.py +++ b/redfam.py @@ -359,6 +359,19 @@ class RedFamParser( RedFam ): # Extract beginnig and maybe ending (beginning, ending) = RedFamParser.extract_dates( text, isarchive ) + # Missing beginning (Task: FS#76) + # Use first day of month of reddisc + if not beginning: + match = re.search( + jogobot.config["redundances"]["reddiscs_onlyinclude_re"], + page.title() ) + + if match: + beginning = datetime.strptime( + "01. {month} {year}".format( + month=match.group(1), year=match.group(2)), + "%d. %B %Y" ) + # Create the RedFam object RedFamParser( heading, page, isarchive, beginning, ending ) From 1e4c8646bf890081bf04c757d205e38740d5bf83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 19:57:25 +0200 Subject: [PATCH 092/192] Reparse redfam-heading with mwparser See related ticked for detailed failure explanation Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=77 FS#77] --- redfam.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/redfam.py b/redfam.py index a78b150..41e6367 100644 --- a/redfam.py +++ b/redfam.py @@ -210,13 +210,14 @@ class RedFamParser( RedFam ): @type heading wikicode or mwparser-parseable """ - # Parse heading with mwparse if needed - if not isinstance( heading, mwparser.wikicode.Wikicode ): - heading = mwparser.parse( heading ) - # Save heading as string self._heading = str( heading ) + # Parse string heading with mwparse again everytime + # In some cases the given wikicode is broken due to syntax errors + # (Task FS#77) + heading = mwparser.parse( self._heading ) + # Save destinations of wikilinks in headings self._articlesList = [ str( link.title ) for link in heading.ifilter_wikilinks() ] From ac54aea69832baa92d4bcb3cac86f7adf6b1991d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 20:02:48 +0200 Subject: [PATCH 093/192] Use callback to detect redfam.section Detecting redfam-Sections via RegExp caused some false positives due to wrong formated things in wikisyntax. See Task Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=78 FS#78] --- redfam.py | 16 ++++++++++++++++ redpage.py | 5 +++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/redfam.py b/redfam.py index 41e6367..b58b94a 100644 --- a/redfam.py +++ b/redfam.py @@ -341,6 +341,22 @@ class RedFamParser( RedFam ): else: return False + @classmethod + def is_section_redfam_cb( cls, heading ): + """ + Used as callback for wikicode.get_sections in redpage.parse to + select sections which are redfams + """ + # Because of strange behavior in some cases, parse heading again + # (Task FS#77) + heading = mwparser.parse( str( heading ) ) + + # Make sure we have min. two wikilinks in heading to assume a redfam + if len( heading.filter_wikilinks() ) >= 2: + return True + else: + return False + @classmethod def parser( cls, text, page, isarchive=False ): """ diff --git a/redpage.py b/redpage.py index 2b93ae8..6bb6cc4 100644 --- a/redpage.py +++ b/redpage.py @@ -28,9 +28,10 @@ Provides a class for handling redundance discussion pages and archives import pywikibot # noqa import mwparserfromhell as mwparser -import jogobot +import jogobot # noqa from mysqlred import MysqlRedPage +from redfam import RedFamParser class RedPage: @@ -116,7 +117,7 @@ class RedPage: # include_lead = if true include first section (intro) # include_heading = if true include heading fams = self.wikicode.get_sections( - matches=jogobot.config["redundances"]["section_heading_regex"], + matches=RedFamParser.is_section_redfam_cb, include_lead=False, include_headings=True ) # Iterate over RedFam From e28acf88d1e81908107081127f0b54cf943c3b50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 22:41:41 +0200 Subject: [PATCH 094/192] Introduce new directory structure To clarify which is a bot and which are helper scripts Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=74 FS#74] --- mysqlred.py => lib/mysqlred.py | 0 redfam.py => lib/redfam.py | 2 +- redpage.py => lib/redpage.py | 4 ++-- reddiscparser.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) rename mysqlred.py => lib/mysqlred.py (100%) rename redfam.py => lib/redfam.py (99%) rename redpage.py => lib/redpage.py (98%) diff --git a/mysqlred.py b/lib/mysqlred.py similarity index 100% rename from mysqlred.py rename to lib/mysqlred.py diff --git a/redfam.py b/lib/redfam.py similarity index 99% rename from redfam.py rename to lib/redfam.py index b58b94a..a0f566f 100644 --- a/redfam.py +++ b/lib/redfam.py @@ -35,7 +35,7 @@ import pywikibot # noqa from pywikibot.tools import deprecated # noqa import jogobot -from mysqlred import MysqlRedFam +from lib.mysqlred import MysqlRedFam class RedFam: diff --git a/redpage.py b/lib/redpage.py similarity index 98% rename from redpage.py rename to lib/redpage.py index 6bb6cc4..176f6bc 100644 --- a/redpage.py +++ b/lib/redpage.py @@ -30,8 +30,8 @@ import mwparserfromhell as mwparser import jogobot # noqa -from mysqlred import MysqlRedPage -from redfam import RedFamParser +from lib.mysqlred import MysqlRedPage +from lib.redfam import RedFamParser class RedPage: diff --git a/reddiscparser.py b/reddiscparser.py index 43417f3..f9b2059 100644 --- a/reddiscparser.py +++ b/reddiscparser.py @@ -35,8 +35,8 @@ from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot -import redpage -import redfam +from lib import redpage +from lib import redfam class DiscussionParserBot( From 77d1de44731b1aa51649a3e0a4a0550488dea63c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 24 Aug 2016 23:53:10 +0200 Subject: [PATCH 095/192] Add a tablename prefix depending on Site To be able to run the bot on different wikis the db tables should be named pywikibot.Site dependend and changed automatically Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=79 FS#79] --- lib/mysqlred.py | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 77eae35..9eb7f4b 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -33,6 +33,7 @@ except ImportError: import atexit +import pywikibot from pywikibot import config import jogobot @@ -53,6 +54,7 @@ class MysqlRed: db_username = config.db_username db_password = config.db_password db_name = config.db_username + jogobot.config['db_suffix'] + db_table_prefix = pywikibot.Site().family.dbName(pywikibot.Site().code) # Class variables for storing cached querys _cached_update_data = [] @@ -136,12 +138,14 @@ class MysqlRedPage( MysqlRed ): # Class variables for storing cached querys _cached_update_data = [] - _update_query = 'UPDATE `red_pages` \ -SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' + _update_query = 'UPDATE `{pre}_red_pages` \ +SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'.format( + pre=MysqlRed.db_table_prefix) _cached_insert_data = {} - _insert_query = 'INSERT INTO `red_pages` \ -( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' + _insert_query = 'INSERT INTO `{pre}_red_pages` \ +( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );'.format( + pre=MysqlRed.db_table_prefix) def __init__( self, page_id ): """ @@ -169,8 +173,10 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' cursor = type( self ).connection.cursor(mysqldb.DictCursor) - cursor.execute( 'SELECT * FROM `red_pages` WHERE `page_id` = ?;', - ( self.__page_id, ) ) + cursor.execute( + 'SELECT * FROM `{pre}_red_pages` WHERE `page_id` = ?;'.format( + pre=MysqlRed.db_table_prefix), ( self.__page_id, ) ) + res = cursor.fetchone() if res: @@ -221,15 +227,17 @@ class MysqlRedFam( MysqlRed ): # Class variables for storing cached querys _cached_update_data = [] - _update_query = 'UPDATE `red_families` \ + _update_query = 'UPDATE `{pre}_red_families` \ SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ -`status`= ? WHERE `fam_hash` = ?;' +`status`= ? WHERE `fam_hash` = ?;'.format( + pre=MysqlRed.db_table_prefix) _cached_insert_data = {} - _insert_query = 'INSERT INTO `red_families` \ + _insert_query = 'INSERT INTO `{pre}_red_families` \ ( fam_hash, red_page_id, beginning, ending, status, heading, \ article0, article1, article2, article3, article4, article5, article6, \ -article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' +article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'.format( + pre=MysqlRed.db_table_prefix) def __init__( self ): """ @@ -252,8 +260,10 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - cursor.execute( 'SELECT * FROM `red_families` WHERE `fam_hash` = ?;', - ( fam_hash, ) ) + cursor.execute( + 'SELECT * FROM `{pre}_red_families` WHERE `fam_hash` = ?;'.format( + pre=MysqlRed.db_table_prefix), ( fam_hash, ) ) + self.data = cursor.fetchone() def add_fam( self, articlesList, heading, red_page_id, @@ -301,8 +311,9 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - cursor.execute( 'SELECT * FROM `red_families` WHERE `status` = ?;', - ( status, ) ) + cursor.execute( + 'SELECT * FROM `{pre}_red_families` WHERE `status` = ?;'.format( + pre=type( self ).db_table_prefix), ( status, ) ) while True: res = cursor.fetchmany( 1000 ) From 71b99b5f5837e43af1cc57f1890bfbf6d4d0e382 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 25 Aug 2016 13:06:32 +0200 Subject: [PATCH 096/192] Delay definition of db_table_prefix db_table_prefix should be defined at init of MysqlRed and not at import to have cmdline args already parsed Otherwise it uses default family Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=79 FS#79] --- lib/mysqlred.py | 53 ++++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 9eb7f4b..499816f 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -54,7 +54,7 @@ class MysqlRed: db_username = config.db_username db_password = config.db_password db_name = config.db_username + jogobot.config['db_suffix'] - db_table_prefix = pywikibot.Site().family.dbName(pywikibot.Site().code) + db_table_prefix = False # Class variables for storing cached querys _cached_update_data = [] @@ -69,6 +69,14 @@ class MysqlRed: @returns mysql-stream MySQL Connection """ + # Needs to be generated after Parsing of Args (not at import time) + if not type(self).db_table_prefix: + type(self).db_table_prefix = \ + pywikibot.Site().family.dbName(pywikibot.Site().code) + + # Now we can setup prepared queries + self._prepare_queries() + # Connect to mysqldb only once if not type( self ).connection: @@ -89,6 +97,15 @@ class MysqlRed: type( self ).connection.close() + def _prepare_queries( self ): + """ + Used to replace placeholders in prepared queries + """ + type(self)._update_query = type(self)._update_query.format( + prefix=type(self).db_table_prefix) + type(self)._insert_query = type(self)._insert_query.format( + prefix=type(self).db_table_prefix) + @classmethod def flush( cls ): """ @@ -137,15 +154,14 @@ class MysqlRedPage( MysqlRed ): """ # Class variables for storing cached querys + # '{prefix}' will be replaced during super().__init__() _cached_update_data = [] - _update_query = 'UPDATE `{pre}_red_pages` \ -SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'.format( - pre=MysqlRed.db_table_prefix) + _update_query = 'UPDATE `{prefix}_red_pages` \ +SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' _cached_insert_data = {} - _insert_query = 'INSERT INTO `{pre}_red_pages` \ -( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );'.format( - pre=MysqlRed.db_table_prefix) + _insert_query = 'INSERT INTO `{prefix}_red_pages` \ +( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' def __init__( self, page_id ): """ @@ -174,8 +190,8 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;'.format( cursor = type( self ).connection.cursor(mysqldb.DictCursor) cursor.execute( - 'SELECT * FROM `{pre}_red_pages` WHERE `page_id` = ?;'.format( - pre=MysqlRed.db_table_prefix), ( self.__page_id, ) ) + 'SELECT * FROM `{prefix}_red_pages` WHERE `page_id` = ?;'.format( + prefix=type(self).db_table_prefix), ( self.__page_id, ) ) res = cursor.fetchone() @@ -227,17 +243,14 @@ class MysqlRedFam( MysqlRed ): # Class variables for storing cached querys _cached_update_data = [] - _update_query = 'UPDATE `{pre}_red_families` \ + _update_query = 'UPDATE `{prefix}_red_families` \ SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ -`status`= ? WHERE `fam_hash` = ?;'.format( - pre=MysqlRed.db_table_prefix) - +`status`= ? WHERE `fam_hash` = ?;' _cached_insert_data = {} - _insert_query = 'INSERT INTO `{pre}_red_families` \ + _insert_query = 'INSERT INTO `{prefix}_red_families` \ ( fam_hash, red_page_id, beginning, ending, status, heading, \ article0, article1, article2, article3, article4, article5, article6, \ -article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'.format( - pre=MysqlRed.db_table_prefix) +article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' def __init__( self ): """ @@ -261,8 +274,8 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'.format( cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor.execute( - 'SELECT * FROM `{pre}_red_families` WHERE `fam_hash` = ?;'.format( - pre=MysqlRed.db_table_prefix), ( fam_hash, ) ) + 'SELECT * FROM `{prefix}_red_families` WHERE `fam_hash` = ?;'. + format( prefix=type(self).db_table_prefix), ( fam_hash, ) ) self.data = cursor.fetchone() @@ -312,8 +325,8 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );'.format( cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor.execute( - 'SELECT * FROM `{pre}_red_families` WHERE `status` = ?;'.format( - pre=type( self ).db_table_prefix), ( status, ) ) + 'SELECT * FROM `{prefix}_red_families` WHERE `status` = ?;'.format( + prefix=type( self ).db_table_prefix), ( status, ) ) while True: res = cursor.fetchmany( 1000 ) From 78eda105622c2692e2dd0e1c825119d2b14a2e94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Thu, 25 Aug 2016 22:41:13 +0200 Subject: [PATCH 097/192] Remove deprecated methods Deprecated functions which are not used anymore can be removed to make code more clearer and improve maintainability Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=80 FS#80] --- lib/redfam.py | 58 --------------------------------------------------- 1 file changed, 58 deletions(-) diff --git a/lib/redfam.py b/lib/redfam.py index a0f566f..30dd22d 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -325,22 +325,6 @@ class RedFamParser( RedFam ): self._beginning, self._ending, self._status ) - @classmethod - @deprecated - def is_sectionheading( cls, line ): - """ - Checks wether given line is a red_fam section heading - - @param str line String to check - - @returns bool Returns True if it is a section heading - """ - - if cls.__sectionhead_pat.search( str(line) ): - return True - else: - return False - @classmethod def is_section_redfam_cb( cls, heading ): """ @@ -438,48 +422,6 @@ class RedFamParser( RedFam ): return (beginning, ending) - @classmethod - @deprecated( 'extract_dates' ) - def is_beginning( cls, line ): - """ - Returns the first timestamp found in line, otherwise None - - @param str line String to search in - - @returns str Timestamp, otherwise None - """ - - return cls.extract_dates( line )[0] - - @classmethod - @deprecated( 'extract_dates' ) - def is_ending( cls, line, isarchive=False ): - """ - Returns the timestamp of done notice ( if one ), otherwise None - - @param line String to search in - @type line str - @param isarchive If true skip searching done notice (on archivepages) - @type isarchive bool - - @returns Timestamp, otherwise None - @returntype str - """ - - return cls.extract_dates( line )[1] - - @classmethod - @deprecated( 'extract_dates' ) - def is_ending2( cls, line ): - """ - Returns the last timestamp found in line, otherwise None - @param str line String to search in - - @returns str Timestamp, otherwise None - """ - - return cls.extract_dates( line, True )[1] - class RedFamWorker( RedFam ): """ From 177a8f920f9396a6480efab60fc7c084e0234308 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 10:55:22 +0200 Subject: [PATCH 098/192] Prepare new structure to use subtasks To have only one entry point for the bot we want to have a single file (red.py) which is calling the specfic task class from bots dir with a standardized call Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- reddiscparser.py => bots/reddiscparser.py | 0 red.py | 230 ++++++++++++++++++++++ 2 files changed, 230 insertions(+) rename reddiscparser.py => bots/reddiscparser.py (100%) create mode 100644 red.py diff --git a/reddiscparser.py b/bots/reddiscparser.py similarity index 100% rename from reddiscparser.py rename to bots/reddiscparser.py diff --git a/red.py b/red.py new file mode 100644 index 0000000..f9b2059 --- /dev/null +++ b/red.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# reddiscparser.py +# +# Copyright 2016 GOLDERWEB – Jonathan Golder +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# +""" +Script to parse all reddisc pages in configured categories +""" + +import os +import sys +import re + +import pywikibot +from pywikibot import pagegenerators +from pywikibot.bot import ExistingPageBot, NoRedirectPageBot + +import jogobot + +from lib import redpage +from lib import redfam + + +class DiscussionParserBot( + # CurrentPageBot, # via next two sets 'current_page' on each treat() + ExistingPageBot, # CurrentPageBot only treats existing pages + NoRedirectPageBot ): # class which only treats non-redirects + """ + Botclass witch initialises the parsing process of Redundancy Discussions + """ + + # RegEx to filter wrong pages + onlyinclude_re = re.compile( + jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) + + def __init__( self, generator ): + """ + Constructor + + Parameters: + @param generator: The page generator that determines on which pages + to work. + @type generator: generator. + """ + super( DiscussionParserBot, self ).__init__(generator=generator) + + def run( self ): + """ + Controls the overal parsing process, using super class for page switch + + Needed to do things before/after treating pages is done + """ + try: + + super( DiscussionParserBot, self ).run() + + except: + raise + + else: + + # If successfully parsed all pages in cat, flush db write cache + redpage.RedPage.flush_db_cache() + + def treat_page( self ): + """ + Handles work on current page + """ + + # Short circuit excluded pages + if self.current_page.title() in ( + jogobot.config["redundances"]["redpage_exclude"] ): + + return + + # Exclude pages which does not match pattern + if not type(self).onlyinclude_re.search( self.current_page.title() ): + + return + + # Initiate RedPage object + red_page = redpage.RedPage( self.current_page ) + + # Check whether parsing is needed + if red_page.is_parsing_needed(): + + # Count families for failure analysis + fam_counter = 0 + + # Iterate over returned generator with redfam sections + for fam in red_page.parse(): + + # Run RedFamParser on section text + redfam.RedFamParser.parser( fam, red_page.page, + red_page.is_archive() ) + + fam_counter += 1 + + else: + # If successfully parsed whole page, flush + # db write cache + if( fam_counter ): + redfam.RedFamParser.flush_db_cache() + jogobot.output( "Page [[{reddisc}]] parsed".format( + reddisc=red_page.page.title() ) ) + else: + jogobot.output( + "\03{red}" + "Page [[{reddisc}]], ".format( + reddisc=red_page.page.title() ) + + "containing no redfam, parsed!", + "WARNING" ) + + +def apply_conf_cat_generators( genFactory ): + """ + Builds generators for categories which are read from jogobot.config + + Parameters: + @param genFactory: The GeneratorFactory to which the builded generators + should be added. + @type genFactory: pagegenerators.GeneratorFactory + """ + # Create Generators for configured Categories + for category in jogobot.config["redundances"]["redpage_cats"]: + cgen = genFactory.getCategoryGen( + category, gen_func=pagegenerators.CategorizedPageGenerator) + + # If there is one, append to genFactory + if cgen: + genFactory.gens.append(cgen) + + +def main(*args): + """ + Process command line arguments and invoke bot. + + If args is an empty list, sys.argv is used. + + @param args: command line arguments + @type args: list of unicode + """ + + # Process global arguments to determine desired site + local_args = pywikibot.handle_args(args) + + # Get the jogobot-task_slug (basename of current file without ending) + task_slug = os.path.basename(__file__)[:-len(".py")] + + # Before run, we need to check wether we are currently active or not + try: + # Will throw Exception if disabled/blocked + # jogobot.is_active( task_slug ) + pass + + except jogobot.jogobot.Blocked: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), + "CRITICAL" ) + + except jogobot.jogobot.Disabled: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{red} %s (%s)" % (value, type ), + "ERROR" ) + + # Bot/Task is active + else: + + # This factory is responsible for processing command line arguments + # that are also used by other scripts and that determine on which pages + # to work on. + genFactory = pagegenerators.GeneratorFactory() + # The generator gives the pages that should be worked upon. + gen = None + + # If always is True, bot won't ask for confirmation of edit (automode) + # always = False + + # If force_reload is True, bot will always parse Countrylist regardless + # if parsing is needed or not + # force_reload = False + + # Parse command line arguments + for arg in local_args: + if arg.startswith("-always"): + # always = True + pass + else: + genFactory.handleArg(arg) + + if not gen: + + # Check wether there are generators waiting for factoring, if not + # use configured categories + if not genFactory.gens: + apply_conf_cat_generators( genFactory ) + + # Create combined Generator (Union of all Generators) + gen = genFactory.getCombinedGenerator() + + if gen: + # Log beginning of parsing + jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) ) + + # The preloading generator is responsible for downloading multiple + # pages from the wiki simultaneously. + gen = pagegenerators.PreloadingGenerator(gen) + DiscussionParserBot( gen ).run() + else: + pywikibot.showHelp() + +if( __name__ == "__main__" ): + main() From b88efb6bdde64ea9d1dc736da224c990464eb863 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 12:17:12 +0200 Subject: [PATCH 099/192] Reflect stucture changes in Code Since bot class is moved to separate dir/file we need to do some changes to rebuild functionality Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- bots/__init__.py | 2 + bots/reddiscparser.py | 109 ++---------------------------------------- red.py | 98 +------------------------------------ 3 files changed, 7 insertions(+), 202 deletions(-) create mode 100644 bots/__init__.py diff --git a/bots/__init__.py b/bots/__init__.py new file mode 100644 index 0000000..9327388 --- /dev/null +++ b/bots/__init__.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index f9b2059..7f66a2f 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -22,15 +22,13 @@ # # """ -Script to parse all reddisc pages in configured categories +Bot to parse all reddisc pages in given Generator or configured categories """ -import os -import sys import re -import pywikibot -from pywikibot import pagegenerators +import pywikibot # noqa +from pywikibot import pagegenerators # noqa from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot @@ -127,104 +125,3 @@ class DiscussionParserBot( reddisc=red_page.page.title() ) + "containing no redfam, parsed!", "WARNING" ) - - -def apply_conf_cat_generators( genFactory ): - """ - Builds generators for categories which are read from jogobot.config - - Parameters: - @param genFactory: The GeneratorFactory to which the builded generators - should be added. - @type genFactory: pagegenerators.GeneratorFactory - """ - # Create Generators for configured Categories - for category in jogobot.config["redundances"]["redpage_cats"]: - cgen = genFactory.getCategoryGen( - category, gen_func=pagegenerators.CategorizedPageGenerator) - - # If there is one, append to genFactory - if cgen: - genFactory.gens.append(cgen) - - -def main(*args): - """ - Process command line arguments and invoke bot. - - If args is an empty list, sys.argv is used. - - @param args: command line arguments - @type args: list of unicode - """ - - # Process global arguments to determine desired site - local_args = pywikibot.handle_args(args) - - # Get the jogobot-task_slug (basename of current file without ending) - task_slug = os.path.basename(__file__)[:-len(".py")] - - # Before run, we need to check wether we are currently active or not - try: - # Will throw Exception if disabled/blocked - # jogobot.is_active( task_slug ) - pass - - except jogobot.jogobot.Blocked: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), - "CRITICAL" ) - - except jogobot.jogobot.Disabled: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{red} %s (%s)" % (value, type ), - "ERROR" ) - - # Bot/Task is active - else: - - # This factory is responsible for processing command line arguments - # that are also used by other scripts and that determine on which pages - # to work on. - genFactory = pagegenerators.GeneratorFactory() - # The generator gives the pages that should be worked upon. - gen = None - - # If always is True, bot won't ask for confirmation of edit (automode) - # always = False - - # If force_reload is True, bot will always parse Countrylist regardless - # if parsing is needed or not - # force_reload = False - - # Parse command line arguments - for arg in local_args: - if arg.startswith("-always"): - # always = True - pass - else: - genFactory.handleArg(arg) - - if not gen: - - # Check wether there are generators waiting for factoring, if not - # use configured categories - if not genFactory.gens: - apply_conf_cat_generators( genFactory ) - - # Create combined Generator (Union of all Generators) - gen = genFactory.getCombinedGenerator() - - if gen: - # Log beginning of parsing - jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) ) - - # The preloading generator is responsible for downloading multiple - # pages from the wiki simultaneously. - gen = pagegenerators.PreloadingGenerator(gen) - DiscussionParserBot( gen ).run() - else: - pywikibot.showHelp() - -if( __name__ == "__main__" ): - main() diff --git a/red.py b/red.py index f9b2059..bee76b8 100644 --- a/red.py +++ b/red.py @@ -22,111 +22,17 @@ # # """ -Script to parse all reddisc pages in configured categories +Wrapper script to invoke all redundances bot tasks """ import os import sys -import re import pywikibot from pywikibot import pagegenerators -from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot - -from lib import redpage -from lib import redfam - - -class DiscussionParserBot( - # CurrentPageBot, # via next two sets 'current_page' on each treat() - ExistingPageBot, # CurrentPageBot only treats existing pages - NoRedirectPageBot ): # class which only treats non-redirects - """ - Botclass witch initialises the parsing process of Redundancy Discussions - """ - - # RegEx to filter wrong pages - onlyinclude_re = re.compile( - jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) - - def __init__( self, generator ): - """ - Constructor - - Parameters: - @param generator: The page generator that determines on which pages - to work. - @type generator: generator. - """ - super( DiscussionParserBot, self ).__init__(generator=generator) - - def run( self ): - """ - Controls the overal parsing process, using super class for page switch - - Needed to do things before/after treating pages is done - """ - try: - - super( DiscussionParserBot, self ).run() - - except: - raise - - else: - - # If successfully parsed all pages in cat, flush db write cache - redpage.RedPage.flush_db_cache() - - def treat_page( self ): - """ - Handles work on current page - """ - - # Short circuit excluded pages - if self.current_page.title() in ( - jogobot.config["redundances"]["redpage_exclude"] ): - - return - - # Exclude pages which does not match pattern - if not type(self).onlyinclude_re.search( self.current_page.title() ): - - return - - # Initiate RedPage object - red_page = redpage.RedPage( self.current_page ) - - # Check whether parsing is needed - if red_page.is_parsing_needed(): - - # Count families for failure analysis - fam_counter = 0 - - # Iterate over returned generator with redfam sections - for fam in red_page.parse(): - - # Run RedFamParser on section text - redfam.RedFamParser.parser( fam, red_page.page, - red_page.is_archive() ) - - fam_counter += 1 - - else: - # If successfully parsed whole page, flush - # db write cache - if( fam_counter ): - redfam.RedFamParser.flush_db_cache() - jogobot.output( "Page [[{reddisc}]] parsed".format( - reddisc=red_page.page.title() ) ) - else: - jogobot.output( - "\03{red}" + "Page [[{reddisc}]], ".format( - reddisc=red_page.page.title() ) + - "containing no redfam, parsed!", - "WARNING" ) +from bots.reddiscparser import DiscussionParserBot def apply_conf_cat_generators( genFactory ): From 1679e2ad6a8b10bd0d319abbab6ad4653615586e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 13:36:14 +0200 Subject: [PATCH 100/192] Prepare environment for starting subtasks Before init and run bot we need to provide a environment for it, like parsed args Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- red.py | 65 ++++++++++++++++++++++------------------------------------ 1 file changed, 25 insertions(+), 40 deletions(-) diff --git a/red.py b/red.py index bee76b8..dd14625 100644 --- a/red.py +++ b/red.py @@ -32,26 +32,6 @@ import pywikibot from pywikibot import pagegenerators import jogobot -from bots.reddiscparser import DiscussionParserBot - - -def apply_conf_cat_generators( genFactory ): - """ - Builds generators for categories which are read from jogobot.config - - Parameters: - @param genFactory: The GeneratorFactory to which the builded generators - should be added. - @type genFactory: pagegenerators.GeneratorFactory - """ - # Create Generators for configured Categories - for category in jogobot.config["redundances"]["redpage_cats"]: - cgen = genFactory.getCategoryGen( - category, gen_func=pagegenerators.CategorizedPageGenerator) - - # If there is one, append to genFactory - if cgen: - genFactory.gens.append(cgen) def main(*args): @@ -68,7 +48,7 @@ def main(*args): local_args = pywikibot.handle_args(args) # Get the jogobot-task_slug (basename of current file without ending) - task_slug = os.path.basename(__file__)[:-len(".py")] + task_slug = os.path.basename(__file__)[:-len(".py")] # noqa (temp) # Before run, we need to check wether we are currently active or not try: @@ -93,8 +73,6 @@ def main(*args): # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() - # The generator gives the pages that should be worked upon. - gen = None # If always is True, bot won't ask for confirmation of edit (automode) # always = False @@ -103,34 +81,41 @@ def main(*args): # if parsing is needed or not # force_reload = False + # Subtask selects the specific bot to run + # Default is reddiscparser + subtask = None + + # kwargs are passed to selected bot as **kwargs + kwargs = dict() # noqa (temp) + # Parse command line arguments for arg in local_args: + + # Split args + arg, sep, value = arg.partition(':') + if arg.startswith("-always"): # always = True pass + elif arg.startswith("-task"): + subtask = value else: genFactory.handleArg(arg) - if not gen: + # After parsing args we can select bot to run + if not subtask or subtask == "discparser": + # Default case: discparser + subtask = "discparser" - # Check wether there are generators waiting for factoring, if not - # use configured categories - if not genFactory.gens: - apply_conf_cat_generators( genFactory ) + # Import related bot + from bots.reddiscparser import DiscussionParserBot as Bot # noqa (temp) - # Create combined Generator (Union of all Generators) - gen = genFactory.getCombinedGenerator() - - if gen: - # Log beginning of parsing - jogobot.output( "{task_slug} invoked".format(task_slug=task_slug) ) - - # The preloading generator is responsible for downloading multiple - # pages from the wiki simultaneously. - gen = pagegenerators.PreloadingGenerator(gen) - DiscussionParserBot( gen ).run() + # else: - pywikibot.showHelp() + jogobot.output( ( + "\03{{red}} Given subtask \"{subtask} \"" + + "is not existing!" ).format( subtask=subtask ), "ERROR" ) + if( __name__ == "__main__" ): main() From 156f117b18ebd997a7e08454ab21455ca6491e98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 13:49:19 +0200 Subject: [PATCH 101/192] Add Bot initiation with exception handling Bot initiation needs to catch errors by Bot to enforce at least a basic logging. And also to be sure Init was successfull before starting bot. Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- red.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/red.py b/red.py index dd14625..7a26f24 100644 --- a/red.py +++ b/red.py @@ -48,7 +48,7 @@ def main(*args): local_args = pywikibot.handle_args(args) # Get the jogobot-task_slug (basename of current file without ending) - task_slug = os.path.basename(__file__)[:-len(".py")] # noqa (temp) + task_slug = os.path.basename(__file__)[:-len(".py")] # Before run, we need to check wether we are currently active or not try: @@ -86,7 +86,7 @@ def main(*args): subtask = None # kwargs are passed to selected bot as **kwargs - kwargs = dict() # noqa (temp) + kwargs = dict() # Parse command line arguments for arg in local_args: @@ -108,7 +108,7 @@ def main(*args): subtask = "discparser" # Import related bot - from bots.reddiscparser import DiscussionParserBot as Bot # noqa (temp) + from bots.reddiscparser import DiscussionParserBot as Bot # else: @@ -116,6 +116,25 @@ def main(*args): "\03{{red}} Given subtask \"{subtask} \"" + "is not existing!" ).format( subtask=subtask ), "ERROR" ) + # Bot gets prepared genFactory as first param and possible kwargs dict + # It has to threw an exception if something does not work properly + try: + # Init bot with genFactory and **kwargs + bot = Bot( genFactory, **kwargs ) # noqa (temp) + + except: + # Catch Errors while initiation + jogobot.output( ( + "\03{{red}} Error while trying to init " + + "subtask \"{task_slug}-{subtask} \"!" ). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + raise + else: + # Init successfull + jogobot.output( ( + "{task_slug}-{subtask} init successfull" ). + format(task_slug=task_slug, subtask=subtask) ) + if( __name__ == "__main__" ): main() From 460d2db18396939c13f7fbb9ca1627fbf4cb02a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 14:00:29 +0200 Subject: [PATCH 102/192] Add Bot run with exception handling Errors, especially caused by missing run-method, need to be catched to provide information in Logfile. And also to get information wether bot run was successfull Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- red.py | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/red.py b/red.py index 7a26f24..3d398f4 100644 --- a/red.py +++ b/red.py @@ -34,7 +34,7 @@ from pywikibot import pagegenerators import jogobot -def main(*args): +def main(*args): # noqa (temp) """ Process command line arguments and invoke bot. @@ -120,7 +120,7 @@ def main(*args): # It has to threw an exception if something does not work properly try: # Init bot with genFactory and **kwargs - bot = Bot( genFactory, **kwargs ) # noqa (temp) + bot = Bot( genFactory, **kwargs ) except: # Catch Errors while initiation @@ -132,7 +132,44 @@ def main(*args): else: # Init successfull jogobot.output( ( - "{task_slug}-{subtask} init successfull" ). + "Subtask \"{task_slug}-{subtask}\" was" + + "initiated successfully" ). + format(task_slug=task_slug, subtask=subtask) ) + + # Fire up Bot + # Bot must have implemented a run()-method + # It has to threw an exception if something does not work properly + try: + # Call run method on Bot + bot.run() + + # Special event on AttributeError to catch missing run()-method + except AttributeError: + (type, value, traceback) = sys.exc_info() + + # Catch missing run()-method + if "has no attribute 'run'" in value: + jogobot.output( ( + "\03{{red}} Error while trying to run " + + "subtask \"{task_slug}-{subtask} \": +" + "Run-method is missing! "). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + + # Pass through other AttributeError + else: + raise + + except: + jogobot.output( ( + "\03{{red}} Error while trying to run " + + "subtask \"{task_slug}-{subtask} \"!" ). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + raise + + else: + # Run successfull + jogobot.output( ( + "Subtask \"{task_slug}-{subtask}\" was finished successfully"). format(task_slug=task_slug, subtask=subtask) ) From 3540cc2a7d68e4c15d7e96bf84c11a21fd59e723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 15:18:17 +0200 Subject: [PATCH 103/192] Move functional sections to functions in main() To make main() function less complicated functional sections are moved to dedicated functions Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=82 FS#82] --- red.py | 357 ++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 240 insertions(+), 117 deletions(-) diff --git a/red.py b/red.py index 3d398f4..d9bda88 100644 --- a/red.py +++ b/red.py @@ -34,7 +34,235 @@ from pywikibot import pagegenerators import jogobot -def main(*args): # noqa (temp) +def active(task_slug): + """ + Checks up if bot with given task_slug is active via jogobot.framework + + @param task_slug Task slug to check + @type task_slug str + + @return True if active, otherwise False + @rtype bool + """ + + try: + # Will throw Exception if disabled/blocked + # jogobot.is_active( task_slug ) + pass + + except jogobot.jogobot.Blocked: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), + "CRITICAL" ) + return False + + except jogobot.jogobot.Disabled: + (type, value, traceback) = sys.exc_info() + jogobot.output( "\03{red} %s (%s)" % (value, type ), + "ERROR" ) + return False + + # Bot/Task is active + else: + return True + + +def parse_local_args( local_args ): + """ + Parses local cmd args which are not parsed by pywikibot + + @param local_args Local args returned by pywikibot.handle_args(args) + @type iterable + + @returns The following tuple + @return 1 Slug of given subtask (Arg "-task") + @rtype str + @return 2 GenFactory with parsed pagegenerator args + @rtype pagegenerators.GeneratorFactory + @return 3 Additional args for subtasks + @rtype dict + @rtype tuple + """ + + # This factory is responsible for processing command line arguments + # that are also used by other scripts and that determine on which pages + # to work on. + genFactory = pagegenerators.GeneratorFactory() + + # If always is True, bot won't ask for confirmation of edit (automode) + # always = False + + # If force_reload is True, bot will always parse Countrylist regardless + # if parsing is needed or not + # force_reload = False + + # Subtask selects the specific bot to run + # Default is reddiscparser + subtask = None + + # kwargs are passed to selected bot as **kwargs + kwargs = dict() + + # Parse command line arguments + for arg in local_args: + + # Split args + arg, sep, value = arg.partition(':') + + if arg.startswith("-always"): + # always = True + pass + elif arg.startswith("-task"): + subtask = value + else: + genFactory.handleArg(arg) + + # Return Tuple + return ( subtask, genFactory, kwargs ) + + +def prepare_bot( task_slug, subtask, genFactory, subtask_args ): + """ + Handles importing subtask Bot class and prepares specific args + + Throws exception if bot not exists + + @param task_slug Task slug, needed for logging + @type task_slug str + @param subtask Slug of given subtask + @type subtask str + @param genFactory GenFactory with parsed pagegenerator args + @type genFactory pagegenerators.GeneratorFactory + @param subtask_args Additional args for subtasks + @type subtask_args dict\ + + @returns The following tuple + @return 1 Subtask slug (replaced None for default) + @rtype str + @return 2 Botclass of given subtask (Arg "-task") + @rtype Class + @return 3 GenFactory with parsed pagegenerator args + @rtype pagegenerators.GeneratorFactory + @return 4 Additional args for subtasks + @rtype dict + @rtype tuple + """ + # kwargs are passed to selected bot as **kwargs + kwargs = dict() + + if not subtask or subtask == "discparser": + # Default case: discparser + subtask = "discparser" + + # Import related bot + from bots.reddiscparser import DiscussionParserBot as Bot + + # Subtask error + else: + jogobot.output( ( + "\03{{red}} Given subtask \"{subtask} \"" + + "is not existing!" ).format( subtask=subtask ), "ERROR" ) + raise Exception + + return ( subtask, Bot, genFactory, kwargs ) + + +def init_bot( task_slug, subtask, Bot, genFactory, **kwargs ): + """ + Initiates Bot-Object with Class given in Bot and passes params genFactory + and kwargs to it + + Passes through exception generated by Bot.__init__() after logging. + + @param task_slug Task slug, needed for logging + @type task_slug str + @param subtask Slug of given subtask + @type subtask str + @param Bot Bot class to build bot-object from + @type Class + @param genFactory GenFactory with parsed pagegenerator args + @type genFactory pagegenerators.GeneratorFactory + @param **kwargs Additional args for Bot() + @type **kwargs dict + + @returns bot-object + @type type(Bot()) + """ + # Bot gets prepared genFactory as first param and possible kwargs dict + # It has to threw an exception if something does not work properly + try: + # Init bot with genFactory and **kwargs + bot = Bot( genFactory, **kwargs ) + + except: + # Catch Errors while initiation + jogobot.output( ( + "\03{{red}} Error while trying to init " + + "subtask \"{task_slug}-{subtask}\"!" ). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + raise + else: + # Init successfull + jogobot.output( ( + "Subtask \"{task_slug}-{subtask}\" was " + + "initiated successfully" ). + format(task_slug=task_slug, subtask=subtask) ) + return bot + + +def run_bot( task_slug, subtask, bot ): + """ + Calls the run()-method of bot-object + + Passes through exceptions generated by Bot.__init__() after logging. + Catches Errors caused by missing run(0-method. + + @param task_slug Task slug, needed for logging + @type task_slug str + @param subtask Slug of given subtask + @type subtask str + @param bot Bot object to call run()-method on + @type object with method run + """ + + # Fire up Bot + # Bot must have implemented a run()-method + # It has to threw an exception if something does not work properly + try: + # Call run method on Bot + bot.run() + + # Special event on AttributeError to catch missing run()-method + except AttributeError: + (type, value, traceback) = sys.exc_info() + + # Catch missing run()-method + if "has no attribute 'run'" in value: + jogobot.output( ( + "\03{{red}} Error while trying to run " + + "subtask \"{task_slug}-{subtask} \": +" + "Run-method is missing! "). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + + # Pass through other AttributeError + else: + raise + + except: + jogobot.output( ( + "\03{{red}} Error while trying to run " + + "subtask \"{task_slug}-{subtask} \"!" ). + format( task_slug=task_slug, subtask=subtask ), "ERROR" ) + raise + + else: + # Run successfull + jogobot.output( ( + "Subtask \"{task_slug}-{subtask}\" was finished successfully"). + format(task_slug=task_slug, subtask=subtask) ) + + +def main(*args): """ Process command line arguments and invoke bot. @@ -51,126 +279,21 @@ def main(*args): # noqa (temp) task_slug = os.path.basename(__file__)[:-len(".py")] # Before run, we need to check wether we are currently active or not - try: - # Will throw Exception if disabled/blocked - # jogobot.is_active( task_slug ) - pass + if not active( task_slug ): + return - except jogobot.jogobot.Blocked: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), - "CRITICAL" ) + # Parse local Args to get information about subtask + ( subtask, genFactory, subtask_args ) = parse_local_args( local_args ) - except jogobot.jogobot.Disabled: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{red} %s (%s)" % (value, type ), - "ERROR" ) + # select subtask and prepare args + ( subtask, Bot, genFactory, kwargs ) = prepare_bot( + task_slug, subtask, genFactory, subtask_args ) - # Bot/Task is active - else: + # Init Bot + bot = init_bot( task_slug, subtask, Bot, genFactory, **kwargs) - # This factory is responsible for processing command line arguments - # that are also used by other scripts and that determine on which pages - # to work on. - genFactory = pagegenerators.GeneratorFactory() - - # If always is True, bot won't ask for confirmation of edit (automode) - # always = False - - # If force_reload is True, bot will always parse Countrylist regardless - # if parsing is needed or not - # force_reload = False - - # Subtask selects the specific bot to run - # Default is reddiscparser - subtask = None - - # kwargs are passed to selected bot as **kwargs - kwargs = dict() - - # Parse command line arguments - for arg in local_args: - - # Split args - arg, sep, value = arg.partition(':') - - if arg.startswith("-always"): - # always = True - pass - elif arg.startswith("-task"): - subtask = value - else: - genFactory.handleArg(arg) - - # After parsing args we can select bot to run - if not subtask or subtask == "discparser": - # Default case: discparser - subtask = "discparser" - - # Import related bot - from bots.reddiscparser import DiscussionParserBot as Bot - - # - else: - jogobot.output( ( - "\03{{red}} Given subtask \"{subtask} \"" + - "is not existing!" ).format( subtask=subtask ), "ERROR" ) - - # Bot gets prepared genFactory as first param and possible kwargs dict - # It has to threw an exception if something does not work properly - try: - # Init bot with genFactory and **kwargs - bot = Bot( genFactory, **kwargs ) - - except: - # Catch Errors while initiation - jogobot.output( ( - "\03{{red}} Error while trying to init " + - "subtask \"{task_slug}-{subtask} \"!" ). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - raise - else: - # Init successfull - jogobot.output( ( - "Subtask \"{task_slug}-{subtask}\" was" + - "initiated successfully" ). - format(task_slug=task_slug, subtask=subtask) ) - - # Fire up Bot - # Bot must have implemented a run()-method - # It has to threw an exception if something does not work properly - try: - # Call run method on Bot - bot.run() - - # Special event on AttributeError to catch missing run()-method - except AttributeError: - (type, value, traceback) = sys.exc_info() - - # Catch missing run()-method - if "has no attribute 'run'" in value: - jogobot.output( ( - "\03{{red}} Error while trying to run " + - "subtask \"{task_slug}-{subtask} \": +" - "Run-method is missing! "). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - - # Pass through other AttributeError - else: - raise - - except: - jogobot.output( ( - "\03{{red}} Error while trying to run " + - "subtask \"{task_slug}-{subtask} \"!" ). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - raise - - else: - # Run successfull - jogobot.output( ( - "Subtask \"{task_slug}-{subtask}\" was finished successfully"). - format(task_slug=task_slug, subtask=subtask) ) + # Run bot + run_bot( task_slug, subtask, bot ) if( __name__ == "__main__" ): From 0ceb2e6e836dfd19a225227b521dc1e99bb9f54d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 16:58:20 +0200 Subject: [PATCH 104/192] Add methods to build gen to DiscussionParser With the new wrapper script the Bot gets a GenFactory and has to build a generator out of it by its own Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=83 FS#83] --- bots/reddiscparser.py | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 7f66a2f..2a47642 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -58,7 +58,47 @@ class DiscussionParserBot( to work. @type generator: generator. """ - super( DiscussionParserBot, self ).__init__(generator=generator) + + def build_generator(self): + """ + Builds generator to work on, based on self.genFactory + """ + # Check wether there are generators waiting for factoring, if not + # use configured categories + if not self.genFactory.gens: + self.apply_conf_cat_generators() + + # Create combined Generator (Union of all Generators) + gen = self.genFactory.getCombinedGenerator() + + if gen: + # The preloading generator is responsible for downloading multiple + # pages from the wiki simultaneously. + self.gen = pagegenerators.PreloadingGenerator(gen) + + else: + pywikibot.showHelp() + + def apply_conf_cat_generators( self ): + """ + Builds generators for categories which are read from jogobot.config + + Parameters: + @param genFactory: The GeneratorFactory to which the builded + generators should be added. + @type genFactory: pagegenerators.GeneratorFactory + """ + # Create Generators for configured Categories + for category in jogobot.config["redundances"]["redpage_cats"]: + gen = self.genFactory.getCategoryGen( + category, gen_func=pagegenerators.CategorizedPageGenerator) + + # If there is one, append to genFactory + if gen: + self.genFactory.gens.append(gen) + + # Reset gen for next iteration + gen = None def run( self ): """ From 2be0a8903de6600939999f6dcb3da813ef584be5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 17:02:51 +0200 Subject: [PATCH 105/192] Adjust constructor for wrapper-script The new wrapper-script calls a standardized API We need to be conform with that Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=83 FS#83] --- bots/reddiscparser.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 2a47642..818eb05 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -49,16 +49,27 @@ class DiscussionParserBot( onlyinclude_re = re.compile( jogobot.config["redundances"]["reddiscs_onlyinclude_re"] ) - def __init__( self, generator ): + def __init__( self, genFactory, **kwargs ): """ Constructor Parameters: - @param generator: The page generator that determines on which pages - to work. - @type generator: generator. + @param genFactory GenFactory with parsed pagegenerator args to + build generator + @type genFactory pagegenerators.GeneratorFactory + @param **kwargs Additional args + @type iterable """ + # Copy needed args + self.genFactory = genFactory + + # Build generator with genFactory + self.build_generator() + + # Run super class init with builded generator + super( DiscussionParserBot, self ).__init__(generator=self.gen) + def build_generator(self): """ Builds generator to work on, based on self.genFactory From d0fa15d0edd12c5e17f2e915d08f13b1f712b928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 18:27:11 +0200 Subject: [PATCH 106/192] Update jogobot module to get standart Start-API [FS#84] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] --- jogobot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jogobot b/jogobot index 2173f29..28d03f3 160000 --- a/jogobot +++ b/jogobot @@ -1 +1 @@ -Subproject commit 2173f2984f1de6950728a15709bf93db5188731d +Subproject commit 28d03f35b848a33ad45d3f5f8f3f82e8c45534ec From 604b7bd8b726fb56f2ae6fb4b6d3871a6518eedc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 18:51:42 +0200 Subject: [PATCH 107/192] Now use Bot-Start API from jogobot framework API was moved to jogobot to share with other tasks Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=85 FS#85] --- red.py | 196 +++------------------------------------------------------ 1 file changed, 7 insertions(+), 189 deletions(-) diff --git a/red.py b/red.py index d9bda88..733def2 100644 --- a/red.py +++ b/red.py @@ -26,101 +26,12 @@ Wrapper script to invoke all redundances bot tasks """ import os -import sys import pywikibot -from pywikibot import pagegenerators import jogobot -def active(task_slug): - """ - Checks up if bot with given task_slug is active via jogobot.framework - - @param task_slug Task slug to check - @type task_slug str - - @return True if active, otherwise False - @rtype bool - """ - - try: - # Will throw Exception if disabled/blocked - # jogobot.is_active( task_slug ) - pass - - except jogobot.jogobot.Blocked: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{lightpurple} %s (%s)" % (value, type ), - "CRITICAL" ) - return False - - except jogobot.jogobot.Disabled: - (type, value, traceback) = sys.exc_info() - jogobot.output( "\03{red} %s (%s)" % (value, type ), - "ERROR" ) - return False - - # Bot/Task is active - else: - return True - - -def parse_local_args( local_args ): - """ - Parses local cmd args which are not parsed by pywikibot - - @param local_args Local args returned by pywikibot.handle_args(args) - @type iterable - - @returns The following tuple - @return 1 Slug of given subtask (Arg "-task") - @rtype str - @return 2 GenFactory with parsed pagegenerator args - @rtype pagegenerators.GeneratorFactory - @return 3 Additional args for subtasks - @rtype dict - @rtype tuple - """ - - # This factory is responsible for processing command line arguments - # that are also used by other scripts and that determine on which pages - # to work on. - genFactory = pagegenerators.GeneratorFactory() - - # If always is True, bot won't ask for confirmation of edit (automode) - # always = False - - # If force_reload is True, bot will always parse Countrylist regardless - # if parsing is needed or not - # force_reload = False - - # Subtask selects the specific bot to run - # Default is reddiscparser - subtask = None - - # kwargs are passed to selected bot as **kwargs - kwargs = dict() - - # Parse command line arguments - for arg in local_args: - - # Split args - arg, sep, value = arg.partition(':') - - if arg.startswith("-always"): - # always = True - pass - elif arg.startswith("-task"): - subtask = value - else: - genFactory.handleArg(arg) - - # Return Tuple - return ( subtask, genFactory, kwargs ) - - def prepare_bot( task_slug, subtask, genFactory, subtask_args ): """ Handles importing subtask Bot class and prepares specific args @@ -167,101 +78,6 @@ def prepare_bot( task_slug, subtask, genFactory, subtask_args ): return ( subtask, Bot, genFactory, kwargs ) -def init_bot( task_slug, subtask, Bot, genFactory, **kwargs ): - """ - Initiates Bot-Object with Class given in Bot and passes params genFactory - and kwargs to it - - Passes through exception generated by Bot.__init__() after logging. - - @param task_slug Task slug, needed for logging - @type task_slug str - @param subtask Slug of given subtask - @type subtask str - @param Bot Bot class to build bot-object from - @type Class - @param genFactory GenFactory with parsed pagegenerator args - @type genFactory pagegenerators.GeneratorFactory - @param **kwargs Additional args for Bot() - @type **kwargs dict - - @returns bot-object - @type type(Bot()) - """ - # Bot gets prepared genFactory as first param and possible kwargs dict - # It has to threw an exception if something does not work properly - try: - # Init bot with genFactory and **kwargs - bot = Bot( genFactory, **kwargs ) - - except: - # Catch Errors while initiation - jogobot.output( ( - "\03{{red}} Error while trying to init " + - "subtask \"{task_slug}-{subtask}\"!" ). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - raise - else: - # Init successfull - jogobot.output( ( - "Subtask \"{task_slug}-{subtask}\" was " + - "initiated successfully" ). - format(task_slug=task_slug, subtask=subtask) ) - return bot - - -def run_bot( task_slug, subtask, bot ): - """ - Calls the run()-method of bot-object - - Passes through exceptions generated by Bot.__init__() after logging. - Catches Errors caused by missing run(0-method. - - @param task_slug Task slug, needed for logging - @type task_slug str - @param subtask Slug of given subtask - @type subtask str - @param bot Bot object to call run()-method on - @type object with method run - """ - - # Fire up Bot - # Bot must have implemented a run()-method - # It has to threw an exception if something does not work properly - try: - # Call run method on Bot - bot.run() - - # Special event on AttributeError to catch missing run()-method - except AttributeError: - (type, value, traceback) = sys.exc_info() - - # Catch missing run()-method - if "has no attribute 'run'" in value: - jogobot.output( ( - "\03{{red}} Error while trying to run " + - "subtask \"{task_slug}-{subtask} \": +" - "Run-method is missing! "). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - - # Pass through other AttributeError - else: - raise - - except: - jogobot.output( ( - "\03{{red}} Error while trying to run " + - "subtask \"{task_slug}-{subtask} \"!" ). - format( task_slug=task_slug, subtask=subtask ), "ERROR" ) - raise - - else: - # Run successfull - jogobot.output( ( - "Subtask \"{task_slug}-{subtask}\" was finished successfully"). - format(task_slug=task_slug, subtask=subtask) ) - - def main(*args): """ Process command line arguments and invoke bot. @@ -278,22 +94,24 @@ def main(*args): # Get the jogobot-task_slug (basename of current file without ending) task_slug = os.path.basename(__file__)[:-len(".py")] + # Disabled until [FS#86] is done # Before run, we need to check wether we are currently active or not - if not active( task_slug ): - return + # if not jogobot.bot.active( task_slug ): + # return # Parse local Args to get information about subtask - ( subtask, genFactory, subtask_args ) = parse_local_args( local_args ) + ( subtask, genFactory, subtask_args ) = jogobot.bot.parse_local_args( + local_args ) # select subtask and prepare args ( subtask, Bot, genFactory, kwargs ) = prepare_bot( task_slug, subtask, genFactory, subtask_args ) # Init Bot - bot = init_bot( task_slug, subtask, Bot, genFactory, **kwargs) + bot = jogobot.bot.init_bot( task_slug, subtask, Bot, genFactory, **kwargs) # Run bot - run_bot( task_slug, subtask, bot ) + jogobot.bot.run_bot( task_slug, subtask, bot ) if( __name__ == "__main__" ): From 9481116777ba73f84f9b47d5425516467f637764 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 2 Mar 2016 16:48:56 +0100 Subject: [PATCH 108/192] Add new generator-method to fetch RedFams by Status and Ending Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=26 FS#26] --- lib/mysqlred.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 499816f..0be345e 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -246,6 +246,7 @@ class MysqlRedFam( MysqlRed ): _update_query = 'UPDATE `{prefix}_red_families` \ SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ `status`= ? WHERE `fam_hash` = ?;' + _cached_insert_data = {} _insert_query = 'INSERT INTO `{prefix}_red_families` \ ( fam_hash, red_page_id, beginning, ending, status, heading, \ @@ -259,9 +260,6 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' super().__init__( ) - def __del__( self ): - pass - def get_fam( self, fam_hash ): """ Retrieves a red family row from MySQL-Database for given fam_hash @@ -335,6 +333,25 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' for row in res: yield row + def get_by_status_and_ending( self, status, ending ): + """ + Generator witch fetches redFams with given status from DB + """ + + cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + + cursor.execute( + 'SELECT * FROM `{prefix}_red_families` WHERE `status` = ? AND'. + format(prefix=type( self ).db_table_prefix) + + '`ending` >= ?;', ( status, ending ) ) + + while True: + res = cursor.fetchmany( 1000 ) + if not res: + break + for row in res: + yield row + class MysqlRedError(Exception): """ From 58dfd8c86a652b29c9037dbb698b162173c1a341 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 2 Mar 2016 16:50:47 +0100 Subject: [PATCH 109/192] For RedFamilies not fetched individually we need to provide the fam hash as index Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=26 FS#26] --- lib/mysqlred.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 0be345e..600c2a3 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -174,9 +174,6 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' self.data = self.get_page() - def __del__( self ): - pass - def get_page( self ): """ Retrieves a red page row from MySQL-Database for given page_id @@ -253,11 +250,13 @@ SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ article0, article1, article2, article3, article4, article5, article6, \ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - def __init__( self ): + def __init__( self, fam_hash=None ): """ Creates a new instance, runs __init__ of parent class """ + self.__fam_hash = fam_hash + super().__init__( ) def get_fam( self, fam_hash ): From a97d8c722e229ae717841647feee5ae62bd7f28c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 2 Mar 2016 17:00:55 +0100 Subject: [PATCH 110/192] Move handling of mysql-Connection from RedFamParser and RedFamWorker to RedFam-Class and make it protected instead of private Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=26 FS#26] --- lib/redfam.py | 63 +++++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/lib/redfam.py b/lib/redfam.py index 30dd22d..07d0820 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -57,6 +57,9 @@ class RedFam: @param heading str Original heading of RedFam (Link) """ + # Database interface + self._mysql = MysqlRedFam( fam_hash ) + # Initial attribute values self._articlesList = articlesList self._beginning = beginning @@ -108,6 +111,28 @@ class RedFam: else: self._fam_hash = h.hexdigest() + def changed( self ): + """ + Checks wether anything has changed and maybe triggers db update + """ + + # On archived red_fams do not delete possibly existing ending + if( not self._ending and self._status > 1 + and self._mysql.data[ 'ending' ] ): + + self._ending = self._mysql.data[ 'ending' ] + + # Since status change means something has changed, update database + if( self._status != self._mysql.data[ 'status' ] or + self._beginning != self._mysql.data[ 'beginning' ] or + self._ending != self._mysql.data[ 'ending' ] or + self._red_page_id != self._mysql.data[ 'red_page_id' ] or + self._heading != self._mysql.data[ 'heading' ]): + + self._mysql.update_fam( self._red_page_id, self._heading, + self._beginning, self._ending, + self._status ) + @classmethod def flush_db_cache( cls ): """ @@ -194,13 +219,13 @@ class RedFamParser( RedFam ): """ # We need a connection to our mysqldb - self.__mysql = MysqlRedFam( ) - self.__mysql.get_fam( self._fam_hash ) + self._mysql = MysqlRedFam( ) + self._mysql.get_fam( self._fam_hash ) - if not self.__mysql.data: - self.__mysql.add_fam( self._articlesList, self._heading, - self._red_page_id, self._beginning, - self._ending ) + if not self._mysql.data: + self._mysql.add_fam( self._articlesList, self._heading, + self._red_page_id, self._beginning, + self._ending ) def heading_parser( self, heading ): """ @@ -226,6 +251,7 @@ class RedFamParser( RedFam ): if len( self._articlesList ) > 8: # For repression in output we need to know the fam hash self.calc_fam_hash() + jogobot.output( ( "\03{{lightred}}" + "Maximum number of articles in red_fam exceeded, " + @@ -289,7 +315,7 @@ class RedFamParser( RedFam ): """ # Do not change stati set by worker script etc. - if not self.__mysql.data['status'] > 2: + if not self._mysql.data['status'] > 2: # No ending, discussion is running: # Sometimes archived discussions also have no detectable ending @@ -301,29 +327,8 @@ class RedFamParser( RedFam ): else: self._status = 2 else: - self._status = self.__mysql.data[ 'status' ] - def changed( self ): - """ - Checks wether anything has changed and maybe triggers db update - """ - - # On archived red_fams do not delete possibly existing ending - if( not self._ending and self._status > 1 and - self.__mysql.data[ 'ending' ] ): - - self._ending = self.__mysql.data[ 'ending' ] - - # Since status change means something has changed, update database - if( self._status != self.__mysql.data[ 'status' ] or - self._beginning != self.__mysql.data[ 'beginning' ] or - self._ending != self.__mysql.data[ 'ending' ] or - self._red_page_id != self.__mysql.data[ 'red_page_id' ] or - self._heading != self.__mysql.data[ 'heading' ]): - - self.__mysql.update_fam( self._red_page_id, self._heading, - self._beginning, self._ending, - self._status ) + self._status = self._mysql.data[ 'status' ] @classmethod def is_section_redfam_cb( cls, heading ): From 151c22a735d082525f1a1934e17ad2c897c969cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 2 Mar 2016 17:04:06 +0100 Subject: [PATCH 111/192] Add fetched mysql_data to _mysql-Object of parent class for using change-method to update db Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=26 FS#26] --- lib/redfam.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/redfam.py b/lib/redfam.py index 07d0820..2e1594a 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -445,6 +445,8 @@ class RedFamWorker( RedFam ): mysql_data[ 'status' ], mysql_data[ 'fam_hash' ], mysql_data[ 'heading' ] ) + self._mysql.data = mysql_data + @classmethod def list_by_status( cls, status ): """ From ad088126e703b3bb4703c778a3717f502d9a937f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 2 Mar 2016 17:06:08 +0100 Subject: [PATCH 112/192] Define method to update Status after Working with RedFam Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=26 FS#26] --- lib/redfam.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/redfam.py b/lib/redfam.py index 2e1594a..e513a0b 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -447,6 +447,13 @@ class RedFamWorker( RedFam ): self._mysql.data = mysql_data + def update_status( self ): + """ + Sets status to 3 when worked on + """ + + self._status = 3 + @classmethod def list_by_status( cls, status ): """ From b271a0b0b1860919d88b1df89e41031f4b5059b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 2 Mar 2016 17:06:46 +0100 Subject: [PATCH 113/192] Add generator wrapper to fetch RedFams by status and ending Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=26 FS#26] --- lib/redfam.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lib/redfam.py b/lib/redfam.py index e513a0b..d4086ea 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -467,6 +467,19 @@ class RedFamWorker( RedFam ): print(fam) raise + @classmethod + def gen_by_status_and_ending( cls, status, ending ): + """ + Yield red_fams stored in db by given status which have an ending after + given one + """ + mysql = MysqlRedFam() + for fam in mysql.get_by_status_and_ending( status, ending ): + try: + yield cls( fam ) + except RedFamHashError: + print(fam) + raise class RedFamError( Exception ): """ From 594130c8a65ee22988c9db530f30bb7934e8ba97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Wed, 2 Mar 2016 22:23:32 +0100 Subject: [PATCH 114/192] Restore changes from 45df35431 Documented to prevent deleting again Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=26 FS#26] --- lib/mysqlred.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 600c2a3..dc313d9 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -174,6 +174,13 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' self.data = self.get_page() + def __del__( self ): + """ + Needed to prevent descendant classes of MYSQL_RED from deleting + connection to db + """ + pass + def get_page( self ): """ Retrieves a red page row from MySQL-Database for given page_id @@ -259,6 +266,13 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' super().__init__( ) + def __del__( self ): + """ + Needed to prevent descendant classes of MYSQL_RED from deleting + connection to db + """ + pass + def get_fam( self, fam_hash ): """ Retrieves a red family row from MySQL-Database for given fam_hash From 4055dc52d8df3e9567c4bfb162692201b298aa46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 5 Mar 2016 15:55:38 +0100 Subject: [PATCH 115/192] Make it possible to get a RedPage-Object by pageid When working on redfams it is necessary to have information about redpage Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=26 FS#26] --- lib/redpage.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/lib/redpage.py b/lib/redpage.py index 176f6bc..ebedaba 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -39,19 +39,22 @@ class RedPage: Class for handling redundance discussion pages and archives """ - def __init__( self, page, archive=False ): + def __init__( self, page=None, pageid=None, archive=False ): """ Generate a new RedPage object based on the given pywikibot page object - @param page page Pywikibot/MediaWiki page object for page + @param page Pywikibot/MediaWiki page object for page + @type page pywikibot.Page + @param pageid MW-Pageid for related page + @type pageid int """ # Safe the pywikibot page object self.page = page + self.pageid = pageid self._archive = archive self.__handle_db( ) - self.is_page_changed() self._parsed = None @@ -62,7 +65,16 @@ class RedPage: """ # We need a connection to our mysqldb - self.__mysql = MysqlRedPage( self.page._pageid ) + if self.page: + self.__mysql = MysqlRedPage( self.page._pageid ) + self.pageid = self.page._pageid + elif self.pageid: + self.__mysql = MysqlRedPage( self.pageid ) + self.page = pywikibot.Page( pywikibot.Site(), + self.__mysql.data['page_title'] ) + self.page.exists() + else: + raise ValueError( "Page NOR pagid provided!" ) if not self.__mysql.data: self.__mysql.add_page( self.page.title(), self.page._revid ) From b36dc250d20b2443be2472ab1ec39dd28c38cefb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 26 Aug 2016 18:17:53 +0200 Subject: [PATCH 116/192] Request information about reddisc page for redfams To generate links to related reddisc it is necessary to get at least the Title of the related reddisc page. As saving the same data in db is worse, we retreive it via join from red_pages table Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=26 FS#26] --- lib/mysqlred.py | 12 ++++++++---- lib/redfam.py | 13 +++++++++++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index dc313d9..f57ae2b 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -353,10 +353,14 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - cursor.execute( - 'SELECT * FROM `{prefix}_red_families` WHERE `status` = ? AND'. - format(prefix=type( self ).db_table_prefix) + - '`ending` >= ?;', ( status, ending ) ) + cursor.execute( ( + 'SELECT * ' + + 'FROM `{prefix}_red_families` `F` ' + + 'INNER JOIN `{prefix}_red_pages` `P` ' + + 'ON `F`.`status` = ? ' + + 'AND `F`.`ending` >= ? ' + 'AND `F`.`red_page_id` = `P`.`page_id`;').format( + prefix=type( self ).db_table_prefix), ( status, ending ) ) while True: res = cursor.fetchmany( 1000 ) diff --git a/lib/redfam.py b/lib/redfam.py index d4086ea..37162c4 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -117,8 +117,8 @@ class RedFam: """ # On archived red_fams do not delete possibly existing ending - if( not self._ending and self._status > 1 - and self._mysql.data[ 'ending' ] ): + if( not self._ending and self._status > 1 and + self._mysql.data[ 'ending' ] ): self._ending = self._mysql.data[ 'ending' ] @@ -447,6 +447,14 @@ class RedFamWorker( RedFam ): self._mysql.data = mysql_data + # Get related RedPage-Information + self.redpageid = mysql_data[ 'page_id' ] + self.redpagetitle = mysql_data[ 'page_title' ] + + # Make sure locale is set to 'de_DE.UTF-8' to prevent problems + # with wrong month abreviations in strptime + locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') + def update_status( self ): """ Sets status to 3 when worked on @@ -481,6 +489,7 @@ class RedFamWorker( RedFam ): print(fam) raise + class RedFamError( Exception ): """ Base class for all Errors of RedFam-Module From 3723aba5781d0647cb21bdc0c89e64736614b6fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Fri, 26 Aug 2016 19:17:16 +0200 Subject: [PATCH 117/192] Add a method to get link to related reddisc To generate notices or other stuff it is necessary to add links to the related reddisc. This method returns a wikilink to text the redfam's reddisc. Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=81 FS#81] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=81 FS#81] --- lib/redfam.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/lib/redfam.py b/lib/redfam.py index 37162c4..4da1ea4 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -462,6 +462,31 @@ class RedFamWorker( RedFam ): self._status = 3 + def get_disc_link( self ): + """ + Constructs and returns the link to Redundancy discussion + + @returns Link to diskussion + @rtype str + """ + + # We need to Replace Links with their linktext + anchor_code = mwparser.parse( self._mysql.data[ 'heading' ].strip() ) + for link in anchor_code.ifilter_wikilinks(): + if link.text: + text = link.text + else: + text = link.title + + anchor_code.replace( link, text ) + + # Whitespace is replaced with underscores + anchor_code.replace( " ", "_" ) + + # We try it with out any more parsing as mw will do while parsing page + return ( self.redpagetitle + "#" + + str(anchor_code).strip() ) + @classmethod def list_by_status( cls, status ): """ From 6717fa4fba2b7ec4e007e895f09ed088e04d1d67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sat, 27 Aug 2016 00:07:28 +0200 Subject: [PATCH 118/192] Add method to generate notice for article discpage We need a method to generate the template to add to article discpages Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=29 FS#29] --- lib/redfam.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/lib/redfam.py b/lib/redfam.py index 4da1ea4..b2277fc 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -487,6 +487,50 @@ class RedFamWorker( RedFam ): return ( self.redpagetitle + "#" + str(anchor_code).strip() ) + def generate_disc_notice_template( self ): + """ + Generates notice template to add on discussion Pages of Articles when + redundancy discussion is finished + + @return Notice template to add on article disc + @rtype wikicode-node + """ + + # Generate template boilerplate + template = mwparser.nodes.template.Template( + jogobot.config['redundances']['disc_notice_template_name']) + + # Index of first article's param + param_cnt = 3 + + # Iterate over articles in redfam + for article in self._articlesList: + # Make sure to only use 8 articles (max. param 10) + if param_cnt > 10: + break + + # Add param for article + template.add( param_cnt, article, True ) + + param_cnt += 1 + + # Add begin + template.add( "Beginn", self._mysql.data[ 'beginning' ].strftime( + "%d. %B %Y").lstrip("0"), True ) + + # Add end + template.add( "Ende", self._mysql.data[ 'ending' ].strftime( + "%d. %B %Y").lstrip("0"), True ) + + # Add link to related reddisc + template.add( "Diskussion", self.get_disc_link(), True ) + + # Add signature and timestamp + # Not used atm + # template.add( 1, "-- ~~~~", True ) + + return template + @classmethod def list_by_status( cls, status ): """ From e5989305a4a2fe44f4e17d7548fad5fbcdbf8421 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Aug 2016 13:20:13 +0200 Subject: [PATCH 119/192] Add a generator to redfam yielding article pages To work on articles of a redfam a generator which yields belonging articles is necessary Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=87 FS#87] --- lib/redfam.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/redfam.py b/lib/redfam.py index b2277fc..284b54e 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -57,6 +57,9 @@ class RedFam: @param heading str Original heading of RedFam (Link) """ + # Having pywikibot.Site() is a good idea most of the time + self.site = pywikibot.Site() + # Database interface self._mysql = MysqlRedFam( fam_hash ) @@ -455,6 +458,15 @@ class RedFamWorker( RedFam ): # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') + def article_generator(self): + """ + Yields pywikibot pageobjects for articles belonging to this redfams + in a generator + self. + """ + for article in self._articlesList: + yield pywikibot.Page(pywikibot.Link(article), self.site) + def update_status( self ): """ Sets status to 3 when worked on From c0b18f88e5ea4cb3654b7e92bf9adfcba16b4e1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Aug 2016 15:06:17 +0200 Subject: [PATCH 120/192] Add filter options to redfam.article_generator To give the posibility to filter not existing pages or redirect pages or vice versa. Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=87 FS#87] --- lib/redfam.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/lib/redfam.py b/lib/redfam.py index 284b54e..9889908 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -458,14 +458,41 @@ class RedFamWorker( RedFam ): # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') - def article_generator(self): + def article_generator(self, filter_existing=None, filter_redirects=None ): """ Yields pywikibot pageobjects for articles belonging to this redfams in a generator self. + + @param filter_existing Set to True to only get existing pages + set to False to only get nonexisting pages + unset/None results in not filtering + @type filter_existing bool/None + @param filter_redirects Set to True to get only noredirectpages, + set to False to get only redirectpages, + unset/None results in not filtering + @type filter_redirects bool/None """ + # Iterate over articles in redfam for article in self._articlesList: - yield pywikibot.Page(pywikibot.Link(article), self.site) + page = pywikibot.Page(pywikibot.Link(article), self.site) + + # Filter non existing Pages if requested with filter_existing=True + if filter_existing and not page.exists(): + continue + # Filter existing pages if requested with filter_existing=False + elif filter_existing is False and page.exists(): + continue + + # Filter redirects if requested with filter_redirects=True + if filter_redirects and page.isRedirectPage(): + continue + # Filter noredirects if requested with filter_redirects=False + elif filter_redirects is False and not page.isRedirectPage(): + continue + + # Yield filtered pages + yield page def update_status( self ): """ From efa919ff2739ae8093a42bc160628bfdab07d7db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Aug 2016 16:39:32 +0200 Subject: [PATCH 121/192] Add new bot with basic structure We need a bot to work on pages which are subjects of redfams and on the belonging talk page Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] --- bots/markpages.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 bots/markpages.py diff --git a/bots/markpages.py b/bots/markpages.py new file mode 100644 index 0000000..6bdcb5a --- /dev/null +++ b/bots/markpages.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# markpages.py +# +# Copyright 2016 GOLDERWEB – Jonathan Golder +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# +""" +Bot to mark pages which were/are subjects of redundance discussions +with templates +""" + +from pywikibot.bot import CurrentPageBot + + +class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() + """ + Bot class to mark pages which were/are subjects of redundance discussions + with templates + """ + + def __init__( self, genFactory, **kwargs ): + """ + Constructor + + Parameters: + @param genFactory GenFactory with parsed pagegenerator args to + build generator + @type genFactory pagegenerators.GeneratorFactory + @param **kwargs Additional args + @type iterable + """ + + # Copy needed args + self.genFactory = genFactory + + # Build generator with genFactory + self.build_generator() + + # Run super class init with builded generator + super( MarkPagesBot, self ).__init__(generator=self.gen) + + def build_generator( self ): + """ + Builds generator + """ + self.gen = self.genFactory.getCombinedGenerator() + + def treat_page( self ): + """ + Handles work on current page + """ + + # Here is the place where to do what ever you want + print( self.current_page.title() ) From ecc78bef96494a05945a8ed87b7f153d09639163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Aug 2016 18:01:02 +0200 Subject: [PATCH 122/192] Import needed modules and add redfams-generator We will need a couple of modules to build the needed generator Also we will need a generator with redfams to work with Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] --- bots/markpages.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 6bdcb5a..ca9d586 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -26,8 +26,15 @@ Bot to mark pages which were/are subjects of redundance discussions with templates """ +from datetime import datetime + +from pywikibot import pagegenerators from pywikibot.bot import CurrentPageBot +import jogobot + +from lib.redfam import RedFamWorker + class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() """ @@ -47,8 +54,12 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() @type iterable """ - # Copy needed args - self.genFactory = genFactory + # Init attribute + self.__redfams = None # Will hold a generator with our redfams + + # We do not use predefined genFactory as there is no sensefull case to + # give a generator via cmd-line for this right now + self.genFactory = pagegenerators.GeneratorFactory() # Build generator with genFactory self.build_generator() @@ -56,6 +67,21 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Run super class init with builded generator super( MarkPagesBot, self ).__init__(generator=self.gen) + @property + def redfams(self): + """ + Holds redfams generator to work on in this bot + """ + # Create generator if not present + if not self.__redfams: + end_after = datetime.strptime( + jogobot.config["red.markpages"]["mark_done_after"], + "%Y-%m-%d" ) + self.__redfams = RedFamWorker.gen_by_status_and_ending( + 2, end_after) + + return self.__redfams + def build_generator( self ): """ Builds generator From da4f9b5d6bc3eaad44ee5373bf45b8fb7e410703 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Aug 2016 18:09:04 +0200 Subject: [PATCH 123/192] Add wrapper-generator to redfam.article_generator We need a wrapper around redfam.article_generator to pass it to pagegenerators.PageWithTalkPageGenerator and to add a reference to related redfam to each pywikibot.page-object before yielding it Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] --- bots/markpages.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/bots/markpages.py b/bots/markpages.py index ca9d586..d20951f 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -88,6 +88,26 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() """ self.gen = self.genFactory.getCombinedGenerator() + def redfam_talkpages_generator( self ): + """ + Wrappers the redfam.article_generator and + passes it to pagegenerators.PageWithTalkPageGenerator(). + Then it iterates over the generator and adds a reference to the + related redfam to each talkpage-object. + """ + + for redfam in self.redfams: + + # We need the talkpage (and only this) of each existing page + for talkpage in pagegenerators.PageWithTalkPageGenerator( + redfam.article_generator( filter_existing=True ), + return_talk_only=True ): + + # Add reference to redfam to talkpages + talkpage.redfam = redfam + + yield talkpage + def treat_page( self ): """ Handles work on current page From c4d8a95672e83d0b1b46551505a5238196eefef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Aug 2016 18:13:27 +0200 Subject: [PATCH 124/192] Implement build_generator-method Build_generator will add the redfam_talkpages_generator to the genFactory, build a generator of the genFactory and sets self.gen which is used as generator for run() Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] --- bots/markpages.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index d20951f..b08776c 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -84,9 +84,14 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() def build_generator( self ): """ - Builds generator + Builds generator to pass to super class """ - self.gen = self.genFactory.getCombinedGenerator() + # Add Talkpages to work on to generatorFactory + self.genFactory.gens.append( self.redfam_talkpages_generator() ) + + # Set generator to pass to super class + self.gen = pagegenerators.PreloadingGenerator( + self.genFactory.getCombinedGenerator() ) def redfam_talkpages_generator( self ): """ From 9beca7f6c905a6ea87f632d00c5b2734570f854a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Aug 2016 20:53:31 +0200 Subject: [PATCH 125/192] Implement method to add notice to disk page Adds the generated notice to the talkpage and starts the saving of the page Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] --- bots/markpages.py | 53 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index b08776c..7fae7c8 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -31,6 +31,8 @@ from datetime import datetime from pywikibot import pagegenerators from pywikibot.bot import CurrentPageBot +import mwparserfromhell as mwparser + import jogobot from lib.redfam import RedFamWorker @@ -116,7 +118,54 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() def treat_page( self ): """ Handles work on current page + + We get a reference to related redfam in current_page.redfam """ - # Here is the place where to do what ever you want - print( self.current_page.title() ) + # First we need to have the current text of page + # and parse it as wikicode + self.current_wikicode = mwparser.parse( self.current_page.text ) + + # Add notice + self.add_disc_notice_template() + + # Convert wikicode back to string to save + self.new_text = str( self.current_wikicode ) + + # Save + self.put_current( self.new_text ) + + def add_disc_notice_template( self ): + """ + Will take self.current_wikicode and adds disc notice template after the + last template in leading section or as first element if there is no + other template in leading section + """ + # The notice to add + notice = self.current_page.redfam.generate_disc_notice_template() + + # Find the right place to insert notice template + # Therfore we need the first section (if there is one) + leadsec = self.current_wikicode.get_sections( + flat=False, include_lead=True )[0] + + # There is none on empty pages, so we need to check + if leadsec: + # Get the last template in leadsec + ltemplate = leadsec.filter_templates()[-1] + + # If there is one, add notice after this + if ltemplate: + self.current_wikicode.insert_after(ltemplate, notice ) + + # To have it in its own line we need to add a linbreak before + self.current_wikicode.insert_before(notice, "\n" ) + + # If there is no template, add before first element on page + else: + self.current_wikicode.insert( 0, notice ) + + # If there is no leadsec (and therefore no template in it, we will add + # before the first element + else: + self.current_wikicode.insert( 0, notice ) From 2b93e4cf16c176ce13da3a78cd9afd873c14b79b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 28 Aug 2016 21:39:54 +0200 Subject: [PATCH 126/192] Check if notice is present before add To prevent duplications we need to check wether notice is already present on talkpage Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] --- bots/markpages.py | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 7fae7c8..9be668a 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -142,7 +142,12 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() other template in leading section """ # The notice to add - notice = self.current_page.redfam.generate_disc_notice_template() + self.disc_notice = \ + self.current_page.redfam.generate_disc_notice_template() + + # Check if it is already present in wikicode + if self.disc_notice_present(): + return False # Find the right place to insert notice template # Therfore we need the first section (if there is one) @@ -156,16 +161,43 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # If there is one, add notice after this if ltemplate: - self.current_wikicode.insert_after(ltemplate, notice ) + self.current_wikicode.insert_after(ltemplate, self.disc_notice) # To have it in its own line we need to add a linbreak before - self.current_wikicode.insert_before(notice, "\n" ) + self.current_wikicode.insert_before(self.disc_notice, "\n" ) # If there is no template, add before first element on page else: - self.current_wikicode.insert( 0, notice ) + self.current_wikicode.insert( 0, self.disc_notice ) # If there is no leadsec (and therefore no template in it, we will add # before the first element else: - self.current_wikicode.insert( 0, notice ) + self.current_wikicode.insert( 0, self.disc_notice ) + + # Notice was added + return True + + def disc_notice_present(self): + """ + Checks if disc notice which shall be added is already present. + """ + # Iterate over Templates with same name (if any) to search equal + # Link to decide if they are the same + for present_notice in self.current_wikicode.ifilter_templates( + matches=self.disc_notice.name ): + + # Get reddisc page.title of notice to add + add_notice_link_tile = self.disc_notice.get( + "Diskussion").partition("#")[0] + # Get reddisc page.title of possible present notice + present_notice_link_tile = present_notice.get( + "Diskussion").partition("#")[0] + + # If those are equal, notice is already present + if add_notice_link_tile == present_notice_link_tile: + return True + + # If nothing is found, loop will run till its end + else: + return False From 59d4d23c83a9863ca0cb798c59e21002ff51e3f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 30 Aug 2016 11:33:54 +0200 Subject: [PATCH 127/192] Set edit summary for each edit Each edit of bot needs a edit summary Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=92 FS#92] --- bots/markpages.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bots/markpages.py b/bots/markpages.py index 9be668a..754e1bf 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -132,8 +132,12 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Convert wikicode back to string to save self.new_text = str( self.current_wikicode ) + # Define edit summary + summary = jogobot.config["red.markpages"]["mark_done_summary"].format( + reddisc=self.current_page.redfam.get_disc_link() ) + # Save - self.put_current( self.new_text ) + self.put_current( self.new_text, summary=summary ) def add_disc_notice_template( self ): """ From 20b811bc2a67bd40b9bca6aa59dc948d0ff2b9e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 30 Aug 2016 11:48:07 +0200 Subject: [PATCH 128/192] Make sure edit summary starts with bot Due to bot policy all edit summaries of bot edits have to start with "Bot:" Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=92 FS#92] --- bots/markpages.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bots/markpages.py b/bots/markpages.py index 754e1bf..e47f4d7 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -134,7 +134,11 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Define edit summary summary = jogobot.config["red.markpages"]["mark_done_summary"].format( - reddisc=self.current_page.redfam.get_disc_link() ) + reddisc=self.current_page.redfam.get_disc_link() ).strip() + + # Make sure summary starts with "Bot:" + if not summary[:len("Bot:")] == "Bot:": + summary = "Bot: " + summary.strip() # Save self.put_current( self.new_text, summary=summary ) From 8c56125a7b56617369d11cecb0c69359d323da7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 30 Aug 2016 12:07:11 +0200 Subject: [PATCH 129/192] Update talkpage notice template Exact date is not necessary and end could be ommited if of the same month Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=93 FS#93] --- lib/redfam.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/redfam.py b/lib/redfam.py index 9889908..f0b36fd 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -554,12 +554,13 @@ class RedFamWorker( RedFam ): param_cnt += 1 # Add begin - template.add( "Beginn", self._mysql.data[ 'beginning' ].strftime( - "%d. %B %Y").lstrip("0"), True ) + begin = self._mysql.data[ 'beginning' ].strftime( "%B %Y" ) + template.add( "Beginn", begin, True ) - # Add end - template.add( "Ende", self._mysql.data[ 'ending' ].strftime( - "%d. %B %Y").lstrip("0"), True ) + # Add end (if not same as begin) + end = self._mysql.data[ 'ending' ].strftime( "%B %Y" ) + if not end == begin: + template.add( "Ende", end, True ) # Add link to related reddisc template.add( "Diskussion", self.get_disc_link(), True ) From 6149dcdb8b097fb9dccef7ffc930e10641cd8548 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 30 Aug 2016 14:28:28 +0200 Subject: [PATCH 130/192] Apply changes to data structure See related ticket Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=94 FS#94] --- bots/markpages.py | 2 +- lib/mysqlred.py | 98 ++++++++++++++--------------- lib/redfam.py | 156 ++++++++++++++++++++++++++++++---------------- lib/redpage.py | 77 ++++++++++++++++++++--- 4 files changed, 222 insertions(+), 111 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index e47f4d7..244ba14 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -80,7 +80,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() jogobot.config["red.markpages"]["mark_done_after"], "%Y-%m-%d" ) self.__redfams = RedFamWorker.gen_by_status_and_ending( - 2, end_after) + "archived", end_after) return self.__redfams diff --git a/lib/mysqlred.py b/lib/mysqlred.py index f57ae2b..79360a8 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -156,21 +156,21 @@ class MysqlRedPage( MysqlRed ): # Class variables for storing cached querys # '{prefix}' will be replaced during super().__init__() _cached_update_data = [] - _update_query = 'UPDATE `{prefix}_red_pages` \ -SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' + _update_query = 'UPDATE `{prefix}_redpages` \ +SET `pagetitle` = ?, `revid` = ?, `status`= ? WHERE `pageid` = ?;' _cached_insert_data = {} - _insert_query = 'INSERT INTO `{prefix}_red_pages` \ -( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' + _insert_query = 'INSERT INTO `{prefix}_redpages` \ +( pageid, pagetitle, revid, status ) VALUES ( ?, ?, ?, ? );' - def __init__( self, page_id ): + def __init__( self, pageid ): """ Creates a new instance, runs __init__ of parent class """ super().__init__( ) - self.__page_id = int( page_id ) + self.__pageid = int( pageid ) self.data = self.get_page() @@ -185,7 +185,7 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' """ Retrieves a red page row from MySQL-Database for given page_id - @param int page_id MediaWiki page_id for page to retrieve + @param int pageid MediaWiki page_id for page to retrieve @returns tuple Tuple with data for given page_id bool FALSE if none found @@ -194,8 +194,8 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' cursor = type( self ).connection.cursor(mysqldb.DictCursor) cursor.execute( - 'SELECT * FROM `{prefix}_red_pages` WHERE `page_id` = ?;'.format( - prefix=type(self).db_table_prefix), ( self.__page_id, ) ) + 'SELECT * FROM `{prefix}_redpages` WHERE `pageid` = ?;'.format( + prefix=type(self).db_table_prefix), ( self.__pageid, ) ) res = cursor.fetchone() @@ -204,40 +204,40 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' else: return False - def add_page( self, page_title, rev_id, status=0 ): + def add_page( self, pagetitle, revid, status=0 ): """ - Inserts a red page row in MySQL-Database for given page_id + Inserts a red page row in MySQL-Database for given pageid - @param int rev_id MediaWiki current rev_id - @param str page_title MediaWiki new page_title + @param int revid MediaWiki current revid + @param str pagetitle MediaWiki new pagetitle @param int status Page parsing status """ - insert_data = { self.__page_id: ( self.__page_id, page_title, - rev_id, status ) } + insert_data = { self.__pageid: ( self.__pageid, pagetitle, + revid, status ) } type( self )._cached_insert_data.update( insert_data ) # Manualy construct self.data dict - self.data = { 'page_id': self.__page_id, 'rev_id': rev_id, - 'page_title': page_title, 'status': status } + self.data = { 'pageid': self.__pageid, 'revid': revid, + 'pagetitle': pagetitle, 'status': status } - def update_page( self, rev_id=None, page_title=None, status=0 ): + def update_page( self, revid=None, pagetitle=None, status=0 ): """ Updates the red page row in MySQL-Database for given page_id - @param int rev_id MediaWiki current rev_id - @param str page_title MediaWiki new page_title + @param int revid MediaWiki current rev_id + @param str pagetitle MediaWiki new page_title @param int status Page parsing status """ - if not page_title: - page_title = self.data[ 'page_title' ] - if not rev_id: - rev_id = self.data[ 'rev_id' ] + if not pagetitle: + pagetitle = self.data[ 'pagetitle' ] + if not revid: + revid = self.data[ 'revid' ] - type( self )._cached_update_data.append( ( page_title, rev_id, - status, self.__page_id ) ) + type( self )._cached_update_data.append( ( pagetitle, revid, + status, self.__pageid ) ) class MysqlRedFam( MysqlRed ): @@ -247,22 +247,22 @@ class MysqlRedFam( MysqlRed ): # Class variables for storing cached querys _cached_update_data = [] - _update_query = 'UPDATE `{prefix}_red_families` \ -SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ -`status`= ? WHERE `fam_hash` = ?;' + _update_query = 'UPDATE `{prefix}_redfams` \ +SET `redpageid` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ +`status`= ? WHERE `famhash` = ?;' _cached_insert_data = {} - _insert_query = 'INSERT INTO `{prefix}_red_families` \ -( fam_hash, red_page_id, beginning, ending, status, heading, \ + _insert_query = 'INSERT INTO `{prefix}_redfams` \ +( famhash, redpageid, beginning, ending, status, heading, \ article0, article1, article2, article3, article4, article5, article6, \ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - def __init__( self, fam_hash=None ): + def __init__( self, famhash=None ): """ Creates a new instance, runs __init__ of parent class """ - self.__fam_hash = fam_hash + self.__famhash = famhash super().__init__( ) @@ -273,27 +273,27 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' """ pass - def get_fam( self, fam_hash ): + def get_fam( self, famhash ): """ Retrieves a red family row from MySQL-Database for given fam_hash @returns dict Dictionairy with data for given fam hash False if none found """ - self.__fam_hash = fam_hash + self.__famhash = famhash cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor.execute( - 'SELECT * FROM `{prefix}_red_families` WHERE `fam_hash` = ?;'. - format( prefix=type(self).db_table_prefix), ( fam_hash, ) ) + 'SELECT * FROM `{prefix}_redfams` WHERE `famhash` = ?;'. + format( prefix=type(self).db_table_prefix), ( famhash, ) ) self.data = cursor.fetchone() - def add_fam( self, articlesList, heading, red_page_id, + def add_fam( self, articlesList, heading, redpageid, beginning, ending=None, status=0 ): - data = [ self.__fam_hash, red_page_id, beginning, ending, + data = [ self.__famhash, redpageid, beginning, ending, status, heading ] for article in articlesList: @@ -304,29 +304,29 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' data = tuple( data ) - insert_data = { self.__fam_hash: data } + insert_data = { self.__famhash: data } type( self )._cached_insert_data.update( insert_data ) # Manualy construct self.data dict - data_keys = ( 'fam_hash', 'red_page_id', 'beginning', 'ending', + data_keys = ( 'fam_hash', 'redpageid', 'beginning', 'ending', 'status', 'heading', 'article0', 'article1', 'article2', 'article3', 'article4', 'article5', 'article6', 'article7' ) self.data = dict( zip( data_keys, data ) ) - def update_fam( self, red_page_id, heading, beginning, ending, status ): + def update_fam( self, redpageid, heading, beginning, ending, status ): """ Updates the red fam row in MySQL-Database for given fam_hash - @param int red_page_id MediaWiki page_id + @param int redpageid MediaWiki page_id @param datetime beginning Timestamp of beginning qparam datetime ending Timestamp of ending of @param int status red_fam status """ - type( self )._cached_update_data.append( ( red_page_id, heading, + type( self )._cached_update_data.append( ( redpageid, heading, beginning, ending, status, - self.__fam_hash ) ) + self.__famhash ) ) def get_by_status( self, status ): """ @@ -336,7 +336,7 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor.execute( - 'SELECT * FROM `{prefix}_red_families` WHERE `status` = ?;'.format( + 'SELECT * FROM `{prefix}_redfams` WHERE `status` = ?;'.format( prefix=type( self ).db_table_prefix), ( status, ) ) while True: @@ -355,11 +355,11 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' cursor.execute( ( 'SELECT * ' + - 'FROM `{prefix}_red_families` `F` ' + - 'INNER JOIN `{prefix}_red_pages` `P` ' + + 'FROM `{prefix}_redfams` `F` ' + + 'INNER JOIN `{prefix}_redpages` `P` ' + 'ON `F`.`status` = ? ' + 'AND `F`.`ending` >= ? ' - 'AND `F`.`red_page_id` = `P`.`page_id`;').format( + 'AND `F`.`redpageid` = `P`.`pageid`;').format( prefix=type( self ).db_table_prefix), ( status, ending ) ) while True: diff --git a/lib/redfam.py b/lib/redfam.py index f0b36fd..798d501 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -43,8 +43,8 @@ class RedFam: Basic class for RedFams, containing the basic data structure """ - def __init__( self, articlesList, beginning, ending=None, red_page_id=None, - status=0, fam_hash=None, heading=None ): + def __init__( self, articlesList, beginning, ending=None, redpageid=None, + status=None, famhash=None, heading=None ): """ Generates a new RedFam object @@ -52,7 +52,7 @@ class RedFam: @param beginning datetime Beginning date @param ending datetime Ending date @param red_page_id int MW pageid of containing RedPage - @param status int Status of RedFam + @param status str Status of RedFam @param fam_hash str SHA1 hash of articlesList @param heading str Original heading of RedFam (Link) """ @@ -61,20 +61,20 @@ class RedFam: self.site = pywikibot.Site() # Database interface - self._mysql = MysqlRedFam( fam_hash ) + self._mysql = MysqlRedFam( famhash ) # Initial attribute values self._articlesList = articlesList self._beginning = beginning self._ending = ending - self._red_page_id = red_page_id - self._status = status - self._fam_hash = fam_hash + self._redpageid = redpageid + self._status = self._parse_status(status) + self._famhash = famhash self._heading = heading # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families - self.calc_fam_hash() + self.calc_famhash() def __repr__( self ): """ @@ -88,14 +88,14 @@ class RedFam: ", heading=" + repr( self._heading ) + \ ", beginning=" + repr( self._beginning ) + \ ", ending=" + repr( self._ending ) + \ - ", red_page_id=" + repr( self._red_page_id ) + \ + ", red_page_id=" + repr( self._redpageid ) + \ ", status=" + repr( self._status ) + \ - ", fam_hash=" + repr( self._fam_hash ) + \ + ", fam_hash=" + repr( self._famhash ) + \ " )" return __repr - def calc_fam_hash( self ): + def calc_famhash( self ): """ Calculates the SHA-1 hash for the articlesList of redundance family. Since we don't need security SHA-1 is just fine. @@ -106,35 +106,35 @@ class RedFam: h = hashlib.sha1() h.update( str( self._articlesList[:8] ).encode('utf-8') ) - if self._fam_hash and h.hexdigest() != self._fam_hash: - raise RedFamHashError( self._fam_hash, h.hexdigest() ) + if self._famhash and h.hexdigest() != self._famhash: + raise RedFamHashError( self._famhash, h.hexdigest() ) - elif self._fam_hash: + elif self._famhash: return else: - self._fam_hash = h.hexdigest() + self._famhash = h.hexdigest() def changed( self ): """ Checks wether anything has changed and maybe triggers db update """ - # On archived red_fams do not delete possibly existing ending - if( not self._ending and self._status > 1 and + # On archived redfams do not delete possibly existing ending + if( not self._ending and "archived" in self._status and self._mysql.data[ 'ending' ] ): self._ending = self._mysql.data[ 'ending' ] # Since status change means something has changed, update database - if( self._status != self._mysql.data[ 'status' ] or + if( self._raw_status != self._mysql.data[ 'status' ] or self._beginning != self._mysql.data[ 'beginning' ] or self._ending != self._mysql.data[ 'ending' ] or - self._red_page_id != self._mysql.data[ 'red_page_id' ] or + self._red_page_id != self._mysql.data[ 'redpageid' ] or self._heading != self._mysql.data[ 'heading' ]): - self._mysql.update_fam( self._red_page_id, self._heading, + self._mysql.update_fam( self._redpageid, self._heading, self._beginning, self._ending, - self._status ) + self._raw_status() ) @classmethod def flush_db_cache( cls ): @@ -143,6 +143,61 @@ class RedFam: """ MysqlRedFam.flush() + def add_status(self, status): + """ + Adds a status specified by status, to status set + + @param status Statusstring to add + @type status str + """ + self._status.add(status) + + def remove_status(self, status, weak=True): + """ + Removes a status, specified by status from set. If weak is set to + False it will throw a KeyError when trying to remove a status not set. + + @param status Statusstring to add + @type status str + @param weak Change behavior on missing status + @type bool + """ + if weak: + self._status.discard(status) + else: + self._status.remove(status) + + def has_status(self, status): + """ + Returns True, if redfam has given status + + @param status Statusstring to check + @type status str + @returns True if status is present else False + """ + if status in self._status: + return True + else: + return False + + def _parse_status(self, raw_status ): + """ + Sets status based on comma separated list + + @param raw_status Commaseparated string of stati (from DB) + @type raw_status str + """ + self._status = set( raw_status.strip().split(",")) + + def _raw_status( self ): + """ + Returns status as commaseparated string (to save in DB) + + @returns Raw status string + @rtype str + """ + return ",".join( self._status ) + class RedFamParser( RedFam ): """ @@ -165,15 +220,15 @@ class RedFamParser( RedFam ): wurde gewünscht von:" __done_notice2 = "{{Erledigt|" - def __init__( self, heading, red_page, red_page_archive, + def __init__( self, heading, redpage, redpagearchive, beginning, ending=None ): """ Creates a RedFam object based on data collected while parsing red_pages combined with possibly former known data from db - @param red_fam_heading str Wikitext heading of section - @param red_page page Pywikibot.page object - @param red_page_archive bool Is red_page an archive + @param redfam_heading str Wikitext heading of section + @param redpage page Pywikibot.page object + @param redpagearchive bool Is red_page an archive @param beginning datetime Timestamp of beginning str as strptime parseable string @param ending datetime Timestamp of ending @@ -181,9 +236,9 @@ class RedFamParser( RedFam ): """ # Set object attributes: - self._red_page_id = red_page._pageid - self._red_page_archive = red_page_archive - self._fam_hash = None + self._redpageid = redpage._pageid + self._redpagearchive = redpagearchive + self._famhash = None # Method self.add_beginning sets self._beginning directly self.add_beginning( beginning ) @@ -195,7 +250,7 @@ class RedFamParser( RedFam ): # If no ending was provided set to None self._ending = None - self._status = None + self._status = set() # Parse the provided heading of redundance section # to set self._articlesList @@ -204,7 +259,7 @@ class RedFamParser( RedFam ): # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families - self.calc_fam_hash() + self.calc_famhash() # Open database connection, ask for data if existing, # otherwise create entry @@ -223,11 +278,11 @@ class RedFamParser( RedFam ): # We need a connection to our mysqldb self._mysql = MysqlRedFam( ) - self._mysql.get_fam( self._fam_hash ) + self._mysql.get_fam( self._famhash ) if not self._mysql.data: self._mysql.add_fam( self._articlesList, self._heading, - self._red_page_id, self._beginning, + self._redpageid, self._beginning, self._ending ) def heading_parser( self, heading ): @@ -253,7 +308,7 @@ class RedFamParser( RedFam ): # Catch sections with more then 8 articles, print error if len( self._articlesList ) > 8: # For repression in output we need to know the fam hash - self.calc_fam_hash() + self.calc_famhash() jogobot.output( ( "\03{{lightred}}" + @@ -317,21 +372,18 @@ class RedFamParser( RedFam ): - 3 and greater status was set by worker script, do not change it """ - # Do not change stati set by worker script etc. - if not self._mysql.data['status'] > 2: - - # No ending, discussion is running: - # Sometimes archived discussions also have no detectable ending - if not self._ending and not self._red_page_archive: - self._status = 0 - else: - if not self._red_page_archive: - self._status = 1 - else: - self._status = 2 + # No ending, discussion is running: + # Sometimes archived discussions also have no detectable ending + if not self._ending and not self._redpagearchive: + self.add_status("open") else: - - self._status = self._mysql.data[ 'status' ] + self.remove_status("open") + if not self._redpagearchive: + self.add_status("done") + else: + self.remove_status("done") + self.remove_status("open") + self.add_status("archived") @classmethod def is_section_redfam_cb( cls, heading ): @@ -444,15 +496,15 @@ class RedFamWorker( RedFam ): articlesList.append( mysql_data[ key ] ) super().__init__( articlesList, mysql_data[ 'beginning' ], - mysql_data[ 'ending' ], mysql_data[ 'red_page_id' ], - mysql_data[ 'status' ], mysql_data[ 'fam_hash' ], + mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], + mysql_data[ 'status' ], mysql_data[ 'famhash' ], mysql_data[ 'heading' ] ) self._mysql.data = mysql_data # Get related RedPage-Information - self.redpageid = mysql_data[ 'page_id' ] - self.redpagetitle = mysql_data[ 'page_title' ] + self.redpageid = mysql_data[ 'pageid' ] + self.redpagetitle = mysql_data[ 'pagetitle' ] # Make sure locale is set to 'de_DE.UTF-8' to prevent problems # with wrong month abreviations in strptime @@ -499,7 +551,7 @@ class RedFamWorker( RedFam ): Sets status to 3 when worked on """ - self._status = 3 + pass def get_disc_link( self ): """ diff --git a/lib/redpage.py b/lib/redpage.py index ebedaba..b4361b9 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -49,6 +49,8 @@ class RedPage: @type pageid int """ + self._status = set() + # Safe the pywikibot page object self.page = page self.pageid = pageid @@ -71,7 +73,7 @@ class RedPage: elif self.pageid: self.__mysql = MysqlRedPage( self.pageid ) self.page = pywikibot.Page( pywikibot.Site(), - self.__mysql.data['page_title'] ) + self.__mysql.data['pagetitle'] ) self.page.exists() else: raise ValueError( "Page NOR pagid provided!" ) @@ -84,9 +86,9 @@ class RedPage: Check wether the page was changed since last run """ - if( self.__mysql.data != { 'page_id': self.page._pageid, - 'rev_id': self.page._revid, - 'page_title': self.page.title(), + if( self.__mysql.data != { 'pageid': self.page._pageid, + 'revid': self.page._revid, + 'pagetitle': self.page.title(), 'status': self.__mysql.data[ 'status' ] } ): self._changed = True else: @@ -110,7 +112,7 @@ class RedPage: Decides wether current RedPage needs to be parsed or not """ - if( self._changed or self.__mysql.data[ 'status' ] == 0 ): + if( self._changed or self.__mysql.data[ 'status' ] == "" ): return True else: return False @@ -146,14 +148,16 @@ class RedPage: Updates the page meta data in mysql db """ if( self._parsed or not self._changed ): - status = 1 + self.add_status( "open" ) if( self.is_archive() ): - status = 2 + self.remove_status( "open" ) + self.add_status( "archived" ) else: - status = 0 + self._status = set() - self.__mysql.update_page( self.page._revid, self.page.title(), status ) + self.__mysql.update_page( self.page._revid, self.page.title(), + self._raw_status() ) @classmethod def flush_db_cache( cls ): @@ -161,3 +165,58 @@ class RedPage: Calls flush method of Mysql Interface class """ MysqlRedPage.flush() + + def add_status(self, status): + """ + Adds a status specified by status, to status set + + @param status Statusstring to add + @type status str + """ + self._status.add(status) + + def remove_status(self, status, weak=True): + """ + Removes a status, specified by status from set. If weak is set to + False it will throw a KeyError when trying to remove a status not set. + + @param status Statusstring to add + @type status str + @param weak Change behavior on missing status + @type bool + """ + if weak: + self._status.discard(status) + else: + self._status.remove(status) + + def has_status(self, status): + """ + Returns True, if redfam has given status + + @param status Statusstring to check + @type status str + @returns True if status is present else False + """ + if status in self._status: + return True + else: + return False + + def _parse_status(self, raw_status ): + """ + Sets status based on comma separated list + + @param raw_status Commaseparated string of stati (from DB) + @type raw_status str + """ + self._status = set( raw_status.strip().split(",")) + + def _raw_status( self ): + """ + Returns status as commaseparated string (to save in DB) + + @returns Raw status string + @rtype str + """ + return ",".join( self._status ) From e13320820ce635c1844dd9a8cdc66f5fd2db4311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 30 Aug 2016 17:45:18 +0200 Subject: [PATCH 131/192] Add API to manage status per article To be able to track changes to articles to update redfam status Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=89 FS#89] --- lib/mysqlred.py | 9 ++-- lib/redfam.py | 125 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 129 insertions(+), 5 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 79360a8..0bb843c 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -336,8 +336,8 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' cursor = type( self ).connection.cursor( mysqldb.DictCursor ) cursor.execute( - 'SELECT * FROM `{prefix}_redfams` WHERE `status` = ?;'.format( - prefix=type( self ).db_table_prefix), ( status, ) ) + 'SELECT * FROM `{prefix}_redfams` WHERE `status` = LIKE %?%;'. + format( prefix=type( self ).db_table_prefix), ( status, ) ) while True: res = cursor.fetchmany( 1000 ) @@ -358,9 +358,10 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' 'FROM `{prefix}_redfams` `F` ' + 'INNER JOIN `{prefix}_redpages` `P` ' + 'ON `F`.`status` = ? ' + - 'AND `F`.`ending` >= ? ' + 'AND `F`.`ending` >= ? ' + 'AND `F`.`redpageid` = `P`.`pageid`;').format( - prefix=type( self ).db_table_prefix), ( status, ending ) ) + prefix=type( self ).db_table_prefix), + ( status, ending ) ) while True: res = cursor.fetchmany( 1000 ) diff --git a/lib/redfam.py b/lib/redfam.py index 798d501..d5312ca 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -198,6 +198,116 @@ class RedFam: """ return ",".join( self._status ) + def article_add_status(self, status, index=None, title=None ): + """ + Adds a status specified by status, to article (identified by title + or index in articlesList) status set + + @param status Statusstring to add + @type status str + @param index Add to article with index in articlesList + @type index int + @param title Add to article with title in articlesList + @type title str + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + self._article_status[index].add(status) + else: + raise IndexError( "No index given or wrong format!") + + def article_remove_status(self, status, index=None, title=None, weak=True): + """ + Removes a status specified by status, from article (identified by title + or index in articlesList) status set + If weak is set to False it will throw a KeyError when trying to + remove a status not set. + + @param status Statusstring to add + @type status str + @param index Remove from article with index in articlesList + @type index int + @param title Remove from article with title in articlesList + @type title str + @param weak Change behavior on missing status + @type bool + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + if weak: + self._article_status[index].discard(status) + else: + self._article_status[index].remove(status) + else: + raise IndexError( "No index given or wrong format!") + + def article_has_status(self, status, index=None, title=None ): + """ + Adds a status specified by status, to articles (identified by title + or index in articlesList) status set + + @param status Statusstring to add + @type status str + @param index Check article with index in articlesList + @type index int + @param title Check article with title in articlesList + @type title str + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + if status in self._article_status[index]: + return True + else: + return False + else: + raise IndexError( "No index given or wrong format!") + + def _article_parse_status(self, raw_status, index=None, title=None ): + """ + Sets status based on comma separated list to articles (identified by + title or index in articlesList) status set + + @param status Statusstring to set + @type status str + @param index Add to article with index in articlesList + @type index int + @param title Add to article with title in articlesList + @type title str + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + self._article_status[index] = set( raw_status.strip().split(",")) + else: + raise IndexError( "No index given or wrong format!") + + def _article_raw_status( self, index=None, title=None ): + """ + Returns status as commaseparated string (to save in DB) of article + (identified by title or index in articlesList) status set + + @param index Get from article with index in articlesList + @type index int + @param title Get from article with title in articlesList + @type title str + @returns Raw status string + @rtype str + """ + if title and not index: + index = self._articlesList.index( title ) + + if isinstance( index, int ) and index < len(self._articlesList): + return ",".join( self._article_status[index] ) + else: + raise IndexError( "No index given or wrong format!") + class RedFamParser( RedFam ): """ @@ -491,10 +601,14 @@ class RedFamWorker( RedFam ): def __init__( self, mysql_data ): articlesList = [] + for key in sorted( mysql_data.keys() ): - if 'article' in key and mysql_data[ key ]: + if 'article' in key and 'status' not in key and mysql_data[ key ]: articlesList.append( mysql_data[ key ] ) + # Preset article status list with empty sets for existing articles + self._article_status = [set() for x in range(0, len(articlesList))] + super().__init__( articlesList, mysql_data[ 'beginning' ], mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], mysql_data[ 'status' ], mysql_data[ 'famhash' ], @@ -502,6 +616,15 @@ class RedFamWorker( RedFam ): self._mysql.data = mysql_data + # Set up article status + index = 0 + for article in self._articlesList: + raw_status = mysql_data[ "article" + str(index) + "_status" ] + if not raw_status: + raw_status = str() + self._article_parse_status( raw_status, index ) + index += 1 + # Get related RedPage-Information self.redpageid = mysql_data[ 'pageid' ] self.redpagetitle = mysql_data[ 'pagetitle' ] From 870ed4bf25cbf6688657f3cb8bd6f70b0ab96afe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 30 Aug 2016 17:47:02 +0200 Subject: [PATCH 132/192] Update redfam.article_generator use article status To be able to filter articles by status of that article Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=89 FS#89] --- bots/markpages.py | 4 +++- lib/redfam.py | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 244ba14..aa9597c 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -107,7 +107,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # We need the talkpage (and only this) of each existing page for talkpage in pagegenerators.PageWithTalkPageGenerator( - redfam.article_generator( filter_existing=True ), + redfam.article_generator( + filter_existing=True, + exclude_article_status=["marked"] ), return_talk_only=True ): # Add reference to redfam to talkpages diff --git a/lib/redfam.py b/lib/redfam.py index d5312ca..d82ffbb 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -633,7 +633,9 @@ class RedFamWorker( RedFam ): # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') - def article_generator(self, filter_existing=None, filter_redirects=None ): + def article_generator(self, filter_existing=None, filter_redirects=None, + exclude_article_status=[], + onlyinclude_article_status=[] ): """ Yields pywikibot pageobjects for articles belonging to this redfams in a generator @@ -647,11 +649,22 @@ class RedFamWorker( RedFam ): set to False to get only redirectpages, unset/None results in not filtering @type filter_redirects bool/None + """ # Iterate over articles in redfam for article in self._articlesList: page = pywikibot.Page(pywikibot.Link(article), self.site) + # Exclude by article status + for status in exclude_article_status: + if self.article_has_status( status, title=article ): + continue + + # Only include by article status + for status in onlyinclude_article_status: + if not self.article_has_status( status, title=article ): + continue + # Filter non existing Pages if requested with filter_existing=True if filter_existing and not page.exists(): continue From d55c81c97b6545e59a36a49c5695b216491c2a16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Tue, 30 Aug 2016 18:05:51 +0200 Subject: [PATCH 133/192] Set article status when worked on talkpage To detect whole redfam status after run over all articles Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=89 FS#89] --- bots/markpages.py | 56 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index aa9597c..7548294 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -129,7 +129,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() self.current_wikicode = mwparser.parse( self.current_page.text ) # Add notice - self.add_disc_notice_template() + # Returns True if added + # None if already present + add_ret = self.add_disc_notice_template() # Convert wikicode back to string to save self.new_text = str( self.current_wikicode ) @@ -142,8 +144,24 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() if not summary[:len("Bot:")] == "Bot:": summary = "Bot: " + summary.strip() - # Save - self.put_current( self.new_text, summary=summary ) + # will return True if saved + # False if not saved because of errors + # None if change was not accepted by user + save_ret = self.put_current( self.new_text ) + + # Status + if add_ret is None or add_ret and save_ret: + self.current_page.redfam.article_add_status( + "marked", + title=self.current_page.title(withNamespace=False)) + elif save_ret is None: + self.current_page.redfam.article_add_status( + "note_rej", + title=self.current_page.title(withNamespace=False)) + else: + self.current_page.redfam.article_add_status( + "sav_err", + title=self.current_page.title(withNamespace=False)) def add_disc_notice_template( self ): """ @@ -157,7 +175,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Check if it is already present in wikicode if self.disc_notice_present(): - return False + return # Find the right place to insert notice template # Therfore we need the first section (if there is one) @@ -211,3 +229,33 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # If nothing is found, loop will run till its end else: return False + + # We need to overrite this since orginal from pywikibot.bot.CurrentPageBot + # does not return result of self._save_page + def put_current(self, new_text, ignore_save_related_errors=None, + ignore_server_errors=None, **kwargs): + """ + Call L{Bot.userPut} but use the current page. + + It compares the new_text to the current page text. + + @param new_text: The new text + @type new_text: basestring + @param ignore_save_related_errors: Ignore save related errors and + automatically print a message. If None uses this instances default. + @type ignore_save_related_errors: bool or None + @param ignore_server_errors: Ignore server errors and automatically + print a message. If None uses this instances default. + @type ignore_server_errors: bool or None + @param kwargs: Additional parameters directly given to L{Bot.userPut}. + @type kwargs: dict + """ + if ignore_save_related_errors is None: + ignore_save_related_errors = self.ignore_save_related_errors + if ignore_server_errors is None: + ignore_server_errors = self.ignore_server_errors + return self.userPut( + self.current_page, self.current_page.text, new_text, + ignore_save_related_errors=ignore_save_related_errors, + ignore_server_errors=ignore_server_errors, + **kwargs) From 65fb2ecb287f8060513977dd95fd3a81361cb9ae Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 5 Nov 2016 19:27:56 +0100 Subject: [PATCH 134/192] Generate Fam status based on article status Some article states should be reflected in the RedFam status Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=89 FS#89] --- lib/redfam.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/lib/redfam.py b/lib/redfam.py index d82ffbb..6e8b3d5 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -68,6 +68,7 @@ class RedFam: self._beginning = beginning self._ending = ending self._redpageid = redpageid + self._status = set() self._status = self._parse_status(status) self._famhash = famhash self._heading = heading @@ -686,8 +687,24 @@ class RedFamWorker( RedFam ): """ Sets status to 3 when worked on """ + for article in self._articlesList: + if self.article_has_status( "note_rej", title=article ): + self.add_status( "note_rej" ) + if self.article_has_status( "sav_err", title=article ): + self.add_status( "sav_err" ) - pass + if not self.has_status( "sav_err" ) and \ + not self.has_status( "note_rej" ): + self.add_status( "marked" ) + + self._mysql.data[ 'status' ] = self._raw_status() + index = 0 + for article in self._articlesList: + self._mysql.data[ "article" + str(index) + 'status' ] = \ + self._article_raw_status( index=index ) + index += 1 + + print( repr(self) ) def get_disc_link( self ): """ From 0ebf307bb80daa662a6f20add5fcb853d7de36f1 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 5 Nov 2016 19:32:02 +0100 Subject: [PATCH 135/192] Add markpages as subtask Markpages is a subtask of our Red-Bot Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=89 FS#89] # The following line will be added automatically # Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=88 FS#88] --- bots/markpages.py | 24 +++++++++++++++++++++--- jogobot | 2 +- lib/mysqlred.py | 2 +- red.py | 4 ++++ 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 7548294..b7b45c0 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -69,6 +69,24 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Run super class init with builded generator super( MarkPagesBot, self ).__init__(generator=self.gen) + def run(self): + """ + Controls the overal parsing process, using super class for page switch + + Needed to do things before/after treating pages is done + """ + try: + + super( MarkPagesBot, self ).run() + + except: + raise + + else: + # Do status redfam status updates + for redfam in self.redfams: + redfam.update_status() + @property def redfams(self): """ @@ -79,8 +97,8 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() end_after = datetime.strptime( jogobot.config["red.markpages"]["mark_done_after"], "%Y-%m-%d" ) - self.__redfams = RedFamWorker.gen_by_status_and_ending( - "archived", end_after) + self.__redfams = list( RedFamWorker.gen_by_status_and_ending( + "archived", end_after) ) return self.__redfams @@ -147,7 +165,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # will return True if saved # False if not saved because of errors # None if change was not accepted by user - save_ret = self.put_current( self.new_text ) + save_ret = self.put_current( self.new_text, summary=summary ) # Status if add_ret is None or add_ret and save_ret: diff --git a/jogobot b/jogobot index 28d03f3..49ada29 160000 --- a/jogobot +++ b/jogobot @@ -1 +1 @@ -Subproject commit 28d03f35b848a33ad45d3f5f8f3f82e8c45534ec +Subproject commit 49ada2993e345600523c161c5e2516ec65625684 diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 0bb843c..9e2e01b 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -308,7 +308,7 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' type( self )._cached_insert_data.update( insert_data ) # Manualy construct self.data dict - data_keys = ( 'fam_hash', 'redpageid', 'beginning', 'ending', + data_keys = ( 'famhash', 'redpageid', 'beginning', 'ending', 'status', 'heading', 'article0', 'article1', 'article2', 'article3', 'article4', 'article5', 'article6', 'article7' ) diff --git a/red.py b/red.py index 733def2..81388d6 100644 --- a/red.py +++ b/red.py @@ -68,6 +68,10 @@ def prepare_bot( task_slug, subtask, genFactory, subtask_args ): # Import related bot from bots.reddiscparser import DiscussionParserBot as Bot + elif subtask == "markpages": + # Import related bot + from bots.markpages import MarkPagesBot as Bot + # Subtask error else: jogobot.output( ( From 6e973369cd868d80862c7efc03fe3cb525573ccb Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 26 Nov 2016 22:26:55 +0100 Subject: [PATCH 136/192] sqlalchemy working for parser Needs some testing, presumably contains some bugs --- bots/reddiscparser.py | 30 +- lib/mysqlred.py | 795 +++++++++++++++++++++++++++--------------- lib/redfam.py | 360 +++++++++++-------- lib/redpage.py | 188 ++++++---- 4 files changed, 857 insertions(+), 516 deletions(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 818eb05..c789d86 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -33,8 +33,8 @@ from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot -from lib import redpage -from lib import redfam +from lib.redpage import RedPage +from lib.redfam import RedFamParser class DiscussionParserBot( @@ -127,7 +127,7 @@ class DiscussionParserBot( else: # If successfully parsed all pages in cat, flush db write cache - redpage.RedPage.flush_db_cache() + RedPage.flush_db_cache() def treat_page( self ): """ @@ -146,20 +146,23 @@ class DiscussionParserBot( return # Initiate RedPage object - red_page = redpage.RedPage( self.current_page ) + redpage = RedPage.session.query(RedPage).filter(RedPage.pageid == self.current_page.pageid ).one_or_none() - # Check whether parsing is needed - if red_page.is_parsing_needed(): + if redpage: + redpage.update( self.current_page ) + else: + redpage = RedPage( self.current_page ) + #~ # Check whether parsing is needed + if redpage.is_parsing_needed(): # Count families for failure analysis fam_counter = 0 # Iterate over returned generator with redfam sections - for fam in red_page.parse(): - + for fam in redpage.parse(): # Run RedFamParser on section text - redfam.RedFamParser.parser( fam, red_page.page, - red_page.is_archive() ) + RedFamParser.parser( fam, redpage, + redpage.is_archive() ) fam_counter += 1 @@ -167,12 +170,13 @@ class DiscussionParserBot( # If successfully parsed whole page, flush # db write cache if( fam_counter ): - redfam.RedFamParser.flush_db_cache() + + RedFamParser.flush_db_cache() jogobot.output( "Page [[{reddisc}]] parsed".format( - reddisc=red_page.page.title() ) ) + reddisc=redpage.page.title() ) ) else: jogobot.output( "\03{red}" + "Page [[{reddisc}]], ".format( - reddisc=red_page.page.title() ) + + reddisc=redpage.page.title() ) + "containing no redfam, parsed!", "WARNING" ) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 9e2e01b..8257822 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -39,336 +39,553 @@ from pywikibot import config import jogobot -class MysqlRed: - """ - Basic interface class, containing opening of connection +from sqlalchemy import create_engine +from sqlalchemy.engine.url import URL +url = URL( "mysql+oursql", + username=config.db_username, + password=config.db_password, + host=config.db_hostname, + port=config.db_port, + database=config.db_username + jogobot.config['db_suffix'] ) +engine = create_engine(url, echo=True) - Specific querys should be defined in descendant classes per data type + +from sqlalchemy.ext.declarative import ( + declarative_base, declared_attr, has_inherited_table ) +Base = declarative_base() + +from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey + +from sqlalchemy.orm import sessionmaker, relationship, composite +from sqlalchemy.ext.mutable import MutableComposite, MutableSet +from sqlalchemy.orm.collections import attribute_mapped_collection +import sqlalchemy.types as types + + +Session = sessionmaker(bind=engine) +session = Session() + +family = "dewpbeta" + +class Mysql(object): + session = session + @declared_attr + def _tableprefix(cls): + return family + "_" + @declared_attr + def _tablesuffix(cls): + return "s" + @declared_attr + def __tablename__(cls): + if has_inherited_table(cls): + return None + prefix = family + "_" + name = cls.__name__[len("Mysql"):].lower() + suffix = "s" + return cls._tableprefix + name + cls._tablesuffix + def changedp(self): + return self in self.session.dirty + +class ColumnList( list, MutableComposite ): + """ + Combines multiple Colums into a list like object """ - # Save mysqldb-connection as class attribute to use only one - # in descendant classes - connection = False - db_hostname = config.db_hostname - db_port = config.db_port - db_username = config.db_username - db_password = config.db_password - db_name = config.db_username + jogobot.config['db_suffix'] - db_table_prefix = False - - # Class variables for storing cached querys - _cached_update_data = [] - _update_query = '' - _cached_insert_data = {} - _insert_query = '' - - def __init__( self ): + def __init__( self, *columns ): """ - Opens a connection to MySQL-DB - - @returns mysql-stream MySQL Connection + Wrapper to the list constructor deciding whether we have initialization + with individual params per article or with an iterable. """ - - # Needs to be generated after Parsing of Args (not at import time) - if not type(self).db_table_prefix: - type(self).db_table_prefix = \ - pywikibot.Site().family.dbName(pywikibot.Site().code) - - # Now we can setup prepared queries - self._prepare_queries() - - # Connect to mysqldb only once - if not type( self ).connection: - - type( self ).connection = mysqldb.connect( - host=type( self ).db_hostname, - port=type( self ).db_port, - user=type( self ).db_username, - passwd=type( self ).db_password, - db=type( self ).db_name ) - - # Register callback for warnig if exit with cached db write querys - atexit.register( type(self).warn_if_not_flushed ) - - def __del__( self ): - """ - Before deleting class, close connection to MySQL-DB - """ - - type( self ).connection.close() - - def _prepare_queries( self ): - """ - Used to replace placeholders in prepared queries - """ - type(self)._update_query = type(self)._update_query.format( - prefix=type(self).db_table_prefix) - type(self)._insert_query = type(self)._insert_query.format( - prefix=type(self).db_table_prefix) - - @classmethod - def flush( cls ): - """ - Run cached querys - """ - if not cls.connection: - raise MysqlRedConnectionError( "No connection exists!" ) - - cursor = cls.connection.cursor() - - # Execute insert query - if cls._cached_insert_data: - # Since cls._cached_insert_data is a dict, we need to have a custom - # Generator to iterate over it - cursor.executemany( cls._insert_query, - ( cls._cached_insert_data[ key ] - for key in cls._cached_insert_data ) ) - # Reset after writing - cls._cached_insert_data = {} - - # Execute update query - # Use executemany since update could not be reduced to one query - if cls._cached_update_data: - cursor.executemany( cls._update_query, cls._cached_update_data ) - # Reset after writing - cls._cached_update_data = [] - - # Commit db changes - if cls._cached_insert_data or cls._cached_update_data: - cls.connection.commit() - - @classmethod - def warn_if_not_flushed(cls): - """ - Outputs a warning if there are db write querys cached and not flushed - before exiting programm! - """ - if cls._cached_update_data or cls._cached_insert_data: - jogobot.output( "Cached Database write querys not flushed!!! " + - "Data loss is possible!", "WARNING" ) - - -class MysqlRedPage( MysqlRed ): - """ - MySQL-db Interface for handling querys for RedPages - """ - - # Class variables for storing cached querys - # '{prefix}' will be replaced during super().__init__() - _cached_update_data = [] - _update_query = 'UPDATE `{prefix}_redpages` \ -SET `pagetitle` = ?, `revid` = ?, `status`= ? WHERE `pageid` = ?;' - - _cached_insert_data = {} - _insert_query = 'INSERT INTO `{prefix}_redpages` \ -( pageid, pagetitle, revid, status ) VALUES ( ?, ?, ?, ? );' - - def __init__( self, pageid ): - """ - Creates a new instance, runs __init__ of parent class - """ - - super().__init__( ) - - self.__pageid = int( pageid ) - - self.data = self.get_page() - - def __del__( self ): - """ - Needed to prevent descendant classes of MYSQL_RED from deleting - connection to db - """ - pass - - def get_page( self ): - """ - Retrieves a red page row from MySQL-Database for given page_id - - @param int pageid MediaWiki page_id for page to retrieve - - @returns tuple Tuple with data for given page_id - bool FALSE if none found - """ - - cursor = type( self ).connection.cursor(mysqldb.DictCursor) - - cursor.execute( - 'SELECT * FROM `{prefix}_redpages` WHERE `pageid` = ?;'.format( - prefix=type(self).db_table_prefix), ( self.__pageid, ) ) - - res = cursor.fetchone() - - if res: - return res + # Individual params per article (from db), first one is a str + if isinstance( columns[0], str ) or \ + isinstance( columns[0], MutableSet ) or columns[0] is None: + super().__init__( columns ) + # Iterable articles list else: - return False + super().__init__( columns[0] ) - def add_page( self, pagetitle, revid, status=0 ): + def __setitem__(self, key, value): """ - Inserts a red page row in MySQL-Database for given pageid - - @param int revid MediaWiki current revid - @param str pagetitle MediaWiki new pagetitle - @param int status Page parsing status + The MutableComposite class needs to be noticed about changes in our + component. So we tweak the setitem process. """ - insert_data = { self.__pageid: ( self.__pageid, pagetitle, - revid, status ) } + # set the item + super().__setitem__( key, value) - type( self )._cached_insert_data.update( insert_data ) + # alert all parents to the change + self.changed() - # Manualy construct self.data dict - self.data = { 'pageid': self.__pageid, 'revid': revid, - 'pagetitle': pagetitle, 'status': status } - - def update_page( self, revid=None, pagetitle=None, status=0 ): + def __composite_values__(self): """ - Updates the red page row in MySQL-Database for given page_id - - @param int revid MediaWiki current rev_id - @param str pagetitle MediaWiki new page_title - @param int status Page parsing status + The Composite method needs to have this method to get the items for db. """ + return self - if not pagetitle: - pagetitle = self.data[ 'pagetitle' ] - if not revid: - revid = self.data[ 'revid' ] +class Status( types.TypeDecorator ): - type( self )._cached_update_data.append( ( pagetitle, revid, - status, self.__pageid ) ) + impl = types.String - -class MysqlRedFam( MysqlRed ): - """ - MySQL-db Interface for handling querys for RedFams - """ - - # Class variables for storing cached querys - _cached_update_data = [] - _update_query = 'UPDATE `{prefix}_redfams` \ -SET `redpageid` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ -`status`= ? WHERE `famhash` = ?;' - - _cached_insert_data = {} - _insert_query = 'INSERT INTO `{prefix}_redfams` \ -( famhash, redpageid, beginning, ending, status, heading, \ -article0, article1, article2, article3, article4, article5, article6, \ -article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - - def __init__( self, famhash=None ): + def process_bind_param(self, value, dialect): """ - Creates a new instance, runs __init__ of parent class + Returns status as commaseparated string (to save in DB) + + @returns Raw status string + @rtype str """ + if isinstance(value, MutableSet): + return ",".join( value ) + elif isinstance(value, String ) or value is None: + return value + else: + raise ProgrammingError - self.__famhash = famhash - super().__init__( ) - - def __del__( self ): + def process_result_value(self, value, dialect): """ - Needed to prevent descendant classes of MYSQL_RED from deleting - connection to db + Sets status based on comma separated list + + @param raw_status Commaseparated string of stati (from DB) + @type raw_status str """ - pass + if value: + return MutableSet( value.strip().split(",")) + else: + return MutableSet([]) - def get_fam( self, famhash ): + def copy(self, **kw): + return Status(self.impl.length) + + + +class MysqlRedFam( Mysql, Base ): + + famhash = Column( String(64), primary_key=True, unique=True ) + + __article0 = Column('article0', String(255), nullable=False ) + __article1 = Column('article1', String(255), nullable=False ) + __article2 = Column('article2', String(255), nullable=True ) + __article3 = Column('article3', String(255), nullable=True ) + __article4 = Column('article4', String(255), nullable=True ) + __article5 = Column('article5', String(255), nullable=True ) + __article6 = Column('article6', String(255), nullable=True ) + __article7 = Column('article7', String(255), nullable=True ) + __articlesList = composite( + ColumnList, __article0, __article1, __article2, __article3, + __article4, __article5, __article6, __article7 ) + + heading = Column( Text, nullable=False ) + redpageid = Column( + Integer, ForeignKey( "dewpbeta_redpages.pageid" ), nullable=False ) + beginning = Column( DateTime, nullable=False ) + ending = Column( DateTime, nullable=True ) + __status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) + + __article0_status = Column( + 'article0_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article1_status = Column( + 'article1_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article2_status = Column( + 'article2_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article3_status = Column( + 'article3_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article4_status = Column( + 'article4_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article5_status = Column( + 'article5_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article6_status = Column( + 'article6_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __article7_status = Column( + 'article7_status', MutableSet.as_mutable(Status(64)), nullable=True ) + __articlesStatus = composite( + ColumnList, __article0_status, __article1_status, __article2_status, + __article3_status, __article4_status, __article5_status, + __article6_status, __article7_status ) + + redpage = relationship( "RedPage", back_populates="redfams" ) + + @property + def articlesList(self): """ - Retrieves a red family row from MySQL-Database for given fam_hash - - @returns dict Dictionairy with data for given fam hash - False if none found + List of articles belonging to the redfam """ - self.__famhash = famhash + return self.__articlesList - cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + @articlesList.setter + def articlesList(self, articlesList): + # Make sure to always have full length for complete overwrites + while( len(articlesList) < 8 ): + articlesList.append(None) + self.__articlesList = ColumnList(articlesList) - cursor.execute( - 'SELECT * FROM `{prefix}_redfams` WHERE `famhash` = ?;'. - format( prefix=type(self).db_table_prefix), ( famhash, ) ) - - self.data = cursor.fetchone() - - def add_fam( self, articlesList, heading, redpageid, - beginning, ending=None, status=0 ): - - data = [ self.__famhash, redpageid, beginning, ending, - status, heading ] - - for article in articlesList: - data.append( str( article ) ) - - while len( data ) < 14: - data.append( None ) - - data = tuple( data ) - - insert_data = { self.__famhash: data } - type( self )._cached_insert_data.update( insert_data ) - - # Manualy construct self.data dict - data_keys = ( 'famhash', 'redpageid', 'beginning', 'ending', - 'status', 'heading', 'article0', 'article1', 'article2', - 'article3', 'article4', 'article5', 'article6', - 'article7' ) - self.data = dict( zip( data_keys, data ) ) - - def update_fam( self, redpageid, heading, beginning, ending, status ): + @property + def status( self ): """ - Updates the red fam row in MySQL-Database for given fam_hash - - @param int redpageid MediaWiki page_id - @param datetime beginning Timestamp of beginning - qparam datetime ending Timestamp of ending of - @param int status red_fam status + Current fam status """ + return self.__status - type( self )._cached_update_data.append( ( redpageid, heading, - beginning, ending, status, - self.__famhash ) ) + @status.setter + def status( self, status ): + if status: + self.__status = MutableSet( status ) + else: + self.__status = MutableSet() - def get_by_status( self, status ): + @property + def articlesStatus(self): """ - Generator witch fetches redFams with given status from DB + List of status strings/sets for the articles of the redfam """ + return self.__articlesStatus - cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + @articlesStatus.setter + def articlesStatus(self, articlesStatus): + self.__articlesStatus = ColumnList(articlesStatus) - cursor.execute( - 'SELECT * FROM `{prefix}_redfams` WHERE `status` = LIKE %?%;'. - format( prefix=type( self ).db_table_prefix), ( status, ) ) +class MysqlRedPage( Mysql, Base ): + pageid = Column( Integer, unique=True, primary_key=True ) + revid = Column( Integer, unique=True, nullable=False ) + pagetitle = Column( String(255), nullable=False ) + status = Column( MutableSet.as_mutable(Status(255)), nullable=True ) - while True: - res = cursor.fetchmany( 1000 ) - if not res: - break - for row in res: - yield row + redfams = relationship( + "MysqlRedFam", order_by=MysqlRedFam.famhash, back_populates="redpage", + collection_class=attribute_mapped_collection("famhash")) - def get_by_status_and_ending( self, status, ending ): - """ - Generator witch fetches redFams with given status from DB - """ - cursor = type( self ).connection.cursor( mysqldb.DictCursor ) +Base.metadata.create_all(engine) - cursor.execute( ( - 'SELECT * ' + - 'FROM `{prefix}_redfams` `F` ' + - 'INNER JOIN `{prefix}_redpages` `P` ' + - 'ON `F`.`status` = ? ' + - 'AND `F`.`ending` >= ? ' + - 'AND `F`.`redpageid` = `P`.`pageid`;').format( - prefix=type( self ).db_table_prefix), - ( status, ending ) ) +#~ class MysqlRed: + #~ """ + #~ Basic interface class, containing opening of connection - while True: - res = cursor.fetchmany( 1000 ) - if not res: - break - for row in res: - yield row + #~ Specific querys should be defined in descendant classes per data type + #~ """ + + #~ # Save mysqldb-connection as class attribute to use only one + #~ # in descendant classes + #~ connection = False + #~ db_hostname = config.db_hostname + #~ db_port = config.db_port + #~ db_username = config.db_username + #~ db_password = config.db_password + #~ db_name = config.db_username + jogobot.config['db_suffix'] + #~ db_table_prefix = False + + #~ # Class variables for storing cached querys + #~ _cached_update_data = [] + #~ _update_query = '' + #~ _cached_insert_data = {} + #~ _insert_query = '' + + #~ def __init__( self ): + #~ """ + #~ Opens a connection to MySQL-DB + + #~ @returns mysql-stream MySQL Connection + #~ """ + + #~ # Needs to be generated after Parsing of Args (not at import time) + #~ if not type(self).db_table_prefix: + #~ type(self).db_table_prefix = \ + #~ pywikibot.Site().family.dbName(pywikibot.Site().code) + + #~ # Now we can setup prepared queries + #~ self._prepare_queries() + + #~ # Connect to mysqldb only once + #~ if not type( self ).connection: + + #~ type( self ).connection = mysqldb.connect( + #~ host=type( self ).db_hostname, + #~ port=type( self ).db_port, + #~ user=type( self ).db_username, + #~ passwd=type( self ).db_password, + #~ db=type( self ).db_name ) + + #~ # Register callback for warnig if exit with cached db write querys + #~ atexit.register( type(self).warn_if_not_flushed ) + + #~ def __del__( self ): + #~ """ + #~ Before deleting class, close connection to MySQL-DB + #~ """ + + #~ type( self ).connection.close() + + #~ def _prepare_queries( self ): + #~ """ + #~ Used to replace placeholders in prepared queries + #~ """ + #~ type(self)._update_query = type(self)._update_query.format( + #~ prefix=type(self).db_table_prefix) + #~ type(self)._insert_query = type(self)._insert_query.format( + #~ prefix=type(self).db_table_prefix) + + #~ @classmethod + #~ def flush( cls ): + #~ """ + #~ Run cached querys + #~ """ + #~ if not cls.connection: + #~ raise MysqlRedConnectionError( "No connection exists!" ) + + #~ cursor = cls.connection.cursor() + + #~ # Execute insert query + #~ if cls._cached_insert_data: + #~ # Since cls._cached_insert_data is a dict, we need to have a custom + #~ # Generator to iterate over it + #~ cursor.executemany( cls._insert_query, + #~ ( cls._cached_insert_data[ key ] + #~ for key in cls._cached_insert_data ) ) + #~ # Reset after writing + #~ cls._cached_insert_data = {} + + #~ # Execute update query + #~ # Use executemany since update could not be reduced to one query + #~ if cls._cached_update_data: + #~ cursor.executemany( cls._update_query, cls._cached_update_data ) + #~ # Reset after writing + #~ cls._cached_update_data = [] + + #~ # Commit db changes + #~ if cls._cached_insert_data or cls._cached_update_data: + #~ cls.connection.commit() + + #~ @classmethod + #~ def warn_if_not_flushed(cls): + #~ """ + #~ Outputs a warning if there are db write querys cached and not flushed + #~ before exiting programm! + #~ """ + #~ if cls._cached_update_data or cls._cached_insert_data: + #~ jogobot.output( "Cached Database write querys not flushed!!! " + + #~ "Data loss is possible!", "WARNING" ) + + +#~ class MysqlRedPage( MysqlRed ): + #~ """ + #~ MySQL-db Interface for handling querys for RedPages + #~ """ + + #~ # Class variables for storing cached querys + #~ # '{prefix}' will be replaced during super().__init__() + #~ _cached_update_data = [] + #~ _update_query = 'UPDATE `{prefix}_redpages` \ +#~ SET `pagetitle` = ?, `revid` = ?, `status`= ? WHERE `pageid` = ?;' + + #~ _cached_insert_data = {} + #~ _insert_query = 'INSERT INTO `{prefix}_redpages` \ +#~ ( pageid, pagetitle, revid, status ) VALUES ( ?, ?, ?, ? );' + + #~ def __init__( self, pageid ): + #~ """ + #~ Creates a new instance, runs __init__ of parent class + #~ """ + + #~ super().__init__( ) + + #~ self.__pageid = int( pageid ) + + #~ self.data = self.get_page() + + #~ def __del__( self ): + #~ """ + #~ Needed to prevent descendant classes of MYSQL_RED from deleting + #~ connection to db + #~ """ + #~ pass + + #~ def get_page( self ): + #~ """ + #~ Retrieves a red page row from MySQL-Database for given page_id + + #~ @param int pageid MediaWiki page_id for page to retrieve + + #~ @returns tuple Tuple with data for given page_id + #~ bool FALSE if none found + #~ """ + + #~ cursor = type( self ).connection.cursor(mysqldb.DictCursor) + + #~ cursor.execute( + #~ 'SELECT * FROM `{prefix}_redpages` WHERE `pageid` = ?;'.format( + #~ prefix=type(self).db_table_prefix), ( self.__pageid, ) ) + + #~ res = cursor.fetchone() + + #~ if res: + #~ return res + #~ else: + #~ return False + + #~ def add_page( self, pagetitle, revid, status=0 ): + #~ """ + #~ Inserts a red page row in MySQL-Database for given pageid + + #~ @param int revid MediaWiki current revid + #~ @param str pagetitle MediaWiki new pagetitle + #~ @param int status Page parsing status + #~ """ + + #~ insert_data = { self.__pageid: ( self.__pageid, pagetitle, + #~ revid, status ) } + + #~ type( self )._cached_insert_data.update( insert_data ) + + #~ # Manualy construct self.data dict + #~ self.data = { 'pageid': self.__pageid, 'revid': revid, + #~ 'pagetitle': pagetitle, 'status': status } + + #~ def update_page( self, revid=None, pagetitle=None, status=0 ): + #~ """ + #~ Updates the red page row in MySQL-Database for given page_id + + #~ @param int revid MediaWiki current rev_id + #~ @param str pagetitle MediaWiki new page_title + #~ @param int status Page parsing status + #~ """ + + #~ if not pagetitle: + #~ pagetitle = self.data[ 'pagetitle' ] + #~ if not revid: + #~ revid = self.data[ 'revid' ] + + #~ type( self )._cached_update_data.append( ( pagetitle, revid, + #~ status, self.__pageid ) ) + + +#~ class MysqlRedFam( MysqlRed ): + #~ """ + #~ MySQL-db Interface for handling querys for RedFams + #~ """ + + #~ # Class variables for storing cached querys + #~ _cached_update_data = [] + #~ _update_query = 'UPDATE `{prefix}_redfams` \ +#~ SET `redpageid` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ +#~ `status`= ? WHERE `famhash` = ?;' + + #~ _cached_insert_data = {} + #~ _insert_query = 'INSERT INTO `{prefix}_redfams` \ +#~ ( famhash, redpageid, beginning, ending, status, heading, \ +#~ article0, article1, article2, article3, article4, article5, article6, \ +#~ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' + + #~ def __init__( self, famhash=None ): + #~ """ + #~ Creates a new instance, runs __init__ of parent class + #~ """ + + #~ self.__famhash = famhash + + #~ super().__init__( ) + + #~ def __del__( self ): + #~ """ + #~ Needed to prevent descendant classes of MYSQL_RED from deleting + #~ connection to db + #~ """ + #~ pass + + #~ def get_fam( self, famhash ): + #~ """ + #~ Retrieves a red family row from MySQL-Database for given fam_hash + + #~ @returns dict Dictionairy with data for given fam hash + #~ False if none found + #~ """ + #~ self.__famhash = famhash + + #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + + #~ cursor.execute( + #~ 'SELECT * FROM `{prefix}_redfams` WHERE `famhash` = ?;'. + #~ format( prefix=type(self).db_table_prefix), ( famhash, ) ) + + #~ self.data = cursor.fetchone() + + #~ def add_fam( self, articlesList, heading, redpageid, + #~ beginning, ending=None, status=0 ): + + #~ data = [ self.__famhash, redpageid, beginning, ending, + #~ status, heading ] + + #~ for article in articlesList: + #~ data.append( str( article ) ) + + #~ while len( data ) < 14: + #~ data.append( None ) + + #~ data = tuple( data ) + + #~ insert_data = { self.__famhash: data } + #~ type( self )._cached_insert_data.update( insert_data ) + + #~ # Manualy construct self.data dict + #~ data_keys = ( 'famhash', 'redpageid', 'beginning', 'ending', + #~ 'status', 'heading', 'article0', 'article1', 'article2', + #~ 'article3', 'article4', 'article5', 'article6', + #~ 'article7' ) + #~ self.data = dict( zip( data_keys, data ) ) + + #~ def update_fam( self, redpageid, heading, beginning, ending, status ): + #~ """ + #~ Updates the red fam row in MySQL-Database for given fam_hash + + #~ @param int redpageid MediaWiki page_id + #~ @param datetime beginning Timestamp of beginning + #~ qparam datetime ending Timestamp of ending of + #~ @param int status red_fam status + #~ """ + + #~ type( self )._cached_update_data.append( ( redpageid, heading, + #~ beginning, ending, status, + #~ self.__famhash ) ) + + #~ def get_by_status( self, status ): + #~ """ + #~ Generator witch fetches redFams with given status from DB + #~ """ + + #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + + #~ cursor.execute( + #~ 'SELECT * FROM `{prefix}_redfams` WHERE `status` = LIKE %?%;'. + #~ format( prefix=type( self ).db_table_prefix), ( status, ) ) + + #~ while True: + #~ res = cursor.fetchmany( 1000 ) + #~ if not res: + #~ break + #~ for row in res: + #~ yield row + + #~ def get_by_status_and_ending( self, status, ending ): + #~ """ + #~ Generator witch fetches redFams with given status from DB + #~ """ + + #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) + + #~ cursor.execute( ( + #~ 'SELECT * ' + + #~ 'FROM `{prefix}_redfams` `F` ' + + #~ 'INNER JOIN `{prefix}_redpages` `P` ' + + #~ 'ON `F`.`status` = ? ' + + #~ 'AND `F`.`ending` >= ? ' + + #~ 'AND `F`.`redpageid` = `P`.`pageid`;').format( + #~ prefix=type( self ).db_table_prefix), + #~ ( status, ending ) ) + + #~ while True: + #~ res = cursor.fetchmany( 1000 ) + #~ if not res: + #~ break + #~ for row in res: + #~ yield row class MysqlRedError(Exception): diff --git a/lib/redfam.py b/lib/redfam.py index 6e8b3d5..526f902 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -3,7 +3,7 @@ # # redfam.py # -# Copyright 2015 GOLDERWEB – Jonathan Golder +# Copyright 2017 GOLDERWEB – Jonathan Golder # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -35,16 +35,17 @@ import pywikibot # noqa from pywikibot.tools import deprecated # noqa import jogobot -from lib.mysqlred import MysqlRedFam +#~ from lib.mysqlred import Column, Integer, String, Text, DateTime, ForeignKey, ColumnList, Status +from lib.mysqlred import MysqlRedFam, MutableSet, ColumnList #, Mysql, Base, relationship, composite, -class RedFam: +class RedFam( MysqlRedFam ): """ Basic class for RedFams, containing the basic data structure """ def __init__( self, articlesList, beginning, ending=None, redpageid=None, - status=None, famhash=None, heading=None ): + status=MutableSet(), famhash=None, heading=None ): """ Generates a new RedFam object @@ -61,21 +62,32 @@ class RedFam: self.site = pywikibot.Site() # Database interface - self._mysql = MysqlRedFam( famhash ) + #self._mysql = MysqlRedFam( famhash ) # Initial attribute values - self._articlesList = articlesList - self._beginning = beginning - self._ending = ending - self._redpageid = redpageid - self._status = set() - self._status = self._parse_status(status) - self._famhash = famhash - self._heading = heading + #~ self.articlesList = articlesList + #~ self.beginning = beginning + #~ self.ending = ending + #~ self.redpageid = redpageid +#~ # self._status = set() +#~ # self._status = self._parse_status(status) + #~ self.famhash = famhash + #~ self.heading = heading + #self.status = status - # Calculates the sha1 hash over self._articlesList to - # rediscover known redundance families - self.calc_famhash() + #articlesStatus = ColumnList([ MutableSet() for x in range(0,8) ]) + + #~ # Calculates the sha1 hash over self._articlesList to + #~ # rediscover known redundance families + #~ self.calc_famhash() + + #~ if not status: + #~ status = MutableSet() + + super().__init__( articlesList=articlesList, beginning=beginning, ending=ending, redpageid=redpageid, + famhash=famhash, heading=heading, status=status, articlesStatus=None ) + + #super().__init__() def __repr__( self ): """ @@ -85,64 +97,75 @@ class RedFam: """ __repr = "RedFam( " + \ - "articlesList=" + repr( self._articlesList ) + \ - ", heading=" + repr( self._heading ) + \ - ", beginning=" + repr( self._beginning ) + \ - ", ending=" + repr( self._ending ) + \ - ", red_page_id=" + repr( self._redpageid ) + \ - ", status=" + repr( self._status ) + \ - ", fam_hash=" + repr( self._famhash ) + \ + "articlesList=" + repr( self.articlesList ) + \ + ", heading=" + repr( self.heading ) + \ + ", beginning=" + repr( self.beginning ) + \ + ", ending=" + repr( self.ending ) + \ + ", red_page_id=" + repr( self.redpageid ) + \ + ", status=" + repr( self.status ) + \ + ", fam_hash=" + repr( self.famhash ) + \ " )" return __repr - def calc_famhash( self ): + @classmethod + def calc_famhash(cls, articlesList ): + + h = hashlib.sha1() + # Since articlesList attr of RedFam will have always 8 Members we + # need to fill up smaller lists (longers will be cropped below). + while len( articlesList) < 8: + articlesList.append(None) + + h.update( str( articlesList[:8] ).encode('utf-8') ) + + return h.hexdigest() + + def c_famhash( self ): """ Calculates the SHA-1 hash for the articlesList of redundance family. Since we don't need security SHA-1 is just fine. @returns str String with the hexadecimal hash digest """ + print( type( self ) ) - h = hashlib.sha1() - h.update( str( self._articlesList[:8] ).encode('utf-8') ) - - if self._famhash and h.hexdigest() != self._famhash: - raise RedFamHashError( self._famhash, h.hexdigest() ) - - elif self._famhash: + if self.famhash and type(self).calc_famhash(self.articlesList) != self.famhash: + raise RedFamHashError( self.famhash, h.hexdigest() ) + elif self.famhash: return else: - self._famhash = h.hexdigest() + self.famhash = type(self).calc_famhash(self.articlesList) - def changed( self ): - """ - Checks wether anything has changed and maybe triggers db update - """ + #~ def changed( self ): + #~ """ + #~ Checks wether anything has changed and maybe triggers db update + #~ """ - # On archived redfams do not delete possibly existing ending - if( not self._ending and "archived" in self._status and - self._mysql.data[ 'ending' ] ): + #~ # On archived redfams do not delete possibly existing ending + #~ if( not self.ending and "archived" in self._status and + #~ self._mysql.data[ 'ending' ] ): - self._ending = self._mysql.data[ 'ending' ] + #~ self._ending = self._mysql.data[ 'ending' ] - # Since status change means something has changed, update database - if( self._raw_status != self._mysql.data[ 'status' ] or - self._beginning != self._mysql.data[ 'beginning' ] or - self._ending != self._mysql.data[ 'ending' ] or - self._red_page_id != self._mysql.data[ 'redpageid' ] or - self._heading != self._mysql.data[ 'heading' ]): + #~ # Since status change means something has changed, update database + #~ if( self._raw_status != self._mysql.data[ 'status' ] or + #~ self._beginning != self._mysql.data[ 'beginning' ] or + #~ self._ending != self._mysql.data[ 'ending' ] or + #~ self._red_page_id != self._mysql.data[ 'redpageid' ] or + #~ self._heading != self._mysql.data[ 'heading' ]): - self._mysql.update_fam( self._redpageid, self._heading, - self._beginning, self._ending, - self._raw_status() ) + #~ self._mysql.update_fam( self._redpageid, self._heading, + #~ self._beginning, self._ending, + #~ self._raw_status() ) @classmethod def flush_db_cache( cls ): """ Calls flush method of Mysql Interface class """ - MysqlRedFam.flush() + cls.session.commit() + #~ MysqlRedFam.flush() def add_status(self, status): """ @@ -151,7 +174,7 @@ class RedFam: @param status Statusstring to add @type status str """ - self._status.add(status) + self.status.add(status) def remove_status(self, status, weak=True): """ @@ -164,9 +187,9 @@ class RedFam: @type bool """ if weak: - self._status.discard(status) + self.status.discard(status) else: - self._status.remove(status) + self.status.remove(status) def has_status(self, status): """ @@ -176,28 +199,28 @@ class RedFam: @type status str @returns True if status is present else False """ - if status in self._status: + if status in self.status: return True else: return False - def _parse_status(self, raw_status ): - """ - Sets status based on comma separated list + #~ def _parse_status(self, raw_status ): + #~ """ + #~ Sets status based on comma separated list - @param raw_status Commaseparated string of stati (from DB) - @type raw_status str - """ - self._status = set( raw_status.strip().split(",")) + #~ @param raw_status Commaseparated string of stati (from DB) + #~ @type raw_status str + #~ """ + #~ self._status = set( raw_status.strip().split(",")) - def _raw_status( self ): - """ - Returns status as commaseparated string (to save in DB) + #~ def _raw_status( self ): + #~ """ + #~ Returns status as commaseparated string (to save in DB) - @returns Raw status string - @rtype str - """ - return ",".join( self._status ) + #~ @returns Raw status string + #~ @rtype str + #~ """ + #~ return ",".join( self._status ) def article_add_status(self, status, index=None, title=None ): """ @@ -331,7 +354,7 @@ class RedFamParser( RedFam ): wurde gewünscht von:" __done_notice2 = "{{Erledigt|" - def __init__( self, heading, redpage, redpagearchive, + def __init__( self, articlesList, heading, redpage, redpagearchive, beginning, ending=None ): """ Creates a RedFam object based on data collected while parsing red_pages @@ -346,57 +369,111 @@ class RedFamParser( RedFam ): str strptime parseable string """ - # Set object attributes: - self._redpageid = redpage._pageid - self._redpagearchive = redpagearchive - self._famhash = None - - # Method self.add_beginning sets self._beginning directly - self.add_beginning( beginning ) - - # Method self.add_ending sets self._ending directly - if( ending ): - self.add_ending( ending ) - else: - # If no ending was provided set to None - self._ending = None - - self._status = set() - # Parse the provided heading of redundance section # to set self._articlesList - self.heading_parser( heading ) + #~ self.heading = str(heading) + #~ self.articlesList = articlesList + + #~ # Catch sections with more then 8 articles, print error + #~ if len( self.articlesList ) > 8: + #~ # For repression in output we need to know the fam hash + #~ self.calc_famhash() + + #~ jogobot.output( + #~ ( "\03{{lightred}}" + + #~ "Maximum number of articles in red_fam exceeded, " + + #~ "maximum number is 8, {number:d} were given \n {repress}" + #~ ).format( datetime=datetime.now().strftime( + #~ "%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ), + #~ repress=repr( self ) ), + #~ "WARNING" ) + + #~ # Only save the first 8 articles +#~ # self.articlesList = self.articlesList[:8] # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families + famhash = type(self).calc_famhash(articlesList) - self.calc_famhash() + #~ obj = self.session.query(RedFamParser).filter(RedFamParser.famhash == self.famhash ).one_or_none() + #~ if obj: + #~ self = obj - # Open database connection, ask for data if existing, - # otherwise create entry - self.__handle_db() + + # Set object attributes: + #~ self.redpageid = redpage._pageid + self._redpagearchive = redpagearchive +# self.famhash = None + + # Method self.add_beginning sets self._beginning directly + #~ self.add_beginning( beginning ) + + #~ # Method self.add_ending sets self._ending directly + #~ if( ending ): + #~ self.add_ending( ending ) + #~ else: + #~ # If no ending was provided set to None + #~ self.ending = None + + #~ self.status = MutableSet() + + beginning = self.__datetime(beginning) + if ending: + ending = self.__datetime(ending) + + + super().__init__( articlesList, beginning, ending=ending, redpageid=redpage._pageid, + famhash=famhash, heading=heading ) # Check status changes - self.status() + self.check_status() + + self.session.add(self) + # Open database connection, ask for data if existing, + # otherwise create entry +# self.__handle_db() + + # Triggers db update if anything changed - self.changed() +# self.changed() - def __handle_db( self ): - """ - Handles opening of db connection - """ - # We need a connection to our mysqldb - self._mysql = MysqlRedFam( ) - self._mysql.get_fam( self._famhash ) - if not self._mysql.data: - self._mysql.add_fam( self._articlesList, self._heading, - self._redpageid, self._beginning, - self._ending ) + #~ def __handle_db( self ): + #~ """ + #~ Handles opening of db connection + #~ """ - def heading_parser( self, heading ): + #~ # We need a connection to our mysqldb + #~ self._mysql = MysqlRedFam( ) + #~ self._mysql.get_fam( self._famhash ) + + #~ if not self._mysql.data: + #~ self._mysql.add_fam( self._articlesList, self._heading, + #~ self._redpageid, self._beginning, + #~ self._ending ) + + def update( self, articlesList, heading, redpage, redpagearchive, + beginning, ending=None): + + self.articlesList = articlesList; + self.heading = heading; + self.redpage = redpage; + self.redpageid = redpage.pageid; + + self.add_beginning( beginning ) + + if( ending ): + self.add_ending( ending ) + + self._redpagearchive = redpagearchive + + # Check status changes + self.check_status() + + @classmethod + def heading_parser( cls, heading ): """ Parses given red_fam_heading string and saves articles list @@ -404,34 +481,16 @@ class RedFamParser( RedFam ): @type heading wikicode or mwparser-parseable """ - # Save heading as string - self._heading = str( heading ) - # Parse string heading with mwparse again everytime # In some cases the given wikicode is broken due to syntax errors # (Task FS#77) - heading = mwparser.parse( self._heading ) + heading = mwparser.parse( str( heading ) ) # Save destinations of wikilinks in headings - self._articlesList = [ str( link.title ) for link + return [ str( link.title ) for link in heading.ifilter_wikilinks() ] - # Catch sections with more then 8 articles, print error - if len( self._articlesList ) > 8: - # For repression in output we need to know the fam hash - self.calc_famhash() - jogobot.output( - ( "\03{{lightred}}" + - "Maximum number of articles in red_fam exceeded, " + - "maximum number is 8, {number:d} were given \n {repress}" - ).format( datetime=datetime.now().strftime( - "%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ), - repress=repr( self ) ), - "WARNING" ) - - # Only save the first 8 articles - self._articlesList = self._articlesList[:8] def add_beginning( self, beginning ): """ @@ -440,7 +499,7 @@ class RedFamParser( RedFam ): @param datetime datetime Beginning date """ - self._beginning = self.__datetime( beginning ) + self.beginning = self.__datetime( beginning ) def add_ending( self, ending ): """ @@ -449,7 +508,7 @@ class RedFamParser( RedFam ): @param datetime datetime Ending date """ - self._ending = self.__datetime( ending ) + self.ending = self.__datetime( ending ) def __datetime( self, timestamp ): """ @@ -473,7 +532,7 @@ class RedFamParser( RedFam ): type( self ).__timestamp_format ) return result - def status( self ): + def check_status( self ): """ Handles detection of correct status There are three possible stati: @@ -485,7 +544,7 @@ class RedFamParser( RedFam ): # No ending, discussion is running: # Sometimes archived discussions also have no detectable ending - if not self._ending and not self._redpagearchive: + if not self.ending and not self._redpagearchive: self.add_status("open") else: self.remove_status("open") @@ -513,7 +572,7 @@ class RedFamParser( RedFam ): return False @classmethod - def parser( cls, text, page, isarchive=False ): + def parser( cls, text, redpage, isarchive=False ): """ Handles parsing of redfam section @@ -536,16 +595,33 @@ class RedFamParser( RedFam ): if not beginning: match = re.search( jogobot.config["redundances"]["reddiscs_onlyinclude_re"], - page.title() ) + redpage.page.title() ) if match: beginning = datetime.strptime( "01. {month} {year}".format( month=match.group(1), year=match.group(2)), "%d. %B %Y" ) + articlesList = RedFamParser.heading_parser( heading ) + famhash = RedFamParser.calc_famhash( articlesList ) - # Create the RedFam object - RedFamParser( heading, page, isarchive, beginning, ending ) + # Check for existing objects in DB first in current redpage + redfam = redpage.redfams.get(famhash) + + with RedFamParser.session.no_autoflush: + if not redfam: + # Otherwise in db table + redfam = RedFamParser.session.query(RedFamParser).filter( + RedFamParser.famhash == famhash ).one_or_none() + + if redfam: + # Existing redfams need to be updated + redfam.update( articlesList, str(heading), redpage, isarchive, beginning, ending ) + + else: + # Create the RedFam object + redfam = RedFamParser( articlesList, str(heading).strip(), redpage.page, isarchive, beginning, ending ) + return redfam @classmethod def extract_dates( cls, text, isarchive=False ): @@ -615,16 +691,16 @@ class RedFamWorker( RedFam ): mysql_data[ 'status' ], mysql_data[ 'famhash' ], mysql_data[ 'heading' ] ) - self._mysql.data = mysql_data +# #~ self._mysql.data = mysql_data - # Set up article status - index = 0 - for article in self._articlesList: - raw_status = mysql_data[ "article" + str(index) + "_status" ] - if not raw_status: - raw_status = str() - self._article_parse_status( raw_status, index ) - index += 1 + #~ # Set up article status + #~ index = 0 + #~ for article in self.articlesList: + #~ raw_status = mysql_data[ "article" + str(index) + "_status" ] + #~ if not raw_status: + #~ raw_status = str() + #~ self._article_parse_status( raw_status, index ) + #~ index += 1 # Get related RedPage-Information self.redpageid = mysql_data[ 'pageid' ] diff --git a/lib/redpage.py b/lib/redpage.py index b4361b9..558cd8c 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -30,15 +30,23 @@ import mwparserfromhell as mwparser import jogobot # noqa -from lib.mysqlred import MysqlRedPage -from lib.redfam import RedFamParser +#~ from lib.mysqlred import Column, Integer, String, Text, DateTime, ForeignKey, ColumnList, Status +from lib.mysqlred import MysqlRedPage, relationship, MutableSet #MysqlRedFam, Base, composite, +from lib.redfam import RedFam, RedFamParser +from sqlalchemy.orm.collections import attribute_mapped_collection -class RedPage: +class RedPage( MysqlRedPage ): """ Class for handling redundance discussion pages and archives """ + #TODO POLYMORPHISM? of BASEClass + redfams = relationship( + "RedFamParser", order_by=RedFamParser.famhash, + back_populates="redpage", + collection_class=attribute_mapped_collection( "famhash" ) ) + def __init__( self, page=None, pageid=None, archive=False ): """ Generate a new RedPage object based on the given pywikibot page object @@ -49,57 +57,91 @@ class RedPage: @type pageid int """ - self._status = set() - # Safe the pywikibot page object - self.page = page - self.pageid = pageid - self._archive = archive + if page: + self._page = page + pageid = self._page.pageid - self.__handle_db( ) - self.is_page_changed() + super().__init__( + pageid=pageid, + revid=self.page._revid, + pagetitle=self.page.title(), + status=MutableSet() ) #TODO EMPTY MutableSet() necessary? + #~ self._status = set() - self._parsed = None + if archive: + self.status.add("archived") - def __handle_db( self ): - """ - Handles opening of db connection - """ + #~ self._archive = archive - # We need a connection to our mysqldb - if self.page: - self.__mysql = MysqlRedPage( self.page._pageid ) - self.pageid = self.page._pageid - elif self.pageid: - self.__mysql = MysqlRedPage( self.pageid ) - self.page = pywikibot.Page( pywikibot.Site(), - self.__mysql.data['pagetitle'] ) - self.page.exists() - else: - raise ValueError( "Page NOR pagid provided!" ) + #~ self.pageid = pageid + #~ self.revid = self.page._revid + #~ self.p + #~ self.status = MutableSet() - if not self.__mysql.data: - self.__mysql.add_page( self.page.title(), self.page._revid ) +# self.__handle_db( ) + #~ self.is_page_changed() + + #~ self._parsed = None + + self.session.add(self) + + #~ def __handle_db( self ): + #~ """ + #~ Handles opening of db connection + #~ """ + + #~ # We need a connection to our mysqldb + #~ if self.page: + #~ self.__mysql = MysqlRedPage( self.page._pageid ) + #~ self.pageid = self.page._pageid + #~ elif self.pageid: + #~ self.__mysql = MysqlRedPage( self.pageid ) + #~ self.page = pywikibot.Page( pywikibot.Site(), + #~ self.pagetitle ) + #~ self.page.exists() + #~ else: + #~ raise ValueError( "Page NOR pagid provided!" ) + + #~ if not self.__mysql.data: + #~ self.__mysql.add_page( self.page.title(), self.page._revid ) + + def update( self, page ): + + self._page = page + self.revid = page._revid + self.pagetitle = page.title() + + @property + def page(self): + if not hasattr(self,"_page"): + self._page = pywikibot.Page( pywikibot.Site(), self.pagetitle ) + + return self._page + + @property + def archive(self): + return self.has_status("archived") def is_page_changed( self ): """ Check wether the page was changed since last run """ - - if( self.__mysql.data != { 'pageid': self.page._pageid, - 'revid': self.page._revid, - 'pagetitle': self.page.title(), - 'status': self.__mysql.data[ 'status' ] } ): - self._changed = True - else: - self._changed = False + self._changed = self.changedp() + #~ if( self.__mysql.data != { 'pageid': self.page._pageid, + #~ 'revid': self.page._revid, + #~ 'pagetitle': self.page.title(), + #~ 'status': self.__mysql.data[ 'status' ] } ): + #~ self._changed = True + #~ else: + #~ self._changed = False def is_archive( self ): """ Detects wether current page is an archive of discussions """ - if( self._archive or ( u"/Archiv" in self.page.title() ) or + if( self.archive or ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ) ): @@ -111,8 +153,7 @@ class RedPage: """ Decides wether current RedPage needs to be parsed or not """ - - if( self._changed or self.__mysql.data[ 'status' ] == "" ): + if( self.changedp() or not self.has_status("parsed") ): return True else: return False @@ -140,31 +181,34 @@ class RedPage: yield fam else: + self.status.add("parsed") self._parsed = True - self.__update_db() + #~ self.__update_db() - def __update_db( self ): - """ - Updates the page meta data in mysql db - """ - if( self._parsed or not self._changed ): - self.add_status( "open" ) + #~ def __update_db( self ): + #~ """ + #~ Updates the page meta data in mysql db + #~ """ + #~ if( self._parsed or not self._changed ): + #~ self.add_status( "open" ) - if( self.is_archive() ): - self.remove_status( "open" ) - self.add_status( "archived" ) - else: - self._status = set() + #~ if( self.is_archive() ): + #~ self.remove_status( "open" ) + #~ self.add_status( "archived" ) + #~ else: + #~ pass + #~ self._status = set() - self.__mysql.update_page( self.page._revid, self.page.title(), - self._raw_status() ) + #~ self.__mysql.update_page( self.page._revid, self.page.title(), + #~ self._raw_status() ) @classmethod def flush_db_cache( cls ): """ Calls flush method of Mysql Interface class """ - MysqlRedPage.flush() + cls.session.commit() + #~ MysqlRedPage.flush() def add_status(self, status): """ @@ -173,7 +217,7 @@ class RedPage: @param status Statusstring to add @type status str """ - self._status.add(status) + self.status.add(status) def remove_status(self, status, weak=True): """ @@ -186,9 +230,9 @@ class RedPage: @type bool """ if weak: - self._status.discard(status) + self.status.discard(status) else: - self._status.remove(status) + self.status.remove(status) def has_status(self, status): """ @@ -198,25 +242,25 @@ class RedPage: @type status str @returns True if status is present else False """ - if status in self._status: + if status in self.status: return True else: return False - def _parse_status(self, raw_status ): - """ - Sets status based on comma separated list + #~ def _parse_status(self, raw_status ): + #~ """ + #~ Sets status based on comma separated list - @param raw_status Commaseparated string of stati (from DB) - @type raw_status str - """ - self._status = set( raw_status.strip().split(",")) + #~ @param raw_status Commaseparated string of stati (from DB) + #~ @type raw_status str + #~ """ + #~ self._status = set( raw_status.strip().split(",")) - def _raw_status( self ): - """ - Returns status as commaseparated string (to save in DB) + #~ def _raw_status( self ): + #~ """ + #~ Returns status as commaseparated string (to save in DB) - @returns Raw status string - @rtype str - """ - return ",".join( self._status ) + #~ @returns Raw status string + #~ @rtype str + #~ """ + #~ return ",".join( self._status ) From 467f829af2f8a24222a5da3f2823ad53b2de3166 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 7 Mar 2017 10:54:10 +0100 Subject: [PATCH 137/192] Some cleanups Remove old commented out code from manual mysql solution --- bots/reddiscparser.py | 2 +- lib/mysqlred.py | 336 +----------------------------------------- lib/redfam.py | 180 +--------------------- lib/redpage.py | 97 +----------- 4 files changed, 12 insertions(+), 603 deletions(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index c789d86..336cd9f 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -153,7 +153,7 @@ class DiscussionParserBot( else: redpage = RedPage( self.current_page ) - #~ # Check whether parsing is needed + # Check whether parsing is needed if redpage.is_parsing_needed(): # Count families for failure analysis fam_counter = 0 diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 8257822..3710219 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -67,6 +67,7 @@ session = Session() family = "dewpbeta" + class Mysql(object): session = session @declared_attr @@ -122,6 +123,7 @@ class ColumnList( list, MutableComposite ): """ return self + class Status( types.TypeDecorator ): impl = types.String @@ -157,7 +159,6 @@ class Status( types.TypeDecorator ): return Status(self.impl.length) - class MysqlRedFam( Mysql, Base ): famhash = Column( String(64), primary_key=True, unique=True ) @@ -243,6 +244,7 @@ class MysqlRedFam( Mysql, Base ): def articlesStatus(self, articlesStatus): self.__articlesStatus = ColumnList(articlesStatus) + class MysqlRedPage( Mysql, Base ): pageid = Column( Integer, unique=True, primary_key=True ) revid = Column( Integer, unique=True, nullable=False ) @@ -254,339 +256,9 @@ class MysqlRedPage( Mysql, Base ): collection_class=attribute_mapped_collection("famhash")) + Base.metadata.create_all(engine) -#~ class MysqlRed: - #~ """ - #~ Basic interface class, containing opening of connection - - #~ Specific querys should be defined in descendant classes per data type - #~ """ - - #~ # Save mysqldb-connection as class attribute to use only one - #~ # in descendant classes - #~ connection = False - #~ db_hostname = config.db_hostname - #~ db_port = config.db_port - #~ db_username = config.db_username - #~ db_password = config.db_password - #~ db_name = config.db_username + jogobot.config['db_suffix'] - #~ db_table_prefix = False - - #~ # Class variables for storing cached querys - #~ _cached_update_data = [] - #~ _update_query = '' - #~ _cached_insert_data = {} - #~ _insert_query = '' - - #~ def __init__( self ): - #~ """ - #~ Opens a connection to MySQL-DB - - #~ @returns mysql-stream MySQL Connection - #~ """ - - #~ # Needs to be generated after Parsing of Args (not at import time) - #~ if not type(self).db_table_prefix: - #~ type(self).db_table_prefix = \ - #~ pywikibot.Site().family.dbName(pywikibot.Site().code) - - #~ # Now we can setup prepared queries - #~ self._prepare_queries() - - #~ # Connect to mysqldb only once - #~ if not type( self ).connection: - - #~ type( self ).connection = mysqldb.connect( - #~ host=type( self ).db_hostname, - #~ port=type( self ).db_port, - #~ user=type( self ).db_username, - #~ passwd=type( self ).db_password, - #~ db=type( self ).db_name ) - - #~ # Register callback for warnig if exit with cached db write querys - #~ atexit.register( type(self).warn_if_not_flushed ) - - #~ def __del__( self ): - #~ """ - #~ Before deleting class, close connection to MySQL-DB - #~ """ - - #~ type( self ).connection.close() - - #~ def _prepare_queries( self ): - #~ """ - #~ Used to replace placeholders in prepared queries - #~ """ - #~ type(self)._update_query = type(self)._update_query.format( - #~ prefix=type(self).db_table_prefix) - #~ type(self)._insert_query = type(self)._insert_query.format( - #~ prefix=type(self).db_table_prefix) - - #~ @classmethod - #~ def flush( cls ): - #~ """ - #~ Run cached querys - #~ """ - #~ if not cls.connection: - #~ raise MysqlRedConnectionError( "No connection exists!" ) - - #~ cursor = cls.connection.cursor() - - #~ # Execute insert query - #~ if cls._cached_insert_data: - #~ # Since cls._cached_insert_data is a dict, we need to have a custom - #~ # Generator to iterate over it - #~ cursor.executemany( cls._insert_query, - #~ ( cls._cached_insert_data[ key ] - #~ for key in cls._cached_insert_data ) ) - #~ # Reset after writing - #~ cls._cached_insert_data = {} - - #~ # Execute update query - #~ # Use executemany since update could not be reduced to one query - #~ if cls._cached_update_data: - #~ cursor.executemany( cls._update_query, cls._cached_update_data ) - #~ # Reset after writing - #~ cls._cached_update_data = [] - - #~ # Commit db changes - #~ if cls._cached_insert_data or cls._cached_update_data: - #~ cls.connection.commit() - - #~ @classmethod - #~ def warn_if_not_flushed(cls): - #~ """ - #~ Outputs a warning if there are db write querys cached and not flushed - #~ before exiting programm! - #~ """ - #~ if cls._cached_update_data or cls._cached_insert_data: - #~ jogobot.output( "Cached Database write querys not flushed!!! " + - #~ "Data loss is possible!", "WARNING" ) - - -#~ class MysqlRedPage( MysqlRed ): - #~ """ - #~ MySQL-db Interface for handling querys for RedPages - #~ """ - - #~ # Class variables for storing cached querys - #~ # '{prefix}' will be replaced during super().__init__() - #~ _cached_update_data = [] - #~ _update_query = 'UPDATE `{prefix}_redpages` \ -#~ SET `pagetitle` = ?, `revid` = ?, `status`= ? WHERE `pageid` = ?;' - - #~ _cached_insert_data = {} - #~ _insert_query = 'INSERT INTO `{prefix}_redpages` \ -#~ ( pageid, pagetitle, revid, status ) VALUES ( ?, ?, ?, ? );' - - #~ def __init__( self, pageid ): - #~ """ - #~ Creates a new instance, runs __init__ of parent class - #~ """ - - #~ super().__init__( ) - - #~ self.__pageid = int( pageid ) - - #~ self.data = self.get_page() - - #~ def __del__( self ): - #~ """ - #~ Needed to prevent descendant classes of MYSQL_RED from deleting - #~ connection to db - #~ """ - #~ pass - - #~ def get_page( self ): - #~ """ - #~ Retrieves a red page row from MySQL-Database for given page_id - - #~ @param int pageid MediaWiki page_id for page to retrieve - - #~ @returns tuple Tuple with data for given page_id - #~ bool FALSE if none found - #~ """ - - #~ cursor = type( self ).connection.cursor(mysqldb.DictCursor) - - #~ cursor.execute( - #~ 'SELECT * FROM `{prefix}_redpages` WHERE `pageid` = ?;'.format( - #~ prefix=type(self).db_table_prefix), ( self.__pageid, ) ) - - #~ res = cursor.fetchone() - - #~ if res: - #~ return res - #~ else: - #~ return False - - #~ def add_page( self, pagetitle, revid, status=0 ): - #~ """ - #~ Inserts a red page row in MySQL-Database for given pageid - - #~ @param int revid MediaWiki current revid - #~ @param str pagetitle MediaWiki new pagetitle - #~ @param int status Page parsing status - #~ """ - - #~ insert_data = { self.__pageid: ( self.__pageid, pagetitle, - #~ revid, status ) } - - #~ type( self )._cached_insert_data.update( insert_data ) - - #~ # Manualy construct self.data dict - #~ self.data = { 'pageid': self.__pageid, 'revid': revid, - #~ 'pagetitle': pagetitle, 'status': status } - - #~ def update_page( self, revid=None, pagetitle=None, status=0 ): - #~ """ - #~ Updates the red page row in MySQL-Database for given page_id - - #~ @param int revid MediaWiki current rev_id - #~ @param str pagetitle MediaWiki new page_title - #~ @param int status Page parsing status - #~ """ - - #~ if not pagetitle: - #~ pagetitle = self.data[ 'pagetitle' ] - #~ if not revid: - #~ revid = self.data[ 'revid' ] - - #~ type( self )._cached_update_data.append( ( pagetitle, revid, - #~ status, self.__pageid ) ) - - -#~ class MysqlRedFam( MysqlRed ): - #~ """ - #~ MySQL-db Interface for handling querys for RedFams - #~ """ - - #~ # Class variables for storing cached querys - #~ _cached_update_data = [] - #~ _update_query = 'UPDATE `{prefix}_redfams` \ -#~ SET `redpageid` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ -#~ `status`= ? WHERE `famhash` = ?;' - - #~ _cached_insert_data = {} - #~ _insert_query = 'INSERT INTO `{prefix}_redfams` \ -#~ ( famhash, redpageid, beginning, ending, status, heading, \ -#~ article0, article1, article2, article3, article4, article5, article6, \ -#~ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' - - #~ def __init__( self, famhash=None ): - #~ """ - #~ Creates a new instance, runs __init__ of parent class - #~ """ - - #~ self.__famhash = famhash - - #~ super().__init__( ) - - #~ def __del__( self ): - #~ """ - #~ Needed to prevent descendant classes of MYSQL_RED from deleting - #~ connection to db - #~ """ - #~ pass - - #~ def get_fam( self, famhash ): - #~ """ - #~ Retrieves a red family row from MySQL-Database for given fam_hash - - #~ @returns dict Dictionairy with data for given fam hash - #~ False if none found - #~ """ - #~ self.__famhash = famhash - - #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - - #~ cursor.execute( - #~ 'SELECT * FROM `{prefix}_redfams` WHERE `famhash` = ?;'. - #~ format( prefix=type(self).db_table_prefix), ( famhash, ) ) - - #~ self.data = cursor.fetchone() - - #~ def add_fam( self, articlesList, heading, redpageid, - #~ beginning, ending=None, status=0 ): - - #~ data = [ self.__famhash, redpageid, beginning, ending, - #~ status, heading ] - - #~ for article in articlesList: - #~ data.append( str( article ) ) - - #~ while len( data ) < 14: - #~ data.append( None ) - - #~ data = tuple( data ) - - #~ insert_data = { self.__famhash: data } - #~ type( self )._cached_insert_data.update( insert_data ) - - #~ # Manualy construct self.data dict - #~ data_keys = ( 'famhash', 'redpageid', 'beginning', 'ending', - #~ 'status', 'heading', 'article0', 'article1', 'article2', - #~ 'article3', 'article4', 'article5', 'article6', - #~ 'article7' ) - #~ self.data = dict( zip( data_keys, data ) ) - - #~ def update_fam( self, redpageid, heading, beginning, ending, status ): - #~ """ - #~ Updates the red fam row in MySQL-Database for given fam_hash - - #~ @param int redpageid MediaWiki page_id - #~ @param datetime beginning Timestamp of beginning - #~ qparam datetime ending Timestamp of ending of - #~ @param int status red_fam status - #~ """ - - #~ type( self )._cached_update_data.append( ( redpageid, heading, - #~ beginning, ending, status, - #~ self.__famhash ) ) - - #~ def get_by_status( self, status ): - #~ """ - #~ Generator witch fetches redFams with given status from DB - #~ """ - - #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - - #~ cursor.execute( - #~ 'SELECT * FROM `{prefix}_redfams` WHERE `status` = LIKE %?%;'. - #~ format( prefix=type( self ).db_table_prefix), ( status, ) ) - - #~ while True: - #~ res = cursor.fetchmany( 1000 ) - #~ if not res: - #~ break - #~ for row in res: - #~ yield row - - #~ def get_by_status_and_ending( self, status, ending ): - #~ """ - #~ Generator witch fetches redFams with given status from DB - #~ """ - - #~ cursor = type( self ).connection.cursor( mysqldb.DictCursor ) - - #~ cursor.execute( ( - #~ 'SELECT * ' + - #~ 'FROM `{prefix}_redfams` `F` ' + - #~ 'INNER JOIN `{prefix}_redpages` `P` ' + - #~ 'ON `F`.`status` = ? ' + - #~ 'AND `F`.`ending` >= ? ' + - #~ 'AND `F`.`redpageid` = `P`.`pageid`;').format( - #~ prefix=type( self ).db_table_prefix), - #~ ( status, ending ) ) - - #~ while True: - #~ res = cursor.fetchmany( 1000 ) - #~ if not res: - #~ break - #~ for row in res: - #~ yield row - class MysqlRedError(Exception): """ diff --git a/lib/redfam.py b/lib/redfam.py index 526f902..d4f00be 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -35,8 +35,7 @@ import pywikibot # noqa from pywikibot.tools import deprecated # noqa import jogobot -#~ from lib.mysqlred import Column, Integer, String, Text, DateTime, ForeignKey, ColumnList, Status -from lib.mysqlred import MysqlRedFam, MutableSet, ColumnList #, Mysql, Base, relationship, composite, +from lib.mysqlred import MysqlRedFam class RedFam( MysqlRedFam ): @@ -45,7 +44,7 @@ class RedFam( MysqlRedFam ): """ def __init__( self, articlesList, beginning, ending=None, redpageid=None, - status=MutableSet(), famhash=None, heading=None ): + status=None, famhash=None, heading=None ): """ Generates a new RedFam object @@ -61,34 +60,9 @@ class RedFam( MysqlRedFam ): # Having pywikibot.Site() is a good idea most of the time self.site = pywikibot.Site() - # Database interface - #self._mysql = MysqlRedFam( famhash ) - - # Initial attribute values - #~ self.articlesList = articlesList - #~ self.beginning = beginning - #~ self.ending = ending - #~ self.redpageid = redpageid -#~ # self._status = set() -#~ # self._status = self._parse_status(status) - #~ self.famhash = famhash - #~ self.heading = heading - #self.status = status - - #articlesStatus = ColumnList([ MutableSet() for x in range(0,8) ]) - - #~ # Calculates the sha1 hash over self._articlesList to - #~ # rediscover known redundance families - #~ self.calc_famhash() - - #~ if not status: - #~ status = MutableSet() - super().__init__( articlesList=articlesList, beginning=beginning, ending=ending, redpageid=redpageid, famhash=famhash, heading=heading, status=status, articlesStatus=None ) - #super().__init__() - def __repr__( self ): """ Returns repression str of RedFam object @@ -137,35 +111,12 @@ class RedFam( MysqlRedFam ): else: self.famhash = type(self).calc_famhash(self.articlesList) - #~ def changed( self ): - #~ """ - #~ Checks wether anything has changed and maybe triggers db update - #~ """ - - #~ # On archived redfams do not delete possibly existing ending - #~ if( not self.ending and "archived" in self._status and - #~ self._mysql.data[ 'ending' ] ): - - #~ self._ending = self._mysql.data[ 'ending' ] - - #~ # Since status change means something has changed, update database - #~ if( self._raw_status != self._mysql.data[ 'status' ] or - #~ self._beginning != self._mysql.data[ 'beginning' ] or - #~ self._ending != self._mysql.data[ 'ending' ] or - #~ self._red_page_id != self._mysql.data[ 'redpageid' ] or - #~ self._heading != self._mysql.data[ 'heading' ]): - - #~ self._mysql.update_fam( self._redpageid, self._heading, - #~ self._beginning, self._ending, - #~ self._raw_status() ) - @classmethod def flush_db_cache( cls ): """ Calls flush method of Mysql Interface class """ cls.session.commit() - #~ MysqlRedFam.flush() def add_status(self, status): """ @@ -204,24 +155,6 @@ class RedFam( MysqlRedFam ): else: return False - #~ def _parse_status(self, raw_status ): - #~ """ - #~ Sets status based on comma separated list - - #~ @param raw_status Commaseparated string of stati (from DB) - #~ @type raw_status str - #~ """ - #~ self._status = set( raw_status.strip().split(",")) - - #~ def _raw_status( self ): - #~ """ - #~ Returns status as commaseparated string (to save in DB) - - #~ @returns Raw status string - #~ @rtype str - #~ """ - #~ return ",".join( self._status ) - def article_add_status(self, status, index=None, title=None ): """ Adds a status specified by status, to article (identified by title @@ -292,46 +225,6 @@ class RedFam( MysqlRedFam ): else: raise IndexError( "No index given or wrong format!") - def _article_parse_status(self, raw_status, index=None, title=None ): - """ - Sets status based on comma separated list to articles (identified by - title or index in articlesList) status set - - @param status Statusstring to set - @type status str - @param index Add to article with index in articlesList - @type index int - @param title Add to article with title in articlesList - @type title str - """ - if title and not index: - index = self._articlesList.index( title ) - - if isinstance( index, int ) and index < len(self._articlesList): - self._article_status[index] = set( raw_status.strip().split(",")) - else: - raise IndexError( "No index given or wrong format!") - - def _article_raw_status( self, index=None, title=None ): - """ - Returns status as commaseparated string (to save in DB) of article - (identified by title or index in articlesList) status set - - @param index Get from article with index in articlesList - @type index int - @param title Get from article with title in articlesList - @type title str - @returns Raw status string - @rtype str - """ - if title and not index: - index = self._articlesList.index( title ) - - if isinstance( index, int ) and index < len(self._articlesList): - return ",".join( self._article_status[index] ) - else: - raise IndexError( "No index given or wrong format!") - class RedFamParser( RedFam ): """ @@ -369,54 +262,14 @@ class RedFamParser( RedFam ): str strptime parseable string """ - # Parse the provided heading of redundance section - # to set self._articlesList - #~ self.heading = str(heading) - #~ self.articlesList = articlesList - - #~ # Catch sections with more then 8 articles, print error - #~ if len( self.articlesList ) > 8: - #~ # For repression in output we need to know the fam hash - #~ self.calc_famhash() - - #~ jogobot.output( - #~ ( "\03{{lightred}}" + - #~ "Maximum number of articles in red_fam exceeded, " + - #~ "maximum number is 8, {number:d} were given \n {repress}" - #~ ).format( datetime=datetime.now().strftime( - #~ "%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ), - #~ repress=repr( self ) ), - #~ "WARNING" ) - - #~ # Only save the first 8 articles -#~ # self.articlesList = self.articlesList[:8] - # Calculates the sha1 hash over self._articlesList to # rediscover known redundance families famhash = type(self).calc_famhash(articlesList) - #~ obj = self.session.query(RedFamParser).filter(RedFamParser.famhash == self.famhash ).one_or_none() - #~ if obj: - #~ self = obj - - # Set object attributes: - #~ self.redpageid = redpage._pageid self._redpagearchive = redpagearchive -# self.famhash = None - - # Method self.add_beginning sets self._beginning directly - #~ self.add_beginning( beginning ) - - #~ # Method self.add_ending sets self._ending directly - #~ if( ending ): - #~ self.add_ending( ending ) - #~ else: - #~ # If no ending was provided set to None - #~ self.ending = None - - #~ self.status = MutableSet() + # Parse Timestamps beginning = self.__datetime(beginning) if ending: ending = self.__datetime(ending) @@ -429,31 +282,8 @@ class RedFamParser( RedFam ): self.check_status() self.session.add(self) - # Open database connection, ask for data if existing, - # otherwise create entry -# self.__handle_db() - - # Triggers db update if anything changed -# self.changed() - - - - #~ def __handle_db( self ): - #~ """ - #~ Handles opening of db connection - #~ """ - - #~ # We need a connection to our mysqldb - #~ self._mysql = MysqlRedFam( ) - #~ self._mysql.get_fam( self._famhash ) - - #~ if not self._mysql.data: - #~ self._mysql.add_fam( self._articlesList, self._heading, - #~ self._redpageid, self._beginning, - #~ self._ending ) - def update( self, articlesList, heading, redpage, redpagearchive, beginning, ending=None): @@ -490,8 +320,6 @@ class RedFamParser( RedFam ): return [ str( link.title ) for link in heading.ifilter_wikilinks() ] - - def add_beginning( self, beginning ): """ Adds the beginning date of a redundance diskussion to the object @@ -780,8 +608,6 @@ class RedFamWorker( RedFam ): self._article_raw_status( index=index ) index += 1 - print( repr(self) ) - def get_disc_link( self ): """ Constructs and returns the link to Redundancy discussion diff --git a/lib/redpage.py b/lib/redpage.py index 558cd8c..fa1c695 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -30,8 +30,7 @@ import mwparserfromhell as mwparser import jogobot # noqa -#~ from lib.mysqlred import Column, Integer, String, Text, DateTime, ForeignKey, ColumnList, Status -from lib.mysqlred import MysqlRedPage, relationship, MutableSet #MysqlRedFam, Base, composite, +from lib.mysqlred import MysqlRedPage, relationship from lib.redfam import RedFam, RedFamParser from sqlalchemy.orm.collections import attribute_mapped_collection @@ -60,7 +59,6 @@ class RedPage( MysqlRedPage ): # Safe the pywikibot page object if page: self._page = page - pageid = self._page.pageid super().__init__( pageid=pageid, @@ -69,48 +67,15 @@ class RedPage( MysqlRedPage ): status=MutableSet() ) #TODO EMPTY MutableSet() necessary? #~ self._status = set() - if archive: - self.status.add("archived") - - #~ self._archive = archive - - #~ self.pageid = pageid - #~ self.revid = self.page._revid - #~ self.p - #~ self.status = MutableSet() - -# self.__handle_db( ) - #~ self.is_page_changed() - - #~ self._parsed = None + self.is_archive() self.session.add(self) - #~ def __handle_db( self ): - #~ """ - #~ Handles opening of db connection - #~ """ - - #~ # We need a connection to our mysqldb - #~ if self.page: - #~ self.__mysql = MysqlRedPage( self.page._pageid ) - #~ self.pageid = self.page._pageid - #~ elif self.pageid: - #~ self.__mysql = MysqlRedPage( self.pageid ) - #~ self.page = pywikibot.Page( pywikibot.Site(), - #~ self.pagetitle ) - #~ self.page.exists() - #~ else: - #~ raise ValueError( "Page NOR pagid provided!" ) - - #~ if not self.__mysql.data: - #~ self.__mysql.add_page( self.page.title(), self.page._revid ) - def update( self, page ): - self._page = page self.revid = page._revid self.pagetitle = page.title() + self.is_archive() @property def page(self): @@ -123,24 +88,10 @@ class RedPage( MysqlRedPage ): def archive(self): return self.has_status("archived") - def is_page_changed( self ): - """ - Check wether the page was changed since last run - """ - self._changed = self.changedp() - #~ if( self.__mysql.data != { 'pageid': self.page._pageid, - #~ 'revid': self.page._revid, - #~ 'pagetitle': self.page.title(), - #~ 'status': self.__mysql.data[ 'status' ] } ): - #~ self._changed = True - #~ else: - #~ self._changed = False - def is_archive( self ): """ Detects wether current page is an archive of discussions """ - if( self.archive or ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ) ): @@ -153,10 +104,7 @@ class RedPage( MysqlRedPage ): """ Decides wether current RedPage needs to be parsed or not """ - if( self.changedp() or not self.has_status("parsed") ): - return True - else: - return False + return self.changedp() or not self.has_status("parsed") def parse( self ): """ @@ -183,24 +131,6 @@ class RedPage( MysqlRedPage ): else: self.status.add("parsed") self._parsed = True - #~ self.__update_db() - - #~ def __update_db( self ): - #~ """ - #~ Updates the page meta data in mysql db - #~ """ - #~ if( self._parsed or not self._changed ): - #~ self.add_status( "open" ) - - #~ if( self.is_archive() ): - #~ self.remove_status( "open" ) - #~ self.add_status( "archived" ) - #~ else: - #~ pass - #~ self._status = set() - - #~ self.__mysql.update_page( self.page._revid, self.page.title(), - #~ self._raw_status() ) @classmethod def flush_db_cache( cls ): @@ -208,7 +138,6 @@ class RedPage( MysqlRedPage ): Calls flush method of Mysql Interface class """ cls.session.commit() - #~ MysqlRedPage.flush() def add_status(self, status): """ @@ -246,21 +175,3 @@ class RedPage( MysqlRedPage ): return True else: return False - - #~ def _parse_status(self, raw_status ): - #~ """ - #~ Sets status based on comma separated list - - #~ @param raw_status Commaseparated string of stati (from DB) - #~ @type raw_status str - #~ """ - #~ self._status = set( raw_status.strip().split(",")) - - #~ def _raw_status( self ): - #~ """ - #~ Returns status as commaseparated string (to save in DB) - - #~ @returns Raw status string - #~ @rtype str - #~ """ - #~ return ",".join( self._status ) From bf8e47f916ee632e5c4f56a6d1b1e2f69a84bb35 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 7 Mar 2017 10:55:44 +0100 Subject: [PATCH 138/192] Improve new status API Make sure state changes are only detected as such by sqlalchemy if they are real changes --- lib/mysqlred.py | 63 +++++++++++++++++++++++++++++++++++++++++++++++-- lib/redpage.py | 14 +++++------ 2 files changed, 68 insertions(+), 9 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 3710219..46fa811 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -85,7 +85,54 @@ class Mysql(object): suffix = "s" return cls._tableprefix + name + cls._tablesuffix def changedp(self): - return self in self.session.dirty + return self.session.is_modified(self) + + +class MutableSet(MutableSet): + """ + Extended version of the mutable set for our states + """ + + def has(self, item): + """ + Check if item is in set + + @param item Item to check + """ + return item in self + + def add(self, item): + """ + Extended add method, which only result in changed object if there is + really an item added. + + @param item Item to add + """ + if not item in self: + super().add(item) + + def discard(self, item): + """ + Wrapper for extended remove below + + @param item Item to discard + """ + self.remove(item) + + def remove(self, item, weak=True ): + """ + Extended remove method, which only results in changed object if there + is really an item removed. Additionally, combine remove and discard! + + @param item Item to remove/discard + @param weak Set to false to use remove, else discard behavior + """ + if item in self: + if weak: + super().discard(item) + else: + super().remove(item) + class ColumnList( list, MutableComposite ): """ @@ -249,13 +296,25 @@ class MysqlRedPage( Mysql, Base ): pageid = Column( Integer, unique=True, primary_key=True ) revid = Column( Integer, unique=True, nullable=False ) pagetitle = Column( String(255), nullable=False ) - status = Column( MutableSet.as_mutable(Status(255)), nullable=True ) + __status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) redfams = relationship( "MysqlRedFam", order_by=MysqlRedFam.famhash, back_populates="redpage", collection_class=attribute_mapped_collection("famhash")) + @property + def status( self ): + """ + Current fam status + """ + return self.__status + @status.setter + def status( self, status ): + if status: + self.__status = MutableSet( status ) + else: + self.__status = MutableSet() Base.metadata.create_all(engine) diff --git a/lib/redpage.py b/lib/redpage.py index fa1c695..cba4268 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -61,11 +61,11 @@ class RedPage( MysqlRedPage ): self._page = page super().__init__( - pageid=pageid, - revid=self.page._revid, - pagetitle=self.page.title(), - status=MutableSet() ) #TODO EMPTY MutableSet() necessary? - #~ self._status = set() + pageid=self._page.pageid, + revid=self._page._revid, + pagetitle=self._page.title(), + status=None + ) self.is_archive() @@ -95,9 +95,9 @@ class RedPage( MysqlRedPage ): if( self.archive or ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ) ): - - return True + self.status.add("archive") else: + self.status.discard("archive") return False def is_parsing_needed( self ): From 89b50e3312a59827fdb454335324c8735cac95c2 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 7 Mar 2017 12:06:11 +0100 Subject: [PATCH 139/192] Remove old status API Now we use the methods of status object directly --- bots/reddiscparser.py | 3 +- lib/redfam.py | 95 +++++++++++-------------------------------- lib/redpage.py | 45 ++------------------ 3 files changed, 29 insertions(+), 114 deletions(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 336cd9f..2e203ba 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -161,8 +161,7 @@ class DiscussionParserBot( # Iterate over returned generator with redfam sections for fam in redpage.parse(): # Run RedFamParser on section text - RedFamParser.parser( fam, redpage, - redpage.is_archive() ) + RedFamParser.parser( fam, redpage, redpage.archive ) fam_counter += 1 diff --git a/lib/redfam.py b/lib/redfam.py index d4f00be..763bfcc 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -84,6 +84,12 @@ class RedFam( MysqlRedFam ): @classmethod def calc_famhash(cls, articlesList ): + """ + Calculates the SHA-1 hash for the articlesList of redundance family. + Since we don't need security SHA-1 is just fine. + + @returns str String with the hexadecimal hash digest + """ h = hashlib.sha1() # Since articlesList attr of RedFam will have always 8 Members we @@ -95,22 +101,6 @@ class RedFam( MysqlRedFam ): return h.hexdigest() - def c_famhash( self ): - """ - Calculates the SHA-1 hash for the articlesList of redundance family. - Since we don't need security SHA-1 is just fine. - - @returns str String with the hexadecimal hash digest - """ - print( type( self ) ) - - if self.famhash and type(self).calc_famhash(self.articlesList) != self.famhash: - raise RedFamHashError( self.famhash, h.hexdigest() ) - elif self.famhash: - return - else: - self.famhash = type(self).calc_famhash(self.articlesList) - @classmethod def flush_db_cache( cls ): """ @@ -118,43 +108,6 @@ class RedFam( MysqlRedFam ): """ cls.session.commit() - def add_status(self, status): - """ - Adds a status specified by status, to status set - - @param status Statusstring to add - @type status str - """ - self.status.add(status) - - def remove_status(self, status, weak=True): - """ - Removes a status, specified by status from set. If weak is set to - False it will throw a KeyError when trying to remove a status not set. - - @param status Statusstring to add - @type status str - @param weak Change behavior on missing status - @type bool - """ - if weak: - self.status.discard(status) - else: - self.status.remove(status) - - def has_status(self, status): - """ - Returns True, if redfam has given status - - @param status Statusstring to check - @type status str - @returns True if status is present else False - """ - if status in self.status: - return True - else: - return False - def article_add_status(self, status, index=None, title=None ): """ Adds a status specified by status, to article (identified by title @@ -267,7 +220,7 @@ class RedFamParser( RedFam ): famhash = type(self).calc_famhash(articlesList) # Set object attributes: - self._redpagearchive = redpagearchive + self.redpage = redpage # Parse Timestamps beginning = self.__datetime(beginning) @@ -275,7 +228,7 @@ class RedFamParser( RedFam ): ending = self.__datetime(ending) - super().__init__( articlesList, beginning, ending=ending, redpageid=redpage._pageid, + super().__init__( articlesList, beginning, ending=ending, redpageid=redpage.page._pageid, famhash=famhash, heading=heading ) # Check status changes @@ -294,7 +247,7 @@ class RedFamParser( RedFam ): self.add_beginning( beginning ) - if( ending ): + if ending: self.add_ending( ending ) self._redpagearchive = redpagearchive @@ -372,16 +325,16 @@ class RedFamParser( RedFam ): # No ending, discussion is running: # Sometimes archived discussions also have no detectable ending - if not self.ending and not self._redpagearchive: - self.add_status("open") + if not self.ending and not self.redpage.archive: + self.status.add("open") else: - self.remove_status("open") - if not self._redpagearchive: - self.add_status("done") + self.status.remove("open") + if not self.redpage.archive: + self.status.add("done") else: - self.remove_status("done") - self.remove_status("open") - self.add_status("archived") + self.status.remove("done") + self.status.remove("open") + self.status.add("archived") @classmethod def is_section_redfam_cb( cls, heading ): @@ -413,7 +366,7 @@ class RedFamParser( RedFam ): text = mwparser.parse( text ) # Extract heading text - heading = next( text.ifilter_headings() ).title + heading = next( text.ifilter_headings() ).title.strip() # Extract beginnig and maybe ending (beginning, ending) = RedFamParser.extract_dates( text, isarchive ) @@ -448,7 +401,7 @@ class RedFamParser( RedFam ): else: # Create the RedFam object - redfam = RedFamParser( articlesList, str(heading).strip(), redpage.page, isarchive, beginning, ending ) + redfam = RedFamParser( articlesList, str(heading), redpage, isarchive, beginning, ending ) return redfam @classmethod @@ -593,13 +546,13 @@ class RedFamWorker( RedFam ): """ for article in self._articlesList: if self.article_has_status( "note_rej", title=article ): - self.add_status( "note_rej" ) + self.status.add( "note_rej" ) if self.article_has_status( "sav_err", title=article ): - self.add_status( "sav_err" ) + self.status.add( "sav_err" ) - if not self.has_status( "sav_err" ) and \ - not self.has_status( "note_rej" ): - self.add_status( "marked" ) + if not self.status.has( "sav_err" ) and \ + not self.status.has( "note_rej" ): + self.status.add( "marked" ) self._mysql.data[ 'status' ] = self._raw_status() index = 0 diff --git a/lib/redpage.py b/lib/redpage.py index cba4268..f9f0aa8 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -86,25 +86,25 @@ class RedPage( MysqlRedPage ): @property def archive(self): - return self.has_status("archived") + self.is_archive() + return self.status.has("archive") def is_archive( self ): """ Detects wether current page is an archive of discussions """ - if( self.archive or ( u"/Archiv" in self.page.title() ) or + if( ( u"/Archiv" in self.page.title() ) or ( "{{Archiv}}" in self.page.text ) or ( "{{Archiv|" in self.page.text ) ): self.status.add("archive") else: self.status.discard("archive") - return False def is_parsing_needed( self ): """ Decides wether current RedPage needs to be parsed or not """ - return self.changedp() or not self.has_status("parsed") + return self.changedp() or not self.status.has("parsed") def parse( self ): """ @@ -138,40 +138,3 @@ class RedPage( MysqlRedPage ): Calls flush method of Mysql Interface class """ cls.session.commit() - - def add_status(self, status): - """ - Adds a status specified by status, to status set - - @param status Statusstring to add - @type status str - """ - self.status.add(status) - - def remove_status(self, status, weak=True): - """ - Removes a status, specified by status from set. If weak is set to - False it will throw a KeyError when trying to remove a status not set. - - @param status Statusstring to add - @type status str - @param weak Change behavior on missing status - @type bool - """ - if weak: - self.status.discard(status) - else: - self.status.remove(status) - - def has_status(self, status): - """ - Returns True, if redfam has given status - - @param status Statusstring to check - @type status str - @returns True if status is present else False - """ - if status in self.status: - return True - else: - return False From 43e31c108a408af39434572129208b63e4c178c9 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 7 Mar 2017 14:51:55 +0100 Subject: [PATCH 140/192] Working RedFamWorker query Modify RedfamWorker class to work with new DB API --- lib/mysqlred.py | 10 +++--- lib/redfam.py | 90 ++++++++++++++++++++++++++----------------------- 2 files changed, 53 insertions(+), 47 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 46fa811..1f92026 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -227,7 +227,7 @@ class MysqlRedFam( Mysql, Base ): Integer, ForeignKey( "dewpbeta_redpages.pageid" ), nullable=False ) beginning = Column( DateTime, nullable=False ) ending = Column( DateTime, nullable=True ) - __status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) + _status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) __article0_status = Column( 'article0_status', MutableSet.as_mutable(Status(64)), nullable=True ) @@ -250,7 +250,7 @@ class MysqlRedFam( Mysql, Base ): __article3_status, __article4_status, __article5_status, __article6_status, __article7_status ) - redpage = relationship( "RedPage", back_populates="redfams" ) + redpage = relationship( "MysqlRedPage", back_populates="redfams" ) @property def articlesList(self): @@ -271,14 +271,14 @@ class MysqlRedFam( Mysql, Base ): """ Current fam status """ - return self.__status + return self._status @status.setter def status( self, status ): if status: - self.__status = MutableSet( status ) + self._status = MutableSet( status ) else: - self.__status = MutableSet() + self._status = MutableSet() @property def articlesStatus(self): diff --git a/lib/redfam.py b/lib/redfam.py index 763bfcc..69b68c7 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -121,10 +121,10 @@ class RedFam( MysqlRedFam ): @type title str """ if title and not index: - index = self._articlesList.index( title ) + index = self.articlesList.index( title ) - if isinstance( index, int ) and index < len(self._articlesList): - self._article_status[index].add(status) + if isinstance( index, int ) and index < len(self.articlesList): + self.articlesStatus[index].add(status) else: raise IndexError( "No index given or wrong format!") @@ -145,13 +145,13 @@ class RedFam( MysqlRedFam ): @type bool """ if title and not index: - index = self._articlesList.index( title ) + index = self.articlesList.index( title ) - if isinstance( index, int ) and index < len(self._articlesList): + if isinstance( index, int ) and index < len(self.articlesList): if weak: - self._article_status[index].discard(status) + self.articlesStatus[index].discard(status) else: - self._article_status[index].remove(status) + self.articlesStatus[index].remove(status) else: raise IndexError( "No index given or wrong format!") @@ -168,10 +168,10 @@ class RedFam( MysqlRedFam ): @type title str """ if title and not index: - index = self._articlesList.index( title ) + index = self.articlesList.index( title ) - if isinstance( index, int ) and index < len(self._articlesList): - if status in self._article_status[index]: + if isinstance( index, int ) and index < len(self.articlesList): + if status in self.articlesStatus[index]: return True else: return False @@ -458,19 +458,20 @@ class RedFamWorker( RedFam ): """ def __init__( self, mysql_data ): - articlesList = [] + #~ articlesList = [] - for key in sorted( mysql_data.keys() ): - if 'article' in key and 'status' not in key and mysql_data[ key ]: - articlesList.append( mysql_data[ key ] ) + #~ for key in sorted( mysql_data.keys() ): + #~ if 'article' in key and 'status' not in key and mysql_data[ key ]: + #~ articlesList.append( mysql_data[ key ] ) - # Preset article status list with empty sets for existing articles - self._article_status = [set() for x in range(0, len(articlesList))] + #~ # Preset article status list with empty sets for existing articles + #~ self._article_status = [set() for x in range(0, len(articlesList))] - super().__init__( articlesList, mysql_data[ 'beginning' ], - mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], - mysql_data[ 'status' ], mysql_data[ 'famhash' ], - mysql_data[ 'heading' ] ) + #~ super().__init__( articlesList, mysql_data[ 'beginning' ], + #~ mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], + #~ mysql_data[ 'status' ], mysql_data[ 'famhash' ], + #~ mysql_data[ 'heading' ] ) + super().__init__() # #~ self._mysql.data = mysql_data @@ -510,8 +511,12 @@ class RedFamWorker( RedFam ): """ # Iterate over articles in redfam - for article in self._articlesList: - page = pywikibot.Page(pywikibot.Link(article), self.site) + for article in self.articlesList: + # Not all list elements contain articles + if not article: + break + + page = pywikibot.Page(pywikibot.Link(article), pywikibot.Site()) # Exclude by article status for status in exclude_article_status: @@ -544,7 +549,10 @@ class RedFamWorker( RedFam ): """ Sets status to 3 when worked on """ - for article in self._articlesList: + for article in self.articlesList: + if not article: + break + if self.article_has_status( "note_rej", title=article ): self.status.add( "note_rej" ) if self.article_has_status( "sav_err", title=article ): @@ -554,13 +562,6 @@ class RedFamWorker( RedFam ): not self.status.has( "note_rej" ): self.status.add( "marked" ) - self._mysql.data[ 'status' ] = self._raw_status() - index = 0 - for article in self._articlesList: - self._mysql.data[ "article" + str(index) + 'status' ] = \ - self._article_raw_status( index=index ) - index += 1 - def get_disc_link( self ): """ Constructs and returns the link to Redundancy discussion @@ -570,7 +571,7 @@ class RedFamWorker( RedFam ): """ # We need to Replace Links with their linktext - anchor_code = mwparser.parse( self._mysql.data[ 'heading' ].strip() ) + anchor_code = mwparser.parse( self.heading.strip() ) for link in anchor_code.ifilter_wikilinks(): if link.text: text = link.text @@ -583,7 +584,7 @@ class RedFamWorker( RedFam ): anchor_code.replace( " ", "_" ) # We try it with out any more parsing as mw will do while parsing page - return ( self.redpagetitle + "#" + + return ( self.redpage.pagetitle + "#" + str(anchor_code).strip() ) def generate_disc_notice_template( self ): @@ -603,7 +604,9 @@ class RedFamWorker( RedFam ): param_cnt = 3 # Iterate over articles in redfam - for article in self._articlesList: + for article in self.articlesList: + if not article: + break # Make sure to only use 8 articles (max. param 10) if param_cnt > 10: break @@ -614,11 +617,11 @@ class RedFamWorker( RedFam ): param_cnt += 1 # Add begin - begin = self._mysql.data[ 'beginning' ].strftime( "%B %Y" ) + begin = self.beginning.strftime( "%B %Y" ) template.add( "Beginn", begin, True ) # Add end (if not same as begin) - end = self._mysql.data[ 'ending' ].strftime( "%B %Y" ) + end = self.ending.strftime( "%B %Y" ) if not end == begin: template.add( "Ende", end, True ) @@ -650,13 +653,16 @@ class RedFamWorker( RedFam ): Yield red_fams stored in db by given status which have an ending after given one """ - mysql = MysqlRedFam() - for fam in mysql.get_by_status_and_ending( status, ending ): - try: - yield cls( fam ) - except RedFamHashError: - print(fam) - raise + from sqlalchemy import text + + for redfam in RedFamWorker.session.query(RedFamWorker).filter( + #~ RedFamWorker._status.like('archived'), + #RedFamWorker._status.like("%{0:s}%".format(status)), + text("status LIKE '%archived%'"), + RedFamWorker.ending >= ending + ): + + yield redfam class RedFamError( Exception ): From 844fee52aec378bd9b16e649fd8e437ee93d939e Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Wed, 8 Mar 2017 00:01:36 +0100 Subject: [PATCH 141/192] Make markpages using new DB/Class structure Update markpages and RedFamWorker-Code to use the new sqlalchemy based DB ORM Interface --- bots/markpages.py | 5 ++++- lib/redfam.py | 31 ++----------------------------- 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index b7b45c0..664f5d4 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -87,6 +87,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() for redfam in self.redfams: redfam.update_status() + RedFamWorker.flush_db_cache() + + @property def redfams(self): """ @@ -168,7 +171,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() save_ret = self.put_current( self.new_text, summary=summary ) # Status - if add_ret is None or add_ret and save_ret: + if add_ret is None or ( add_ret and save_ret ): self.current_page.redfam.article_add_status( "marked", title=self.current_page.title(withNamespace=False)) diff --git a/lib/redfam.py b/lib/redfam.py index 69b68c7..8dae7ec 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -78,6 +78,7 @@ class RedFam( MysqlRedFam ): ", red_page_id=" + repr( self.redpageid ) + \ ", status=" + repr( self.status ) + \ ", fam_hash=" + repr( self.famhash ) + \ + ", articlesStatus=" + repr( self.articlesStatus ) + \ " )" return __repr @@ -456,38 +457,10 @@ class RedFamWorker( RedFam ): Handles working with redundance families stored in database where discussion is finished """ - def __init__( self, mysql_data ): + def __init__( self ): - #~ articlesList = [] - - #~ for key in sorted( mysql_data.keys() ): - #~ if 'article' in key and 'status' not in key and mysql_data[ key ]: - #~ articlesList.append( mysql_data[ key ] ) - - #~ # Preset article status list with empty sets for existing articles - #~ self._article_status = [set() for x in range(0, len(articlesList))] - - #~ super().__init__( articlesList, mysql_data[ 'beginning' ], - #~ mysql_data[ 'ending' ], mysql_data[ 'redpageid' ], - #~ mysql_data[ 'status' ], mysql_data[ 'famhash' ], - #~ mysql_data[ 'heading' ] ) super().__init__() -# #~ self._mysql.data = mysql_data - - #~ # Set up article status - #~ index = 0 - #~ for article in self.articlesList: - #~ raw_status = mysql_data[ "article" + str(index) + "_status" ] - #~ if not raw_status: - #~ raw_status = str() - #~ self._article_parse_status( raw_status, index ) - #~ index += 1 - - # Get related RedPage-Information - self.redpageid = mysql_data[ 'pageid' ] - self.redpagetitle = mysql_data[ 'pagetitle' ] - # Make sure locale is set to 'de_DE.UTF-8' to prevent problems # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') From 9ba7d2e51755733cda9357d30832dfc40216af20 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Wed, 8 Mar 2017 00:04:15 +0100 Subject: [PATCH 142/192] Change redfam generator filters Change and clear up the filters in redfam generator to keep track of article status and use positive conditionals --- lib/redfam.py | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/lib/redfam.py b/lib/redfam.py index 8dae7ec..8be9cf3 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -491,6 +491,28 @@ class RedFamWorker( RedFam ): page = pywikibot.Page(pywikibot.Link(article), pywikibot.Site()) + # Filter existing pages if requested with filter_existing=False + if page.exists(): + self.article_remove_status( "deleted", title=article ) + if filter_existing is False: + continue + # Filter non existing Pages if requested with filter_existing=True + else: + self.article_add_status( "deleted", title=article ) + if filter_existing: + continue + + # Filter redirects if requested with filter_redirects=True + if page.isRedirectPage(): + self.article_add_status( "redirect", title=article ) + if filter_redirects: + continue + # Filter noredirects if requested with filter_redirects=False + else: + self.article_remove_status("redirect", title=article ) + if filter_redirects is False: + continue + # Exclude by article status for status in exclude_article_status: if self.article_has_status( status, title=article ): @@ -501,20 +523,6 @@ class RedFamWorker( RedFam ): if not self.article_has_status( status, title=article ): continue - # Filter non existing Pages if requested with filter_existing=True - if filter_existing and not page.exists(): - continue - # Filter existing pages if requested with filter_existing=False - elif filter_existing is False and page.exists(): - continue - - # Filter redirects if requested with filter_redirects=True - if filter_redirects and page.isRedirectPage(): - continue - # Filter noredirects if requested with filter_redirects=False - elif filter_redirects is False and not page.isRedirectPage(): - continue - # Yield filtered pages yield page From e16925197cb1a71e63e2d2604caa73abe274f2e5 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Wed, 8 Mar 2017 18:38:15 +0100 Subject: [PATCH 143/192] Fix pep8.. compliance To be concordant with the coding styles fix pep8 compliance --- bots/markpages.py | 1 - bots/reddiscparser.py | 3 ++- lib/mysqlred.py | 60 +++++++++++++++++++++---------------------- lib/redfam.py | 58 ++++++++++++++++++++++++----------------- lib/redpage.py | 8 +++--- 5 files changed, 70 insertions(+), 60 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 664f5d4..0fbaded 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -89,7 +89,6 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() RedFamWorker.flush_db_cache() - @property def redfams(self): """ diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 2e203ba..9179841 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -146,7 +146,8 @@ class DiscussionParserBot( return # Initiate RedPage object - redpage = RedPage.session.query(RedPage).filter(RedPage.pageid == self.current_page.pageid ).one_or_none() + redpage = RedPage.session.query(RedPage).filter( + RedPage.pageid == self.current_page.pageid ).one_or_none() if redpage: redpage.update( self.current_page ) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 1f92026..232dc7c 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -25,22 +25,27 @@ Provides interface classes for communication of redundances bot with mysql-db """ -# Prefere using oursql then MySQLdb -try: - import oursql as mysqldb -except ImportError: - import MySQLdb as mysqldb +import atexit # noqa -import atexit - -import pywikibot +import pywikibot # noqa from pywikibot import config import jogobot - -from sqlalchemy import create_engine +from sqlalchemy import ( + create_engine, Column, Integer, String, Text, DateTime, ForeignKey ) +from sqlalchemy import text # noqa from sqlalchemy.engine.url import URL +from sqlalchemy.ext.declarative import ( + declarative_base, declared_attr, has_inherited_table ) +from sqlalchemy.ext.mutable import MutableComposite, MutableSet +from sqlalchemy.orm import sessionmaker, relationship, composite +from sqlalchemy.orm.collections import attribute_mapped_collection +import sqlalchemy.types as types + + +Base = declarative_base() + url = URL( "mysql+oursql", username=config.db_username, password=config.db_password, @@ -50,18 +55,6 @@ url = URL( "mysql+oursql", engine = create_engine(url, echo=True) -from sqlalchemy.ext.declarative import ( - declarative_base, declared_attr, has_inherited_table ) -Base = declarative_base() - -from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey - -from sqlalchemy.orm import sessionmaker, relationship, composite -from sqlalchemy.ext.mutable import MutableComposite, MutableSet -from sqlalchemy.orm.collections import attribute_mapped_collection -import sqlalchemy.types as types - - Session = sessionmaker(bind=engine) session = Session() @@ -70,20 +63,22 @@ family = "dewpbeta" class Mysql(object): session = session + @declared_attr def _tableprefix(cls): return family + "_" + @declared_attr def _tablesuffix(cls): return "s" + @declared_attr def __tablename__(cls): if has_inherited_table(cls): return None - prefix = family + "_" name = cls.__name__[len("Mysql"):].lower() - suffix = "s" return cls._tableprefix + name + cls._tablesuffix + def changedp(self): return self.session.is_modified(self) @@ -108,7 +103,7 @@ class MutableSet(MutableSet): @param item Item to add """ - if not item in self: + if item not in self: super().add(item) def discard(self, item): @@ -187,8 +182,11 @@ class Status( types.TypeDecorator ): elif isinstance(value, String ) or value is None: return value else: - raise ProgrammingError - + raise TypeError( + "Value should be an instance of one of {0:s},".format( + str( [type(MutableSet()), type(String()), type(None)] ) ) + + "given value was an instance of {1:s}".format( + str(type(value))) ) def process_result_value(self, value, dialect): """ @@ -226,8 +224,9 @@ class MysqlRedFam( Mysql, Base ): redpageid = Column( Integer, ForeignKey( "dewpbeta_redpages.pageid" ), nullable=False ) beginning = Column( DateTime, nullable=False ) - ending = Column( DateTime, nullable=True ) - _status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) + ending = Column( DateTime, nullable=True ) + _status = Column( 'status', MutableSet.as_mutable(Status(255)), + nullable=True ) __article0_status = Column( 'article0_status', MutableSet.as_mutable(Status(64)), nullable=True ) @@ -296,7 +295,8 @@ class MysqlRedPage( Mysql, Base ): pageid = Column( Integer, unique=True, primary_key=True ) revid = Column( Integer, unique=True, nullable=False ) pagetitle = Column( String(255), nullable=False ) - __status = Column( 'status', MutableSet.as_mutable(Status(255)), nullable=True ) + __status = Column( 'status', MutableSet.as_mutable(Status(255)), + nullable=True ) redfams = relationship( "MysqlRedFam", order_by=MysqlRedFam.famhash, back_populates="redpage", diff --git a/lib/redfam.py b/lib/redfam.py index 8be9cf3..5c31364 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -35,7 +35,7 @@ import pywikibot # noqa from pywikibot.tools import deprecated # noqa import jogobot -from lib.mysqlred import MysqlRedFam +from lib.mysqlred import MysqlRedFam, text class RedFam( MysqlRedFam ): @@ -60,8 +60,16 @@ class RedFam( MysqlRedFam ): # Having pywikibot.Site() is a good idea most of the time self.site = pywikibot.Site() - super().__init__( articlesList=articlesList, beginning=beginning, ending=ending, redpageid=redpageid, - famhash=famhash, heading=heading, status=status, articlesStatus=None ) + super().__init__( + articlesList=articlesList, + beginning=beginning, + ending=ending, + redpageid=redpageid, + famhash=famhash, + heading=heading, + status=status, + articlesStatus=None + ) def __repr__( self ): """ @@ -228,23 +236,25 @@ class RedFamParser( RedFam ): if ending: ending = self.__datetime(ending) - - super().__init__( articlesList, beginning, ending=ending, redpageid=redpage.page._pageid, - famhash=famhash, heading=heading ) + super().__init__( articlesList, + beginning, + ending=ending, + redpageid=redpage.page._pageid, + famhash=famhash, + heading=heading ) # Check status changes self.check_status() self.session.add(self) - def update( self, articlesList, heading, redpage, redpagearchive, - beginning, ending=None): + beginning, ending=None ): - self.articlesList = articlesList; - self.heading = heading; - self.redpage = redpage; - self.redpageid = redpage.pageid; + self.articlesList = articlesList + self.heading = heading + self.redpage = redpage + self.redpageid = redpage.pageid self.add_beginning( beginning ) @@ -271,8 +281,7 @@ class RedFamParser( RedFam ): heading = mwparser.parse( str( heading ) ) # Save destinations of wikilinks in headings - return [ str( link.title ) for link - in heading.ifilter_wikilinks() ] + return [ str( link.title ) for link in heading.ifilter_wikilinks() ] def add_beginning( self, beginning ): """ @@ -398,11 +407,13 @@ class RedFamParser( RedFam ): if redfam: # Existing redfams need to be updated - redfam.update( articlesList, str(heading), redpage, isarchive, beginning, ending ) + redfam.update( articlesList, str(heading), redpage, isarchive, + beginning, ending ) else: # Create the RedFam object - redfam = RedFamParser( articlesList, str(heading), redpage, isarchive, beginning, ending ) + redfam = RedFamParser( articlesList, str(heading), + redpage, isarchive, beginning, ending ) return redfam @classmethod @@ -465,7 +476,8 @@ class RedFamWorker( RedFam ): # with wrong month abreviations in strptime locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') - def article_generator(self, filter_existing=None, filter_redirects=None, + def article_generator(self, # noqa + filter_existing=None, filter_redirects=None, exclude_article_status=[], onlyinclude_article_status=[] ): """ @@ -602,7 +614,7 @@ class RedFamWorker( RedFam ): template.add( "Beginn", begin, True ) # Add end (if not same as begin) - end = self.ending.strftime( "%B %Y" ) + end = self.ending.strftime( "%B %Y" ) if not end == begin: template.add( "Ende", end, True ) @@ -634,14 +646,12 @@ class RedFamWorker( RedFam ): Yield red_fams stored in db by given status which have an ending after given one """ - from sqlalchemy import text - for redfam in RedFamWorker.session.query(RedFamWorker).filter( - #~ RedFamWorker._status.like('archived'), - #RedFamWorker._status.like("%{0:s}%".format(status)), + # NOT WORKING WITH OBJECT NOTATION + # RedFamWorker._status.like('archived'), + # RedFamWorker._status.like("%{0:s}%".format(status)), text("status LIKE '%archived%'"), - RedFamWorker.ending >= ending - ): + RedFamWorker.ending >= ending ): yield redfam diff --git a/lib/redpage.py b/lib/redpage.py index f9f0aa8..3678111 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -31,8 +31,8 @@ import mwparserfromhell as mwparser import jogobot # noqa from lib.mysqlred import MysqlRedPage, relationship -from lib.redfam import RedFam, RedFamParser from sqlalchemy.orm.collections import attribute_mapped_collection +from lib.redfam import RedFamParser class RedPage( MysqlRedPage ): @@ -40,7 +40,7 @@ class RedPage( MysqlRedPage ): Class for handling redundance discussion pages and archives """ - #TODO POLYMORPHISM? of BASEClass + # TODO POLYMORPHISM? of BASEClass redfams = relationship( "RedFamParser", order_by=RedFamParser.famhash, back_populates="redpage", @@ -65,7 +65,7 @@ class RedPage( MysqlRedPage ): revid=self._page._revid, pagetitle=self._page.title(), status=None - ) + ) self.is_archive() @@ -79,7 +79,7 @@ class RedPage( MysqlRedPage ): @property def page(self): - if not hasattr(self,"_page"): + if not hasattr(self, "_page"): self._page = pywikibot.Page( pywikibot.Site(), self.pagetitle ) return self._page From 3fe47e666f9db09be9b1eab4a11620e8ea71ea65 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Wed, 8 Mar 2017 18:41:02 +0100 Subject: [PATCH 144/192] Fix polymorphism problem with relationships Since we are using subclasses of the ORM mapped classes, disable typechecks for ORM relations --- lib/mysqlred.py | 8 +++++--- lib/redpage.py | 9 +-------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 232dc7c..4f6101e 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -249,7 +249,8 @@ class MysqlRedFam( Mysql, Base ): __article3_status, __article4_status, __article5_status, __article6_status, __article7_status ) - redpage = relationship( "MysqlRedPage", back_populates="redfams" ) + redpage = relationship( "MysqlRedPage", enable_typechecks=False, + back_populates="redfams" ) @property def articlesList(self): @@ -299,8 +300,9 @@ class MysqlRedPage( Mysql, Base ): nullable=True ) redfams = relationship( - "MysqlRedFam", order_by=MysqlRedFam.famhash, back_populates="redpage", - collection_class=attribute_mapped_collection("famhash")) + "MysqlRedFam", enable_typechecks=False, + back_populates="redpage", order_by=MysqlRedFam.famhash, + collection_class=attribute_mapped_collection("famhash") ) @property def status( self ): diff --git a/lib/redpage.py b/lib/redpage.py index 3678111..69f02b8 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -30,8 +30,7 @@ import mwparserfromhell as mwparser import jogobot # noqa -from lib.mysqlred import MysqlRedPage, relationship -from sqlalchemy.orm.collections import attribute_mapped_collection +from lib.mysqlred import MysqlRedPage from lib.redfam import RedFamParser @@ -40,12 +39,6 @@ class RedPage( MysqlRedPage ): Class for handling redundance discussion pages and archives """ - # TODO POLYMORPHISM? of BASEClass - redfams = relationship( - "RedFamParser", order_by=RedFamParser.famhash, - back_populates="redpage", - collection_class=attribute_mapped_collection( "famhash" ) ) - def __init__( self, page=None, pageid=None, archive=False ): """ Generate a new RedPage object based on the given pywikibot page object From 281f1c49a8f13349084107389128ad2714e8b089 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Thu, 9 Mar 2017 00:00:17 +0100 Subject: [PATCH 145/192] mysqlred: Set family via pywikibot Get family/language part of table names from PyWikiBot Site --- lib/mysqlred.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 4f6101e..1760fda 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -58,7 +58,7 @@ engine = create_engine(url, echo=True) Session = sessionmaker(bind=engine) session = Session() -family = "dewpbeta" +family = pywikibot.Site().family.dbName(pywikibot.Site().code) class Mysql(object): @@ -222,7 +222,7 @@ class MysqlRedFam( Mysql, Base ): heading = Column( Text, nullable=False ) redpageid = Column( - Integer, ForeignKey( "dewpbeta_redpages.pageid" ), nullable=False ) + Integer, ForeignKey( family + "_redpages.pageid" ), nullable=False ) beginning = Column( DateTime, nullable=False ) ending = Column( DateTime, nullable=True ) _status = Column( 'status', MutableSet.as_mutable(Status(255)), From 4aaacf144314cbc5c83eed566e38fc428525fdd4 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Thu, 9 Mar 2017 10:13:56 +0100 Subject: [PATCH 146/192] Add redfams to redpage-obj after parsing To have redfams available for updates immediately after parsing. Double redfams then will be seen as Update. Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=108 FS#108] --- lib/redfam.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/redfam.py b/lib/redfam.py index 5c31364..ca10e87 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -414,7 +414,9 @@ class RedFamParser( RedFam ): # Create the RedFam object redfam = RedFamParser( articlesList, str(heading), redpage, isarchive, beginning, ending ) - return redfam + + # Add redfam to redpage object + redpage.redfams.set( redfam ) @classmethod def extract_dates( cls, text, isarchive=False ): From 147e96d388cc43e153ce3b538b24b62e880347da Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Thu, 9 Mar 2017 15:30:51 +0100 Subject: [PATCH 147/192] Add Wrapperclass for Parser to RedPage Add a wrapper class to overwrite type of Items returned by RedPage.redfams relationship Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=109 FS#109] --- bots/reddiscparser.py | 10 +++++----- lib/redpage.py | 12 +++++++++++- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 9179841..44f2aba 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -33,7 +33,7 @@ from pywikibot.bot import ExistingPageBot, NoRedirectPageBot import jogobot -from lib.redpage import RedPage +from lib.redpage import RedPageParser from lib.redfam import RedFamParser @@ -127,7 +127,7 @@ class DiscussionParserBot( else: # If successfully parsed all pages in cat, flush db write cache - RedPage.flush_db_cache() + RedPageParser.flush_db_cache() def treat_page( self ): """ @@ -146,13 +146,13 @@ class DiscussionParserBot( return # Initiate RedPage object - redpage = RedPage.session.query(RedPage).filter( - RedPage.pageid == self.current_page.pageid ).one_or_none() + redpage = RedPageParser.session.query(RedPageParser).filter( + RedPageParser.pageid == self.current_page.pageid ).one_or_none() if redpage: redpage.update( self.current_page ) else: - redpage = RedPage( self.current_page ) + redpage = RedPageParser( self.current_page ) # Check whether parsing is needed if redpage.is_parsing_needed(): diff --git a/lib/redpage.py b/lib/redpage.py index 69f02b8..1c535ad 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -30,7 +30,8 @@ import mwparserfromhell as mwparser import jogobot # noqa -from lib.mysqlred import MysqlRedPage +from lib.mysqlred import ( + MysqlRedPage, relationship, attribute_mapped_collection ) from lib.redfam import RedFamParser @@ -131,3 +132,12 @@ class RedPage( MysqlRedPage ): Calls flush method of Mysql Interface class """ cls.session.commit() + + +class RedPageParser( RedPage ): + """ + Wrapper class to change the type of redfams collection elements in parser + """ + redfams = relationship( + "RedFamParser", enable_typechecks=False, back_populates="redpage", + collection_class=attribute_mapped_collection("famhash") ) From 65de6decb2b03440b331ed76fb795127b26e1ff3 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Fri, 10 Mar 2017 21:51:59 +0100 Subject: [PATCH 148/192] markpages: Filter redirects Do not mark redirects discussion pages --- bots/markpages.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bots/markpages.py b/bots/markpages.py index 0fbaded..8ace79a 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -129,6 +129,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() for talkpage in pagegenerators.PageWithTalkPageGenerator( redfam.article_generator( filter_existing=True, + filter_redirects=True, exclude_article_status=["marked"] ), return_talk_only=True ): From 868894a38b9bacc3bc0e40c67ee966967eaa8047 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Fri, 10 Mar 2017 23:28:24 +0100 Subject: [PATCH 149/192] Format fixes Set locale to de_DE.utf-8 for whole Task Make sure Template is added in own source line --- bots/markpages.py | 3 +++ red.py | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/bots/markpages.py b/bots/markpages.py index 8ace79a..7637993 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -224,6 +224,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() else: self.current_wikicode.insert( 0, self.disc_notice ) + # To have it in its own line we need to add a linbreak after it + self.current_wikicode.insert_after(self.disc_notice, "\n" ) + # Notice was added return True diff --git a/red.py b/red.py index 81388d6..f4be812 100644 --- a/red.py +++ b/red.py @@ -26,6 +26,7 @@ Wrapper script to invoke all redundances bot tasks """ import os +import locale import pywikibot @@ -92,6 +93,10 @@ def main(*args): @type args: list of unicode """ + # Make sure locale is set to 'de_DE.UTF-8' to prevent problems + # with wrong month abreviations in strptime + locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') + # Process global arguments to determine desired site local_args = pywikibot.handle_args(args) From 56f326b568b730448be809b60bd2c78124875a4d Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Fri, 10 Mar 2017 23:43:16 +0100 Subject: [PATCH 150/192] Fix error all current redfams marked when quit Restructure update_status to make sure, marked is only set when all articles are marked or gone (means deleted or redirect) [https://fs.golderweb.de/index.php?do=details&task_id=111 FS#111] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=110 FS#110] --- lib/redfam.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/lib/redfam.py b/lib/redfam.py index ca10e87..90fd8a2 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -548,14 +548,19 @@ class RedFamWorker( RedFam ): if not article: break - if self.article_has_status( "note_rej", title=article ): - self.status.add( "note_rej" ) if self.article_has_status( "sav_err", title=article ): self.status.add( "sav_err" ) + return + elif self.article_has_status( "note_rej", title=article ): + self.status.add( "note_rej" ) + return - if not self.status.has( "sav_err" ) and \ - not self.status.has( "note_rej" ): - self.status.add( "marked" ) + elif not self.article_has_status("deleted", title=article ) and \ + not self.article_has_status("redirect", title=article) and\ + not self.article_has_status("marked", title=article): + return + + self.status.add( "marked" ) def get_disc_link( self ): """ From 3e69a1c77e84f93589afca05d97d84aa30710639 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Fri, 10 Mar 2017 23:58:48 +0100 Subject: [PATCH 151/192] Remove problem indicating stati when set marked Remove states which are indicating problems in previous runs if successfully marked article and also whole RedFam [https://fs.golderweb.de/index.php?do=details&task_id=112 FS#112] Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=110 FS#110] --- bots/markpages.py | 6 ++++++ lib/redfam.py | 2 ++ 2 files changed, 8 insertions(+) diff --git a/bots/markpages.py b/bots/markpages.py index 7637993..bdf4034 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -172,6 +172,12 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Status if add_ret is None or ( add_ret and save_ret ): + self.current_page.redfam.article_remove_status( + "note_rej", + title=self.current_page.title(withNamespace=False)) + self.current_page.redfam.article_remove_status( + "sav_err", + title=self.current_page.title(withNamespace=False)) self.current_page.redfam.article_add_status( "marked", title=self.current_page.title(withNamespace=False)) diff --git a/lib/redfam.py b/lib/redfam.py index 90fd8a2..dc1535d 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -560,6 +560,8 @@ class RedFamWorker( RedFam ): not self.article_has_status("marked", title=article): return + self.status.remove("sav_err") + self.status.remove("note_rej") self.status.add( "marked" ) def get_disc_link( self ): From 37704c66610dfc7b9729b32e4d1f61bb952d3c31 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 11 Mar 2017 10:39:31 +0100 Subject: [PATCH 152/192] Replace pywikibot.showDiff with patched version Pywikibot.bot.userPut does not support setting the value of diff context so it is always zero. Therefore we need to patch either userPut or showDiff to get some context. Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=113 FS#113] --- bots/markpages.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/bots/markpages.py b/bots/markpages.py index bdf4034..23a6aa2 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -28,8 +28,10 @@ with templates from datetime import datetime +import pywikibot from pywikibot import pagegenerators from pywikibot.bot import CurrentPageBot +from pywikibot.diff import PatchManager import mwparserfromhell as mwparser @@ -280,6 +282,10 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() @param kwargs: Additional parameters directly given to L{Bot.userPut}. @type kwargs: dict """ + + # Monkey patch pywikibot.showDiff + pywikibot.showDiff = showDiff + if ignore_save_related_errors is None: ignore_save_related_errors = self.ignore_save_related_errors if ignore_server_errors is None: @@ -289,3 +295,15 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() ignore_save_related_errors=ignore_save_related_errors, ignore_server_errors=ignore_server_errors, **kwargs) + + +# We need to have a patched version to set context param to value greater 0 as +# pywikibot.bot.userPut() currently does not support this value +def showDiff(oldtext, newtext, context=3): + """ + Output a string showing the differences between oldtext and newtext. + + The differences are highlighted (only on compatible systems) to show which + changes were made. + """ + PatchManager(oldtext, newtext, context=context).print_hunks() From 80c94ccf4f001f91bdef4ee4b9efed954bd2be85 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 11 Mar 2017 11:30:19 +0100 Subject: [PATCH 153/192] Replace underscores in article titles Remove underscores in article titles and replace with spaces to have canonical state for all articles Therefore we need to split title and posible anchors in heading parser Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=114 FS#114] --- lib/redfam.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/lib/redfam.py b/lib/redfam.py index dc1535d..6abb7ae 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -280,8 +280,21 @@ class RedFamParser( RedFam ): # (Task FS#77) heading = mwparser.parse( str( heading ) ) - # Save destinations of wikilinks in headings - return [ str( link.title ) for link in heading.ifilter_wikilinks() ] + articlesList = [] + for link in heading.ifilter_wikilinks(): + article = str( link.title ) + + # Split in title and anchor part + article = article.split("#", 1) + # Replace underscores in title with spaces + article[0] = article[0].replace("_", " ") + # Rejoin title and anchor + article = "#".join(article) + + # Add to list + articlesList.append(article) + + return articlesList def add_beginning( self, beginning ): """ From 0f930082b4e4a77731557691d7ee4d727a781750 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 11 Mar 2017 11:40:41 +0100 Subject: [PATCH 154/192] Also canonicalise anchor parts of articles Replace spaces in anchors with underscores as spaces are not correct there Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=114 FS#114] --- lib/redfam.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/redfam.py b/lib/redfam.py index 6abb7ae..6c045c6 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -288,6 +288,11 @@ class RedFamParser( RedFam ): article = article.split("#", 1) # Replace underscores in title with spaces article[0] = article[0].replace("_", " ") + + if len(article) > 1: + # other way round, replace spaces with underscores in anchors + article[1] = article[1].replace(" ", "_") + # Rejoin title and anchor article = "#".join(article) From 34e7e0d3beb7b00f0c2408aabac415df49061e36 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 11 Mar 2017 12:19:29 +0100 Subject: [PATCH 155/192] Prevent index Error if no template in leadsec Check if there is a template in leadsec before accessing list item to prevent IndexErrors Related Task: [https://fs.golderweb.de/index.php?do=details&task_id=115 FS#115] --- bots/markpages.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 23a6aa2..268d2a6 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -214,11 +214,12 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # There is none on empty pages, so we need to check if leadsec: # Get the last template in leadsec - ltemplate = leadsec.filter_templates()[-1] + ltemplates = leadsec.filter_templates() # If there is one, add notice after this - if ltemplate: - self.current_wikicode.insert_after(ltemplate, self.disc_notice) + if ltemplates: + self.current_wikicode.insert_after( ltemplates[-1], + self.disc_notice ) # To have it in its own line we need to add a linbreak before self.current_wikicode.insert_before(self.disc_notice, "\n" ) From 8422d08cb6508ec5b6af1902fe5fdbdb33b22d86 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Mon, 21 Aug 2017 13:49:34 +0200 Subject: [PATCH 156/192] Keep comments and leading templates together Prevent spliting up existing comments and templates as often those are documenting archiv templates behaviour Related Task: [FS#141](https://fs.golderweb.de/index.php?do=details&task_id=141) --- bots/markpages.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 268d2a6..354532d 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -218,8 +218,29 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # If there is one, add notice after this if ltemplates: - self.current_wikicode.insert_after( ltemplates[-1], - self.disc_notice ) + + # Make sure not separate template and maybe following comment + insert_after_index = self.current_wikicode.index( + ltemplates[-1] ) + + # Filter one linebreak + if isinstance( self.current_wikicode.get( + insert_after_index + 1), mwparser.nodes.text.Text ) and \ + self.current_wikicode.get( + insert_after_index + 1 ).value is "\n": + + insert_after_index += 1 + + while isinstance( + self.current_wikicode.get(insert_after_index + 1), + mwparser.nodes.comment.Comment ): + + insert_after_index += 1 + else: + + self.current_wikicode.insert_after( + self.current_wikicode.get(insert_after_index), + self.disc_notice ) # To have it in its own line we need to add a linbreak before self.current_wikicode.insert_before(self.disc_notice, "\n" ) @@ -233,8 +254,8 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() else: self.current_wikicode.insert( 0, self.disc_notice ) - # To have it in its own line we need to add a linbreak after it - self.current_wikicode.insert_after(self.disc_notice, "\n" ) + # To have it in its own line we need to add a linbreak after it + self.current_wikicode.insert_after(self.disc_notice, "\n" ) # Notice was added return True From 30de2a2e12e33bfddaebe25b3469324a40b9cdda Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Mon, 21 Aug 2017 13:55:33 +0200 Subject: [PATCH 157/192] Replace oursql with PyMySQL Since this is prefered on toolsforge and works out of the box after installing via pip, replace oursql which caused some problems. Especially oursql was not able to connect to db via ssh tunnel. Related Task: [FS#144](https://fs.golderweb.de/index.php?do=details&task_id=144) --- lib/mysqlred.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 1760fda..19b77e2 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -46,12 +46,13 @@ import sqlalchemy.types as types Base = declarative_base() -url = URL( "mysql+oursql", +url = URL( "mysql+pymysql", username=config.db_username, password=config.db_password, host=config.db_hostname, port=config.db_port, database=config.db_username + jogobot.config['db_suffix'] ) + engine = create_engine(url, echo=True) From 47b85a0b5eb7521544e47cb4e4d4e630b037ed91 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Mon, 21 Aug 2017 22:09:59 +0200 Subject: [PATCH 158/192] Add missing line break if there is no template To make sure our notice template resides in its own line in every case Related Task: [FS#141](https://fs.golderweb.de/index.php?do=details&task_id=141) --- bots/markpages.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bots/markpages.py b/bots/markpages.py index 354532d..4e12620 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -236,6 +236,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() mwparser.nodes.comment.Comment ): insert_after_index += 1 + else: self.current_wikicode.insert_after( @@ -249,6 +250,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() else: self.current_wikicode.insert( 0, self.disc_notice ) + # To have it in its own line we need to add a linbreak after it + self.current_wikicode.insert_after(self.disc_notice, "\n" ) + # If there is no leadsec (and therefore no template in it, we will add # before the first element else: From cd87d1c2bb1f4f04da240c31c412a41fb481b401 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 22 Aug 2017 21:45:58 +0200 Subject: [PATCH 159/192] Fix already marked articles was reshown bug Since we search for matching states for articles to include or exclude in a loop, we could not control the outer loop via default break/ continue. Python docs recommend using Exceptions and try/except structures to realise that most conveniently. https://docs.python.org/3/faq/design.html#why-is-there-no-goto Related Task: [FS#138](https://fs.golderweb.de/index.php?do=details&task_id=138) --- lib/redfam.py | 93 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 36 deletions(-) diff --git a/lib/redfam.py b/lib/redfam.py index 6c045c6..36ff4d2 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -515,46 +515,67 @@ class RedFamWorker( RedFam ): @type filter_redirects bool/None """ + + # Helper to leave multidimensional loop + # https://docs.python.org/3/faq/design.html#why-is-there-no-goto + class Continue(Exception): + pass + + class Break(Exception): + pass + # Iterate over articles in redfam for article in self.articlesList: - # Not all list elements contain articles - if not article: + + # To be able to control outer loop from inside child loops + try: + + # Not all list elements contain articles + if not article: + raise Break() + + page = pywikibot.Page( pywikibot.Link(article), + pywikibot.Site() ) + + # Filter existing pages if requested with filter_existing=False + if page.exists(): + self.article_remove_status( "deleted", title=article ) + if filter_existing is False: + raise Continue() + # Filter non existing Pages if requested with + # filter_existing=True + else: + self.article_add_status( "deleted", title=article ) + if filter_existing: + raise Continue() + + # Filter redirects if requested with filter_redirects=True + if page.isRedirectPage(): + self.article_add_status( "redirect", title=article ) + if filter_redirects: + raise Continue() + # Filter noredirects if requested with filter_redirects=False + else: + self.article_remove_status("redirect", title=article ) + if filter_redirects is False: + raise Continue() + + # Exclude by article status + for status in exclude_article_status: + if self.article_has_status( status, title=article ): + raise Continue() + + # Only include by article status + for status in onlyinclude_article_status: + if not self.article_has_status( status, title=article ): + raise Continue() + + # Proxy loop control to outer loop + except Continue: + continue + except Break: break - page = pywikibot.Page(pywikibot.Link(article), pywikibot.Site()) - - # Filter existing pages if requested with filter_existing=False - if page.exists(): - self.article_remove_status( "deleted", title=article ) - if filter_existing is False: - continue - # Filter non existing Pages if requested with filter_existing=True - else: - self.article_add_status( "deleted", title=article ) - if filter_existing: - continue - - # Filter redirects if requested with filter_redirects=True - if page.isRedirectPage(): - self.article_add_status( "redirect", title=article ) - if filter_redirects: - continue - # Filter noredirects if requested with filter_redirects=False - else: - self.article_remove_status("redirect", title=article ) - if filter_redirects is False: - continue - - # Exclude by article status - for status in exclude_article_status: - if self.article_has_status( status, title=article ): - continue - - # Only include by article status - for status in onlyinclude_article_status: - if not self.article_has_status( status, title=article ): - continue - # Yield filtered pages yield page From 4137d7246830610ef68466987b74e7dfcbbc090a Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 22 Aug 2017 21:56:43 +0200 Subject: [PATCH 160/192] Look for existing notice by simple in-check To detect maybe uncommented notices already present, check for them using just a simple python x in y check over whole wikicode Related Task: [FS#138](https://fs.golderweb.de/index.php?do=details&task_id=138) --- bots/markpages.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bots/markpages.py b/bots/markpages.py index 4e12620..ffef940 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -268,6 +268,10 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() """ Checks if disc notice which shall be added is already present. """ + + if self.disc_notice in self.current_wikicode: + return True + # Iterate over Templates with same name (if any) to search equal # Link to decide if they are the same for present_notice in self.current_wikicode.ifilter_templates( From cc02006fd2bb62b833e657ba3ca46a12a1886d4b Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Tue, 22 Aug 2017 21:51:57 +0200 Subject: [PATCH 161/192] Do not exclude redirects from beeing marked In accordance with Zulu55 redirect discussion pages should also get a notice, therefore do not exclude redirects. Related Task: [FS#140](https://fs.golderweb.de/index.php?do=details&task_id=140) --- bots/markpages.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bots/markpages.py b/bots/markpages.py index ffef940..d47b5c1 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -131,7 +131,6 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() for talkpage in pagegenerators.PageWithTalkPageGenerator( redfam.article_generator( filter_existing=True, - filter_redirects=True, exclude_article_status=["marked"] ), return_talk_only=True ): From 14ec71dd093701ff679bb68020a966dbd3bd79ad Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Wed, 23 Aug 2017 14:53:22 +0200 Subject: [PATCH 162/192] Rewrite get_disc_link to handle special cases Use methods of pywikibot site-object and mwparser to get rid of any special elements like templates or links in headings for construction of our disc link. Replace   by hand as it otherwise will occur as normal space and wont work Related Task: [FS#147](https://fs.golderweb.de/index.php?do=details&task_id=147) --- lib/redfam.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/redfam.py b/lib/redfam.py index 36ff4d2..f645ae7 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -611,22 +611,22 @@ class RedFamWorker( RedFam ): @rtype str """ - # We need to Replace Links with their linktext - anchor_code = mwparser.parse( self.heading.strip() ) - for link in anchor_code.ifilter_wikilinks(): - if link.text: - text = link.text - else: - text = link.title + # Expand templates using pwb site object + site = pywikibot.Site() + anchor_code = site.expand_text(self.heading.strip()) - anchor_code.replace( link, text ) + # Remove possibly embbeded files + anchor_code = re.sub( r"\[\[\w+:[^\|]+(?:\|.+){2,}\]\]", "", + anchor_code ) - # Whitespace is replaced with underscores - anchor_code.replace( " ", "_" ) + # Replace non-breaking-space by correct urlencoded value + anchor_code = anchor_code.replace( " ", ".C2.A0" ) - # We try it with out any more parsing as mw will do while parsing page - return ( self.redpage.pagetitle + "#" + - str(anchor_code).strip() ) + # Use mwparser to strip and normalize + anchor_code = mwparser.parse( anchor_code ).strip_code() + + # We try it without any more parsing as mw will do while parsing page + return ( self.redpage.pagetitle + "#" + anchor_code.strip() ) def generate_disc_notice_template( self ): """ From 9b9d50c4d2a37051b815929a44232fb5a0cdf12e Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Thu, 24 Aug 2017 12:04:45 +0200 Subject: [PATCH 163/192] Improve detection of empty lines Search with RegEx as empty lines could also contain spaces Related Task: [FS#141](https://fs.golderweb.de/index.php?do=details&task_id=141) --- bots/markpages.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 4e12620..81891c7 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -26,6 +26,7 @@ Bot to mark pages which were/are subjects of redundance discussions with templates """ +import re from datetime import datetime import pywikibot @@ -226,8 +227,8 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Filter one linebreak if isinstance( self.current_wikicode.get( insert_after_index + 1), mwparser.nodes.text.Text ) and \ - self.current_wikicode.get( - insert_after_index + 1 ).value is "\n": + re.search( r"^\n\s+$", self.current_wikicode.get( + insert_after_index + 1 ).value ): insert_after_index += 1 From 3b2cb95f366233b4265aba3e923c87ce1ecefb7c Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Thu, 24 Aug 2017 12:09:43 +0200 Subject: [PATCH 164/192] Do not fetch marked redfams from db Exclude marked Redfams from DB-Query to prevent marking them again Related Task: [FS#138](https://fs.golderweb.de/index.php?do=details&task_id=138) --- lib/redfam.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/redfam.py b/lib/redfam.py index 36ff4d2..afc66e0 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -699,6 +699,7 @@ class RedFamWorker( RedFam ): # RedFamWorker._status.like('archived'), # RedFamWorker._status.like("%{0:s}%".format(status)), text("status LIKE '%archived%'"), + text("status NOT LIKE '%marked%'"), RedFamWorker.ending >= ending ): yield redfam From 3aa6c5fb1cf8193b3e1a6d033cf28e72b6763248 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Thu, 24 Aug 2017 12:23:17 +0200 Subject: [PATCH 165/192] Disable PreloadingGenerator temporarily PreloadingGenerator mixes up yielded Pages. This is very unconvenient for semi-automatic workflow with manual checks as the articles of the RedFams were not following each other. Related Task: [FS#148](https://fs.golderweb.de/index.php?do=details&task_id=148) --- bots/markpages.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 91df1ab..971e9c2 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -115,8 +115,12 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() self.genFactory.gens.append( self.redfam_talkpages_generator() ) # Set generator to pass to super class - self.gen = pagegenerators.PreloadingGenerator( - self.genFactory.getCombinedGenerator() ) + # Since PreloadingGenerator mixis up the Pages, do not use it right now + # (FS#148) + # We can do so for automatic runs (FS#150) + # self.gen = pagegenerators.PreloadingGenerator( + # self.genFactory.getCombinedGenerator() ) + self.gen = self.genFactory.getCombinedGenerator() def redfam_talkpages_generator( self ): """ From b6d7268a7f232db65b521580d63b60f12fc4ee53 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Mon, 21 Aug 2017 22:05:06 +0200 Subject: [PATCH 166/192] select by famhash: Add methods to get param in bot We need a method as callback to get bot specific params passed through to our bot class. Introduce -famhash parameter to work on specific famhash Related Task:[FS#146](https://fs.golderweb.de/index.php?do=details&task_id=146) --- red.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/red.py b/red.py index f4be812..8ea4f7b 100644 --- a/red.py +++ b/red.py @@ -60,7 +60,7 @@ def prepare_bot( task_slug, subtask, genFactory, subtask_args ): @rtype tuple """ # kwargs are passed to selected bot as **kwargs - kwargs = dict() + kwargs = subtask_args if not subtask or subtask == "discparser": # Default case: discparser @@ -83,6 +83,25 @@ def prepare_bot( task_slug, subtask, genFactory, subtask_args ): return ( subtask, Bot, genFactory, kwargs ) +def parse_red_args( argkey, value ): + """ + Process additional args for red.py + + @param argkey The arguments key + @type argkey str + @param value The arguments value + @type value str + + @return Tuple with (key, value) if given pair is relevant, else None + @rtype tuple or None + """ + + if argkey.startswith("-famhash"): + return ( "famhash", value ) + + return None + + def main(*args): """ Process command line arguments and invoke bot. @@ -110,7 +129,7 @@ def main(*args): # Parse local Args to get information about subtask ( subtask, genFactory, subtask_args ) = jogobot.bot.parse_local_args( - local_args ) + local_args, parse_red_args ) # select subtask and prepare args ( subtask, Bot, genFactory, kwargs ) = prepare_bot( From 024be69fe122b50691b425c4e98ba89a9ed6d1c5 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Mon, 21 Aug 2017 22:07:18 +0200 Subject: [PATCH 167/192] Use famhash as generator If famhash is defined, fetch explicitly that redfam from db and work only on this Related Task: [FS#146](https://fs.golderweb.de/index.php?do=details&task_id=146) --- bots/markpages.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 971e9c2..d2e8d7f 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -62,6 +62,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Init attribute self.__redfams = None # Will hold a generator with our redfams + if "famhash" in kwargs: + self.famhash = kwargs["famhash"] + # We do not use predefined genFactory as there is no sensefull case to # give a generator via cmd-line for this right now self.genFactory = pagegenerators.GeneratorFactory() @@ -102,8 +105,15 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() end_after = datetime.strptime( jogobot.config["red.markpages"]["mark_done_after"], "%Y-%m-%d" ) - self.__redfams = list( RedFamWorker.gen_by_status_and_ending( - "archived", end_after) ) + + if hasattr(self, "famhash"): + self.__redfams = list( + RedFamWorker.session.query(RedFamWorker).filter( + RedFamWorker.famhash == self.famhash ) ) + + else: + self.__redfams = list( RedFamWorker.gen_by_status_and_ending( + "archived", end_after) ) return self.__redfams From 642a29b022ade48b8e9e11d93d3ff380b0de737b Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Thu, 24 Aug 2017 18:47:18 +0200 Subject: [PATCH 168/192] Improve regex for blank lines Do not match consecutive linebreaks as one Related Task: [FS#141](https://fs.golderweb.de/index.php?do=details&task_id=141) --- bots/markpages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bots/markpages.py b/bots/markpages.py index 81891c7..c6b5e18 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -227,7 +227,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # Filter one linebreak if isinstance( self.current_wikicode.get( insert_after_index + 1), mwparser.nodes.text.Text ) and \ - re.search( r"^\n\s+$", self.current_wikicode.get( + re.search( r"^\n[^\n\S]+$", self.current_wikicode.get( insert_after_index + 1 ).value ): insert_after_index += 1 From 31c10073a2313a1d337d7bde51229765d78da3b9 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Fri, 25 Aug 2017 17:09:38 +0200 Subject: [PATCH 169/192] Prevent index errors searching for comments Make sure not to exceed existing indexes of wikicode object while trying to search for comments Related Task: [FS#141](https://fs.golderweb.de/index.php?do=details&task_id=141) --- bots/markpages.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index c6b5e18..5fa7f36 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -224,25 +224,28 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() insert_after_index = self.current_wikicode.index( ltemplates[-1] ) - # Filter one linebreak - if isinstance( self.current_wikicode.get( - insert_after_index + 1), mwparser.nodes.text.Text ) and \ - re.search( r"^\n[^\n\S]+$", self.current_wikicode.get( - insert_after_index + 1 ).value ): + # If there is more content + if len(self.current_wikicode.nodes) > (insert_after_index + 1): + # Filter one linebreak + if isinstance( self.current_wikicode.get( + insert_after_index + 1), + mwparser.nodes.text.Text) and \ + re.search( r"^\n[^\n\S]+$", self.current_wikicode.get( + insert_after_index + 1 ).value ): - insert_after_index += 1 + insert_after_index += 1 - while isinstance( - self.current_wikicode.get(insert_after_index + 1), - mwparser.nodes.comment.Comment ): + while len(self.current_wikicode.nodes) > \ + (insert_after_index + 1) and \ + isinstance( + self.current_wikicode.get(insert_after_index + 1), + mwparser.nodes.comment.Comment ): - insert_after_index += 1 + insert_after_index += 1 - else: - - self.current_wikicode.insert_after( - self.current_wikicode.get(insert_after_index), - self.disc_notice ) + self.current_wikicode.insert_after( + self.current_wikicode.get(insert_after_index), + self.disc_notice ) # To have it in its own line we need to add a linbreak before self.current_wikicode.insert_before(self.disc_notice, "\n" ) From 8a26b6d92a10c48d7b8fc8685a70d90bc7706a6b Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Fri, 25 Aug 2017 18:11:41 +0200 Subject: [PATCH 170/192] Normalize article titles with anchors In our db article titles with anchors are stored with underscores in anchor string. Therefore we need to replace spaces in anchor string given by pywikibot.Page.title(). Related Task: [FS#151](https://fs.golderweb.de/index.php?do=details&task_id=151) --- bots/markpages.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 5fdd701..63f6d23 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -186,25 +186,32 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # None if change was not accepted by user save_ret = self.put_current( self.new_text, summary=summary ) + # Normalize title with anchor (replace spaces in anchor) + article = self.current_page.title(withNamespace=False) + article_parts = article.split("#", 1) + if len(article_parts) == 2: + article_parts[1] = article_parts[1].replace(" ", "_") + article = "#".join(article_parts) + # Status if add_ret is None or ( add_ret and save_ret ): self.current_page.redfam.article_remove_status( "note_rej", - title=self.current_page.title(withNamespace=False)) + title=article) self.current_page.redfam.article_remove_status( "sav_err", - title=self.current_page.title(withNamespace=False)) + title=article) self.current_page.redfam.article_add_status( "marked", - title=self.current_page.title(withNamespace=False)) + title=article) elif save_ret is None: self.current_page.redfam.article_add_status( "note_rej", - title=self.current_page.title(withNamespace=False)) + title=article) else: self.current_page.redfam.article_add_status( "sav_err", - title=self.current_page.title(withNamespace=False)) + title=article) def add_disc_notice_template( self ): """ From f8002c85da5539948e6413a844c2444b7057fe10 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 2 Sep 2017 14:23:25 +0200 Subject: [PATCH 171/192] Do not search for templates recursivly Since nested templates did not get an index in global wikicode object searching for index of an nested template results in ValueError Related Task: [FS#153](https://fs.golderweb.de/index.php?do=details&task_id=153) --- bots/markpages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bots/markpages.py b/bots/markpages.py index 63f6d23..00e33ba 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -235,7 +235,7 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # There is none on empty pages, so we need to check if leadsec: # Get the last template in leadsec - ltemplates = leadsec.filter_templates() + ltemplates = leadsec.filter_templates(recursive=False) # If there is one, add notice after this if ltemplates: From b3cfcdc25907fe016e1ca85c4de5bec8bba1d9a1 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 2 Sep 2017 15:59:34 +0200 Subject: [PATCH 172/192] Improve title detection to get correct behaviour Make sure that categorie links are starting with colon and non article pages are returned with namespace. Related Task: [FS#154](https://fs.golderweb.de/index.php?do=details&task_id=154) --- bots/markpages.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bots/markpages.py b/bots/markpages.py index 00e33ba..d8e1545 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -187,7 +187,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() save_ret = self.put_current( self.new_text, summary=summary ) # Normalize title with anchor (replace spaces in anchor) - article = self.current_page.title(withNamespace=False) + article = self.current_page.toggleTalkPage().title( + asLink=True, textlink=True) + article = article.strip("[]") article_parts = article.split("#", 1) if len(article_parts) == 2: article_parts[1] = article_parts[1].replace(" ", "_") From d9b4fcc0bdf1489cf3b3203354d0d0866fb0ac99 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 2 Sep 2017 22:06:30 +0200 Subject: [PATCH 173/192] Strip spaces before adding articles to redfam Some article links have surounding spaces in their linktext. Remove them before adding article to RedFam to have a cannonical title Related Task: [FS#155](https://fs.golderweb.de/index.php?do=details&task_id=155) --- lib/redfam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/redfam.py b/lib/redfam.py index 6a44d40..6a9402c 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -282,7 +282,7 @@ class RedFamParser( RedFam ): articlesList = [] for link in heading.ifilter_wikilinks(): - article = str( link.title ) + article = str( link.title ).strip() # Split in title and anchor part article = article.split("#", 1) From ff03ca8f131775b5f6e337d54130f459e263cfa7 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 2 Sep 2017 22:10:25 +0200 Subject: [PATCH 174/192] Explicitly set charset for PyMySQL-Connection Since PyMySQL-Connection otherwise uses charset 'latin-1', explicitly set connection charset to 'utf8' http://docs.sqlalchemy.org/en/rel_1_0/dialects/mysql.html#charset-selection http://docs.sqlalchemy.org/en/rel_1_0/core/engines.html?highlight=url#sqlalchemy.engine.url.URL Related Task: [FS#156](https://fs.golderweb.de/index.php?do=details&task_id=156) --- lib/mysqlred.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 19b77e2..b4529d7 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -51,7 +51,8 @@ url = URL( "mysql+pymysql", password=config.db_password, host=config.db_hostname, port=config.db_port, - database=config.db_username + jogobot.config['db_suffix'] ) + database=config.db_username + jogobot.config['db_suffix'], + query={'charset': 'utf8'} ) engine = create_engine(url, echo=True) From 02e53475f1091f8eba58c47ba1af703e2f1e3292 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 9 Sep 2017 21:35:36 +0200 Subject: [PATCH 175/192] Prevent lowercase article titles in Parser Since real lowercase article titles are not allowed, make sure to convert all first letters of article titles to uppercase. This is neccessary since pywikibot will return article titles like this. Related Task: [FS#157](https://fs.golderweb.de/index.php?do=details&task_id=157) --- lib/redfam.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/redfam.py b/lib/redfam.py index 6a9402c..f36d1bc 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -284,6 +284,13 @@ class RedFamParser( RedFam ): for link in heading.ifilter_wikilinks(): article = str( link.title ).strip() + # Short circuit empty links + if not article: + continue + + # Make sure first letter is uppercase + article = article[0].upper() + article[1:] + # Split in title and anchor part article = article.split("#", 1) # Replace underscores in title with spaces From 88848cb084f11092d90459413540d8cb6e5e66f5 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 23 Sep 2017 20:32:13 +0200 Subject: [PATCH 176/192] Prepare Version test-v4 for release Add a README.md file for this project --- README.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..964dd07 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +jogobot-red +=========== + +Versions +-------- + +* test-v4 + + - Feature _markpages_ working in semi-automatic mode using command + + python red.py -task:markpages -family:wikipedia + + - Work on specific redfam using param + + -famhash:[sha1-famhash] + + - Use _PyMySQL_ instead of _OurSQL_ + + - Correctly parse redfams with articles with leading small character or spaces in wikilink + +* test-v3 + +* test-v2 + +* test-v1 + +License +------- +GPLv3 + +Author Information +------------------ + +Copyright 2017 Jonathan Golder jonathan@golderweb.de https://golderweb.de/ + +alias Wikipedia.org-User _Jogo.obb_ (https://de.wikipedia.org/Benutzer:Jogo.obb) From ec2b84df2af2fd11ef6580141be9453f4116152b Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 23 Sep 2017 21:09:58 +0200 Subject: [PATCH 177/192] Add requirements To make setup of environment for this module easier --- README.md | 15 +++++++++++++++ requirements.txt | 23 +++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 requirements.txt diff --git a/README.md b/README.md index 964dd07..1fac5a6 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,21 @@ jogobot-red =========== +Dependencies +------------ + +* pywikibot-core +* mwparserfromhell + +The libraries above need to be installed and configured manualy considering [documentation of pywikibot-core](https://www.mediawiki.org/wiki/Manual:Pywikibot). + +* SQLAlchemy +* PyMySQL + +Those can be installed using pip and the _requirements.txt_ file provided with this packet + + pip install -r requirements.txt + Versions -------- diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..45f37b6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,23 @@ +# This is a PIP 6+ requirements file for using jogobot-red +# +# All dependencies can be installed using: +# $ sudo pip install -r requirements.txt +# +# It is good practise to install packages using the system +# package manager if it has a packaged version. If you are +# unsure, please use pip as described at the top of the file. +# +# To get a list of potential matches, use +# +# $ awk -F '[#>=]' '{print $1}' requirements.txt | xargs yum search +# or +# $ awk -F '[#>=]' '{print $1}' requirements.txt | xargs apt-cache search + +# Needed for Database-Connection +# SQLAlchemy Python ORM-Framework +SQLAlchemy>=1.1 +# PyMySQL DB-Connector +PyMySQL>=0.7 + +# Also needed, but not covered here, is a working copy of pywikibot-core +# which also brings mwparserfromhell From bdccc8417ce943a6f830de8b0c137ce5836fe367 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 2 Sep 2017 15:53:51 +0200 Subject: [PATCH 178/192] Set always in Pywikibot.Basebot If cmdline param -always is set, set the related option in Pywikibot.Basebot Object for automatic edits with out further requests Related Task: [FS#152](https://fs.golderweb.de/index.php?do=details&task_id=152) --- bots/markpages.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bots/markpages.py b/bots/markpages.py index d8e1545..5dcf527 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -73,7 +73,9 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() self.build_generator() # Run super class init with builded generator - super( MarkPagesBot, self ).__init__(generator=self.gen) + super( MarkPagesBot, self ).__init__( + generator=self.gen, + always=True if "always" in kwargs else False ) def run(self): """ From 0c135ef1bb4306e5a91a3b61d3cd9c14b50082d6 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 23 Sep 2017 23:50:42 +0200 Subject: [PATCH 179/192] Describe version test-v5 --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 1fac5a6..dea3bbf 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,11 @@ Those can be installed using pip and the _requirements.txt_ file provided with t Versions -------- +* test-v5 + - Feature _markpages_ working in full-automatic mode with _always_-flag + + python red.py -task:markpages -family:wikipedia -always + * test-v4 - Feature _markpages_ working in semi-automatic mode using command From 84802cf52161d9a8e1380c67d83071de59f3e2ed Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 28 Oct 2017 18:41:06 +0200 Subject: [PATCH 180/192] Remove leading or trailing spaces in articles Some articles contain spaces between title and anchor part which will be stripped now Related Task: [FS#159](https://fs.golderweb.de/index.php?do=details&task_id=159) --- lib/redfam.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/redfam.py b/lib/redfam.py index f36d1bc..6f73a5c 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -297,6 +297,10 @@ class RedFamParser( RedFam ): article[0] = article[0].replace("_", " ") if len(article) > 1: + # Strip both parts to prevent leading/trailing spaces + article[0] = article[0].strip() + article[1] = article[1].strip() + # other way round, replace spaces with underscores in anchors article[1] = article[1].replace(" ", "_") From 614f288bb9d9ae7de1ea89df485cab2f30f777ad Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 28 Oct 2017 18:44:05 +0200 Subject: [PATCH 181/192] Activate jogobot status api for onwiki disabling Related Task: [FS#86](https://fs.golderweb.de/index.php?do=details&task_id=86) --- red.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/red.py b/red.py index 8ea4f7b..8abffa3 100644 --- a/red.py +++ b/red.py @@ -124,8 +124,8 @@ def main(*args): # Disabled until [FS#86] is done # Before run, we need to check wether we are currently active or not - # if not jogobot.bot.active( task_slug ): - # return + if not jogobot.bot.active( task_slug ): + return # Parse local Args to get information about subtask ( subtask, genFactory, subtask_args ) = jogobot.bot.parse_local_args( From 108b7aa33144941777e3ceb5eb3b9ad36441885a Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 28 Oct 2017 18:46:30 +0200 Subject: [PATCH 182/192] Describe version test-v6 --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index dea3bbf..cf72408 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,10 @@ Those can be installed using pip and the _requirements.txt_ file provided with t Versions -------- +* test-v6 + - jogobot status API enabled (Bot can be disabled onwiki) + - Fixed Problem with space between article title and anchor + * test-v5 - Feature _markpages_ working in full-automatic mode with _always_-flag From 5f4640d5ff1aa42c0048d25cea34b5cba46b74c1 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 28 Oct 2017 22:35:25 +0200 Subject: [PATCH 183/192] Replace urlencoded chars with unicode equivalent Otherwise we get value errors while marking since pwb replaces those Related Task: [FS#160](https://fs.golderweb.de/index.php?do=details&task_id=160) --- lib/redfam.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/redfam.py b/lib/redfam.py index 6f73a5c..191a895 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -28,6 +28,7 @@ Provides classes for working with RedFams import hashlib import locale import re +import urllib.parse from datetime import datetime import mwparserfromhell as mwparser # noqa @@ -291,6 +292,9 @@ class RedFamParser( RedFam ): # Make sure first letter is uppercase article = article[0].upper() + article[1:] + # Unquote possible url encoded special chars + article = urllib.parse.unquote( article ) + # Split in title and anchor part article = article.split("#", 1) # Replace underscores in title with spaces From 33b2e47312a0902ba11b6de275e3cd04ba078f62 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 28 Oct 2017 22:43:53 +0200 Subject: [PATCH 184/192] Describe version test-v7 --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index cf72408..0074db1 100644 --- a/README.md +++ b/README.md @@ -18,10 +18,12 @@ Those can be installed using pip and the _requirements.txt_ file provided with t Versions -------- +* test-v7 + - Fixed problem with url encoded chars in article title * test-v6 - jogobot status API enabled (Bot can be disabled onwiki) - - Fixed Problem with space between article title and anchor + - Fixed problem with space between article title and anchor * test-v5 - Feature _markpages_ working in full-automatic mode with _always_-flag From e18aa96a84c0f074de7689184e2b9ff663195453 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sun, 5 Nov 2017 11:15:04 +0100 Subject: [PATCH 185/192] redfam: article_generator can return talkpage To make pywikibot.pagegenerators.PageWithTalkPageGenerators unneccessary so we can manipulate talkpage object directly Related Task: [FS#161](https://fs.golderweb.de/index.php?do=details&task_id=161) --- lib/redfam.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/redfam.py b/lib/redfam.py index 191a895..0d6c4fe 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -514,7 +514,8 @@ class RedFamWorker( RedFam ): def article_generator(self, # noqa filter_existing=None, filter_redirects=None, exclude_article_status=[], - onlyinclude_article_status=[] ): + onlyinclude_article_status=[], + talkpages=None ): """ Yields pywikibot pageobjects for articles belonging to this redfams in a generator @@ -528,6 +529,8 @@ class RedFamWorker( RedFam ): set to False to get only redirectpages, unset/None results in not filtering @type filter_redirects bool/None + @param talkpages Set to True to get Talkpages instead of article page + @type talkpages bool/None """ @@ -591,6 +594,10 @@ class RedFamWorker( RedFam ): except Break: break + # Toggle talkpage + if talkpages: + page = page.toggleTalkPage() + # Yield filtered pages yield page From 20103d589d137bc2632a889665a15d8930f03cfc Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sun, 5 Nov 2017 11:18:53 +0100 Subject: [PATCH 186/192] redfam: article_generator add redfam info to page Add reference to redfam object and article title from db to Page object since Page.title() may differe (short Namespaces, anchors, special chars) Related Task: [FS#161](https://fs.golderweb.de/index.php?do=details&task_id=161) --- lib/redfam.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/redfam.py b/lib/redfam.py index 0d6c4fe..8248a7e 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -598,6 +598,12 @@ class RedFamWorker( RedFam ): if talkpages: page = page.toggleTalkPage() + # Add reference to redfam to pages + page.redfam = self + + # Keep article title from db with page object + page.redarticle = article + # Yield filtered pages yield page From bfec2abf9822aea6705259bf2003aebdbfacebb5 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sun, 5 Nov 2017 11:20:55 +0100 Subject: [PATCH 187/192] markpages: Get rid of PageWithTalkPageGenerator Since redfam.article_generator can yield talkpage with additional information about redfam and current article from db, we do not need it anymore. Related Task: [FS#161](https://fs.golderweb.de/index.php?do=details&task_id=161) --- bots/markpages.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index 5dcf527..e2f587f 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -145,14 +145,10 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() for redfam in self.redfams: # We need the talkpage (and only this) of each existing page - for talkpage in pagegenerators.PageWithTalkPageGenerator( - redfam.article_generator( - filter_existing=True, - exclude_article_status=["marked"] ), - return_talk_only=True ): - - # Add reference to redfam to talkpages - talkpage.redfam = redfam + for talkpage in redfam.article_generator( + filter_existing=True, + exclude_article_status=["marked"], + talkpages=True ): yield talkpage From 9640467f6930572c57dc8a3df9b17cf1adbaaea0 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sun, 5 Nov 2017 11:22:43 +0100 Subject: [PATCH 188/192] markpages: Use redarticle attribute of Page Instead of trying to reconstruct our db article title, use the one added to Page-object by redfam.article_generator Related Task: [FS#161](https://fs.golderweb.de/index.php?do=details&task_id=161) --- bots/markpages.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/bots/markpages.py b/bots/markpages.py index e2f587f..f395af2 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -184,14 +184,8 @@ class MarkPagesBot( CurrentPageBot ): # sets 'current_page' on each treat() # None if change was not accepted by user save_ret = self.put_current( self.new_text, summary=summary ) - # Normalize title with anchor (replace spaces in anchor) - article = self.current_page.toggleTalkPage().title( - asLink=True, textlink=True) - article = article.strip("[]") - article_parts = article.split("#", 1) - if len(article_parts) == 2: - article_parts[1] = article_parts[1].replace(" ", "_") - article = "#".join(article_parts) + # Get article as named in db + article = self.current_page.redarticle # Status if add_ret is None or ( add_ret and save_ret ): From 788a3df0cd9368b07c541db74de9db4fa2b2a430 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sun, 5 Nov 2017 12:00:28 +0100 Subject: [PATCH 189/192] Update jogobot-submodule to v0.1 --- jogobot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jogobot b/jogobot index 49ada29..d69d873 160000 --- a/jogobot +++ b/jogobot @@ -1 +1 @@ -Subproject commit 49ada2993e345600523c161c5e2516ec65625684 +Subproject commit d69d873624abb70a25a0aef711a635cfc88aa7e9 From b4c193eedc56f221ff83a8d531370a9c93ffc804 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sun, 5 Nov 2017 12:07:38 +0100 Subject: [PATCH 190/192] Disable echoing of SQLAlchemy Egine We don't need this extensive output for production --- lib/mysqlred.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index b4529d7..8ab083e 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -54,7 +54,7 @@ url = URL( "mysql+pymysql", database=config.db_username + jogobot.config['db_suffix'], query={'charset': 'utf8'} ) -engine = create_engine(url, echo=True) +engine = create_engine(url, echo=False) Session = sessionmaker(bind=engine) From 1b6faf9e53edf3b2c144bafd0b513a56a2ca5322 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sun, 5 Nov 2017 12:17:05 +0100 Subject: [PATCH 191/192] Use own db for red-task Since we have several tables and sometimes need to create a copy on replication servers. --- lib/mysqlred.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/mysqlred.py b/lib/mysqlred.py index 8ab083e..aef9440 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -51,7 +51,8 @@ url = URL( "mysql+pymysql", password=config.db_password, host=config.db_hostname, port=config.db_port, - database=config.db_username + jogobot.config['db_suffix'], + database=( config.db_username + + jogobot.config['redundances']['db_suffix'] ), query={'charset': 'utf8'} ) engine = create_engine(url, echo=False) From 93447d8dc65abc6f1ad2a1df8d936721b2edc702 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sun, 5 Nov 2017 12:25:13 +0100 Subject: [PATCH 192/192] Prepare release v1.0 Update Copyright Notices Version information --- README.md | 4 ++++ bots/markpages.py | 2 +- bots/reddiscparser.py | 2 +- lib/mysqlred.py | 2 +- lib/redfam.py | 2 +- lib/redpage.py | 2 +- red.py | 2 +- 7 files changed, 10 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 0074db1..18d7d0f 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,10 @@ Those can be installed using pip and the _requirements.txt_ file provided with t Versions -------- +* v1.0 + - first stable release + - less debug output + - fixed problems with article title * test-v7 - Fixed problem with url encoded chars in article title diff --git a/bots/markpages.py b/bots/markpages.py index f395af2..ca99406 100644 --- a/bots/markpages.py +++ b/bots/markpages.py @@ -3,7 +3,7 @@ # # markpages.py # -# Copyright 2016 GOLDERWEB – Jonathan Golder +# Copyright 2017 Jonathan Golder # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/bots/reddiscparser.py b/bots/reddiscparser.py index 44f2aba..839db7d 100644 --- a/bots/reddiscparser.py +++ b/bots/reddiscparser.py @@ -3,7 +3,7 @@ # # reddiscparser.py # -# Copyright 2016 GOLDERWEB – Jonathan Golder +# Copyright 2017 Jonathan Golder # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/lib/mysqlred.py b/lib/mysqlred.py index aef9440..9b0c2e5 100644 --- a/lib/mysqlred.py +++ b/lib/mysqlred.py @@ -3,7 +3,7 @@ # # mysqlred.py # -# Copyright 2015 GOLDERWEB – Jonathan Golder +# Copyright 2017 Jonathan Golder # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/lib/redfam.py b/lib/redfam.py index 8248a7e..7241366 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -3,7 +3,7 @@ # # redfam.py # -# Copyright 2017 GOLDERWEB – Jonathan Golder +# Copyright 2017 Jonathan Golder # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/lib/redpage.py b/lib/redpage.py index 1c535ad..ad296dc 100644 --- a/lib/redpage.py +++ b/lib/redpage.py @@ -3,7 +3,7 @@ # # redpage.py # -# Copyright 2015 GOLDERWEB – Jonathan Golder +# Copyright 2017 Jonathan Golder # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/red.py b/red.py index 8abffa3..c7a23ae 100644 --- a/red.py +++ b/red.py @@ -3,7 +3,7 @@ # # reddiscparser.py # -# Copyright 2016 GOLDERWEB – Jonathan Golder +# Copyright 2017 Jonathan Golder # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by