From e186f2f22b9ac86c3d0946d26378cb8f394b4aca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?GOLDERWEB=20=E2=80=93=20Jonathan=20Golder?= Date: Sun, 20 Sep 2015 17:45:07 +0200 Subject: [PATCH] Use dictionary with page_id / fam_hash as key for cached_insert_data to prevent double entrys --- mysqlred.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/mysqlred.py b/mysqlred.py index cb9754d..589c648 100644 --- a/mysqlred.py +++ b/mysqlred.py @@ -54,7 +54,7 @@ class MysqlRed: # Class variables for storing cached querys _cached_update_data = [] _update_query = '' - _cached_insert_data = [] + _cached_insert_data = {} _insert_query = '' def __init__( self ): @@ -89,8 +89,9 @@ class MysqlRed: # Execute insert query if cls._cached_insert_data: - cursor.executemany( cls._insert_query, cls._cached_insert_data ) - cls._cached_insert_data = [] + print( cls._cached_insert_data ) + cursor.executemany( cls._insert_query, ( cls._cached_insert_data[ key ] for key in cls._cached_insert_data ) ) + cls._cached_insert_data = {} # Execute update query # Use executemany since update could not be reduced to one query @@ -113,7 +114,7 @@ class MysqlRedPage( MysqlRed ): _update_query = 'UPDATE `red_pages` \ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' - _cached_insert_data = [] + _cached_insert_data = {} _insert_query = 'INSERT INTO `red_pages` \ ( page_id, page_title, rev_id, status ) VALUES ( ?, ?, ?, ? );' @@ -161,8 +162,10 @@ SET `page_title` = ?, `rev_id` = ?, `status`= ? WHERE `page_id` = ?;' @param int status Page parsing status """ - type( self )._cached_insert_data.append( ( self.__page_id, page_title, - rev_id, status ) ) + insert_data = { self.__page_id: ( self.__page_id, page_title, + rev_id, status ) } + + type( self )._cached_insert_data.update( insert_data ) # Manualy construct self.data dict self.data = { 'page_id': self.__page_id, 'rev_id': rev_id, @@ -197,7 +200,7 @@ class MysqlRedFam( MysqlRed ): SET `red_page_id` = ?, `heading` = ?, `beginning` = ?, `ending` = ?, \ `status`= ? WHERE `fam_hash` = ?;' - _cached_insert_data = [] + _cached_insert_data = {} _insert_query = 'INSERT INTO `red_families` \ ( fam_hash, red_page_id, beginning, ending, status, heading, \ article0, article1, article2, article3, article4, article5, article6, \ @@ -242,7 +245,8 @@ article7 ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );' data = tuple( data ) - type( self )._cached_insert_data.append( data ) + insert_data = { self.__fam_hash: data } + type( self )._cached_insert_data.update( insert_data ) # Manualy construct self.data dict data_keys = ( 'fam_hash', 'red_page_id', 'beginning', 'ending',