#!/usr/bin/env python3 # -*- coding: utf-8 -*- from mysql_red import MYSQL_RED_PAGE from red_fam import RED_FAM_PARSER class RED_PAGE: """Class for handling redundance discussion pages and archives""" def __init__( self, page, archive=False ): """ Generate a new RED_PAGE object based on the given pywikibot page object @param page page """ # Safe the pywikibot page object self.page = page self._archive = archive self.__handle_db( ) self.is_page_changed() self.__parsed = None if( self.__changed or self.__mysql.data[ 'status' ] == 0 ): self.parse() self.__update_db() # else: # self.__mysql.add_page() def __handle_db( self ): """ Handles opening of db connection """ # We need a connection to our mysqldb self.__mysql = MYSQL_RED_PAGE( self.page._pageid ) if not self.__mysql.data: self.__mysql.add_page( self.page.title(), self.page._revid ) def is_page_changed( self ): """ Check wether the page was changed since last run """ if( self.__mysql.data != { 'page_id': self.page._pageid, 'rev_id': self.page._revid, 'page_title': self.page.title(), 'status': self.__mysql.data[ 'status' ] } ): self.__changed = True else: self.__changed = False def is_archive( self ): """ Detects wether current page is an archive of discussions """ if self._archive or ( u"/Archiv" in self.page.title() ): return True else: return False def parse( self ): """ Handles the parsing process """ # Since @param text is a string we need to split it in lines text_lines = self.page.text.split( "\n" ) # Initialise line counter i = 0 fam_heading = None beginning = None ending = None # Set line for last detected Redundance-Family to 0 last_fam = 0 # Iterate over the lines of the page for line in text_lines: # Check wether we have an "Redundance-Family"-Section heading (Level 3) if RED_FAM_PARSER.is_sectionheading( line ): # Before working with next red_fam create the object for the one before (if one) if( fam_heading and beginning ): try: red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) except: pass # Save line number for last detected Redundance-Family last_fam = i # Save heading fam_heading = line # Defined (re)initialisation of dates beginning = None ending = None # Check wether we are currently in an "Redundance-Family"-Section Body if i > last_fam and last_fam > 0: # Check if we have alredy recognized the beginning date of the discussion (in former iteration) or if we have a done-notice if not beginning: beginning = RED_FAM_PARSER.is_beginning( line ) elif not ending: ending = RED_FAM_PARSER.is_ending( line ) # Increment line counter i += 1 else: # For the last red_fam create the object if( fam_heading and beginning ): #~ try: red_fam = RED_FAM_PARSER( fam_heading, self.page._pageid, self.is_archive(), beginning, ending ) #~ except: #~ pass def __update_db( self ): """ Updates the page meta data in mysql db """ if( self.__parsed ): status = 1 if( self.is_archive() ): status = 2 else: status = 0 self.__mysql.update_page( self.page._revid, self.page.title(), status )