Merge branch 'restucture-parsers' into test-v3

2016-03-05 15:02:31 +01:00
parent baf4ae2a07 7cac294181
commit 944bea488a
4 changed files with 342 additions and 225 deletions
--- a/parse-pages.py
+++ b/parse-pages.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8  -*-
+#
+#  parse-pages.py
+#
+#  Copyright 2016 GOLDERWEB – Jonathan Golder <jonathan@golderweb.de>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#  MA 02110-1301, USA.
+#
+#
+"""
+Script to parse all redpages in configured categories
+"""
+
+import pywikibot
+from pywikibot import pagegenerators
+
+import jogobot
+
+import redpage
+import redfam
+
+
+def get_cat_pages( cat ):
+    """
+    Generates a iteratable generator-object with all pages listet in given
+    category
+
+    @param  cat  Category to request
+    @type  cat  str
+
+    @returns  generator  Iteratable object with pages of given category
+    """
+
+    # Get site to work on from pywikibot config
+    site = pywikibot.Site()
+
+    # Retrieve the content of given category
+    category = pywikibot.Category( site, cat )
+
+    # Build an iteratable generator object with page objects for given category
+    generator = pagegenerators.CategorizedPageGenerator( category )
+
+    return generator
+
+
+def main(*args):
+    """
+    Handles process
+    """
+
+    try:
+        jogobot.output( "BEGINN – parser-pages.py" )
+
+        # Iterate over configured categories
+        for cat in ( jogobot.config["redundances"]["redpage_cats"] ):
+
+            # Iterate over pages in current cat
+            for page in get_cat_pages( cat ):
+
+                # For pages configured to exclude, go on with next page
+                if page.title() in (
+                        jogobot.config["redundances"]["redpage_exclude"] ):
+
+                    continue
+
+                # Initiate RedPage object
+                red_page = redpage.RedPage( page )
+
+                # Check whether parsing is needed
+                if red_page.is_parsing_needed():
+
+                    # Iterate over returned generator with redfam sections
+                    for fam in red_page.parse():
+
+                        # Run RedFamParser on section text
+                        redfam.RedFamParser.parser( fam, red_page.page._pageid,
+                                                    red_page.is_archive() )
+                    else:
+                        # If successfully parsed whole page, flush
+                        # db write cache
+                        redfam.RedFamParser.flush_db_cache()
+                        jogobot.output( "Page '%s' parsed" %
+                                        red_page.page.title() )
+            else:
+                # If successfully parsed all pages in cat, flush db write cache
+                redpage.RedPage.flush_db_cache()
+
+    finally:
+        jogobot.output( "END – parser-pages.py" )
+        pywikibot.stopme()
+
+if( __name__ == "__main__" ):
+    main()
--- a/redfam.py
+++ b/redfam.py
@@ -30,7 +30,9 @@ import locale
 import re
 from datetime import datetime

-import pywikibot
+import mwparserfromhell as mwparser  # noqa
+import pywikibot  # noqa
+from pywikibot.tools import deprecated  # noqa

 import jogobot
 from mysqlred import MysqlRedFam
@@ -40,12 +42,12 @@ class RedFam:
    """
    Basic class for RedFams, containing the basic data structure
    """
-    
+
    def __init__( self, articlesList, beginning, ending=None, red_page_id=None,
                  status=0, fam_hash=None, heading=None ):
        """
        Generates a new RedFam object
-        
+
        @param    articlesList    list        List of articles
        @param    beginning       datetime    Beginning date
        @param    ending          datetime    Ending date
@@ -54,7 +56,7 @@ class RedFam:
        @param    fam_hash        str         SHA1 hash of articlesList
        @param    heading         str         Original heading of RedFam (Link)
        """
-        
+
        # Initial attribute values
        self._articlesList = articlesList
        self._beginning = beginning
@@ -63,18 +65,18 @@ class RedFam:
        self._status = status
        self._fam_hash = fam_hash
        self._heading = heading
-        
+
        # Calculates the sha1 hash over self._articlesList to
        # rediscover known redundance families
        self.calc_fam_hash()
-        
+
    def __repr__( self ):
        """
        Returns repression str of RedFam object
-        
+
        @returns    str     repr() string
        """
-        
+
        __repr = "RedFam( " + \
                 "articlesList=" + repr( self._articlesList ) + \
                 ", heading=" + repr( self._heading ) + \
@@ -84,28 +86,28 @@ class RedFam:
                 ", status=" + repr( self._status ) + \
                 ", fam_hash=" + repr( self._fam_hash ) + \
                 " )"
-        
+
        return __repr

    def calc_fam_hash( self ):
        """
        Calculates the SHA-1 hash for the articlesList of redundance family.
        Since we don't need security SHA-1 is just fine.
-        
+
        @returns    str    String with the hexadecimal hash digest
        """
-        
+
        h = hashlib.sha1()
        h.update( str( self._articlesList[:8] ).encode('utf-8') )
-        
+
        if self._fam_hash and h.hexdigest() != self._fam_hash:
            raise RedFamHashError( self._fam_hash, h.hexdigest() )
-            
+
        elif self._fam_hash:
            return
        else:
            self._fam_hash = h.hexdigest()
-        
+
    @classmethod
    def flush_db_cache( cls ):
        """
@@ -119,28 +121,28 @@ class RedFamParser( RedFam ):
    Provides an interface to RedFam for adding/updating redundance families
    while parsig redundance pages
    """
-    
+
    # Define the timestamp format
    __timestamp_format = jogobot.config['redundances']['timestamp_format']
-    
+
    # Define section heading re.pattern
-    __sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" )
-    
+    __sectionhead_pat = re.compile( r"^(.*\[\[.+\]\].*\[\[.+\]\].*)" )
+
    # Define timestamp re.pattern
    __timestamp_pat = re.compile( jogobot.config['redundances']
                                  ['timestamp_regex'] )
-    
+
    # Textpattern for recognisation of done-notices
    __done_notice = ":<small>Archivierung dieses Abschnittes \
                     wurde gewünscht von:"
    __done_notice2 = "{{Erledigt|"
-    
+
    def __init__( self, heading, red_page_id, red_page_archive,
                  beginning, ending=None ):
        """
        Creates a RedFam object based on data collected while parsing red_pages
        combined with possibly former known data from db
-        
+
        @param    red_fam_heading     str         Wikitext heading of section
        @param    red_page_id         int         MediaWiki page_id
        @param    red_page_archive    bool        Is red_page an archive
@@ -149,132 +151,132 @@ class RedFamParser( RedFam ):
        @param    ending              datetime    Timestamp of ending
                                      str         strptime parseable string
        """
-        
+
        # Set object attributes:
        self._red_page_id = red_page_id
        self._red_page_archive = red_page_archive
        self._fam_hash = None
-        
+
        # Method self.add_beginning sets self._beginning directly
        self.add_beginning( beginning )
-        
+
        # Method self.add_ending sets self._ending directly
        if( ending ):
            self.add_ending( ending )
        else:
            # If no ending was provided set to None
            self._ending = None
-        
+
        self._status = None
-        
+
        # Parse the provided heading of redundance section
        # to set self._articlesList
        self.heading_parser( heading )
-        
+
        # Calculates the sha1 hash over self._articlesList to
        # rediscover known redundance families
-       
+
        self.calc_fam_hash()
-        
+
        # Open database connection, ask for data if existing,
        # otherwise create entry
        self.__handle_db()
-        
+
        # Check status changes
        self.status()
-        
+
        # Triggers db update if anything changed
        self.changed()
-        
+
    def __handle_db( self ):
        """
        Handles opening of db connection
        """
-        
+
        # We need a connection to our mysqldb
        self.__mysql = MysqlRedFam( )
        self.__mysql.get_fam( self._fam_hash )
-        
+
        if not self.__mysql.data:
            self.__mysql.add_fam( self._articlesList, self._heading,
                                  self._red_page_id, self._beginning,
                                  self._ending )
-    
+
    def heading_parser( self, heading ):
        """
        Parses given red_fam_heading string and saves articles list
+
+        @param  heading  Heading of RedFam-Section
+        @type  heading  wikicode or mwparser-parseable
        """
-        
-        # Predefine a pattern for wikilinks' destination
-        wikilink_pat = re.compile( r"\[\[([^\[\]\|]+)(?:\]\]|\|)" )
-    
-        # Parse content of heading for generating section links later
-        match = type( self ).__sectionhead_pat.search( heading )
-        if match:
-            self._heading = match.group(2).strip()
-        else:
-            raise RedFamHeadingError( heading )
-        
-        # We get the pages in first [0] element iterating over
-        # wikilink_pat.findall( line )
-        # Strip leading and trailing whitespace in Links to prevent wrong
-        # fam_hashes (when receiving redfam from db) since MySQL drops it
-        self._articlesList = [ link.strip() for link
-                               in wikilink_pat.findall( self._heading ) ]
-        
+
+        # Parse heading with mwparse if needed
+        if not isinstance( heading, mwparser.wikicode.Wikicode ):
+            heading = mwparser.parse( heading )
+
+        # Save heading as string
+        self._heading = str( heading )
+
+        # Save destinations of wikilinks in headings
+        self._articlesList = [ str( link.title ) for link
+                               in heading.ifilter_wikilinks() ]
+
        # Catch sections with more then 8 articles, print error
        if len( self._articlesList ) > 8:
            # For repression in output we need to know the fam hash
            self.calc_fam_hash()
-            pywikibot.output( "\
-{datetime} – \03{{lightred}}[WARNING] – \
-Maximum number of articles in red_fam exceeded, maximum number is 8, \
-{number:d} were given \n {repress}".format(
-                datetime=datetime.now().strftime( "%Y-%m-%d %H:%M:%S" ),
-                number=len( self._articlesList ), repress=repr( self ) ) )
-            
+            jogobot.output(
+                ( "\03{{lightred}}" +
+                  "Maximum number of articles in red_fam exceeded, " +
+                  "maximum number is 8, {number:d} were given \n {repress}"
+                  ).format( datetime=datetime.now().strftime(
+                      "%Y-%m-%d %H:%M:%S" ), number=len( self._articlesList ),
+                    repress=repr( self ) ),
+                "WARNING" )
+
+            # Only save the first 8 articles
            self._articlesList = self._articlesList[:8]

    def add_beginning( self, beginning ):
        """
        Adds the beginning date of a redundance diskussion to the object
-        
+
        @param      datetime    datetime    Beginning date
        """
-        
+
        self._beginning = self.__datetime( beginning )
-        
+
    def add_ending( self, ending ):
        """
        Adds the ending date of a redundance diskussion to the object.
-        
+
        @param      datetime    datetime    Ending date
        """
-        
+
        self._ending = self.__datetime( ending )
-    
+
    def __datetime( self, timestamp ):
        """
        Decides wether given timestamp is a parseable string or a
        datetime object and returns a datetime object in both cases
-        
+
        @param      datetime    timestamp    Datetime object
                    str         timestamp    Parseable string with timestamp
-        
+
        @returns    datetime                 Datetime object
        """
-        
+
        # Make sure locale is set to 'de_DE.UTF-8' to prevent problems
        # with wrong month abreviations in strptime
        locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
-        
+
        if( isinstance( timestamp, datetime ) ):
            return timestamp
        else:
            result = datetime.strptime( timestamp,
                                        type( self ).__timestamp_format )
        return result
-            
+
    def status( self ):
        """
        Handles detection of correct status
@@ -284,10 +286,10 @@ Maximum number of articles in red_fam exceeded, maximum number is 8, \
        - 2 Discussion archived --> ending (normaly) present, page is archive
        - 3 and greater status was set by worker script, do not change it
        """
-        
+
        # Do not change stati set by worker script etc.
        if not self.__mysql.data['status'] > 2:
-            
+
            # No ending, discussion is running:
            # Sometimes archived discussions also have no detectable ending
            if not self._ending and not self._red_page_archive:
@@ -299,101 +301,150 @@ Maximum number of articles in red_fam exceeded, maximum number is 8, \
                    self._status = 2
        else:
            self._status = self.__mysql.data[ 'status' ]
-    
+
    def changed( self ):
        """
        Checks wether anything has changed and maybe triggers db update
        """
-        
+
        # On archived red_fams do not delete possibly existing ending
        if( not self._ending and self._status > 1
            and self.__mysql.data[ 'ending' ] ):
-            
+
                self._ending = self.__mysql.data[ 'ending' ]
-        
+
        # Since status change means something has changed, update database
        if( self._status != self.__mysql.data[ 'status' ] or
            self._beginning != self.__mysql.data[ 'beginning' ] or
            self._ending != self.__mysql.data[ 'ending' ] or
            self._red_page_id != self.__mysql.data[ 'red_page_id' ] or
            self._heading != self.__mysql.data[ 'heading' ]):
-                
+
                self.__mysql.update_fam( self._red_page_id, self._heading,
                                         self._beginning, self._ending,
                                         self._status )
-            
+
    @classmethod
+    @deprecated
    def is_sectionheading( cls, line ):
        """
        Checks wether given line is a red_fam section heading
-        
+
        @param      str     line    String to check
-        
+
        @returns    bool            Returns True if it is a section heading
        """
-        
-        if cls.__sectionhead_pat.search( line ):
+
+        if cls.__sectionhead_pat.search( str(line) ):
            return True
        else:
            return False
-    
+
    @classmethod
+    def parser( cls, text, pageid, isarchive=False ):
+        """
+        Handles parsing of redfam section
+
+        @param  text  Text of RedFam-Section
+        @type  text  wikicode or mwparser-parseable
+        """
+
+        # Parse heading with mwparse if needed
+        if not isinstance( text, mwparser.wikicode.Wikicode ):
+            text = mwparser.parse( text )
+
+        # Extract heading text
+        heading = next( text.ifilter_headings() ).title
+
+        # Extract beginnig and maybe ending
+        (beginning, ending) = RedFamParser.extract_dates( text, isarchive )
+
+        # Create the RedFam object
+        RedFamParser( heading, pageid, isarchive, beginning, ending )
+
+    @classmethod
+    def extract_dates( cls, text, isarchive=False ):
+        """
+        Returns tuple of the first and maybe last timestamp of a section.
+        Last timestamp is only returned if there is a done notice or param
+        *isarchiv* is set to 'True'
+
+        @param  text  Text to search in
+        @type  line  Any Type castable to str
+        @param  isarchive  If true skip searching done notice (on archivepages)
+        @type  isarchive  bool
+
+        @returns  Timestamps, otherwise None
+        @returntype  tuple of strs
+        """
+
+        # Match all timestamps
+        matches = cls.__timestamp_pat.findall( str( text ) )
+        if matches:
+
+            # First one is beginning
+            # Since some timestamps are broken we need to reconstruct them
+            # by regex match groups
+            beginning = ( matches[0][0] + ", " + matches[0][1] + ". " +
+                          matches[0][2] + ". " + matches[0][3] )
+
+            # Last one maybe is ending
+            # Done notice format 1
+            # Done notice format 2
+            # Or on archivepages
+            if ( cls.__done_notice in text or
+                 cls.__done_notice2 in text or
+                 isarchive ):
+
+                ending = ( matches[-1][0] + ", " + matches[-1][1] + ". " +
+                           matches[-1][2] + ". " + matches[-1][3] )
+
+            else:
+                ending = None
+
+        return (beginning, ending)
+
+    @classmethod
+    @deprecated( extract_dates )
    def is_beginning( cls, line ):
        """
        Returns the first timestamp found in line, otherwise None
-    
+
        @param      str    line    String to search in
-    
+
        @returns    str            Timestamp, otherwise None
        """
-        
-        match = cls.__timestamp_pat.search( line )
-        if match:
-            # Since some timestamps are broken we need to reconstruct them
-            # by regex match groups
-            result = match.group(1) + ", " + match.group(2) + ". " +\
-                match.group(3) + ". " + match.group(4)
-            return result
-        else:
-            return None
-        
+
+        return cls.extract_dates( line )[0]
+
    @classmethod
-    def is_ending( cls, line ):
+    @deprecated( extract_dates )
+    def is_ending( cls, line, isarchive=False ):
        """
        Returns the timestamp of done notice ( if one ), otherwise None
-        @param      str    line    String to search in
-    
-        @returns    str            Timestamp, otherwise None
+
+        @param  line    String to search in
+        @type  line  str
+        @param  isarchive  If true skip searching done notice (on archivepages)
+        @type  isarchive  bool
+
+        @returns  Timestamp, otherwise None
+        @returntype  str
        """
-        
-        if ( cls.__done_notice in line ) or ( cls.__done_notice2 in line ):
-            match = cls.__timestamp_pat.search( line )
-            if match:
-                # Since some timestamps are broken we need to reconstruct them
-                # by regex match groups
-                result = match.group(1) + ", " + match.group(2) + ". " +\
-                    match.group(3) + ". " + match.group(4)
-                return result
-        return None
-    
+
+        return cls.extract_dates( line )[1]
+
    @classmethod
+    @deprecated( extract_dates )
    def is_ending2( cls, line ):
        """
        Returns the last timestamp found in line, otherwise None
        @param      str    line    String to search in
-    
+
        @returns    str            Timestamp, otherwise None
        """
-        
-        matches = cls.__timestamp_pat.findall( line )
-        if matches:
-            # Since some timestamps are broken we need to reconstruct them
-            # by regex match groups
-            result = matches[-1][0] + ", " + matches[-1][1] + ". " +\
-                matches[-1][2] + ". " + matches[-1][3]
-            return result
-        else:
-            return None
+
+        return cls.extract_dates( line, True )[1]


 class RedFamWorker( RedFam ):
@@ -402,17 +453,17 @@ class RedFamWorker( RedFam ):
    where discussion is finished
    """
    def __init__( self, mysql_data ):
-        
+
        articlesList = []
        for key in sorted( mysql_data.keys() ):
            if 'article' in key and mysql_data[ key ]:
                articlesList.append( mysql_data[ key ] )
-                
+
        super().__init__( articlesList, mysql_data[ 'beginning' ],
                          mysql_data[ 'ending' ], mysql_data[ 'red_page_id' ],
                          mysql_data[ 'status' ], mysql_data[ 'fam_hash' ],
                          mysql_data[ 'heading' ] )
-    
+
    @classmethod
    def list_by_status( cls, status ):
        """
@@ -426,12 +477,12 @@ class RedFamWorker( RedFam ):
                print(fam)
                raise

-    
+
 class RedFamError( Exception ):
    """
    Base class for all Errors of RedFam-Module
    """
-    
+
    def __init__( self, message=None ):
        """
        Handles Instantiation of RedFamError's
@@ -440,12 +491,12 @@ class RedFamError( Exception ):
            self.message = "An Error occured while executing a RedFam action"
        else:
            self.message = message
-    
+
    def __str__( self ):
        """
        Output of error message
        """
-        
+
        return self.message


@@ -453,12 +504,12 @@ class RedFamHashError( RedFamError ):
    """
    Raised when given RedFamHash does not match with calculated
    """
-    
+
    def __init__( self, givenHash, calculatedHash ):
-        
+
        message = "Given fam_hash ('{given}') does not match with \
 calculated ('{calc}'".format( given=givenHash, calc=calculatedHash )
-        
+
        super().__init__( message )


@@ -467,8 +518,8 @@ class RedFamHeadingError ( RedFamError ):
    Raised when given RedFamHeading does not match __sectionhead_pat Regex
    """
    def __init__( self, heading ):
-        
+
        message = "Error while trying to parse section heading. Given heading \
 '{heading}' does not match RegEx".format( heading=heading )
-    
+
        super().__init__( message )
--- a/redpage.py
+++ b/redpage.py
@@ -26,53 +26,51 @@ Provides a class for handling redundance discussion pages and archives
 """

 import pywikibot  # noqa
+import mwparserfromhell as mwparser
+
+import jogobot

 from mysqlred import MysqlRedPage
-from redfam import RedFamParser


 class RedPage:
    """
    Class for handling redundance discussion pages and archives
    """
-    
+
    def __init__( self, page, archive=False ):
        """
        Generate a new RedPage object based on the given pywikibot page object
-    
+
        @param    page    page    Pywikibot/MediaWiki page object for page
        """
-        
+
        # Safe the pywikibot page object
        self.page = page
        self._archive = archive
-        
+
        self.__handle_db( )

        self.is_page_changed()
-        
+
        self._parsed = None
-        if( self._changed or self.__mysql.data[ 'status' ] == 0 ):
-            self.parse()
-        
-        self.__update_db()
-    
+
    def __handle_db( self ):
        """
        Handles opening of db connection
        """
-        
+
        # We need a connection to our mysqldb
        self.__mysql = MysqlRedPage( self.page._pageid )
-        
+
        if not self.__mysql.data:
            self.__mysql.add_page( self.page.title(), self.page._revid )
-            
+
    def is_page_changed( self ):
        """
        Check wether the page was changed since last run
        """
-        
+
        if( self.__mysql.data != { 'page_id': self.page._pageid,
                                   'rev_id': self.page._revid,
                                   'page_title': self.page.title(),
@@ -85,102 +83,65 @@ class RedPage:
        """
        Detects wether current page is an archive of discussions
        """
-        
+
        if( self._archive or ( u"/Archiv" in self.page.title() ) or
            ( "{{Archiv}}" in self.page.text ) or
            ( "{{Archiv|" in self.page.text ) ):
-                
+
                return True
        else:
                return False
-        
-    def parse( self ):  # noqa
+
+    def is_parsing_needed( self ):
+        """
+        Decides wether current RedPage needs to be parsed or not
+        """
+
+        if( self._changed or self.__mysql.data[ 'status' ] == 0 ):
+            return True
+        else:
+            return False
+
+    def parse( self ):
        """
        Handles the parsing process
        """

-        # Since @param text is a string we need to split it in lines
-        text_lines = self.page.text.split( "\n" )
-        length = len( text_lines )
-        
-        # Initialise line counter
-        i = 0
-        fam_heading = None
-        beginning = None
-        ending = None
-    
-        # Set line for last detected Redundance-Family to 0
-        last_fam = 0
-    
-        # Iterate over the lines of the page
-        for line in text_lines:
-            
-            # Check wether we have an "Redundance-Family"-Section heading
-            if RedFamParser.is_sectionheading( line ):
-                
-                # Save line number for last detected Redundance-Family
-                last_fam = i
-                # Save heading
-                fam_heading = line
-                
-                # Defined (re)initialisation of dates
-                beginning = None
-                ending = None
-            
-            # Check wether we are currently in an "Redundance-Family"-Section
-            if i > last_fam and last_fam > 0:
-            
-                # Check if we have alredy recognized the beginning date of the
-                # discussion (in former iteration) or if we have a done-notice
-                if not beginning:
-                    beginning = RedFamParser.is_beginning( line )
-                elif not ending:
-                    ending = RedFamParser.is_ending( line )
-            
-            # Detect end of red_fam section (next line is new sectionheading)
-            # or end of file
-            # Prevent from running out of index
-            if i < (length - 1):
-                test = RedFamParser.is_sectionheading( text_lines[ i + 1 ] )
-            else:
-                test = False
-            if ( test or ( length == ( i + 1 ) ) ):
-                
-                # Create the red_fam object
-                if( fam_heading and beginning ):
-                    
-                    # Maybe we can find a ending by feed if we have None yet
-                    # (No done notice on archive pages)
-                    if not ending and self.is_archive():
-                        j = i
-                        while (j > last_fam) and not ending:
-                            j -= 1
-                            ending = RedFamParser.is_ending2( text_lines[ j ] )
-                    
-                    # Create the RedFam object
-                    RedFamParser( fam_heading, self.page._pageid,
-                                  self.is_archive(), beginning, ending )
-            
-            # Increment line counter
-            i += 1
+        # Generate Wikicode object
+        self.wikicode = mwparser.parse( self.page.text )
+
+        # Select RedFam-sections
+        # matches=Regexp or
+        #         function( gets heading content as wikicode as param 1)
+        # include_lead = if true include first section (intro)
+        # include_heading = if true include heading
+        fams = self.wikicode.get_sections(
+            matches=jogobot.config["redundances"]["section_heading_regex"],
+            include_lead=False, include_headings=True )
+
+        # Iterate over RedFam
+        for fam in fams:
+
+            yield fam
+
        else:
-            RedFamParser.flush_db_cache()
            self._parsed = True
-    
+            self.__update_db()
+
    def __update_db( self ):
        """
        Updates the page meta data in mysql db
        """
        if( self._parsed or not self._changed ):
            status = 1
-        
+
            if( self.is_archive() ):
                status = 2
        else:
            status = 0
-            
+
        self.__mysql.update_page( self.page._revid, self.page.title(), status )
-        
+
    @classmethod
    def flush_db_cache( cls ):
        """
--- a/tox.ini
+++ b/tox.ini
@@ -1,2 +0,0 @@
-[flake8]
-ignore = E129,E201,E202,W293