Rewrite RedPage.parse using mwparserfromhell to make it simpler

2016-03-03 17:37:46 +01:00
parent b81694c6d3
commit 7422307985
2 changed files with 51 additions and 87 deletions
--- a/redfam.py
+++ b/redfam.py
@@ -126,7 +126,7 @@ class RedFamParser( RedFam ):
    __timestamp_format = jogobot.config['redundances']['timestamp_format']
    # Define section heading re.pattern
-    __sectionhead_pat = re.compile( r"^(=+)(.*\[\[.+\]\].*\[\[.+\]\].*)\1" )
+    __sectionhead_pat = re.compile( r"^(.*\[\[.+\]\].*\[\[.+\]\].*)" )
    # Define timestamp re.pattern
    __timestamp_pat = re.compile( jogobot.config['redundances']
@@ -334,7 +334,7 @@ class RedFamParser( RedFam ):
        @returns    bool            Returns True if it is a section heading
        """
-        if cls.__sectionhead_pat.search( line ):
+        if cls.__sectionhead_pat.search( str(line) ):
            return True
        else:
            return False
--- a/redpage.py
+++ b/redpage.py
@@ -26,6 +26,7 @@ Provides a class for handling redundance discussion pages and archives
 """
 import pywikibot  # noqa
 import mwparserfromhell as mwparser
 from mysqlred import MysqlRedPage
 from redfam import RedFamParser
@@ -94,75 +95,38 @@ class RedPage:
        else:
                return False
-    def parse( self ):  # noqa
+    def parse( self ):
        """
        Handles the parsing process
        """
-        # Since @param text is a string we need to split it in lines
+        # Generate Wikicode object
-        text_lines = self.page.text.split( "\n" )
+        self.wikicode = mwparser.parse( self.page.text )
        length = len( text_lines )
-        # Initialise line counter
+        # Select RedFam-sections
-        i = 0
+        # matches=Regexp or
-        fam_heading = None
+        #         function( gets heading content as wikicode as param 1)
-        beginning = None
+        # include_lead = if true include first section (intro)
-        ending = None
+        # include_heading = if true include heading
        fams = self.wikicode.get_sections(
            matches=RedFamParser.is_sectionheading,
            include_lead=False, include_headings=True )
-        # Set line for last detected Redundance-Family to 0
+        # Iterate over RedFam
-        last_fam = 0
+        for fam in fams:
-        # Iterate over the lines of the page
+            # Extract heading text
-        for line in text_lines:
+            heading = next( fam.ifilter_headings() ).title
-            # Check wether we have an "Redundance-Family"-Section heading
+            # Extract beginnig and maybe ending
-            if RedFamParser.is_sectionheading( line ):
+            (beginning, ending) = RedFamParser.extract_dates( fam,
-                
+                                                              self.is_archive()
-                # Save line number for last detected Redundance-Family
+                                                              )
                last_fam = i
                # Save heading
                fam_heading = line
                # Defined (re)initialisation of dates
                beginning = None
                ending = None
            # Check wether we are currently in an "Redundance-Family"-Section
            if i > last_fam and last_fam > 0:
                # Check if we have alredy recognized the beginning date of the
                # discussion (in former iteration) or if we have a done-notice
                if not beginning:
                    beginning = RedFamParser.is_beginning( line )
                elif not ending:
                    ending = RedFamParser.is_ending( line )
            # Detect end of red_fam section (next line is new sectionheading)
            # or end of file
            # Prevent from running out of index
            if i < (length - 1):
                test = RedFamParser.is_sectionheading( text_lines[ i + 1 ] )
            else:
                test = False
            if ( test or ( length == ( i + 1 ) ) ):
                # Create the red_fam object
                if( fam_heading and beginning ):
                    # Maybe we can find a ending by feed if we have None yet
                    # (No done notice on archive pages)
                    if not ending and self.is_archive():
                        j = i
                        while (j > last_fam) and not ending:
                            j -= 1
                            ending = RedFamParser.is_ending2( text_lines[ j ] )
            # Create the RedFam object
-                    RedFamParser( fam_heading, self.page._pageid,
+            RedFamParser( heading, self.page._pageid,
                          self.is_archive(), beginning, ending )
            # Increment line counter
            i += 1
        else:
            RedFamParser.flush_db_cache()
            self._parsed = True