sqlalchemy working for parser

Needs some testing, presumably contains some bugs
This commit is contained in:
2016-11-26 22:26:55 +01:00
parent 0ebf307bb8
commit 6e973369cd
4 changed files with 857 additions and 516 deletions

View File

@@ -33,8 +33,8 @@ from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
import jogobot
from lib import redpage
from lib import redfam
from lib.redpage import RedPage
from lib.redfam import RedFamParser
class DiscussionParserBot(
@@ -127,7 +127,7 @@ class DiscussionParserBot(
else:
# If successfully parsed all pages in cat, flush db write cache
redpage.RedPage.flush_db_cache()
RedPage.flush_db_cache()
def treat_page( self ):
"""
@@ -146,20 +146,23 @@ class DiscussionParserBot(
return
# Initiate RedPage object
red_page = redpage.RedPage( self.current_page )
redpage = RedPage.session.query(RedPage).filter(RedPage.pageid == self.current_page.pageid ).one_or_none()
# Check whether parsing is needed
if red_page.is_parsing_needed():
if redpage:
redpage.update( self.current_page )
else:
redpage = RedPage( self.current_page )
#~ # Check whether parsing is needed
if redpage.is_parsing_needed():
# Count families for failure analysis
fam_counter = 0
# Iterate over returned generator with redfam sections
for fam in red_page.parse():
for fam in redpage.parse():
# Run RedFamParser on section text
redfam.RedFamParser.parser( fam, red_page.page,
red_page.is_archive() )
RedFamParser.parser( fam, redpage,
redpage.is_archive() )
fam_counter += 1
@@ -167,12 +170,13 @@ class DiscussionParserBot(
# If successfully parsed whole page, flush
# db write cache
if( fam_counter ):
redfam.RedFamParser.flush_db_cache()
RedFamParser.flush_db_cache()
jogobot.output( "Page [[{reddisc}]] parsed".format(
reddisc=red_page.page.title() ) )
reddisc=redpage.page.title() ) )
else:
jogobot.output(
"\03{red}" + "Page [[{reddisc}]], ".format(
reddisc=red_page.page.title() ) +
reddisc=redpage.page.title() ) +
"containing no redfam, parsed!",
"WARNING" )