diff --git a/parse-pages.py b/parse-pages.py new file mode 100644 index 0000000..4545aef --- /dev/null +++ b/parse-pages.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# parse-pages.py +# +# Copyright 2016 GOLDERWEB – Jonathan Golder +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# +""" +Script to parse all redpages in configured categories +""" + +import pywikibot +from pywikibot import pagegenerators + +import jogobot + +import redpage +import redfam + + +def get_cat_pages( cat ): + """ + Generates a iteratable generator-object with all pages listet in given + category + + @param cat Category to request + @type cat str + + @returns generator Iteratable object with pages of given category + """ + + # Get site to work on from pywikibot config + site = pywikibot.Site() + + # Retrieve the content of given category + category = pywikibot.Category( site, cat ) + + # Build an iteratable generator object with page objects for given category + generator = pagegenerators.CategorizedPageGenerator( category ) + + return generator + + +def main(*args): + """ + Handles process + """ + + try: + jogobot.output( "BEGINN – parser-pages.py" ) + + # Iterate over configured categories + for cat in ( jogobot.config["redundances"]["redpage_cats"] ): + + # Iterate over pages in current cat + for page in get_cat_pages( cat ): + + # For pages configured to exclude, go on with next page + if page.title() in ( + jogobot.config["redundances"]["redpage_exclude"] ): + + continue + + # Initiate RedPage object + red_page = redpage.RedPage( page ) + + # Check whether parsing is needed + if red_page.is_parsing_needed(): + + # Iterate over returned generator with redfam sections + for fam in red_page.parse(): + + # Run RedFamParser on section text + redfam.RedFamParser.parser( fam, red_page.page._pageid, + red_page.is_archive() ) + else: + # If successfully parsed whole page, flush + # db write cache + redfam.RedFamParser.flush_db_cache() + jogobot.output( "Page '%s' parsed" % + red_page.page.title() ) + else: + # If successfully parsed all pages in cat, flush db write cache + redpage.RedPage.flush_db_cache() + + finally: + jogobot.output( "END – parser-pages.py" ) + pywikibot.stopme() + +if( __name__ == "__main__" ): + main() diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 9236f4f..0000000 --- a/tox.ini +++ /dev/null @@ -1,2 +0,0 @@ -[flake8] -ignore = E129,E201,E202,W293