#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # parse-pages.py # # Copyright 2016 GOLDERWEB – Jonathan Golder # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. # # """ Script to parse all redpages in configured categories """ import pywikibot from pywikibot import pagegenerators import jogobot import redpage import redfam def get_cat_pages( cat ): """ Generates a iteratable generator-object with all pages listet in given category @param cat Category to request @type cat str @returns generator Iteratable object with pages of given category """ # Get site to work on from pywikibot config site = pywikibot.Site() # Retrieve the content of given category category = pywikibot.Category( site, cat ) # Build an iteratable generator object with page objects for given category generator = pagegenerators.CategorizedPageGenerator( category ) return generator def main(*args): """ Handles process """ try: jogobot.output( "BEGINN – parser-pages.py" ) # Iterate over configured categories for cat in ( jogobot.config["redundances"]["redpage_cats"] ): # Iterate over pages in current cat for page in get_cat_pages( cat ): # For pages configured to exclude, go on with next page if page.title() in ( jogobot.config["redundances"]["redpage_exclude"] ): continue # Initiate RedPage object red_page = redpage.RedPage( page ) # Check whether parsing is needed if red_page.is_parsing_needed(): # Iterate over returned generator with redfam sections for fam in red_page.parse(): # Run RedFamParser on section text redfam.RedFamParser.parser( fam, red_page.page._pageid, red_page.is_archive() ) else: # If successfully parsed whole page, flush # db write cache redfam.RedFamParser.flush_db_cache() jogobot.output( "Page '%s' parsed" % red_page.page.title() ) else: # If successfully parsed all pages in cat, flush db write cache redpage.RedPage.flush_db_cache() finally: jogobot.output( "END – parser-pages.py" ) pywikibot.stopme() if( __name__ == "__main__" ): main()