From 5f4640d5ff1aa42c0048d25cea34b5cba46b74c1 Mon Sep 17 00:00:00 2001 From: Jonathan Golder Date: Sat, 28 Oct 2017 22:35:25 +0200 Subject: [PATCH] Replace urlencoded chars with unicode equivalent Otherwise we get value errors while marking since pwb replaces those Related Task: [FS#160](https://fs.golderweb.de/index.php?do=details&task_id=160) --- lib/redfam.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/redfam.py b/lib/redfam.py index 6f73a5c..191a895 100644 --- a/lib/redfam.py +++ b/lib/redfam.py @@ -28,6 +28,7 @@ Provides classes for working with RedFams import hashlib import locale import re +import urllib.parse from datetime import datetime import mwparserfromhell as mwparser # noqa @@ -291,6 +292,9 @@ class RedFamParser( RedFam ): # Make sure first letter is uppercase article = article[0].upper() + article[1:] + # Unquote possible url encoded special chars + article = urllib.parse.unquote( article ) + # Split in title and anchor part article = article.split("#", 1) # Replace underscores in title with spaces