wmllint: handle HelpWML markup correctly while spellchecking

This fixes bug #22780, as well as several false positives when checking the /data/core/editor/help.cfg file.
2016-06-09 21:41:50 +02:00 · 2016-06-09 21:41:50 +02:00 · 2b3d54d5f4
commit 2b3d54d5f4
parent d5fdc951c1
1 changed files with 4 additions and 3 deletions
--- a/data/tools/wmllint
+++ b/data/tools/wmllint
@ -2626,9 +2626,10 @@ def inner_spellcheck(nav, value, spelldict):
        value = value.replace(old, new)

    if '<' in value:
-        value = re.sub("<ref>.*< ref>", "", value)
-        value = re.sub("<[^>]+>text='([^']*)'<[^>]+>", r"\1", value)
-        value = re.sub("<[0-9,]+>", "", value)
+        # remove HelpWML markup and extract its text content where needed
+        value = re.sub(r"<(ref|format)>.*?text='(.*?)'.*?< \1>", r"\2", value)
+        value = re.sub(r"<(jump|img)>.*?< \1>", "", value)
+        value = re.sub(r"<(italic|bold|header)>text='(.*?)'< \1>", r"\2", value)
    # Fold continued lines
    value = re.sub(r'" *\+\s*_? *"', "", value)
    # It would be nice to use pyenchant's tokenizer here, but we can't