multilang: bad language translation check (#30783)

* compare bad against list

* use web

* Update test_translations.py

* uncomment

* override

* wrap

* AssertionError

* better

* detent

* selfish

* check numerusforms

* already checked

* use name

* not again

* combined

* sets

* assume available

* fix assume

* check regardless of other tests

* assert not print

* raise for status

* better

* done

* useless

* happy ruff

* better set

* quiet

* clean

* obvious

* clearer

Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>

* IGNORED_WORDS

* assert match

* direct assert

* show bad word

* fix numerous empty string checks

* fix IGNORED_WORDS

---------

Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
old-commit-hash: fd88990006
chrysler-long2
royjr 1 year ago committed by GitHub
parent 0312d3d0f3
commit b2112b81af
  1. 29
      selfdrive/ui/tests/test_translations.py

@ -5,6 +5,8 @@ import re
import shutil
import unittest
import xml.etree.ElementTree as ET
import string
import requests
from openpilot.selfdrive.ui.update_translations import TRANSLATIONS_DIR, LANGUAGES_FILE, update_translations
@ -121,6 +123,33 @@ class TestTranslations(unittest.TestCase):
matches = re.findall(r'@(\w+);', cur_translations)
self.assertEqual(len(matches), 0, f"The string(s) {matches} were found with '@' instead of '&'")
def test_bad_language(self):
IGNORED_WORDS = {'pédale'}
for name, file in self.translation_files.items():
match = re.search(r'_([a-zA-Z]{2,3})', file)
assert match, f"{name} - could not parse language"
response = requests.get(f"https://raw.githubusercontent.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words/master/{match.group(1)}")
response.raise_for_status()
banned_words = {line.strip() for line in response.text.splitlines()}
for context in ET.parse(os.path.join(TRANSLATIONS_DIR, f"{file}.ts")).getroot():
for message in context.iterfind("message"):
translation = message.find("translation")
if translation.get("type") == "unfinished":
continue
translation_text = " ".join([t.text for t in translation.findall("numerusform")]) if message.get("numerus") == "yes" else translation.text
if not translation_text:
continue
words = set(translation_text.translate(str.maketrans('', '', string.punctuation + '%n')).lower().split())
bad_words_found = words & (banned_words - IGNORED_WORDS)
assert not bad_words_found, f"Bad language found in {name}: '{translation_text}'. Bad word(s): {', '.join(bad_words_found)}"
if __name__ == "__main__":
unittest.main()

Loading…
Cancel
Save