openpilot_comma/selfdrive/ui/tests/test_translations.py

import pytest
import json
import os
import re
import xml.etree.ElementTree as ET
import string
import requests
from parameterized import parameterized_class

from openpilot.selfdrive.ui.update_translations import TRANSLATIONS_DIR, LANGUAGES_FILE

with open(LANGUAGES_FILE) as f:
  translation_files = json.load(f)

UNFINISHED_TRANSLATION_TAG = "<translation type=\"unfinished\""  # non-empty translations can be marked unfinished
LOCATION_TAG = "<location "
FORMAT_ARG = re.compile("%[0-9]+")


@parameterized_class(("name", "file"), translation_files.items())
class TestTranslations:
  name: str
  file: str

  @staticmethod
  def _read_translation_file(path, file):
    tr_file = os.path.join(path, f"{file}.ts")
    with open(tr_file) as f:
      return f.read()

  def test_missing_translation_files(self):
    assert os.path.exists(os.path.join(TRANSLATIONS_DIR, f"{self.file}.ts")), \
                    f"{self.name} has no XML translation file, run selfdrive/ui/update_translations.py"

  @pytest.mark.skip("Only test unfinished translations before going to release")
  def test_unfinished_translations(self):
    cur_translations = self._read_translation_file(TRANSLATIONS_DIR, self.file)
    assert UNFINISHED_TRANSLATION_TAG not in cur_translations, \
                    f"{self.file} ({self.name}) translation file has unfinished translations. Finish translations or mark them as completed in Qt Linguist"

  def test_vanished_translations(self):
    cur_translations = self._read_translation_file(TRANSLATIONS_DIR, self.file)
    assert "<translation type=\"vanished\">" not in cur_translations, \
                    f"{self.file} ({self.name}) translation file has obsolete translations. Run selfdrive/ui/update_translations.py --vanish to remove them"

  def test_finished_translations(self):
    """
      Tests ran on each translation marked "finished"
      Plural:
      - that any numerus (plural) translations have all plural forms non-empty
      - that the correct format specifier is used (%n)
      Non-plural:
      - that translation is not empty
      - that translation format arguments are consistent
    """
    tr_xml = ET.parse(os.path.join(TRANSLATIONS_DIR, f"{self.file}.ts"))

    for context in tr_xml.getroot():
      for message in context.iterfind("message"):
        translation = message.find("translation")
        source_text = message.find("source").text

        # Do not test unfinished translations
        if translation.get("type") == "unfinished":
          continue

        if message.get("numerus") == "yes":
          numerusform = [t.text for t in translation.findall("numerusform")]

          for nf in numerusform:
            assert nf is not None, f"Ensure all plural translation forms are completed: {source_text}"
            assert "%n" in nf, "Ensure numerus argument (%n) exists in translation."
            assert FORMAT_ARG.search(nf) is None, f"Plural translations must use %n, not %1, %2, etc.: {numerusform}"

        else:
          assert translation.text is not None, f"Ensure translation is completed: {source_text}"

          source_args = FORMAT_ARG.findall(source_text)
          translation_args = FORMAT_ARG.findall(translation.text)
          assert sorted(source_args) == sorted(translation_args), \
                           f"Ensure format arguments are consistent: `{source_text}` vs. `{translation.text}`"

  def test_no_locations(self):
    for line in self._read_translation_file(TRANSLATIONS_DIR, self.file).splitlines():
      assert not line.strip().startswith(LOCATION_TAG), \
                       f"Line contains location tag: {line.strip()}, remove all line numbers."

  def test_entities_error(self):
    cur_translations = self._read_translation_file(TRANSLATIONS_DIR, self.file)
    matches = re.findall(r'@(\w+);', cur_translations)
    assert len(matches) == 0, f"The string(s) {matches} were found with '@' instead of '&'"

  def test_bad_language(self):
    IGNORED_WORDS = {'pédale'}

    match = re.search(r'_([a-zA-Z]{2,3})', self.file)
    assert match, f"{self.name} - could not parse language"

    try:
      response = requests.get(
        f"https://raw.githubusercontent.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words/master/{match.group(1)}"
      )
      response.raise_for_status()
    except requests.exceptions.HTTPError as e:
      if e.response is not None and e.response.status_code == 429:
        pytest.skip("word list rate limited")
      raise

    banned_words = {line.strip() for line in response.text.splitlines()}

    for context in ET.parse(os.path.join(TRANSLATIONS_DIR, f"{self.file}.ts")).getroot():
      for message in context.iterfind("message"):
        translation = message.find("translation")
        if translation.get("type") == "unfinished":
          continue

        translation_text = " ".join([t.text for t in translation.findall("numerusform")]) if message.get("numerus") == "yes" else translation.text

        if not translation_text:
          continue

        words = set(translation_text.translate(str.maketrans('', '', string.punctuation + '%n')).lower().split())
        bad_words_found = words & (banned_words - IGNORED_WORDS)
        assert not bad_words_found, f"Bad language found in {self.name}: '{translation_text}'. Bad word(s): {', '.join(bad_words_found)}"