From fc941b87a3b77bbc2acb5db2b9353863969dc3c4 Mon Sep 17 00:00:00 2001 From: Meir Date: Sat, 6 Jan 2024 08:28:56 +0200 Subject: [PATCH] Auto translate with OpenAI (#30899) * auto translate with OpenAI * change model * fix mypy typing errors * update * fix lint errors * check api key after args parser * fix input args * use languages.json * just print --------- Co-authored-by: Adeeb Shihadeh old-commit-hash: 2378f311d2955ed0983fc0b7f2b93b938a3256fa --- selfdrive/ui/translations/auto_translate.py | 136 ++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100755 selfdrive/ui/translations/auto_translate.py diff --git a/selfdrive/ui/translations/auto_translate.py b/selfdrive/ui/translations/auto_translate.py new file mode 100755 index 0000000000..431e6d0905 --- /dev/null +++ b/selfdrive/ui/translations/auto_translate.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os +import pathlib +import xml.etree.ElementTree as ET +from typing import cast + +import requests + +TRANSLATIONS_DIR = pathlib.Path(__file__).resolve().parent +TRANSLATIONS_LANGUAGES = TRANSLATIONS_DIR / "languages.json" + +OPENAI_MODEL = "gpt-4" +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") +OPENAI_PROMPT = "You are a professional translator from English to {language} (ISO 639 language code)." + \ + "The following sentence or word is in the GUI of a software called openpilot, translate it accordingly." + + +def get_language_files(languages: list[str] | None = None) -> dict[str, pathlib.Path]: + files = {} + + with open(TRANSLATIONS_LANGUAGES) as fp: + language_dict = json.load(fp) + + for filename in language_dict.values(): + path = TRANSLATIONS_DIR / f"{filename}.ts" + language = path.stem.split("main_")[1] + + if languages is None or language in languages: + files[language] = path + + return files + + +def translate_phrase(text: str, language: str) -> str: + response = requests.post( + "https://api.openai.com/v1/chat/completions", + json={ + "model": OPENAI_MODEL, + "messages": [ + { + "role": "system", + "content": OPENAI_PROMPT.format(language=language), + }, + { + "role": "user", + "content": text, + }, + ], + "temperature": 0.8, + "max_tokens": 1024, + "top_p": 1, + }, + headers={ + "Authorization": f"Bearer {OPENAI_API_KEY}", + }, + ) + + response.raise_for_status() + + data = response.json() + + return cast(str, data["choices"][0]["message"]["content"]) + + +def translate_file(path: pathlib.Path, language: str, all_: bool) -> None: + tree = ET.parse(path) + + root = tree.getroot() + + for context in root.findall("./context"): + name = context.find("name") + if name is None: + raise ValueError("name not found") + + print(f"Context: {name.text}") + + for message in context.findall("./message"): + source = message.find("source") + translation = message.find("translation") + + if source is None or translation is None: + raise ValueError("source or translation not found") + + if not all_ and translation.attrib.get("type") != "unfinished": + continue + + llm_translation = translate_phrase(cast(str, source.text), language) + + print(f"Source: {source.text}\n" + \ + f"Current translation: {translation.text}\n" + \ + f"LLM translation: {llm_translation}") + + translation.text = llm_translation + + with path.open("w", encoding="utf-8") as fp: + fp.write('\n' + + '\n' + + ET.tostring(root, encoding="utf-8").decode()) + + +def main(): + arg_parser = argparse.ArgumentParser("Auto translate") + + group = arg_parser.add_mutually_exclusive_group(required=True) + group.add_argument("-a", "--all-files", action="store_true", help="Translate all files") + group.add_argument("-f", "--file", nargs="+", help="Translate the selected files. (Example: -f fr de)") + + arg_parser.add_argument("-t", "--all-translations", action="store_true", default=False, help="Translate all sections. (Default: only unfinished)") + + args = arg_parser.parse_args() + + if OPENAI_API_KEY is None: + print("OpenAI API key is missing. (Hint: use `export OPENAI_API_KEY=YOUR-KEY` before you run the script).\n" + \ + "If you don't have one go to: https://beta.openai.com/account/api-keys.") + exit(1) + + files = get_language_files(None if args.all_files else args.file) + + if args.file: + missing_files = set(args.file) - set(files) + if len(missing_files): + print(f"No language files found: {missing_files}") + exit(1) + + print(f"Translation mode: {'all' if args.all_translations else 'only unfinished'}. Files: {list(files)}") + + for lang, path in files.items(): + print(f"Translate {lang} ({path})") + translate_file(path, lang, args.all_translations) + + +if __name__ == "__main__": + main()