Auto translate with OpenAI (#30899)
* auto translate with OpenAI
* change model
* fix mypy typing errors
* update
* fix lint errors
* check api key after args parser
* fix input args
* use languages.json
* just print
---------
Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
old-commit-hash: 2378f311d2
chrysler-long2
parent
090d3d2e41
commit
fc941b87a3
1 changed files with 136 additions and 0 deletions
@ -0,0 +1,136 @@ |
||||
#!/usr/bin/env python3 |
||||
|
||||
import argparse |
||||
import json |
||||
import os |
||||
import pathlib |
||||
import xml.etree.ElementTree as ET |
||||
from typing import cast |
||||
|
||||
import requests |
||||
|
||||
TRANSLATIONS_DIR = pathlib.Path(__file__).resolve().parent |
||||
TRANSLATIONS_LANGUAGES = TRANSLATIONS_DIR / "languages.json" |
||||
|
||||
OPENAI_MODEL = "gpt-4" |
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") |
||||
OPENAI_PROMPT = "You are a professional translator from English to {language} (ISO 639 language code)." + \ |
||||
"The following sentence or word is in the GUI of a software called openpilot, translate it accordingly." |
||||
|
||||
|
||||
def get_language_files(languages: list[str] | None = None) -> dict[str, pathlib.Path]: |
||||
files = {} |
||||
|
||||
with open(TRANSLATIONS_LANGUAGES) as fp: |
||||
language_dict = json.load(fp) |
||||
|
||||
for filename in language_dict.values(): |
||||
path = TRANSLATIONS_DIR / f"{filename}.ts" |
||||
language = path.stem.split("main_")[1] |
||||
|
||||
if languages is None or language in languages: |
||||
files[language] = path |
||||
|
||||
return files |
||||
|
||||
|
||||
def translate_phrase(text: str, language: str) -> str: |
||||
response = requests.post( |
||||
"https://api.openai.com/v1/chat/completions", |
||||
json={ |
||||
"model": OPENAI_MODEL, |
||||
"messages": [ |
||||
{ |
||||
"role": "system", |
||||
"content": OPENAI_PROMPT.format(language=language), |
||||
}, |
||||
{ |
||||
"role": "user", |
||||
"content": text, |
||||
}, |
||||
], |
||||
"temperature": 0.8, |
||||
"max_tokens": 1024, |
||||
"top_p": 1, |
||||
}, |
||||
headers={ |
||||
"Authorization": f"Bearer {OPENAI_API_KEY}", |
||||
}, |
||||
) |
||||
|
||||
response.raise_for_status() |
||||
|
||||
data = response.json() |
||||
|
||||
return cast(str, data["choices"][0]["message"]["content"]) |
||||
|
||||
|
||||
def translate_file(path: pathlib.Path, language: str, all_: bool) -> None: |
||||
tree = ET.parse(path) |
||||
|
||||
root = tree.getroot() |
||||
|
||||
for context in root.findall("./context"): |
||||
name = context.find("name") |
||||
if name is None: |
||||
raise ValueError("name not found") |
||||
|
||||
print(f"Context: {name.text}") |
||||
|
||||
for message in context.findall("./message"): |
||||
source = message.find("source") |
||||
translation = message.find("translation") |
||||
|
||||
if source is None or translation is None: |
||||
raise ValueError("source or translation not found") |
||||
|
||||
if not all_ and translation.attrib.get("type") != "unfinished": |
||||
continue |
||||
|
||||
llm_translation = translate_phrase(cast(str, source.text), language) |
||||
|
||||
print(f"Source: {source.text}\n" + \ |
||||
f"Current translation: {translation.text}\n" + \ |
||||
f"LLM translation: {llm_translation}") |
||||
|
||||
translation.text = llm_translation |
||||
|
||||
with path.open("w", encoding="utf-8") as fp: |
||||
fp.write('<?xml version="1.0" encoding="utf-8"?>\n' + |
||||
'<!DOCTYPE TS>\n' + |
||||
ET.tostring(root, encoding="utf-8").decode()) |
||||
|
||||
|
||||
def main(): |
||||
arg_parser = argparse.ArgumentParser("Auto translate") |
||||
|
||||
group = arg_parser.add_mutually_exclusive_group(required=True) |
||||
group.add_argument("-a", "--all-files", action="store_true", help="Translate all files") |
||||
group.add_argument("-f", "--file", nargs="+", help="Translate the selected files. (Example: -f fr de)") |
||||
|
||||
arg_parser.add_argument("-t", "--all-translations", action="store_true", default=False, help="Translate all sections. (Default: only unfinished)") |
||||
|
||||
args = arg_parser.parse_args() |
||||
|
||||
if OPENAI_API_KEY is None: |
||||
print("OpenAI API key is missing. (Hint: use `export OPENAI_API_KEY=YOUR-KEY` before you run the script).\n" + \ |
||||
"If you don't have one go to: https://beta.openai.com/account/api-keys.") |
||||
exit(1) |
||||
|
||||
files = get_language_files(None if args.all_files else args.file) |
||||
|
||||
if args.file: |
||||
missing_files = set(args.file) - set(files) |
||||
if len(missing_files): |
||||
print(f"No language files found: {missing_files}") |
||||
exit(1) |
||||
|
||||
print(f"Translation mode: {'all' if args.all_translations else 'only unfinished'}. Files: {list(files)}") |
||||
|
||||
for lang, path in files.items(): |
||||
print(f"Translate {lang} ({path})") |
||||
translate_file(path, lang, args.all_translations) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
main() |
Loading…
Reference in new issue