Skip to content

Commit be41182

Browse files
committed
feat: setup translations using LLM
1 parent fa0947d commit be41182

23 files changed

+1124
-860
lines changed

.github/scripts/translate.py

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
#!/usr/bin/env python3
2+
3+
import json
4+
import subprocess
5+
import sys
6+
from pathlib import Path
7+
from typing import Dict, Any, Set
8+
9+
10+
def load_json(file_path: Path) -> Dict[str, Any]:
11+
with open(file_path, 'r', encoding='utf-8') as f:
12+
return json.load(f)
13+
14+
15+
def save_json(file_path: Path, data: Dict[str, Any]) -> None:
16+
with open(file_path, 'w', encoding='utf-8') as f:
17+
json.dump(data, f, ensure_ascii=False, indent=2)
18+
f.write('\n')
19+
20+
21+
def get_changed_keys(en_file: Path) -> Set[str]:
22+
# Get git diff to find which keys were added or modified in en_US.json.
23+
print("Getting git diff...", flush=True)
24+
25+
try:
26+
result = subprocess.run(
27+
['git', 'diff', 'HEAD~1', 'HEAD', '--', str(en_file)],
28+
capture_output=True,
29+
text=True,
30+
check=False,
31+
cwd=en_file.parent.parent
32+
)
33+
34+
print(f"Git diff return code: {result.returncode}", flush=True)
35+
36+
if result.returncode != 0:
37+
print(f"Git diff error: {result.stderr}", flush=True)
38+
sys.exit(1)
39+
40+
if not result.stdout.strip():
41+
print("No diff found - file unchanged", flush=True)
42+
return set()
43+
44+
# Parse diff output to extract changed keys.
45+
changed_keys = set()
46+
for line in result.stdout.split('\n'):
47+
if line.startswith('+') and not line.startswith('+++'):
48+
content = line[1:].strip()
49+
if content.startswith('"') and '":' in content:
50+
try:
51+
key = content.split('"')[1]
52+
changed_keys.add(key)
53+
except IndexError:
54+
continue
55+
56+
return changed_keys
57+
58+
except Exception as e:
59+
print(f"Exception in get_changed_keys: {e}", flush=True)
60+
sys.exit(1)
61+
62+
63+
def translate_keys(keys_dict: Dict[str, str], target_language: str) -> Dict[str, str]:
64+
# Use LLM to translate English strings to target language.
65+
prompt = f"""You are a professional translator. Translate the following JSON object from English to {target_language}.
66+
67+
IMPORTANT RULES:
68+
1. Keep all JSON keys EXACTLY the same (do not translate keys)
69+
2. Only translate the VALUES
70+
3. Preserve any special formatting like quotes (\"\"), placeholders (\"M\", \"N\", \"X\", \"ENTRY\", \"PLAYLIST\", etc.)
71+
4. Maintain the same meaning, punctuation, capitalization, structure and formatting
72+
5. Return ONLY the translated JSON object, no additional text
73+
6. Ensure the output is valid JSON
74+
7. Try to keep the same string length as the original string (if possible)
75+
76+
Input JSON:
77+
{json.dumps(keys_dict, ensure_ascii=False, indent=2)}"""
78+
79+
print(f"Calling LLM...", flush=True)
80+
81+
try:
82+
result = subprocess.run(
83+
['llm', '-m', 'github/gpt-5'],
84+
input=prompt,
85+
capture_output=True,
86+
text=True,
87+
check=False,
88+
timeout=120
89+
)
90+
91+
print(f"LLM returned with code {result.returncode}", flush=True)
92+
93+
if result.returncode != 0:
94+
print(f"Error: {result.stderr}", flush=True)
95+
return keys_dict
96+
97+
content = result.stdout.strip()
98+
99+
if not content:
100+
print(f"Empty response from LLM", flush=True)
101+
return keys_dict
102+
103+
# Strip markdown code block formatting if present.
104+
if content.startswith('```'):
105+
content = content.split('```')[1]
106+
if content.startswith('json'):
107+
content = content[4:]
108+
content = content.split('```')[0].strip()
109+
110+
try:
111+
return json.loads(content)
112+
except json.JSONDecodeError as e:
113+
print(f"JSON error: {e}", flush=True)
114+
print(f"Content: {content[:200]}...", flush=True)
115+
return keys_dict
116+
117+
except subprocess.TimeoutExpired:
118+
print(f"LLM call timed out after 120 seconds", flush=True)
119+
return keys_dict
120+
except Exception as e:
121+
print(f"Exception calling LLM: {e}", flush=True)
122+
return keys_dict
123+
124+
125+
def main():
126+
print("Starting translation script...", flush=True)
127+
128+
# Setup paths.
129+
script_dir = Path(__file__).parent
130+
project_root = script_dir.parent.parent
131+
localizations_dir = project_root / "localizations"
132+
index_file = project_root / "index.json"
133+
en_file = localizations_dir / "en_US.json"
134+
135+
print(f"Paths:", flush=True)
136+
print(f" project_root: {project_root}", flush=True)
137+
print(f" en_file: {en_file}", flush=True)
138+
139+
if not en_file.exists():
140+
print(f"Error: {en_file} not found", flush=True)
141+
sys.exit(1)
142+
143+
# Load English localization file.
144+
en_data = load_json(en_file)
145+
print(f"Loaded {len(en_data)} keys from en_US.json", flush=True)
146+
147+
# Get keys that were changed in the latest commit.
148+
changed_keys = get_changed_keys(en_file)
149+
150+
if not changed_keys:
151+
print("No changed keys found - nothing to translate", flush=True)
152+
sys.exit(0)
153+
154+
print(f"Found {len(changed_keys)} changed keys: {', '.join(sorted(changed_keys))}", flush=True)
155+
156+
# Load list of available languages from index.json.
157+
if not index_file.exists():
158+
print(f"Error: {index_file} not found", flush=True)
159+
sys.exit(1)
160+
161+
languages = load_json(index_file)
162+
print(f"Loaded {len(languages)} languages", flush=True)
163+
164+
# Translate changed keys for each language.
165+
for lang_info in languages:
166+
lang_code = lang_info['code']
167+
lang_name = lang_info['name']
168+
169+
# Skip English since it's the source language.
170+
if lang_code == 'en_US':
171+
continue
172+
173+
print(f"\n[{lang_code}] {lang_name}", flush=True)
174+
175+
target_file = localizations_dir / f"{lang_code}.json"
176+
existing_data = load_json(target_file) if target_file.exists() else {}
177+
178+
# Filter to only keys that need translation.
179+
keys_to_translate = {k: en_data[k] for k in changed_keys if k in en_data}
180+
181+
if not keys_to_translate:
182+
print("Up to date", flush=True)
183+
continue
184+
185+
print(f"Translating {len(keys_to_translate)} keys...", flush=True)
186+
187+
# Translate in batches to avoid overwhelming the LLM.
188+
batch_size = 50
189+
translated = {}
190+
keys = list(keys_to_translate.keys())
191+
192+
for i in range(0, len(keys), batch_size):
193+
batch_keys = keys[i:i + batch_size]
194+
batch_dict = {k: keys_to_translate[k] for k in batch_keys}
195+
196+
batch_num = i // batch_size + 1
197+
total_batches = (len(keys) + batch_size - 1) // batch_size
198+
print(f"Batch {batch_num}/{total_batches}", flush=True)
199+
200+
batch_translated = translate_keys(batch_dict, lang_name)
201+
translated.update(batch_translated)
202+
203+
# Merge translations with existing data and maintain key order from en_US.json.
204+
final_data = {**existing_data, **translated}
205+
ordered_data = {k: final_data.get(k, en_data[k]) for k in en_data.keys()}
206+
207+
save_json(target_file, ordered_data)
208+
print(f"✓ Saved", flush=True)
209+
210+
print("\n✓ Done", flush=True)
211+
212+
213+
if __name__ == "__main__":
214+
main()

.github/workflows/translate.yml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
name: Translate
2+
3+
on:
4+
push:
5+
branches: ["main", "master"]
6+
paths:
7+
- "localizations/en_US.json"
8+
9+
jobs:
10+
translate:
11+
if: github.event.pusher.name == 'alexmercerind' && github.event.pusher.email == 'saini123hitesh@gmail.com'
12+
runs-on: ubuntu-latest
13+
permissions:
14+
contents: write
15+
pull-requests: write
16+
steps:
17+
- name: Checkout repository
18+
uses: actions/checkout@v4
19+
with:
20+
fetch-depth: 0
21+
- name: Set up Python
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: "3.11"
25+
- name: Install llm-github-models CLI
26+
run: |
27+
pip install llm-github-models
28+
- name: Configure llm
29+
env:
30+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
31+
run: |
32+
llm keys set github --value "$GITHUB_TOKEN"
33+
- name: Generate translations
34+
run: |
35+
python3 .github/scripts/translate.py
36+
- name: Check for changes
37+
id: check_changes
38+
run: |
39+
if git diff --quiet; then
40+
echo "has_changes=false" >> $GITHUB_OUTPUT
41+
else
42+
echo "has_changes=true" >> $GITHUB_OUTPUT
43+
fi
44+
- name: Create Pull Request
45+
if: steps.check_changes.outputs.has_changes == 'true'
46+
uses: peter-evans/create-pull-request@v6
47+
with:
48+
token: ${{ secrets.GITHUB_TOKEN }}
49+
commit-message: "chore: auto-translate localizations"
50+
title: "Auto-Translate Localizations"
51+
body: "This pull request contains automatically generated translations based on changes to `en_US.json`."
52+
branch: auto-translate-${{ github.run_number }}
53+
delete-branch: true
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: CI
1+
name: Validate
22

33
on:
44
push:
@@ -14,4 +14,4 @@ jobs:
1414
- name: Validate JSON syntax
1515
run: for file in `ls | grep '.json' `; do python -mjson.tool "$file" > /dev/null ; done
1616
- name: Validate index entries & localization values
17-
run: python3 .github/ci.py
17+
run: python3 .github/scripts/validate.py

0 commit comments

Comments
 (0)