Skip to content

Commit 1d65719

Browse files
committed
feat: setup translations using LLM
1 parent fa0947d commit 1d65719

23 files changed

+1163
-861
lines changed

.github/scripts/translate.py

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
#!/usr/bin/env python3
2+
3+
import json
4+
import subprocess
5+
import sys
6+
from pathlib import Path
7+
from typing import Dict, Any, Set
8+
9+
10+
def load_json(file_path: Path) -> Dict[str, Any]:
11+
with open(file_path, 'r', encoding='utf-8') as f:
12+
return json.load(f)
13+
14+
15+
def save_json(file_path: Path, data: Dict[str, Any]) -> None:
16+
with open(file_path, 'w', encoding='utf-8') as f:
17+
json.dump(data, f, ensure_ascii=False, indent=2)
18+
f.write('\n')
19+
20+
21+
def get_changed_keys(en_file: Path) -> Set[str]:
22+
# Get git diff to find which keys were added or modified in en_US.json.
23+
print("Getting git diff...", flush=True)
24+
25+
try:
26+
result = subprocess.run(
27+
['git', 'diff', 'HEAD~1', 'HEAD', '--', str(en_file)],
28+
capture_output=True,
29+
text=True,
30+
check=False,
31+
cwd=en_file.parent.parent
32+
)
33+
34+
print(f"Git diff return code: {result.returncode}", flush=True)
35+
36+
if result.returncode != 0:
37+
print(f"Git diff error: {result.stderr}", flush=True)
38+
sys.exit(1)
39+
40+
if not result.stdout.strip():
41+
print("No diff found - file unchanged", flush=True)
42+
return set()
43+
44+
# Parse diff output to extract changed keys.
45+
changed_keys = set()
46+
for line in result.stdout.split('\n'):
47+
if line.startswith('+') and not line.startswith('+++'):
48+
content = line[1:].strip()
49+
if content.startswith('"') and '":' in content:
50+
try:
51+
key = content.split('"')[1]
52+
changed_keys.add(key)
53+
except IndexError:
54+
continue
55+
56+
return changed_keys
57+
58+
except Exception as e:
59+
print(f"Exception in get_changed_keys: {e}", flush=True)
60+
sys.exit(1)
61+
62+
63+
def translate_keys(keys_dict: Dict[str, str], target_language: str, full_en_data: Dict[str, str]) -> Dict[str, str]:
64+
# Use LLM to translate English strings to target language.
65+
prompt = f"""You are a professional translator working on localization for Harmonoid, a music player application. Translate the following JSON object from English to {target_language}.
66+
67+
CONTEXT: These strings are UI text for a music player app. They include terms related to music playback, playlists, albums, artists, audio settings, and media library management.
68+
69+
FULL APPLICATION CONTEXT (all English strings for reference):
70+
{json.dumps(full_en_data, ensure_ascii=False, indent=2)}
71+
72+
IMPORTANT RULES:
73+
1. Keep all JSON keys EXACTLY the same (do not translate keys)
74+
2. Only translate the VALUES
75+
3. Preserve any special formatting like quotes (\"\"), placeholders (\"M\", \"N\", \"X\", \"ENTRY\", \"PLAYLIST\", etc.)
76+
4. Maintain the same meaning, punctuation, capitalization, structure and formatting
77+
5. Use appropriate music/audio terminology for the target language
78+
6. Return ONLY the translated JSON object, no additional text
79+
7. Ensure the output is valid JSON
80+
8. Try to keep the same string length as the original string (if possible)
81+
82+
STRINGS TO TRANSLATE:
83+
{json.dumps(keys_dict, ensure_ascii=False, indent=2)}"""
84+
85+
print(f"Calling LLM...", flush=True)
86+
87+
try:
88+
# Create the process with explicit stdin pipe
89+
process = subprocess.Popen(
90+
['llm', '-m', 'github/gpt-4o'],
91+
stdin=subprocess.PIPE,
92+
stdout=subprocess.PIPE,
93+
stderr=subprocess.PIPE,
94+
text=True
95+
)
96+
97+
# Write the prompt and close stdin to signal EOF
98+
stdout, stderr = process.communicate(input=prompt)
99+
returncode = process.returncode
100+
101+
print(f"LLM returned with code {returncode}", flush=True)
102+
103+
if returncode != 0:
104+
print(f"Error: {stderr}", flush=True)
105+
return keys_dict
106+
107+
content = stdout.strip()
108+
109+
if not content:
110+
print(f"Empty response from LLM", flush=True)
111+
return keys_dict
112+
113+
# Strip markdown code block formatting if present.
114+
if content.startswith('```'):
115+
content = content.split('```')[1]
116+
if content.startswith('json'):
117+
content = content[4:]
118+
content = content.split('```')[0].strip()
119+
120+
try:
121+
return json.loads(content)
122+
except json.JSONDecodeError as e:
123+
print(f"JSON error: {e}", flush=True)
124+
print(f"Content: {content[:200]}...", flush=True)
125+
return keys_dict
126+
except Exception as e:
127+
print(f"Exception calling LLM: {e}", flush=True)
128+
return keys_dict
129+
130+
131+
def main():
132+
print("Starting translation script...", flush=True)
133+
134+
# Setup paths.
135+
script_dir = Path(__file__).parent
136+
project_root = script_dir.parent.parent
137+
localizations_dir = project_root / "localizations"
138+
index_file = project_root / "index.json"
139+
en_file = localizations_dir / "en_US.json"
140+
141+
print(f"Paths:", flush=True)
142+
print(f" project_root: {project_root}", flush=True)
143+
print(f" en_file: {en_file}", flush=True)
144+
145+
if not en_file.exists():
146+
print(f"Error: {en_file} not found", flush=True)
147+
sys.exit(1)
148+
149+
# Load English localization file.
150+
en_data = load_json(en_file)
151+
print(f"Loaded {len(en_data)} keys from en_US.json", flush=True)
152+
153+
# Get keys that were changed in the latest commit.
154+
changed_keys = get_changed_keys(en_file)
155+
156+
if not changed_keys:
157+
print("No changed keys found - nothing to translate", flush=True)
158+
sys.exit(0)
159+
160+
print(f"Found {len(changed_keys)} changed keys: {', '.join(sorted(changed_keys))}", flush=True)
161+
162+
# Load list of available languages from index.json.
163+
if not index_file.exists():
164+
print(f"Error: {index_file} not found", flush=True)
165+
sys.exit(1)
166+
167+
languages = load_json(index_file)
168+
print(f"Loaded {len(languages)} languages", flush=True)
169+
170+
# Translate changed keys for each language.
171+
for lang_info in languages:
172+
lang_code = lang_info['code']
173+
lang_name = lang_info['name']
174+
175+
# Skip English since it's the source language.
176+
if lang_code == 'en_US':
177+
continue
178+
179+
print(f"\n[{lang_code}] {lang_name}", flush=True)
180+
181+
target_file = localizations_dir / f"{lang_code}.json"
182+
existing_data = load_json(target_file) if target_file.exists() else {}
183+
184+
# Filter to only keys that need translation.
185+
keys_to_translate = {k: en_data[k] for k in changed_keys if k in en_data}
186+
187+
if not keys_to_translate:
188+
print("Up to date", flush=True)
189+
continue
190+
191+
print(f"Translating {len(keys_to_translate)} keys...", flush=True)
192+
193+
# Translate in batches to avoid overwhelming the LLM.
194+
batch_size = 50
195+
translated = {}
196+
keys = list(keys_to_translate.keys())
197+
198+
for i in range(0, len(keys), batch_size):
199+
batch_keys = keys[i:i + batch_size]
200+
batch_dict = {k: keys_to_translate[k] for k in batch_keys}
201+
202+
batch_num = i // batch_size + 1
203+
total_batches = (len(keys) + batch_size - 1) // batch_size
204+
print(f"Batch {batch_num}/{total_batches}", flush=True)
205+
206+
batch_translated = translate_keys(batch_dict, lang_name, en_data)
207+
translated.update(batch_translated)
208+
209+
# Merge translations with existing data and maintain key order from en_US.json.
210+
final_data = {**existing_data, **translated}
211+
ordered_data = {k: final_data.get(k, en_data[k]) for k in en_data.keys()}
212+
213+
save_json(target_file, ordered_data)
214+
print(f"✓ Saved", flush=True)
215+
216+
print("\n✓ Done", flush=True)
217+
218+
219+
if __name__ == "__main__":
220+
main()

.github/workflows/translate.yml

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
name: Translate
2+
3+
on:
4+
push:
5+
branches: ["main", "master"]
6+
paths:
7+
- "localizations/en_US.json"
8+
9+
jobs:
10+
translate:
11+
if: github.event.pusher.name == 'alexmercerind' && github.event.pusher.email == 'saini123hitesh@gmail.com'
12+
runs-on: ubuntu-latest
13+
permissions:
14+
contents: write
15+
pull-requests: write
16+
steps:
17+
- name: Checkout repository
18+
uses: actions/checkout@v4
19+
with:
20+
fetch-depth: 0
21+
- name: Set up Python
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: "3.11"
25+
- name: Install llm-github-models CLI
26+
run: |
27+
pip install llm-github-models
28+
- name: Configure llm
29+
env:
30+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
31+
run: |
32+
llm keys set github --value "$GITHUB_TOKEN"
33+
- name: Generate translations
34+
run: |
35+
python3 .github/scripts/translate.py
36+
- name: Check for changes
37+
id: check_changes
38+
run: |
39+
if git diff --quiet; then
40+
echo "has_changes=false" >> $GITHUB_OUTPUT
41+
else
42+
echo "has_changes=true" >> $GITHUB_OUTPUT
43+
fi
44+
- name: Create Pull Request
45+
if: steps.check_changes.outputs.has_changes == 'true'
46+
id: create_pr
47+
uses: peter-evans/create-pull-request@v6
48+
with:
49+
token: ${{ secrets.GITHUB_TOKEN }}
50+
commit-message: "chore: auto-translate localizations"
51+
title: "Auto-Translate Localizations"
52+
body: "This pull request contains automatically generated translations based on changes to `en_US.json`."
53+
branch: auto-translate-${{ github.run_number }}
54+
delete-branch: true
55+
- name: Comment on PR
56+
if: steps.check_changes.outputs.has_changes == 'true' && steps.create_pr.outputs.pull-request-number
57+
uses: actions/github-script@v7
58+
with:
59+
github-token: ${{ secrets.GITHUB_TOKEN }}
60+
script: |
61+
await github.rest.issues.createComment({
62+
owner: context.repo.owner,
63+
repo: context.repo.repo,
64+
issue_number: ${{ steps.create_pr.outputs.pull-request-number }},
65+
body: '@gemini-code-assist review'
66+
});
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: CI
1+
name: Validate
22

33
on:
44
push:
@@ -14,4 +14,4 @@ jobs:
1414
- name: Validate JSON syntax
1515
run: for file in `ls | grep '.json' `; do python -mjson.tool "$file" > /dev/null ; done
1616
- name: Validate index entries & localization values
17-
run: python3 .github/ci.py
17+
run: python3 .github/scripts/validate.py

0 commit comments

Comments
 (0)