Coverage for src / ezqt_app / services / translation / auto_translator.py: 63.64%

288 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-06 13:12 +0000

1# /////////////////////////////////////////////////////////////// 

2# SERVICES.TRANSLATION.AUTO_TRANSLATOR - Automatic translation providers 

3# Project: ezqt_app 

4# /////////////////////////////////////////////////////////////// 

5 

6"""Automatic translation via external providers (disabled by default).""" 

7 

8from __future__ import annotations 

9 

10# /////////////////////////////////////////////////////////////// 

11# IMPORTS 

12# /////////////////////////////////////////////////////////////// 

13# Standard library imports 

14import hashlib 

15import json 

16import threading 

17import time 

18from abc import ABC, abstractmethod 

19from datetime import datetime, timedelta 

20from pathlib import Path 

21from typing import Any 

22 

23# Third-party imports 

24import requests 

25from pydantic import BaseModel, ConfigDict, RootModel, ValidationError 

26from PySide6.QtCore import QObject, Signal 

27 

28# Local imports 

29from ...utils.diagnostics import warn_tech, warn_user 

30from ...utils.printer import get_printer 

31 

32 

33# /////////////////////////////////////////////////////////////// 

34# PYDANTIC RESPONSE SCHEMAS 

35# /////////////////////////////////////////////////////////////// 

36class _LibreTranslateResponseSchema(BaseModel): 

37 """Expected response payload for LibreTranslate provider.""" 

38 

39 model_config = ConfigDict(extra="forbid") 

40 

41 translatedText: str 

42 

43 

44class _GoogleTranslateResponseSchema(RootModel[list[Any]]): 

45 """Root response payload for Google Translate unofficial endpoint.""" 

46 

47 

48class _MyMemoryResponseDataSchema(BaseModel): 

49 """Nested MyMemory response data payload.""" 

50 

51 model_config = ConfigDict(extra="forbid") 

52 

53 translatedText: str | None = None 

54 

55 

56class _MyMemoryResponseSchema(BaseModel): 

57 """Expected response payload for MyMemory provider.""" 

58 

59 model_config = ConfigDict(extra="forbid") 

60 

61 responseStatus: int 

62 responseData: _MyMemoryResponseDataSchema | None = None 

63 

64 

65class _TranslationCacheEntrySchema(BaseModel): 

66 """Strict schema for one translation cache entry.""" 

67 

68 model_config = ConfigDict(extra="forbid") 

69 

70 original: str 

71 translation: str 

72 source_lang: str 

73 target_lang: str 

74 provider: str 

75 created: datetime 

76 

77 

78class _TranslationCacheFileSchema(RootModel[dict[str, _TranslationCacheEntrySchema]]): 

79 """Strict schema for translation cache file content.""" 

80 

81 

82# /////////////////////////////////////////////////////////////// 

83# CLASSES 

84# /////////////////////////////////////////////////////////////// 

85class TranslationProvider(ABC): 

86 """Base translation provider class""" 

87 

88 def __init__(self, name: str, base_url: str): 

89 self.name = name 

90 self.base_url = base_url 

91 self.timeout = 10 

92 self.rate_limit_delay = 1.0 

93 

94 @abstractmethod 

95 def translate( 

96 self, text: str, source_lang: str, target_lang: str 

97 ) -> str | None: ... 

98 

99 def is_available(self) -> bool: 

100 try: 

101 response = requests.get(self.base_url, timeout=5) 

102 return response.status_code == 200 

103 except Exception: 

104 return False 

105 

106 

107class LibreTranslateProvider(TranslationProvider): 

108 """LibreTranslate provider""" 

109 

110 def __init__(self, api_key: str | None = None, custom_server: str | None = None): 

111 server = custom_server or "https://libretranslate.com" 

112 super().__init__("LibreTranslate", server) 

113 self.api_key = api_key 

114 self.rate_limit_delay = 1.0 

115 

116 def translate(self, text: str, source_lang: str, target_lang: str) -> str | None: 

117 try: 

118 url = f"{self.base_url}/translate" 

119 data: dict[str, Any] = { 

120 "q": text, 

121 "source": source_lang, 

122 "target": target_lang, 

123 "format": "text", 

124 } 

125 if self.api_key: 125 ↛ 126line 125 didn't jump to line 126 because the condition on line 125 was never true

126 data["api_key"] = self.api_key 

127 

128 response = requests.post( 

129 url, 

130 json=data, 

131 headers={"Content-Type": "application/json"}, 

132 timeout=self.timeout, 

133 ) 

134 if response.status_code == 200: 134 ↛ 137line 134 didn't jump to line 137 because the condition on line 134 was always true

135 payload = _LibreTranslateResponseSchema.model_validate(response.json()) 

136 return payload.translatedText 

137 warn_tech( 

138 code="translation.provider.libretranslate.http_error", 

139 message=f"LibreTranslate error: {response.status_code}", 

140 ) 

141 return None 

142 except ValidationError as e: 

143 warn_tech( 

144 code="translation.provider.libretranslate.invalid_payload", 

145 message="LibreTranslate returned an invalid payload", 

146 error=e, 

147 ) 

148 return None 

149 except Exception as e: 

150 warn_tech( 

151 code="translation.provider.libretranslate.exception", 

152 message="LibreTranslate exception", 

153 error=e, 

154 ) 

155 return None 

156 

157 

158class GoogleTranslateProvider(TranslationProvider): 

159 """Google Translate Web provider (unofficial)""" 

160 

161 def __init__(self): 

162 super().__init__("Google Translate", "https://translate.googleapis.com") 

163 self.rate_limit_delay = 0.5 

164 

165 def translate(self, text: str, source_lang: str, target_lang: str) -> str | None: 

166 try: 

167 response = requests.get( 

168 f"{self.base_url}/translate_a/single", 

169 params={ 

170 "client": "gtx", 

171 "sl": source_lang, 

172 "tl": target_lang, 

173 "dt": "t", 

174 "q": text, 

175 }, 

176 timeout=self.timeout, 

177 ) 

178 if response.status_code == 200: 

179 payload = _GoogleTranslateResponseSchema.model_validate(response.json()) 

180 data = payload.root 

181 if ( 181 ↛ 193line 181 didn't jump to line 193 because the condition on line 181 was always true

182 data 

183 and len(data) > 0 

184 and isinstance(data[0], list) 

185 and len(data[0]) > 0 

186 ): 

187 first_entry = data[0][0] 

188 if isinstance(first_entry, list) and first_entry: 188 ↛ 193line 188 didn't jump to line 193 because the condition on line 188 was always true

189 translated = first_entry[0] 

190 if isinstance(translated, str): 190 ↛ 193line 190 didn't jump to line 193 because the condition on line 190 was always true

191 return translated 

192 

193 warn_tech( 

194 code="translation.provider.google.invalid_payload", 

195 message="Google Translate returned an unexpected payload shape", 

196 ) 

197 else: 

198 warn_tech( 

199 code="translation.provider.google.http_error", 

200 message=f"Google Translate error: {response.status_code}", 

201 ) 

202 except ValidationError as e: 

203 warn_tech( 

204 code="translation.provider.google.invalid_payload", 

205 message="Google Translate returned an invalid payload", 

206 error=e, 

207 ) 

208 except Exception as e: 

209 warn_tech( 

210 code="translation.provider.google.exception", 

211 message="Google Translate exception", 

212 error=e, 

213 ) 

214 return None 

215 

216 

217class MyMemoryProvider(TranslationProvider): 

218 """MyMemory provider (free, no API key required)""" 

219 

220 def __init__(self): 

221 super().__init__("MyMemory", "https://api.mymemory.translated.net") 

222 

223 def translate(self, text: str, source_lang: str, target_lang: str) -> str | None: 

224 try: 

225 response = requests.get( 

226 f"{self.base_url}/get", 

227 params={"q": text, "langpair": f"{source_lang}|{target_lang}"}, 

228 timeout=self.timeout, 

229 ) 

230 if response.status_code == 200: 230 ↛ 235line 230 didn't jump to line 235 because the condition on line 230 was always true

231 payload = _MyMemoryResponseSchema.model_validate(response.json()) 

232 if payload.responseStatus == 200 and payload.responseData is not None: 

233 return payload.responseData.translatedText 

234 else: 

235 warn_tech( 

236 code="translation.provider.mymemory.http_error", 

237 message=f"MyMemory error: {response.status_code}", 

238 ) 

239 except ValidationError as e: 

240 warn_tech( 

241 code="translation.provider.mymemory.invalid_payload", 

242 message="MyMemory returned an invalid payload", 

243 error=e, 

244 ) 

245 except Exception as e: 

246 warn_tech( 

247 code="translation.provider.mymemory.exception", 

248 message="MyMemory exception", 

249 error=e, 

250 ) 

251 return None 

252 

253 

254class TranslationCache: 

255 """Translation cache manager""" 

256 

257 def __init__(self, cache_file: Path): 

258 self.cache_file = cache_file 

259 self.cache_data: dict[str, Any] = {} 

260 self.max_age_days = 30 

261 self.load_cache() 

262 

263 def _get_cache_key(self, text: str, source_lang: str, target_lang: str) -> str: 

264 return hashlib.md5( 

265 f"{text}|{source_lang}|{target_lang}".encode(), usedforsecurity=False 

266 ).hexdigest() 

267 

268 def get(self, text: str, source_lang: str, target_lang: str) -> str | None: 

269 key = self._get_cache_key(text, source_lang, target_lang) 

270 entry_raw = self.cache_data.get(key) 

271 if entry_raw: 

272 try: 

273 entry = _TranslationCacheEntrySchema.model_validate(entry_raw) 

274 except ValidationError: 

275 del self.cache_data[key] 

276 return None 

277 

278 if datetime.now() - entry.created < timedelta(days=self.max_age_days): 

279 return entry.translation 

280 del self.cache_data[key] 

281 return None 

282 

283 def set( 

284 self, 

285 text: str, 

286 source_lang: str, 

287 target_lang: str, 

288 translation: str, 

289 provider: str, 

290 ) -> None: 

291 key = self._get_cache_key(text, source_lang, target_lang) 

292 self.cache_data[key] = { 

293 "original": text, 

294 "translation": translation, 

295 "source_lang": source_lang, 

296 "target_lang": target_lang, 

297 "provider": provider, 

298 "created": datetime.now().isoformat(), 

299 } 

300 self.save_cache() 

301 

302 def load_cache(self) -> None: 

303 try: 

304 if self.cache_file.exists(): 

305 with open(self.cache_file, encoding="utf-8") as f: 

306 raw_data = json.load(f) 

307 validated = _TranslationCacheFileSchema.model_validate(raw_data) 

308 self.cache_data = { 

309 key: entry.model_dump(mode="json") 

310 for key, entry in validated.root.items() 

311 } 

312 except Exception as e: 

313 warn_tech( 

314 code="translation.cache.load_failed", 

315 message="Error loading cache", 

316 error=e, 

317 ) 

318 self.cache_data = {} 

319 

320 def save_cache(self) -> None: 

321 try: 

322 self.cache_file.parent.mkdir(parents=True, exist_ok=True) 

323 validated = _TranslationCacheFileSchema.model_validate(self.cache_data) 

324 with open(self.cache_file, "w", encoding="utf-8") as f: 

325 json.dump( 

326 validated.model_dump(mode="json"), f, indent=2, ensure_ascii=False 

327 ) 

328 except Exception as e: 

329 warn_tech( 

330 code="translation.cache.save_failed", 

331 message="Error saving cache", 

332 error=e, 

333 ) 

334 

335 def clear_expired(self) -> None: 

336 current_time = datetime.now() 

337 expired_keys: list[str] = [] 

338 for key, entry_raw in self.cache_data.items(): 

339 try: 

340 entry = _TranslationCacheEntrySchema.model_validate(entry_raw) 

341 except ValidationError: 

342 expired_keys.append(key) 

343 continue 

344 

345 if current_time - entry.created > timedelta(days=self.max_age_days): 

346 expired_keys.append(key) 

347 

348 for key in expired_keys: 

349 del self.cache_data[key] 

350 if expired_keys: 

351 self.save_cache() 

352 

353 

354class AutoTranslator(QObject): 

355 """Automatic translation manager (disabled by default). 

356 

357 Each call to :meth:`translate` spawns a lightweight daemon thread that 

358 performs the HTTP round-trip in the background. Signals are emitted from 

359 that thread; Qt automatically delivers them via a queued connection to 

360 slots that live in the main thread, so the UI is never blocked. 

361 

362 A ``_pending`` set (guarded by ``_lock``) deduplicates in-flight requests: 

363 if the same source string is requested while a thread for it is still 

364 running, no second thread is spawned. 

365 """ 

366 

367 translation_ready = Signal(str, str) 

368 translation_error = Signal(str, str) 

369 

370 def __init__(self, cache_dir: Path | None = None): 

371 super().__init__() 

372 if cache_dir is None: 

373 cache_dir = Path.home() / ".ezqt" / "cache" 

374 cache_dir.mkdir(parents=True, exist_ok=True) 

375 self.cache = TranslationCache(cache_dir / "translations.json") 

376 self.providers: list[TranslationProvider] = [] 

377 self._pending: set[str] = set() 

378 self._lock = threading.Lock() 

379 self._setup_providers() 

380 self.enabled = False 

381 

382 def _setup_providers(self) -> None: 

383 # Order matters: fastest/unofficial first, then free fallback providers. 

384 # is_available() performs a synchronous HTTP GET; calling it at setup time 

385 # would block the Qt main thread. Provider availability checking requires 

386 # a dedicated health-check mechanism before it can be integrated. 

387 self.providers = [ 

388 GoogleTranslateProvider(), 

389 MyMemoryProvider(), 

390 LibreTranslateProvider(), 

391 ] 

392 

393 def add_provider(self, provider: TranslationProvider) -> None: 

394 self.providers.append(provider) 

395 

396 def remove_provider(self, provider_name: str) -> None: 

397 self.providers = [p for p in self.providers if p.name != provider_name] 

398 

399 def translate(self, text: str, source_lang: str, target_lang: str) -> str | None: 

400 """Schedule an async translation and return ``None`` immediately. 

401 

402 If *source_lang* equals *target_lang* the method returns *text* 

403 immediately (identity translation — no HTTP request is made). 

404 If *text* is already cached the cached value is returned immediately. 

405 Otherwise a daemon thread is started and ``None`` is returned; the 

406 caller receives the result via :attr:`translation_ready`. 

407 """ 

408 if source_lang == target_lang: 

409 return text 

410 

411 cached = self.cache.get(text, source_lang, target_lang) 

412 if cached: 

413 return cached 

414 

415 with self._lock: 

416 if text in self._pending: 

417 return None 

418 self._pending.add(text) 

419 

420 t = threading.Thread( 

421 target=self._do_translate, 

422 args=(text, source_lang, target_lang), 

423 daemon=True, 

424 name=f"ez-translate:{text[:30]}", 

425 ) 

426 t.start() 

427 return None 

428 

429 def _do_translate(self, text: str, source_lang: str, target_lang: str) -> None: 

430 """Blocking translation worker — runs in a background daemon thread.""" 

431 try: 

432 for provider in self.providers: 

433 try: 

434 translation = provider.translate(text, source_lang, target_lang) 

435 if translation: 

436 self.cache.set( 

437 text, source_lang, target_lang, translation, provider.name 

438 ) 

439 get_printer().debug_msg( 

440 "[TranslationService] Automatic translation " 

441 f"({provider.name}): '{text}' -> '{translation}'" 

442 ) 

443 # Signal is delivered to the main thread via queued connection. 

444 self.translation_ready.emit(text, translation) 

445 return 

446 time.sleep(provider.rate_limit_delay) 

447 except Exception as e: 

448 warn_tech( 

449 code="translation.worker.provider_failed", 

450 message=f"Translation error with {provider.name}", 

451 error=e, 

452 ) 

453 

454 warn_user( 

455 code="translation.auto.failed", 

456 user_message=f"Automatic translation failed: '{text}'", 

457 log_message=f"All providers failed for '{text}'", 

458 ) 

459 self.translation_error.emit(text, "No translation found") 

460 finally: 

461 with self._lock: 

462 self._pending.discard(text) 

463 

464 def translate_sync( 

465 self, text: str, source_lang: str, target_lang: str 

466 ) -> str | None: 

467 """Translate text synchronously, blocking until a result is obtained. 

468 

469 Intended for use in CLI scripts, test helpers, and offline batch-processing 

470 tools that run outside the Qt event loop. Each provider call is a blocking 

471 HTTP request; the total wait time can reach ``len(providers) × timeout`` 

472 seconds if all providers fail. 

473 

474 Warning: 

475 **Never call this method from the Qt main (UI) thread.** Doing so 

476 blocks the event loop for the entire duration of the HTTP round-trips, 

477 freezing the application UI. For in-app translation use 

478 :meth:`translate` instead, which runs the request in a daemon thread. 

479 

480 Example:: 

481 

482 # Appropriate usage — called from a CLI script, not from a Qt slot: 

483 translator = get_auto_translator() 

484 translator.enabled = True 

485 result = translator.translate_sync("Hello", "en", "fr") 

486 print(result) # "Bonjour" 

487 

488 Args: 

489 text: The source text to translate. 

490 source_lang: BCP-47 language code of the source text (e.g. ``"en"``). 

491 target_lang: BCP-47 language code of the desired output (e.g. ``"fr"``). 

492 

493 Returns: 

494 The translated string, or ``None`` if the translator is disabled or 

495 all providers fail. 

496 """ 

497 if not self.enabled: 

498 return None 

499 

500 cached = self.cache.get(text, source_lang, target_lang) 

501 if cached: 

502 return cached 

503 

504 for provider in self.providers: 504 ↛ 520line 504 didn't jump to line 520 because the loop on line 504 didn't complete

505 try: 

506 translation = provider.translate(text, source_lang, target_lang) 

507 if translation: 507 ↛ 512line 507 didn't jump to line 512 because the condition on line 507 was always true

508 self.cache.set( 

509 text, source_lang, target_lang, translation, provider.name 

510 ) 

511 return translation 

512 time.sleep(provider.rate_limit_delay) 

513 except Exception as e: 

514 warn_tech( 

515 code="translation.sync.provider_failed", 

516 message=f"Translation error with {provider.name}", 

517 error=e, 

518 ) 

519 

520 return None 

521 

522 def save_translation_to_ts( 

523 self, original: str, translated: str, target_lang: str, ts_file_path: Path 

524 ) -> None: 

525 """Append a single translation entry to a Qt Linguist .ts XML file.""" 

526 from xml.etree.ElementTree import Element, ElementTree, SubElement # nosec B405 

527 

528 import defusedxml.ElementTree as ET # type: ignore[import-untyped] 

529 

530 try: 

531 if ts_file_path.exists(): 

532 try: 

533 tree = ET.parse(ts_file_path) 

534 root = tree.getroot() 

535 if root is None: 

536 raise ET.ParseError("Empty document") 

537 except ET.ParseError: 

538 root = Element("TS", {"language": target_lang, "version": "2.1"}) 

539 tree = ElementTree(root) 

540 else: 

541 root = Element("TS", {"language": target_lang, "version": "2.1"}) 

542 tree = ElementTree(root) 

543 

544 context = root.find("context") 

545 if context is None: 

546 context = SubElement(root, "context") 

547 SubElement(context, "name").text = "ezqt_app" 

548 

549 # Update existing entry if source already present, otherwise append. 

550 for msg in context.findall("message"): 

551 src = msg.find("source") 

552 if src is not None and src.text == original: 

553 trans = msg.find("translation") 

554 if trans is not None: 

555 trans.text = translated 

556 break 

557 else: 

558 msg = SubElement(context, "message") 

559 SubElement(msg, "source").text = original 

560 SubElement(msg, "translation").text = translated 

561 

562 ts_file_path.parent.mkdir(parents=True, exist_ok=True) 

563 tree.write(ts_file_path, encoding="unicode", xml_declaration=True) 

564 get_printer().debug_msg( 

565 f"[TranslationService] Translation saved to {ts_file_path}" 

566 ) 

567 except Exception as e: 

568 warn_tech( 

569 code="translation.ts.save_failed", 

570 message="Error saving translation to .ts file", 

571 error=e, 

572 ) 

573 

574 def clear_cache(self) -> None: 

575 self.cache.cache_data.clear() 

576 self.cache.save_cache() 

577 get_printer().debug_msg("[TranslationService] Translation cache cleared") 

578 

579 def get_cache_stats(self) -> dict[str, Any]: 

580 stats: dict[str, Any] = { 

581 "total_entries": len(self.cache.cache_data), 

582 "cache_file": str(self.cache.cache_file), 

583 "max_age_days": self.cache.max_age_days, 

584 } 

585 provider_stats: dict[str, int] = {} 

586 for entry_raw in self.cache.cache_data.values(): 

587 try: 

588 entry = _TranslationCacheEntrySchema.model_validate(entry_raw) 

589 p = entry.provider 

590 except ValidationError: 

591 p = "invalid" 

592 provider_stats[p] = provider_stats.get(p, 0) + 1 

593 stats["by_provider"] = provider_stats 

594 return stats 

595 

596 def cleanup(self) -> None: 

597 # Background threads are daemon threads — they exit automatically when 

598 # the process exits. We only need to flush the on-disk cache. 

599 self.cache.clear_expired() 

600 

601 

602# /////////////////////////////////////////////////////////////// 

603# FUNCTIONS 

604# /////////////////////////////////////////////////////////////// 

605def get_auto_translator() -> AutoTranslator: 

606 """Return the global AutoTranslator singleton.""" 

607 from .._registry import ServiceRegistry 

608 

609 return ServiceRegistry.get(AutoTranslator, AutoTranslator)