Coverage for src / ezqt_app / services / translation / auto_translator.py: 63.64%
288 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-06 13:12 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-06 13:12 +0000
1# ///////////////////////////////////////////////////////////////
2# SERVICES.TRANSLATION.AUTO_TRANSLATOR - Automatic translation providers
3# Project: ezqt_app
4# ///////////////////////////////////////////////////////////////
6"""Automatic translation via external providers (disabled by default)."""
8from __future__ import annotations
10# ///////////////////////////////////////////////////////////////
11# IMPORTS
12# ///////////////////////////////////////////////////////////////
13# Standard library imports
14import hashlib
15import json
16import threading
17import time
18from abc import ABC, abstractmethod
19from datetime import datetime, timedelta
20from pathlib import Path
21from typing import Any
23# Third-party imports
24import requests
25from pydantic import BaseModel, ConfigDict, RootModel, ValidationError
26from PySide6.QtCore import QObject, Signal
28# Local imports
29from ...utils.diagnostics import warn_tech, warn_user
30from ...utils.printer import get_printer
33# ///////////////////////////////////////////////////////////////
34# PYDANTIC RESPONSE SCHEMAS
35# ///////////////////////////////////////////////////////////////
36class _LibreTranslateResponseSchema(BaseModel):
37 """Expected response payload for LibreTranslate provider."""
39 model_config = ConfigDict(extra="forbid")
41 translatedText: str
44class _GoogleTranslateResponseSchema(RootModel[list[Any]]):
45 """Root response payload for Google Translate unofficial endpoint."""
48class _MyMemoryResponseDataSchema(BaseModel):
49 """Nested MyMemory response data payload."""
51 model_config = ConfigDict(extra="forbid")
53 translatedText: str | None = None
56class _MyMemoryResponseSchema(BaseModel):
57 """Expected response payload for MyMemory provider."""
59 model_config = ConfigDict(extra="forbid")
61 responseStatus: int
62 responseData: _MyMemoryResponseDataSchema | None = None
65class _TranslationCacheEntrySchema(BaseModel):
66 """Strict schema for one translation cache entry."""
68 model_config = ConfigDict(extra="forbid")
70 original: str
71 translation: str
72 source_lang: str
73 target_lang: str
74 provider: str
75 created: datetime
78class _TranslationCacheFileSchema(RootModel[dict[str, _TranslationCacheEntrySchema]]):
79 """Strict schema for translation cache file content."""
82# ///////////////////////////////////////////////////////////////
83# CLASSES
84# ///////////////////////////////////////////////////////////////
85class TranslationProvider(ABC):
86 """Base translation provider class"""
88 def __init__(self, name: str, base_url: str):
89 self.name = name
90 self.base_url = base_url
91 self.timeout = 10
92 self.rate_limit_delay = 1.0
94 @abstractmethod
95 def translate(
96 self, text: str, source_lang: str, target_lang: str
97 ) -> str | None: ...
99 def is_available(self) -> bool:
100 try:
101 response = requests.get(self.base_url, timeout=5)
102 return response.status_code == 200
103 except Exception:
104 return False
107class LibreTranslateProvider(TranslationProvider):
108 """LibreTranslate provider"""
110 def __init__(self, api_key: str | None = None, custom_server: str | None = None):
111 server = custom_server or "https://libretranslate.com"
112 super().__init__("LibreTranslate", server)
113 self.api_key = api_key
114 self.rate_limit_delay = 1.0
116 def translate(self, text: str, source_lang: str, target_lang: str) -> str | None:
117 try:
118 url = f"{self.base_url}/translate"
119 data: dict[str, Any] = {
120 "q": text,
121 "source": source_lang,
122 "target": target_lang,
123 "format": "text",
124 }
125 if self.api_key: 125 ↛ 126line 125 didn't jump to line 126 because the condition on line 125 was never true
126 data["api_key"] = self.api_key
128 response = requests.post(
129 url,
130 json=data,
131 headers={"Content-Type": "application/json"},
132 timeout=self.timeout,
133 )
134 if response.status_code == 200: 134 ↛ 137line 134 didn't jump to line 137 because the condition on line 134 was always true
135 payload = _LibreTranslateResponseSchema.model_validate(response.json())
136 return payload.translatedText
137 warn_tech(
138 code="translation.provider.libretranslate.http_error",
139 message=f"LibreTranslate error: {response.status_code}",
140 )
141 return None
142 except ValidationError as e:
143 warn_tech(
144 code="translation.provider.libretranslate.invalid_payload",
145 message="LibreTranslate returned an invalid payload",
146 error=e,
147 )
148 return None
149 except Exception as e:
150 warn_tech(
151 code="translation.provider.libretranslate.exception",
152 message="LibreTranslate exception",
153 error=e,
154 )
155 return None
158class GoogleTranslateProvider(TranslationProvider):
159 """Google Translate Web provider (unofficial)"""
161 def __init__(self):
162 super().__init__("Google Translate", "https://translate.googleapis.com")
163 self.rate_limit_delay = 0.5
165 def translate(self, text: str, source_lang: str, target_lang: str) -> str | None:
166 try:
167 response = requests.get(
168 f"{self.base_url}/translate_a/single",
169 params={
170 "client": "gtx",
171 "sl": source_lang,
172 "tl": target_lang,
173 "dt": "t",
174 "q": text,
175 },
176 timeout=self.timeout,
177 )
178 if response.status_code == 200:
179 payload = _GoogleTranslateResponseSchema.model_validate(response.json())
180 data = payload.root
181 if ( 181 ↛ 193line 181 didn't jump to line 193 because the condition on line 181 was always true
182 data
183 and len(data) > 0
184 and isinstance(data[0], list)
185 and len(data[0]) > 0
186 ):
187 first_entry = data[0][0]
188 if isinstance(first_entry, list) and first_entry: 188 ↛ 193line 188 didn't jump to line 193 because the condition on line 188 was always true
189 translated = first_entry[0]
190 if isinstance(translated, str): 190 ↛ 193line 190 didn't jump to line 193 because the condition on line 190 was always true
191 return translated
193 warn_tech(
194 code="translation.provider.google.invalid_payload",
195 message="Google Translate returned an unexpected payload shape",
196 )
197 else:
198 warn_tech(
199 code="translation.provider.google.http_error",
200 message=f"Google Translate error: {response.status_code}",
201 )
202 except ValidationError as e:
203 warn_tech(
204 code="translation.provider.google.invalid_payload",
205 message="Google Translate returned an invalid payload",
206 error=e,
207 )
208 except Exception as e:
209 warn_tech(
210 code="translation.provider.google.exception",
211 message="Google Translate exception",
212 error=e,
213 )
214 return None
217class MyMemoryProvider(TranslationProvider):
218 """MyMemory provider (free, no API key required)"""
220 def __init__(self):
221 super().__init__("MyMemory", "https://api.mymemory.translated.net")
223 def translate(self, text: str, source_lang: str, target_lang: str) -> str | None:
224 try:
225 response = requests.get(
226 f"{self.base_url}/get",
227 params={"q": text, "langpair": f"{source_lang}|{target_lang}"},
228 timeout=self.timeout,
229 )
230 if response.status_code == 200: 230 ↛ 235line 230 didn't jump to line 235 because the condition on line 230 was always true
231 payload = _MyMemoryResponseSchema.model_validate(response.json())
232 if payload.responseStatus == 200 and payload.responseData is not None:
233 return payload.responseData.translatedText
234 else:
235 warn_tech(
236 code="translation.provider.mymemory.http_error",
237 message=f"MyMemory error: {response.status_code}",
238 )
239 except ValidationError as e:
240 warn_tech(
241 code="translation.provider.mymemory.invalid_payload",
242 message="MyMemory returned an invalid payload",
243 error=e,
244 )
245 except Exception as e:
246 warn_tech(
247 code="translation.provider.mymemory.exception",
248 message="MyMemory exception",
249 error=e,
250 )
251 return None
254class TranslationCache:
255 """Translation cache manager"""
257 def __init__(self, cache_file: Path):
258 self.cache_file = cache_file
259 self.cache_data: dict[str, Any] = {}
260 self.max_age_days = 30
261 self.load_cache()
263 def _get_cache_key(self, text: str, source_lang: str, target_lang: str) -> str:
264 return hashlib.md5(
265 f"{text}|{source_lang}|{target_lang}".encode(), usedforsecurity=False
266 ).hexdigest()
268 def get(self, text: str, source_lang: str, target_lang: str) -> str | None:
269 key = self._get_cache_key(text, source_lang, target_lang)
270 entry_raw = self.cache_data.get(key)
271 if entry_raw:
272 try:
273 entry = _TranslationCacheEntrySchema.model_validate(entry_raw)
274 except ValidationError:
275 del self.cache_data[key]
276 return None
278 if datetime.now() - entry.created < timedelta(days=self.max_age_days):
279 return entry.translation
280 del self.cache_data[key]
281 return None
283 def set(
284 self,
285 text: str,
286 source_lang: str,
287 target_lang: str,
288 translation: str,
289 provider: str,
290 ) -> None:
291 key = self._get_cache_key(text, source_lang, target_lang)
292 self.cache_data[key] = {
293 "original": text,
294 "translation": translation,
295 "source_lang": source_lang,
296 "target_lang": target_lang,
297 "provider": provider,
298 "created": datetime.now().isoformat(),
299 }
300 self.save_cache()
302 def load_cache(self) -> None:
303 try:
304 if self.cache_file.exists():
305 with open(self.cache_file, encoding="utf-8") as f:
306 raw_data = json.load(f)
307 validated = _TranslationCacheFileSchema.model_validate(raw_data)
308 self.cache_data = {
309 key: entry.model_dump(mode="json")
310 for key, entry in validated.root.items()
311 }
312 except Exception as e:
313 warn_tech(
314 code="translation.cache.load_failed",
315 message="Error loading cache",
316 error=e,
317 )
318 self.cache_data = {}
320 def save_cache(self) -> None:
321 try:
322 self.cache_file.parent.mkdir(parents=True, exist_ok=True)
323 validated = _TranslationCacheFileSchema.model_validate(self.cache_data)
324 with open(self.cache_file, "w", encoding="utf-8") as f:
325 json.dump(
326 validated.model_dump(mode="json"), f, indent=2, ensure_ascii=False
327 )
328 except Exception as e:
329 warn_tech(
330 code="translation.cache.save_failed",
331 message="Error saving cache",
332 error=e,
333 )
335 def clear_expired(self) -> None:
336 current_time = datetime.now()
337 expired_keys: list[str] = []
338 for key, entry_raw in self.cache_data.items():
339 try:
340 entry = _TranslationCacheEntrySchema.model_validate(entry_raw)
341 except ValidationError:
342 expired_keys.append(key)
343 continue
345 if current_time - entry.created > timedelta(days=self.max_age_days):
346 expired_keys.append(key)
348 for key in expired_keys:
349 del self.cache_data[key]
350 if expired_keys:
351 self.save_cache()
354class AutoTranslator(QObject):
355 """Automatic translation manager (disabled by default).
357 Each call to :meth:`translate` spawns a lightweight daemon thread that
358 performs the HTTP round-trip in the background. Signals are emitted from
359 that thread; Qt automatically delivers them via a queued connection to
360 slots that live in the main thread, so the UI is never blocked.
362 A ``_pending`` set (guarded by ``_lock``) deduplicates in-flight requests:
363 if the same source string is requested while a thread for it is still
364 running, no second thread is spawned.
365 """
367 translation_ready = Signal(str, str)
368 translation_error = Signal(str, str)
370 def __init__(self, cache_dir: Path | None = None):
371 super().__init__()
372 if cache_dir is None:
373 cache_dir = Path.home() / ".ezqt" / "cache"
374 cache_dir.mkdir(parents=True, exist_ok=True)
375 self.cache = TranslationCache(cache_dir / "translations.json")
376 self.providers: list[TranslationProvider] = []
377 self._pending: set[str] = set()
378 self._lock = threading.Lock()
379 self._setup_providers()
380 self.enabled = False
382 def _setup_providers(self) -> None:
383 # Order matters: fastest/unofficial first, then free fallback providers.
384 # is_available() performs a synchronous HTTP GET; calling it at setup time
385 # would block the Qt main thread. Provider availability checking requires
386 # a dedicated health-check mechanism before it can be integrated.
387 self.providers = [
388 GoogleTranslateProvider(),
389 MyMemoryProvider(),
390 LibreTranslateProvider(),
391 ]
393 def add_provider(self, provider: TranslationProvider) -> None:
394 self.providers.append(provider)
396 def remove_provider(self, provider_name: str) -> None:
397 self.providers = [p for p in self.providers if p.name != provider_name]
399 def translate(self, text: str, source_lang: str, target_lang: str) -> str | None:
400 """Schedule an async translation and return ``None`` immediately.
402 If *source_lang* equals *target_lang* the method returns *text*
403 immediately (identity translation — no HTTP request is made).
404 If *text* is already cached the cached value is returned immediately.
405 Otherwise a daemon thread is started and ``None`` is returned; the
406 caller receives the result via :attr:`translation_ready`.
407 """
408 if source_lang == target_lang:
409 return text
411 cached = self.cache.get(text, source_lang, target_lang)
412 if cached:
413 return cached
415 with self._lock:
416 if text in self._pending:
417 return None
418 self._pending.add(text)
420 t = threading.Thread(
421 target=self._do_translate,
422 args=(text, source_lang, target_lang),
423 daemon=True,
424 name=f"ez-translate:{text[:30]}",
425 )
426 t.start()
427 return None
429 def _do_translate(self, text: str, source_lang: str, target_lang: str) -> None:
430 """Blocking translation worker — runs in a background daemon thread."""
431 try:
432 for provider in self.providers:
433 try:
434 translation = provider.translate(text, source_lang, target_lang)
435 if translation:
436 self.cache.set(
437 text, source_lang, target_lang, translation, provider.name
438 )
439 get_printer().debug_msg(
440 "[TranslationService] Automatic translation "
441 f"({provider.name}): '{text}' -> '{translation}'"
442 )
443 # Signal is delivered to the main thread via queued connection.
444 self.translation_ready.emit(text, translation)
445 return
446 time.sleep(provider.rate_limit_delay)
447 except Exception as e:
448 warn_tech(
449 code="translation.worker.provider_failed",
450 message=f"Translation error with {provider.name}",
451 error=e,
452 )
454 warn_user(
455 code="translation.auto.failed",
456 user_message=f"Automatic translation failed: '{text}'",
457 log_message=f"All providers failed for '{text}'",
458 )
459 self.translation_error.emit(text, "No translation found")
460 finally:
461 with self._lock:
462 self._pending.discard(text)
464 def translate_sync(
465 self, text: str, source_lang: str, target_lang: str
466 ) -> str | None:
467 """Translate text synchronously, blocking until a result is obtained.
469 Intended for use in CLI scripts, test helpers, and offline batch-processing
470 tools that run outside the Qt event loop. Each provider call is a blocking
471 HTTP request; the total wait time can reach ``len(providers) × timeout``
472 seconds if all providers fail.
474 Warning:
475 **Never call this method from the Qt main (UI) thread.** Doing so
476 blocks the event loop for the entire duration of the HTTP round-trips,
477 freezing the application UI. For in-app translation use
478 :meth:`translate` instead, which runs the request in a daemon thread.
480 Example::
482 # Appropriate usage — called from a CLI script, not from a Qt slot:
483 translator = get_auto_translator()
484 translator.enabled = True
485 result = translator.translate_sync("Hello", "en", "fr")
486 print(result) # "Bonjour"
488 Args:
489 text: The source text to translate.
490 source_lang: BCP-47 language code of the source text (e.g. ``"en"``).
491 target_lang: BCP-47 language code of the desired output (e.g. ``"fr"``).
493 Returns:
494 The translated string, or ``None`` if the translator is disabled or
495 all providers fail.
496 """
497 if not self.enabled:
498 return None
500 cached = self.cache.get(text, source_lang, target_lang)
501 if cached:
502 return cached
504 for provider in self.providers: 504 ↛ 520line 504 didn't jump to line 520 because the loop on line 504 didn't complete
505 try:
506 translation = provider.translate(text, source_lang, target_lang)
507 if translation: 507 ↛ 512line 507 didn't jump to line 512 because the condition on line 507 was always true
508 self.cache.set(
509 text, source_lang, target_lang, translation, provider.name
510 )
511 return translation
512 time.sleep(provider.rate_limit_delay)
513 except Exception as e:
514 warn_tech(
515 code="translation.sync.provider_failed",
516 message=f"Translation error with {provider.name}",
517 error=e,
518 )
520 return None
522 def save_translation_to_ts(
523 self, original: str, translated: str, target_lang: str, ts_file_path: Path
524 ) -> None:
525 """Append a single translation entry to a Qt Linguist .ts XML file."""
526 from xml.etree.ElementTree import Element, ElementTree, SubElement # nosec B405
528 import defusedxml.ElementTree as ET # type: ignore[import-untyped]
530 try:
531 if ts_file_path.exists():
532 try:
533 tree = ET.parse(ts_file_path)
534 root = tree.getroot()
535 if root is None:
536 raise ET.ParseError("Empty document")
537 except ET.ParseError:
538 root = Element("TS", {"language": target_lang, "version": "2.1"})
539 tree = ElementTree(root)
540 else:
541 root = Element("TS", {"language": target_lang, "version": "2.1"})
542 tree = ElementTree(root)
544 context = root.find("context")
545 if context is None:
546 context = SubElement(root, "context")
547 SubElement(context, "name").text = "ezqt_app"
549 # Update existing entry if source already present, otherwise append.
550 for msg in context.findall("message"):
551 src = msg.find("source")
552 if src is not None and src.text == original:
553 trans = msg.find("translation")
554 if trans is not None:
555 trans.text = translated
556 break
557 else:
558 msg = SubElement(context, "message")
559 SubElement(msg, "source").text = original
560 SubElement(msg, "translation").text = translated
562 ts_file_path.parent.mkdir(parents=True, exist_ok=True)
563 tree.write(ts_file_path, encoding="unicode", xml_declaration=True)
564 get_printer().debug_msg(
565 f"[TranslationService] Translation saved to {ts_file_path}"
566 )
567 except Exception as e:
568 warn_tech(
569 code="translation.ts.save_failed",
570 message="Error saving translation to .ts file",
571 error=e,
572 )
574 def clear_cache(self) -> None:
575 self.cache.cache_data.clear()
576 self.cache.save_cache()
577 get_printer().debug_msg("[TranslationService] Translation cache cleared")
579 def get_cache_stats(self) -> dict[str, Any]:
580 stats: dict[str, Any] = {
581 "total_entries": len(self.cache.cache_data),
582 "cache_file": str(self.cache.cache_file),
583 "max_age_days": self.cache.max_age_days,
584 }
585 provider_stats: dict[str, int] = {}
586 for entry_raw in self.cache.cache_data.values():
587 try:
588 entry = _TranslationCacheEntrySchema.model_validate(entry_raw)
589 p = entry.provider
590 except ValidationError:
591 p = "invalid"
592 provider_stats[p] = provider_stats.get(p, 0) + 1
593 stats["by_provider"] = provider_stats
594 return stats
596 def cleanup(self) -> None:
597 # Background threads are daemon threads — they exit automatically when
598 # the process exits. We only need to flush the on-disk cache.
599 self.cache.clear_expired()
602# ///////////////////////////////////////////////////////////////
603# FUNCTIONS
604# ///////////////////////////////////////////////////////////////
605def get_auto_translator() -> AutoTranslator:
606 """Return the global AutoTranslator singleton."""
607 from .._registry import ServiceRegistry
609 return ServiceRegistry.get(AutoTranslator, AutoTranslator)