Coverage for src/ezqt_app/services/translation/auto

1# ///////////////////////////////////////////////////////////////

2# SERVICES.TRANSLATION.AUTO_TRANSLATOR - Automatic translation providers

3# Project: ezqt_app

4# ///////////////////////////////////////////////////////////////

6"""Automatic translation via external providers (disabled by default)."""

8from __future__ import annotations

10# ///////////////////////////////////////////////////////////////

11# IMPORTS

12# ///////////////////////////////////////////////////////////////

13# Standard library imports

14import hashlib

15import json

16import threading

17import time

18from abc import ABC, abstractmethod

19from datetime import datetime, timedelta

20from pathlib import Path

21from typing import Any

23# Third-party imports

24import requests

25from pydantic import BaseModel, ConfigDict, RootModel, ValidationError

26from PySide6.QtCore import QObject, Signal

28# Local imports

29from ...utils.diagnostics import warn_tech, warn_user

30from ...utils.printer import get_printer

33# ///////////////////////////////////////////////////////////////

34# PYDANTIC RESPONSE SCHEMAS

35# ///////////////////////////////////////////////////////////////

36class _LibreTranslateResponseSchema(BaseModel):

37 """Expected response payload for LibreTranslate provider."""

39 model_config = ConfigDict(extra="forbid")

41 translatedText: str

44class _GoogleTranslateResponseSchema(RootModel[list[Any]]):

45 """Root response payload for Google Translate unofficial endpoint."""

48class _MyMemoryResponseDataSchema(BaseModel):

49 """Nested MyMemory response data payload."""

51 model_config = ConfigDict(extra="forbid")

53 translatedText: str | None = None

56class _MyMemoryResponseSchema(BaseModel):

57 """Expected response payload for MyMemory provider."""

59 model_config = ConfigDict(extra="forbid")

61 responseStatus: int

62 responseData: _MyMemoryResponseDataSchema | None = None

65class _TranslationCacheEntrySchema(BaseModel):

66 """Strict schema for one translation cache entry."""

68 model_config = ConfigDict(extra="forbid")

70 original: str

71 translation: str

72 source_lang: str

73 target_lang: str

74 provider: str

75 created: datetime

78class _TranslationCacheFileSchema(RootModel[dict[str, _TranslationCacheEntrySchema]]):

79 """Strict schema for translation cache file content."""

82# ///////////////////////////////////////////////////////////////

83# CLASSES

84# ///////////////////////////////////////////////////////////////

85class TranslationProvider(ABC):

86 """Base translation provider class"""

88 def __init__(self, name: str, base_url: str):

89 self.name = name

90 self.base_url = base_url

91 self.timeout = 10

92 self.rate_limit_delay = 1.0

94 @abstractmethod

95 def translate(

96 self, text: str, source_lang: str, target_lang: str

97 ) -> str | None: ...

99 def is_available(self) -> bool:

100 try:

101 response = requests.get(self.base_url, timeout=5)

102 return response.status_code == 200

103 except Exception:

104 return False

105

106

107class LibreTranslateProvider(TranslationProvider):

108 """LibreTranslate provider"""

109

110 def __init__(self, api_key: str | None = None, custom_server: str | None = None):

111 server = custom_server or "https://libretranslate.com"

112 super().__init__("LibreTranslate", server)

113 self.api_key = api_key

114 self.rate_limit_delay = 1.0

115

116 def translate(self, text: str, source_lang: str, target_lang: str) -> str | None:

117 try:

118 url = f"{self.base_url}/translate"

119 data: dict[str, Any] = {

120 "q": text,

121 "source": source_lang,

122 "target": target_lang,

123 "format": "text",

124 }

125 if self.api_key: 125 ↛ 126line 125 didn't jump to line 126 because the condition on line 125 was never true

126 data["api_key"] = self.api_key

127

128 response = requests.post(

129 url,

130 json=data,

131 headers={"Content-Type": "application/json"},

132 timeout=self.timeout,

133 )

134 if response.status_code == 200: 134 ↛ 137line 134 didn't jump to line 137 because the condition on line 134 was always true

135 payload = _LibreTranslateResponseSchema.model_validate(response.json())

136 return payload.translatedText

137 warn_tech(

138 code="translation.provider.libretranslate.http_error",

139 message=f"LibreTranslate error: {response.status_code}",

140 )

141 return None

142 except ValidationError as e:

143 warn_tech(

144 code="translation.provider.libretranslate.invalid_payload",

145 message="LibreTranslate returned an invalid payload",

146 error=e,

147 )

148 return None

149 except Exception as e:

150 warn_tech(

151 code="translation.provider.libretranslate.exception",

152 message="LibreTranslate exception",

153 error=e,

154 )

155 return None

156

157

158class GoogleTranslateProvider(TranslationProvider):

159 """Google Translate Web provider (unofficial)"""

160

161 def __init__(self):

162 super().__init__("Google Translate", "https://translate.googleapis.com")

163 self.rate_limit_delay = 0.5

164

165 def translate(self, text: str, source_lang: str, target_lang: str) -> str | None:

166 try:

167 response = requests.get(

168 f"{self.base_url}/translate_a/single",

169 params={

170 "client": "gtx",

171 "sl": source_lang,

172 "tl": target_lang,

173 "dt": "t",

174 "q": text,

175 },

176 timeout=self.timeout,

177 )

178 if response.status_code == 200:

179 payload = _GoogleTranslateResponseSchema.model_validate(response.json())

180 data = payload.root

181 if ( 181 ↛ 193line 181 didn't jump to line 193 because the condition on line 181 was always true

182 data

183 and len(data) > 0

184 and isinstance(data[0], list)

185 and len(data[0]) > 0

186 ):

187 first_entry = data[0][0]

188 if isinstance(first_entry, list) and first_entry: 188 ↛ 193line 188 didn't jump to line 193 because the condition on line 188 was always true

189 translated = first_entry[0]

190 if isinstance(translated, str): 190 ↛ 193line 190 didn't jump to line 193 because the condition on line 190 was always true

191 return translated

192

193 warn_tech(

194 code="translation.provider.google.invalid_payload",

195 message="Google Translate returned an unexpected payload shape",

196 )

197 else:

198 warn_tech(

199 code="translation.provider.google.http_error",

200 message=f"Google Translate error: {response.status_code}",

201 )

202 except ValidationError as e:

203 warn_tech(

204 code="translation.provider.google.invalid_payload",

205 message="Google Translate returned an invalid payload",

206 error=e,

207 )

208 except Exception as e:

209 warn_tech(

210 code="translation.provider.google.exception",

211 message="Google Translate exception",

212 error=e,

213 )

214 return None

215

216

217class MyMemoryProvider(TranslationProvider):

218 """MyMemory provider (free, no API key required)"""

219

220 def __init__(self):

221 super().__init__("MyMemory", "https://api.mymemory.translated.net")

222

223 def translate(self, text: str, source_lang: str, target_lang: str) -> str | None:

224 try:

225 response = requests.get(

226 f"{self.base_url}/get",

227 params={"q": text, "langpair": f"{source_lang}|{target_lang}"},

228 timeout=self.timeout,

229 )

230 if response.status_code == 200: 230 ↛ 235line 230 didn't jump to line 235 because the condition on line 230 was always true

231 payload = _MyMemoryResponseSchema.model_validate(response.json())

232 if payload.responseStatus == 200 and payload.responseData is not None:

233 return payload.responseData.translatedText

234 else:

235 warn_tech(

236 code="translation.provider.mymemory.http_error",

237 message=f"MyMemory error: {response.status_code}",

238 )

239 except ValidationError as e:

240 warn_tech(

241 code="translation.provider.mymemory.invalid_payload",

242 message="MyMemory returned an invalid payload",

243 error=e,

244 )

245 except Exception as e:

246 warn_tech(

247 code="translation.provider.mymemory.exception",

248 message="MyMemory exception",

249 error=e,

250 )

251 return None

252

253

254class TranslationCache:

255 """Translation cache manager"""

256

257 def __init__(self, cache_file: Path):

258 self.cache_file = cache_file

259 self.cache_data: dict[str, Any] = {}

260 self.max_age_days = 30

261 self.load_cache()

262

263 def _get_cache_key(self, text: str, source_lang: str, target_lang: str) -> str:

264 return hashlib.md5(

265 f"{text}|{source_lang}|{target_lang}".encode(), usedforsecurity=False

266 ).hexdigest()

267

268 def get(self, text: str, source_lang: str, target_lang: str) -> str | None:

269 key = self._get_cache_key(text, source_lang, target_lang)

270 entry_raw = self.cache_data.get(key)

271 if entry_raw:

272 try:

273 entry = _TranslationCacheEntrySchema.model_validate(entry_raw)

274 except ValidationError:

275 del self.cache_data[key]

276 return None

277

278 if datetime.now() - entry.created < timedelta(days=self.max_age_days):

279 return entry.translation

280 del self.cache_data[key]

281 return None

282

283 def set(

284 self,

285 text: str,

286 source_lang: str,

287 target_lang: str,

288 translation: str,

289 provider: str,

290 ) -> None:

291 key = self._get_cache_key(text, source_lang, target_lang)

292 self.cache_data[key] = {

293 "original": text,

294 "translation": translation,

295 "source_lang": source_lang,

296 "target_lang": target_lang,

297 "provider": provider,

298 "created": datetime.now().isoformat(),

299 }

300 self.save_cache()

301

302 def load_cache(self) -> None:

303 try:

304 if self.cache_file.exists():

305 with open(self.cache_file, encoding="utf-8") as f:

306 raw_data = json.load(f)

307 validated = _TranslationCacheFileSchema.model_validate(raw_data)

308 self.cache_data = {

309 key: entry.model_dump(mode="json")

310 for key, entry in validated.root.items()

311 }

312 except Exception as e:

313 warn_tech(

314 code="translation.cache.load_failed",

315 message="Error loading cache",

316 error=e,

317 )

318 self.cache_data = {}

319

320 def save_cache(self) -> None:

321 try:

322 self.cache_file.parent.mkdir(parents=True, exist_ok=True)

323 validated = _TranslationCacheFileSchema.model_validate(self.cache_data)

324 with open(self.cache_file, "w", encoding="utf-8") as f:

325 json.dump(

326 validated.model_dump(mode="json"), f, indent=2, ensure_ascii=False

327 )

328 except Exception as e:

329 warn_tech(

330 code="translation.cache.save_failed",

331 message="Error saving cache",

332 error=e,

333 )

334

335 def clear_expired(self) -> None:

336 current_time = datetime.now()

337 expired_keys: list[str] = []

338 for key, entry_raw in self.cache_data.items():

339 try:

340 entry = _TranslationCacheEntrySchema.model_validate(entry_raw)

341 except ValidationError:

342 expired_keys.append(key)

343 continue

344

345 if current_time - entry.created > timedelta(days=self.max_age_days):

346 expired_keys.append(key)

347

348 for key in expired_keys:

349 del self.cache_data[key]

350 if expired_keys:

351 self.save_cache()

352

353

354class AutoTranslator(QObject):

355 """Automatic translation manager (disabled by default).

356

357 Each call to :meth:`translate` spawns a lightweight daemon thread that

358 performs the HTTP round-trip in the background. Signals are emitted from

359 that thread; Qt automatically delivers them via a queued connection to

360 slots that live in the main thread, so the UI is never blocked.

361

362 A ``_pending`` set (guarded by ``_lock``) deduplicates in-flight requests:

363 if the same source string is requested while a thread for it is still

364 running, no second thread is spawned.

365 """

366

367 translation_ready = Signal(str, str)

368 translation_error = Signal(str, str)

369

370 def __init__(self, cache_dir: Path | None = None):

371 super().__init__()

372 if cache_dir is None:

373 cache_dir = Path.home() / ".ezqt" / "cache"

374 cache_dir.mkdir(parents=True, exist_ok=True)

375 self.cache = TranslationCache(cache_dir / "translations.json")

376 self.providers: list[TranslationProvider] = []

377 self._pending: set[str] = set()

378 self._lock = threading.Lock()

379 self._setup_providers()

380 self.enabled = False

381

382 def _setup_providers(self) -> None:

383 # Order matters: fastest/unofficial first, then free fallback providers.

384 # is_available() performs a synchronous HTTP GET; calling it at setup time

385 # would block the Qt main thread. Provider availability checking requires

386 # a dedicated health-check mechanism before it can be integrated.

387 self.providers = [

388 GoogleTranslateProvider(),

389 MyMemoryProvider(),

390 LibreTranslateProvider(),

391 ]

392

393 def add_provider(self, provider: TranslationProvider) -> None:

394 self.providers.append(provider)

395

396 def remove_provider(self, provider_name: str) -> None:

397 self.providers = [p for p in self.providers if p.name != provider_name]

398

399 def translate(self, text: str, source_lang: str, target_lang: str) -> str | None:

400 """Schedule an async translation and return ``None`` immediately.

401

402 If *source_lang* equals *target_lang* the method returns *text*

403 immediately (identity translation — no HTTP request is made).

404 If *text* is already cached the cached value is returned immediately.

405 Otherwise a daemon thread is started and ``None`` is returned; the

406 caller receives the result via :attr:`translation_ready`.

407 """

408 if source_lang == target_lang:

409 return text

410

411 cached = self.cache.get(text, source_lang, target_lang)

412 if cached:

413 return cached

414

415 with self._lock:

416 if text in self._pending:

417 return None

418 self._pending.add(text)

419

420 t = threading.Thread(

421 target=self._do_translate,

422 args=(text, source_lang, target_lang),

423 daemon=True,

424 name=f"ez-translate:{text[:30]}",

425 )

426 t.start()

427 return None

428

429 def _do_translate(self, text: str, source_lang: str, target_lang: str) -> None:

430 """Blocking translation worker — runs in a background daemon thread."""

431 try:

432 for provider in self.providers:

433 try:

434 translation = provider.translate(text, source_lang, target_lang)

435 if translation:

436 self.cache.set(

437 text, source_lang, target_lang, translation, provider.name

438 )

439 get_printer().debug_msg(

440 "[TranslationService] Automatic translation "

441 f"({provider.name}): '{text}' -> '{translation}'"

442 )

443 # Signal is delivered to the main thread via queued connection.

444 self.translation_ready.emit(text, translation)

445 return

446 time.sleep(provider.rate_limit_delay)

447 except Exception as e:

448 warn_tech(

449 code="translation.worker.provider_failed",

450 message=f"Translation error with {provider.name}",

451 error=e,

452 )

453

454 warn_user(

455 code="translation.auto.failed",

456 user_message=f"Automatic translation failed: '{text}'",

457 log_message=f"All providers failed for '{text}'",

458 )

459 self.translation_error.emit(text, "No translation found")

460 finally:

461 with self._lock:

462 self._pending.discard(text)

463

464 def translate_sync(

465 self, text: str, source_lang: str, target_lang: str

466 ) -> str | None:

467 """Translate text synchronously, blocking until a result is obtained.

468

469 Intended for use in CLI scripts, test helpers, and offline batch-processing

470 tools that run outside the Qt event loop. Each provider call is a blocking

471 HTTP request; the total wait time can reach ``len(providers) × timeout``

472 seconds if all providers fail.

473

474 Warning:

475 **Never call this method from the Qt main (UI) thread.** Doing so

476 blocks the event loop for the entire duration of the HTTP round-trips,

477 freezing the application UI. For in-app translation use

478 :meth:`translate` instead, which runs the request in a daemon thread.

479

480 Example::

481

482 # Appropriate usage — called from a CLI script, not from a Qt slot:

483 translator = get_auto_translator()

484 translator.enabled = True

485 result = translator.translate_sync("Hello", "en", "fr")

486 print(result) # "Bonjour"

487

488 Args:

489 text: The source text to translate.

490 source_lang: BCP-47 language code of the source text (e.g. ``"en"``).

491 target_lang: BCP-47 language code of the desired output (e.g. ``"fr"``).

492

493 Returns:

494 The translated string, or ``None`` if the translator is disabled or

495 all providers fail.

496 """

497 if not self.enabled:

498 return None

499

500 cached = self.cache.get(text, source_lang, target_lang)

501 if cached:

502 return cached

503

504 for provider in self.providers: 504 ↛ 520line 504 didn't jump to line 520 because the loop on line 504 didn't complete

505 try:

506 translation = provider.translate(text, source_lang, target_lang)

507 if translation: 507 ↛ 512line 507 didn't jump to line 512 because the condition on line 507 was always true

508 self.cache.set(

509 text, source_lang, target_lang, translation, provider.name

510 )

511 return translation

512 time.sleep(provider.rate_limit_delay)

513 except Exception as e:

514 warn_tech(

515 code="translation.sync.provider_failed",

516 message=f"Translation error with {provider.name}",

517 error=e,

518 )

519

520 return None

521

522 def save_translation_to_ts(

523 self, original: str, translated: str, target_lang: str, ts_file_path: Path

524 ) -> None:

525 """Append a single translation entry to a Qt Linguist .ts XML file."""

526 from xml.etree.ElementTree import Element, ElementTree, SubElement # nosec B405

527

528 import defusedxml.ElementTree as ET # type: ignore[import-untyped]

529

530 try:

531 if ts_file_path.exists():

532 try:

533 tree = ET.parse(ts_file_path)

534 root = tree.getroot()

535 if root is None:

536 raise ET.ParseError("Empty document")

537 except ET.ParseError:

538 root = Element("TS", {"language": target_lang, "version": "2.1"})

539 tree = ElementTree(root)

540 else:

541 root = Element("TS", {"language": target_lang, "version": "2.1"})

542 tree = ElementTree(root)

543

544 context = root.find("context")

545 if context is None:

546 context = SubElement(root, "context")

547 SubElement(context, "name").text = "ezqt_app"

548

549 # Update existing entry if source already present, otherwise append.

550 for msg in context.findall("message"):

551 src = msg.find("source")

552 if src is not None and src.text == original:

553 trans = msg.find("translation")

554 if trans is not None:

555 trans.text = translated

556 break

557 else:

558 msg = SubElement(context, "message")

559 SubElement(msg, "source").text = original

560 SubElement(msg, "translation").text = translated

561

562 ts_file_path.parent.mkdir(parents=True, exist_ok=True)

563 tree.write(ts_file_path, encoding="unicode", xml_declaration=True)

564 get_printer().debug_msg(

565 f"[TranslationService] Translation saved to {ts_file_path}"

566 )

567 except Exception as e:

568 warn_tech(

569 code="translation.ts.save_failed",

570 message="Error saving translation to .ts file",

571 error=e,

572 )

573

574 def clear_cache(self) -> None:

575 self.cache.cache_data.clear()

576 self.cache.save_cache()

577 get_printer().debug_msg("[TranslationService] Translation cache cleared")

578

579 def get_cache_stats(self) -> dict[str, Any]:

580 stats: dict[str, Any] = {

581 "total_entries": len(self.cache.cache_data),

582 "cache_file": str(self.cache.cache_file),

583 "max_age_days": self.cache.max_age_days,

584 }

585 provider_stats: dict[str, int] = {}

586 for entry_raw in self.cache.cache_data.values():

587 try:

588 entry = _TranslationCacheEntrySchema.model_validate(entry_raw)

589 p = entry.provider

590 except ValidationError:

591 p = "invalid"

592 provider_stats[p] = provider_stats.get(p, 0) + 1

593 stats["by_provider"] = provider_stats

594 return stats

595

596 def cleanup(self) -> None:

597 # Background threads are daemon threads — they exit automatically when

598 # the process exits. We only need to flush the on-disk cache.

599 self.cache.clear_expired()

600

601

602# ///////////////////////////////////////////////////////////////

603# FUNCTIONS

604# ///////////////////////////////////////////////////////////////

605def get_auto_translator() -> AutoTranslator:

606 """Return the global AutoTranslator singleton."""

607 from .._registry import ServiceRegistry

608

609 return ServiceRegistry.get(AutoTranslator, AutoTranslator)

Coverage for src / ezqt_app / services / translation / auto_translator.py: 63.64%

288 statements