Coverage for src / ezqt_app / services / translation / string_collector.py: 76.84%
158 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-06 13:12 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-06 13:12 +0000
1# ///////////////////////////////////////////////////////////////
2# SERVICES.TRANSLATION.STRING_COLLECTOR - UI string collector
3# Project: ezqt_app
4# ///////////////////////////////////////////////////////////////
6"""Collects translatable strings from Qt widgets for translation workflows."""
8from __future__ import annotations
10# ///////////////////////////////////////////////////////////////
11# IMPORTS
12# ///////////////////////////////////////////////////////////////
13# Standard library imports
14import json
15from datetime import datetime
16from pathlib import Path
17from typing import Any
19# Third-party imports
20from pydantic import RootModel, ValidationError
22# Local imports
23from ...domain.models.translation import SUPPORTED_LANGUAGES
24from ...utils.diagnostics import warn_tech, warn_user
25from ...utils.printer import get_printer
26from ._scanner import is_translatable, scan_widget
29# ///////////////////////////////////////////////////////////////
30# PYDANTIC SCHEMAS
31# ///////////////////////////////////////////////////////////////
32class _TranslationTasksSchema(RootModel[dict[str, dict[str, list[str]]]]):
33 """Strict schema for translation_tasks.json payload."""
36# ///////////////////////////////////////////////////////////////
37# CLASSES
38# ///////////////////////////////////////////////////////////////
39class StringCollector:
40 """String collector with language detection and task generation."""
42 def __init__(self, user_dir: Path | None = None):
43 if user_dir is None:
44 user_dir = Path.home() / ".ezqt"
46 self.user_dir = user_dir
47 self.user_dir.mkdir(parents=True, exist_ok=True)
49 self.translations_dir = self.user_dir / "translations"
50 self.cache_dir = self.user_dir / "cache"
52 self.translations_dir.mkdir(exist_ok=True)
53 self.cache_dir.mkdir(exist_ok=True)
55 self.pending_file = self.translations_dir / "pending_strings.txt"
56 self.processed_file = self.translations_dir / "processed_strings.txt"
57 self.language_detected_file = self.translations_dir / "language_detected.txt"
58 self.translation_tasks_file = self.translations_dir / "translation_tasks.json"
60 self._collected_strings: set[str] = set()
61 self._new_strings: set[str] = set()
62 self._language_detected_strings: list[tuple[str, str]] = []
64 def collect_strings_from_widget(
65 self, widget: Any, recursive: bool = True
66 ) -> set[str]:
67 collected = {
68 entry.original_text
69 for _, entry in scan_widget(widget, recursive=recursive)
70 if is_translatable(entry.original_text)
71 }
72 self._collected_strings.update(collected)
73 return collected
75 def _detect_language(self, text: str) -> str:
76 try:
77 from langdetect import (
78 DetectorFactory,
79 detect,
80 )
82 DetectorFactory.seed = 0
83 return detect(text)
84 except ImportError:
85 return self._simple_language_detection(text)
86 except Exception as e:
87 warn_tech(
88 code="translation.collector.language_detection_failed",
89 message="Language detection error",
90 error=e,
91 )
92 return "en"
94 def _simple_language_detection(self, text: str) -> str:
95 french_chars = "àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞŸ"
96 german_chars = "äöüßÄÖÜ"
97 spanish_chars = "ñáéíóúüÑÁÉÍÓÚÜ"
98 if any(c in french_chars for c in text): 98 ↛ 100line 98 didn't jump to line 100 because the condition on line 98 was always true
99 return "fr"
100 if any(c in german_chars for c in text):
101 return "de"
102 if any(c in spanish_chars for c in text):
103 return "es"
104 return "en"
106 def save_pending_strings(self, strings: set[str]) -> None:
107 try:
108 sorted_strings = sorted(strings)
109 with open(self.pending_file, "w", encoding="utf-8") as f:
110 f.write(f"# Pending strings - {datetime.now().isoformat()}\n")
111 f.write(f"# Total: {len(sorted_strings)} strings\n\n")
112 for s in sorted_strings: 112 ↛ 113line 112 didn't jump to line 113 because the loop on line 112 never started
113 f.write(f"{s}\n")
114 get_printer().debug_msg(
115 f"[TranslationService] {len(strings)} pending strings saved"
116 )
117 except Exception as e:
118 warn_tech(
119 code="translation.collector.save_pending_failed",
120 message="Error saving strings",
121 error=e,
122 )
124 def detect_languages_and_save(self, strings: set[str]) -> list[tuple[str, str]]:
125 language_detected: list[tuple[str, str]] = []
126 for text in strings: 126 ↛ 127line 126 didn't jump to line 127 because the loop on line 126 never started
127 try:
128 lang = self._detect_language(text)
129 language_detected.append((lang, text))
130 except Exception as e:
131 warn_tech(
132 code="translation.collector.detect_languages_failed",
133 message="Language detection error",
134 error=e,
135 )
136 language_detected.append(("en", text))
138 try:
139 sorted_results = sorted(language_detected, key=lambda x: x[0])
140 with open(self.language_detected_file, "w", encoding="utf-8") as f:
141 f.write(
142 f"# Strings with detected language - {datetime.now().isoformat()}\n"
143 )
144 f.write("# Format: language_code|text\n\n")
145 for lang, text in sorted_results: 145 ↛ 146line 145 didn't jump to line 146 because the loop on line 145 never started
146 f.write(f"{lang}|{text}\n")
147 self._language_detected_strings = language_detected
148 except Exception as e:
149 warn_tech(
150 code="translation.collector.save_language_results_failed",
151 message="Error saving language detection results",
152 error=e,
153 )
155 return language_detected
157 def load_processed_strings(self) -> set[str]:
158 processed: set[str] = set()
159 try:
160 if self.processed_file.exists():
161 with open(self.processed_file, encoding="utf-8") as f:
162 for line in f:
163 line = line.strip()
164 if line and not line.startswith("#"):
165 processed.add(line)
166 get_printer().debug_msg(
167 f"[TranslationService] {len(processed)} processed strings loaded"
168 )
169 else:
170 get_printer().debug_msg(
171 "[TranslationService] No processed strings file found"
172 )
173 except Exception as e:
174 warn_tech(
175 code="translation.collector.load_processed_failed",
176 message="Error loading processed strings",
177 error=e,
178 )
179 return processed
181 def get_supported_languages(self) -> list[str]:
182 return list(SUPPORTED_LANGUAGES.keys())
184 def generate_translation_tasks(
185 self, language_detected: list[tuple[str, str]]
186 ) -> dict[str, Any]:
187 tasks: dict[str, dict[str, list[str]]] = {}
188 supported = self.get_supported_languages()
189 for source_lang, text in language_detected:
190 tasks.setdefault(source_lang, {})
191 for target_lang in (lang for lang in supported if lang != source_lang):
192 tasks[source_lang].setdefault(target_lang, []).append(text)
193 try:
194 validated = _TranslationTasksSchema.model_validate(tasks)
195 with open(self.translation_tasks_file, "w", encoding="utf-8") as f:
196 json.dump(
197 validated.model_dump(mode="json"), f, indent=2, ensure_ascii=False
198 )
199 get_printer().debug_msg(
200 f"[TranslationService] {len(tasks)} translation tasks generated"
201 )
202 except ValidationError as e:
203 warn_tech(
204 code="translation.collector.save_tasks_validation_failed",
205 message="Invalid translation tasks payload",
206 error=e,
207 )
208 return {}
209 except Exception as e:
210 warn_tech(
211 code="translation.collector.save_tasks_failed",
212 message="Error saving tasks",
213 error=e,
214 )
215 return tasks
217 def get_new_strings(self) -> set[str]:
218 return self._new_strings.copy()
220 def collect_and_compare(
221 self, widget: Any, recursive: bool = True
222 ) -> dict[str, Any]:
223 collected = self.collect_strings_from_widget(widget, recursive)
224 self.save_pending_strings(collected)
225 language_detected = self.detect_languages_and_save(collected)
226 tasks = self.generate_translation_tasks(language_detected)
227 processed = self.load_processed_strings()
228 self._new_strings = collected - processed
230 stats: dict[str, Any] = {
231 "total_collected": len(collected),
232 "total_processed": len(processed),
233 "new_strings": len(self._new_strings),
234 "languages_detected": len({lang for lang, _ in language_detected}),
235 "translation_tasks": len(tasks),
236 }
237 get_printer().debug_msg("[TranslationService] Collection summary:")
238 for key, val in stats.items():
239 get_printer().debug_msg(f" - {key}: {val}")
240 return stats
242 def mark_strings_as_processed(self, strings: set[str] | None = None) -> None:
243 if strings is None: 243 ↛ 244line 243 didn't jump to line 244 because the condition on line 243 was never true
244 strings = self._new_strings
245 if not strings:
246 warn_user(
247 code="translation.collector.mark_processed_empty",
248 user_message="No strings to mark as processed",
249 )
250 return
251 try:
252 processed = self.load_processed_strings()
253 processed.update(strings)
254 sorted_strings = sorted(processed)
255 with open(self.processed_file, "w", encoding="utf-8") as f:
256 f.write(f"# Processed strings - {datetime.now().isoformat()}\n")
257 f.write(f"# Total: {len(sorted_strings)} strings\n\n")
258 for s in sorted_strings:
259 f.write(f"{s}\n")
260 get_printer().debug_msg(
261 f"[TranslationService] {len(strings)} strings marked as processed"
262 )
263 except Exception as e:
264 warn_user(
265 code="translation.collector.mark_processed_failed",
266 user_message="Error marking strings",
267 log_message="Error marking strings",
268 error=e,
269 )
271 def get_stats(self) -> dict[str, Any]:
272 return {
273 "collected_strings": len(self._collected_strings),
274 "new_strings": len(self._new_strings),
275 "language_detected": len(self._language_detected_strings),
276 "pending_file": str(self.pending_file),
277 "processed_file": str(self.processed_file),
278 "language_detected_file": str(self.language_detected_file),
279 "translation_tasks_file": str(self.translation_tasks_file),
280 }
282 def clear_cache(self) -> None:
283 self._collected_strings.clear()
284 self._new_strings.clear()
285 self._language_detected_strings.clear()
286 get_printer().debug_msg("[TranslationService] Collector cache cleared")
289# ///////////////////////////////////////////////////////////////
290# FUNCTIONS
291# ///////////////////////////////////////////////////////////////
292def get_string_collector(user_dir: Path | None = None) -> StringCollector:
293 """Return the global StringCollector singleton."""
294 from .._registry import ServiceRegistry
296 return ServiceRegistry.get(StringCollector, lambda: StringCollector(user_dir))