Coverage for src / ezqt_app / services / translation / string_collector.py: 24.46%
152 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-26 07:07 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-26 07:07 +0000
1# ///////////////////////////////////////////////////////////////
2# SERVICES.TRANSLATION.STRING_COLLECTOR - UI string collector
3# Project: ezqt_app
4# ///////////////////////////////////////////////////////////////
6"""Collects translatable strings from Qt widgets for translation workflows."""
8from __future__ import annotations
10# ///////////////////////////////////////////////////////////////
11# IMPORTS
12# ///////////////////////////////////////////////////////////////
13# Standard library imports
14import json
15from datetime import datetime
16from pathlib import Path
17from typing import Any
19# Local imports
20from ...domain.models.translation import SUPPORTED_LANGUAGES
21from ...utils.diagnostics import warn_tech, warn_user
22from ...utils.printer import get_printer
23from ._scanner import is_translatable, scan_widget
26# ///////////////////////////////////////////////////////////////
27# CLASSES
28# ///////////////////////////////////////////////////////////////
29class StringCollector:
30 """String collector with language detection and task generation."""
32 def __init__(self, user_dir: Path | None = None):
33 if user_dir is None:
34 user_dir = Path.home() / ".ezqt"
36 self.user_dir = user_dir
37 self.user_dir.mkdir(parents=True, exist_ok=True)
39 self.translations_dir = self.user_dir / "translations"
40 self.cache_dir = self.user_dir / "cache"
42 self.translations_dir.mkdir(exist_ok=True)
43 self.cache_dir.mkdir(exist_ok=True)
45 self.pending_file = self.translations_dir / "pending_strings.txt"
46 self.processed_file = self.translations_dir / "processed_strings.txt"
47 self.language_detected_file = self.translations_dir / "language_detected.txt"
48 self.translation_tasks_file = self.translations_dir / "translation_tasks.json"
50 self._collected_strings: set[str] = set()
51 self._new_strings: set[str] = set()
52 self._language_detected_strings: list[tuple[str, str]] = []
54 def collect_strings_from_widget(
55 self, widget: Any, recursive: bool = True
56 ) -> set[str]:
57 collected = {
58 entry.original_text
59 for _, entry in scan_widget(widget, recursive=recursive)
60 if is_translatable(entry.original_text)
61 }
62 self._collected_strings.update(collected)
63 return collected
65 def _detect_language(self, text: str) -> str:
66 try:
67 from langdetect import (
68 DetectorFactory,
69 detect,
70 )
72 DetectorFactory.seed = 0
73 return detect(text)
74 except ImportError:
75 return self._simple_language_detection(text)
76 except Exception as e:
77 warn_tech(
78 code="translation.collector.language_detection_failed",
79 message="Language detection error",
80 error=e,
81 )
82 return "en"
84 def _simple_language_detection(self, text: str) -> str:
85 french_chars = "àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞŸ"
86 german_chars = "äöüßÄÖÜ"
87 spanish_chars = "ñáéíóúüÑÁÉÍÓÚÜ"
88 if any(c in french_chars for c in text):
89 return "fr"
90 if any(c in german_chars for c in text):
91 return "de"
92 if any(c in spanish_chars for c in text):
93 return "es"
94 return "en"
96 def save_pending_strings(self, strings: set[str]) -> None:
97 try:
98 sorted_strings = sorted(strings)
99 with open(self.pending_file, "w", encoding="utf-8") as f:
100 f.write(f"# Pending strings - {datetime.now().isoformat()}\n")
101 f.write(f"# Total: {len(sorted_strings)} strings\n\n")
102 for s in sorted_strings:
103 f.write(f"{s}\n")
104 get_printer().debug_msg(
105 f"[TranslationService] {len(strings)} pending strings saved"
106 )
107 except Exception as e:
108 warn_tech(
109 code="translation.collector.save_pending_failed",
110 message="Error saving strings",
111 error=e,
112 )
114 def detect_languages_and_save(self, strings: set[str]) -> list[tuple[str, str]]:
115 language_detected: list[tuple[str, str]] = []
116 for text in strings:
117 try:
118 lang = self._detect_language(text)
119 language_detected.append((lang, text))
120 except Exception as e:
121 warn_tech(
122 code="translation.collector.detect_languages_failed",
123 message="Language detection error",
124 error=e,
125 )
126 language_detected.append(("en", text))
128 try:
129 sorted_results = sorted(language_detected, key=lambda x: x[0])
130 with open(self.language_detected_file, "w", encoding="utf-8") as f:
131 f.write(
132 f"# Strings with detected language - {datetime.now().isoformat()}\n"
133 )
134 f.write("# Format: language_code|text\n\n")
135 for lang, text in sorted_results:
136 f.write(f"{lang}|{text}\n")
137 self._language_detected_strings = language_detected
138 except Exception as e:
139 warn_tech(
140 code="translation.collector.save_language_results_failed",
141 message="Error saving language detection results",
142 error=e,
143 )
145 return language_detected
147 def load_processed_strings(self) -> set[str]:
148 processed: set[str] = set()
149 try:
150 if self.processed_file.exists():
151 with open(self.processed_file, encoding="utf-8") as f:
152 for line in f:
153 line = line.strip()
154 if line and not line.startswith("#"):
155 processed.add(line)
156 get_printer().debug_msg(
157 f"[TranslationService] {len(processed)} processed strings loaded"
158 )
159 else:
160 get_printer().debug_msg(
161 "[TranslationService] No processed strings file found"
162 )
163 except Exception as e:
164 warn_tech(
165 code="translation.collector.load_processed_failed",
166 message="Error loading processed strings",
167 error=e,
168 )
169 return processed
171 def get_supported_languages(self) -> list[str]:
172 return list(SUPPORTED_LANGUAGES.keys())
174 def generate_translation_tasks(
175 self, language_detected: list[tuple[str, str]]
176 ) -> dict[str, Any]:
177 tasks: dict[str, Any] = {}
178 supported = self.get_supported_languages()
179 for source_lang, text in language_detected:
180 tasks.setdefault(source_lang, {})
181 for target_lang in (lang for lang in supported if lang != source_lang):
182 tasks[source_lang].setdefault(target_lang, []).append(text)
183 try:
184 with open(self.translation_tasks_file, "w", encoding="utf-8") as f:
185 json.dump(tasks, f, indent=2, ensure_ascii=False)
186 get_printer().debug_msg(
187 f"[TranslationService] {len(tasks)} translation tasks generated"
188 )
189 except Exception as e:
190 warn_tech(
191 code="translation.collector.save_tasks_failed",
192 message="Error saving tasks",
193 error=e,
194 )
195 return tasks
197 def get_new_strings(self) -> set[str]:
198 return self._new_strings.copy()
200 def collect_and_compare(
201 self, widget: Any, recursive: bool = True
202 ) -> dict[str, Any]:
203 collected = self.collect_strings_from_widget(widget, recursive)
204 self.save_pending_strings(collected)
205 language_detected = self.detect_languages_and_save(collected)
206 tasks = self.generate_translation_tasks(language_detected)
207 processed = self.load_processed_strings()
208 self._new_strings = collected - processed
210 stats: dict[str, Any] = {
211 "total_collected": len(collected),
212 "total_processed": len(processed),
213 "new_strings": len(self._new_strings),
214 "languages_detected": len({lang for lang, _ in language_detected}),
215 "translation_tasks": len(tasks),
216 }
217 get_printer().debug_msg("[TranslationService] Collection summary:")
218 for key, val in stats.items():
219 get_printer().debug_msg(f" - {key}: {val}")
220 return stats
222 def mark_strings_as_processed(self, strings: set[str] | None = None) -> None:
223 if strings is None:
224 strings = self._new_strings
225 if not strings:
226 warn_user(
227 code="translation.collector.mark_processed_empty",
228 user_message="No strings to mark as processed",
229 )
230 return
231 try:
232 processed = self.load_processed_strings()
233 processed.update(strings)
234 sorted_strings = sorted(processed)
235 with open(self.processed_file, "w", encoding="utf-8") as f:
236 f.write(f"# Processed strings - {datetime.now().isoformat()}\n")
237 f.write(f"# Total: {len(sorted_strings)} strings\n\n")
238 for s in sorted_strings:
239 f.write(f"{s}\n")
240 get_printer().debug_msg(
241 f"[TranslationService] {len(strings)} strings marked as processed"
242 )
243 except Exception as e:
244 warn_user(
245 code="translation.collector.mark_processed_failed",
246 user_message="Error marking strings",
247 log_message="Error marking strings",
248 error=e,
249 )
251 def get_stats(self) -> dict[str, Any]:
252 return {
253 "collected_strings": len(self._collected_strings),
254 "new_strings": len(self._new_strings),
255 "language_detected": len(self._language_detected_strings),
256 "pending_file": str(self.pending_file),
257 "processed_file": str(self.processed_file),
258 "language_detected_file": str(self.language_detected_file),
259 "translation_tasks_file": str(self.translation_tasks_file),
260 }
262 def clear_cache(self) -> None:
263 self._collected_strings.clear()
264 self._new_strings.clear()
265 self._language_detected_strings.clear()
266 get_printer().debug_msg("[TranslationService] Collector cache cleared")
269# ///////////////////////////////////////////////////////////////
270# FUNCTIONS
271# ///////////////////////////////////////////////////////////////
272def get_string_collector(user_dir: Path | None = None) -> StringCollector:
273 """Return the global StringCollector singleton."""
274 from .._registry import ServiceRegistry
276 return ServiceRegistry.get(StringCollector, lambda: StringCollector(user_dir))