Coverage for src / ezqt_app / services / translation / string_collector.py: 76.84%

158 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-06 13:12 +0000

1# /////////////////////////////////////////////////////////////// 

2# SERVICES.TRANSLATION.STRING_COLLECTOR - UI string collector 

3# Project: ezqt_app 

4# /////////////////////////////////////////////////////////////// 

5 

6"""Collects translatable strings from Qt widgets for translation workflows.""" 

7 

8from __future__ import annotations 

9 

10# /////////////////////////////////////////////////////////////// 

11# IMPORTS 

12# /////////////////////////////////////////////////////////////// 

13# Standard library imports 

14import json 

15from datetime import datetime 

16from pathlib import Path 

17from typing import Any 

18 

19# Third-party imports 

20from pydantic import RootModel, ValidationError 

21 

22# Local imports 

23from ...domain.models.translation import SUPPORTED_LANGUAGES 

24from ...utils.diagnostics import warn_tech, warn_user 

25from ...utils.printer import get_printer 

26from ._scanner import is_translatable, scan_widget 

27 

28 

29# /////////////////////////////////////////////////////////////// 

30# PYDANTIC SCHEMAS 

31# /////////////////////////////////////////////////////////////// 

32class _TranslationTasksSchema(RootModel[dict[str, dict[str, list[str]]]]): 

33 """Strict schema for translation_tasks.json payload.""" 

34 

35 

36# /////////////////////////////////////////////////////////////// 

37# CLASSES 

38# /////////////////////////////////////////////////////////////// 

39class StringCollector: 

40 """String collector with language detection and task generation.""" 

41 

42 def __init__(self, user_dir: Path | None = None): 

43 if user_dir is None: 

44 user_dir = Path.home() / ".ezqt" 

45 

46 self.user_dir = user_dir 

47 self.user_dir.mkdir(parents=True, exist_ok=True) 

48 

49 self.translations_dir = self.user_dir / "translations" 

50 self.cache_dir = self.user_dir / "cache" 

51 

52 self.translations_dir.mkdir(exist_ok=True) 

53 self.cache_dir.mkdir(exist_ok=True) 

54 

55 self.pending_file = self.translations_dir / "pending_strings.txt" 

56 self.processed_file = self.translations_dir / "processed_strings.txt" 

57 self.language_detected_file = self.translations_dir / "language_detected.txt" 

58 self.translation_tasks_file = self.translations_dir / "translation_tasks.json" 

59 

60 self._collected_strings: set[str] = set() 

61 self._new_strings: set[str] = set() 

62 self._language_detected_strings: list[tuple[str, str]] = [] 

63 

64 def collect_strings_from_widget( 

65 self, widget: Any, recursive: bool = True 

66 ) -> set[str]: 

67 collected = { 

68 entry.original_text 

69 for _, entry in scan_widget(widget, recursive=recursive) 

70 if is_translatable(entry.original_text) 

71 } 

72 self._collected_strings.update(collected) 

73 return collected 

74 

75 def _detect_language(self, text: str) -> str: 

76 try: 

77 from langdetect import ( 

78 DetectorFactory, 

79 detect, 

80 ) 

81 

82 DetectorFactory.seed = 0 

83 return detect(text) 

84 except ImportError: 

85 return self._simple_language_detection(text) 

86 except Exception as e: 

87 warn_tech( 

88 code="translation.collector.language_detection_failed", 

89 message="Language detection error", 

90 error=e, 

91 ) 

92 return "en" 

93 

94 def _simple_language_detection(self, text: str) -> str: 

95 french_chars = "àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞŸ" 

96 german_chars = "äöüßÄÖÜ" 

97 spanish_chars = "ñáéíóúüÑÁÉÍÓÚÜ" 

98 if any(c in french_chars for c in text): 98 ↛ 100line 98 didn't jump to line 100 because the condition on line 98 was always true

99 return "fr" 

100 if any(c in german_chars for c in text): 

101 return "de" 

102 if any(c in spanish_chars for c in text): 

103 return "es" 

104 return "en" 

105 

106 def save_pending_strings(self, strings: set[str]) -> None: 

107 try: 

108 sorted_strings = sorted(strings) 

109 with open(self.pending_file, "w", encoding="utf-8") as f: 

110 f.write(f"# Pending strings - {datetime.now().isoformat()}\n") 

111 f.write(f"# Total: {len(sorted_strings)} strings\n\n") 

112 for s in sorted_strings: 112 ↛ 113line 112 didn't jump to line 113 because the loop on line 112 never started

113 f.write(f"{s}\n") 

114 get_printer().debug_msg( 

115 f"[TranslationService] {len(strings)} pending strings saved" 

116 ) 

117 except Exception as e: 

118 warn_tech( 

119 code="translation.collector.save_pending_failed", 

120 message="Error saving strings", 

121 error=e, 

122 ) 

123 

124 def detect_languages_and_save(self, strings: set[str]) -> list[tuple[str, str]]: 

125 language_detected: list[tuple[str, str]] = [] 

126 for text in strings: 126 ↛ 127line 126 didn't jump to line 127 because the loop on line 126 never started

127 try: 

128 lang = self._detect_language(text) 

129 language_detected.append((lang, text)) 

130 except Exception as e: 

131 warn_tech( 

132 code="translation.collector.detect_languages_failed", 

133 message="Language detection error", 

134 error=e, 

135 ) 

136 language_detected.append(("en", text)) 

137 

138 try: 

139 sorted_results = sorted(language_detected, key=lambda x: x[0]) 

140 with open(self.language_detected_file, "w", encoding="utf-8") as f: 

141 f.write( 

142 f"# Strings with detected language - {datetime.now().isoformat()}\n" 

143 ) 

144 f.write("# Format: language_code|text\n\n") 

145 for lang, text in sorted_results: 145 ↛ 146line 145 didn't jump to line 146 because the loop on line 145 never started

146 f.write(f"{lang}|{text}\n") 

147 self._language_detected_strings = language_detected 

148 except Exception as e: 

149 warn_tech( 

150 code="translation.collector.save_language_results_failed", 

151 message="Error saving language detection results", 

152 error=e, 

153 ) 

154 

155 return language_detected 

156 

157 def load_processed_strings(self) -> set[str]: 

158 processed: set[str] = set() 

159 try: 

160 if self.processed_file.exists(): 

161 with open(self.processed_file, encoding="utf-8") as f: 

162 for line in f: 

163 line = line.strip() 

164 if line and not line.startswith("#"): 

165 processed.add(line) 

166 get_printer().debug_msg( 

167 f"[TranslationService] {len(processed)} processed strings loaded" 

168 ) 

169 else: 

170 get_printer().debug_msg( 

171 "[TranslationService] No processed strings file found" 

172 ) 

173 except Exception as e: 

174 warn_tech( 

175 code="translation.collector.load_processed_failed", 

176 message="Error loading processed strings", 

177 error=e, 

178 ) 

179 return processed 

180 

181 def get_supported_languages(self) -> list[str]: 

182 return list(SUPPORTED_LANGUAGES.keys()) 

183 

184 def generate_translation_tasks( 

185 self, language_detected: list[tuple[str, str]] 

186 ) -> dict[str, Any]: 

187 tasks: dict[str, dict[str, list[str]]] = {} 

188 supported = self.get_supported_languages() 

189 for source_lang, text in language_detected: 

190 tasks.setdefault(source_lang, {}) 

191 for target_lang in (lang for lang in supported if lang != source_lang): 

192 tasks[source_lang].setdefault(target_lang, []).append(text) 

193 try: 

194 validated = _TranslationTasksSchema.model_validate(tasks) 

195 with open(self.translation_tasks_file, "w", encoding="utf-8") as f: 

196 json.dump( 

197 validated.model_dump(mode="json"), f, indent=2, ensure_ascii=False 

198 ) 

199 get_printer().debug_msg( 

200 f"[TranslationService] {len(tasks)} translation tasks generated" 

201 ) 

202 except ValidationError as e: 

203 warn_tech( 

204 code="translation.collector.save_tasks_validation_failed", 

205 message="Invalid translation tasks payload", 

206 error=e, 

207 ) 

208 return {} 

209 except Exception as e: 

210 warn_tech( 

211 code="translation.collector.save_tasks_failed", 

212 message="Error saving tasks", 

213 error=e, 

214 ) 

215 return tasks 

216 

217 def get_new_strings(self) -> set[str]: 

218 return self._new_strings.copy() 

219 

220 def collect_and_compare( 

221 self, widget: Any, recursive: bool = True 

222 ) -> dict[str, Any]: 

223 collected = self.collect_strings_from_widget(widget, recursive) 

224 self.save_pending_strings(collected) 

225 language_detected = self.detect_languages_and_save(collected) 

226 tasks = self.generate_translation_tasks(language_detected) 

227 processed = self.load_processed_strings() 

228 self._new_strings = collected - processed 

229 

230 stats: dict[str, Any] = { 

231 "total_collected": len(collected), 

232 "total_processed": len(processed), 

233 "new_strings": len(self._new_strings), 

234 "languages_detected": len({lang for lang, _ in language_detected}), 

235 "translation_tasks": len(tasks), 

236 } 

237 get_printer().debug_msg("[TranslationService] Collection summary:") 

238 for key, val in stats.items(): 

239 get_printer().debug_msg(f" - {key}: {val}") 

240 return stats 

241 

242 def mark_strings_as_processed(self, strings: set[str] | None = None) -> None: 

243 if strings is None: 243 ↛ 244line 243 didn't jump to line 244 because the condition on line 243 was never true

244 strings = self._new_strings 

245 if not strings: 

246 warn_user( 

247 code="translation.collector.mark_processed_empty", 

248 user_message="No strings to mark as processed", 

249 ) 

250 return 

251 try: 

252 processed = self.load_processed_strings() 

253 processed.update(strings) 

254 sorted_strings = sorted(processed) 

255 with open(self.processed_file, "w", encoding="utf-8") as f: 

256 f.write(f"# Processed strings - {datetime.now().isoformat()}\n") 

257 f.write(f"# Total: {len(sorted_strings)} strings\n\n") 

258 for s in sorted_strings: 

259 f.write(f"{s}\n") 

260 get_printer().debug_msg( 

261 f"[TranslationService] {len(strings)} strings marked as processed" 

262 ) 

263 except Exception as e: 

264 warn_user( 

265 code="translation.collector.mark_processed_failed", 

266 user_message="Error marking strings", 

267 log_message="Error marking strings", 

268 error=e, 

269 ) 

270 

271 def get_stats(self) -> dict[str, Any]: 

272 return { 

273 "collected_strings": len(self._collected_strings), 

274 "new_strings": len(self._new_strings), 

275 "language_detected": len(self._language_detected_strings), 

276 "pending_file": str(self.pending_file), 

277 "processed_file": str(self.processed_file), 

278 "language_detected_file": str(self.language_detected_file), 

279 "translation_tasks_file": str(self.translation_tasks_file), 

280 } 

281 

282 def clear_cache(self) -> None: 

283 self._collected_strings.clear() 

284 self._new_strings.clear() 

285 self._language_detected_strings.clear() 

286 get_printer().debug_msg("[TranslationService] Collector cache cleared") 

287 

288 

289# /////////////////////////////////////////////////////////////// 

290# FUNCTIONS 

291# /////////////////////////////////////////////////////////////// 

292def get_string_collector(user_dir: Path | None = None) -> StringCollector: 

293 """Return the global StringCollector singleton.""" 

294 from .._registry import ServiceRegistry 

295 

296 return ServiceRegistry.get(StringCollector, lambda: StringCollector(user_dir))