Coverage for src / ezqt_app / services / translation / string_collector.py: 24.46%

152 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-26 07:07 +0000

1# /////////////////////////////////////////////////////////////// 

2# SERVICES.TRANSLATION.STRING_COLLECTOR - UI string collector 

3# Project: ezqt_app 

4# /////////////////////////////////////////////////////////////// 

5 

6"""Collects translatable strings from Qt widgets for translation workflows.""" 

7 

8from __future__ import annotations 

9 

10# /////////////////////////////////////////////////////////////// 

11# IMPORTS 

12# /////////////////////////////////////////////////////////////// 

13# Standard library imports 

14import json 

15from datetime import datetime 

16from pathlib import Path 

17from typing import Any 

18 

19# Local imports 

20from ...domain.models.translation import SUPPORTED_LANGUAGES 

21from ...utils.diagnostics import warn_tech, warn_user 

22from ...utils.printer import get_printer 

23from ._scanner import is_translatable, scan_widget 

24 

25 

26# /////////////////////////////////////////////////////////////// 

27# CLASSES 

28# /////////////////////////////////////////////////////////////// 

29class StringCollector: 

30 """String collector with language detection and task generation.""" 

31 

32 def __init__(self, user_dir: Path | None = None): 

33 if user_dir is None: 

34 user_dir = Path.home() / ".ezqt" 

35 

36 self.user_dir = user_dir 

37 self.user_dir.mkdir(parents=True, exist_ok=True) 

38 

39 self.translations_dir = self.user_dir / "translations" 

40 self.cache_dir = self.user_dir / "cache" 

41 

42 self.translations_dir.mkdir(exist_ok=True) 

43 self.cache_dir.mkdir(exist_ok=True) 

44 

45 self.pending_file = self.translations_dir / "pending_strings.txt" 

46 self.processed_file = self.translations_dir / "processed_strings.txt" 

47 self.language_detected_file = self.translations_dir / "language_detected.txt" 

48 self.translation_tasks_file = self.translations_dir / "translation_tasks.json" 

49 

50 self._collected_strings: set[str] = set() 

51 self._new_strings: set[str] = set() 

52 self._language_detected_strings: list[tuple[str, str]] = [] 

53 

54 def collect_strings_from_widget( 

55 self, widget: Any, recursive: bool = True 

56 ) -> set[str]: 

57 collected = { 

58 entry.original_text 

59 for _, entry in scan_widget(widget, recursive=recursive) 

60 if is_translatable(entry.original_text) 

61 } 

62 self._collected_strings.update(collected) 

63 return collected 

64 

65 def _detect_language(self, text: str) -> str: 

66 try: 

67 from langdetect import ( 

68 DetectorFactory, 

69 detect, 

70 ) 

71 

72 DetectorFactory.seed = 0 

73 return detect(text) 

74 except ImportError: 

75 return self._simple_language_detection(text) 

76 except Exception as e: 

77 warn_tech( 

78 code="translation.collector.language_detection_failed", 

79 message="Language detection error", 

80 error=e, 

81 ) 

82 return "en" 

83 

84 def _simple_language_detection(self, text: str) -> str: 

85 french_chars = "àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞŸ" 

86 german_chars = "äöüßÄÖÜ" 

87 spanish_chars = "ñáéíóúüÑÁÉÍÓÚÜ" 

88 if any(c in french_chars for c in text): 

89 return "fr" 

90 if any(c in german_chars for c in text): 

91 return "de" 

92 if any(c in spanish_chars for c in text): 

93 return "es" 

94 return "en" 

95 

96 def save_pending_strings(self, strings: set[str]) -> None: 

97 try: 

98 sorted_strings = sorted(strings) 

99 with open(self.pending_file, "w", encoding="utf-8") as f: 

100 f.write(f"# Pending strings - {datetime.now().isoformat()}\n") 

101 f.write(f"# Total: {len(sorted_strings)} strings\n\n") 

102 for s in sorted_strings: 

103 f.write(f"{s}\n") 

104 get_printer().debug_msg( 

105 f"[TranslationService] {len(strings)} pending strings saved" 

106 ) 

107 except Exception as e: 

108 warn_tech( 

109 code="translation.collector.save_pending_failed", 

110 message="Error saving strings", 

111 error=e, 

112 ) 

113 

114 def detect_languages_and_save(self, strings: set[str]) -> list[tuple[str, str]]: 

115 language_detected: list[tuple[str, str]] = [] 

116 for text in strings: 

117 try: 

118 lang = self._detect_language(text) 

119 language_detected.append((lang, text)) 

120 except Exception as e: 

121 warn_tech( 

122 code="translation.collector.detect_languages_failed", 

123 message="Language detection error", 

124 error=e, 

125 ) 

126 language_detected.append(("en", text)) 

127 

128 try: 

129 sorted_results = sorted(language_detected, key=lambda x: x[0]) 

130 with open(self.language_detected_file, "w", encoding="utf-8") as f: 

131 f.write( 

132 f"# Strings with detected language - {datetime.now().isoformat()}\n" 

133 ) 

134 f.write("# Format: language_code|text\n\n") 

135 for lang, text in sorted_results: 

136 f.write(f"{lang}|{text}\n") 

137 self._language_detected_strings = language_detected 

138 except Exception as e: 

139 warn_tech( 

140 code="translation.collector.save_language_results_failed", 

141 message="Error saving language detection results", 

142 error=e, 

143 ) 

144 

145 return language_detected 

146 

147 def load_processed_strings(self) -> set[str]: 

148 processed: set[str] = set() 

149 try: 

150 if self.processed_file.exists(): 

151 with open(self.processed_file, encoding="utf-8") as f: 

152 for line in f: 

153 line = line.strip() 

154 if line and not line.startswith("#"): 

155 processed.add(line) 

156 get_printer().debug_msg( 

157 f"[TranslationService] {len(processed)} processed strings loaded" 

158 ) 

159 else: 

160 get_printer().debug_msg( 

161 "[TranslationService] No processed strings file found" 

162 ) 

163 except Exception as e: 

164 warn_tech( 

165 code="translation.collector.load_processed_failed", 

166 message="Error loading processed strings", 

167 error=e, 

168 ) 

169 return processed 

170 

171 def get_supported_languages(self) -> list[str]: 

172 return list(SUPPORTED_LANGUAGES.keys()) 

173 

174 def generate_translation_tasks( 

175 self, language_detected: list[tuple[str, str]] 

176 ) -> dict[str, Any]: 

177 tasks: dict[str, Any] = {} 

178 supported = self.get_supported_languages() 

179 for source_lang, text in language_detected: 

180 tasks.setdefault(source_lang, {}) 

181 for target_lang in (lang for lang in supported if lang != source_lang): 

182 tasks[source_lang].setdefault(target_lang, []).append(text) 

183 try: 

184 with open(self.translation_tasks_file, "w", encoding="utf-8") as f: 

185 json.dump(tasks, f, indent=2, ensure_ascii=False) 

186 get_printer().debug_msg( 

187 f"[TranslationService] {len(tasks)} translation tasks generated" 

188 ) 

189 except Exception as e: 

190 warn_tech( 

191 code="translation.collector.save_tasks_failed", 

192 message="Error saving tasks", 

193 error=e, 

194 ) 

195 return tasks 

196 

197 def get_new_strings(self) -> set[str]: 

198 return self._new_strings.copy() 

199 

200 def collect_and_compare( 

201 self, widget: Any, recursive: bool = True 

202 ) -> dict[str, Any]: 

203 collected = self.collect_strings_from_widget(widget, recursive) 

204 self.save_pending_strings(collected) 

205 language_detected = self.detect_languages_and_save(collected) 

206 tasks = self.generate_translation_tasks(language_detected) 

207 processed = self.load_processed_strings() 

208 self._new_strings = collected - processed 

209 

210 stats: dict[str, Any] = { 

211 "total_collected": len(collected), 

212 "total_processed": len(processed), 

213 "new_strings": len(self._new_strings), 

214 "languages_detected": len({lang for lang, _ in language_detected}), 

215 "translation_tasks": len(tasks), 

216 } 

217 get_printer().debug_msg("[TranslationService] Collection summary:") 

218 for key, val in stats.items(): 

219 get_printer().debug_msg(f" - {key}: {val}") 

220 return stats 

221 

222 def mark_strings_as_processed(self, strings: set[str] | None = None) -> None: 

223 if strings is None: 

224 strings = self._new_strings 

225 if not strings: 

226 warn_user( 

227 code="translation.collector.mark_processed_empty", 

228 user_message="No strings to mark as processed", 

229 ) 

230 return 

231 try: 

232 processed = self.load_processed_strings() 

233 processed.update(strings) 

234 sorted_strings = sorted(processed) 

235 with open(self.processed_file, "w", encoding="utf-8") as f: 

236 f.write(f"# Processed strings - {datetime.now().isoformat()}\n") 

237 f.write(f"# Total: {len(sorted_strings)} strings\n\n") 

238 for s in sorted_strings: 

239 f.write(f"{s}\n") 

240 get_printer().debug_msg( 

241 f"[TranslationService] {len(strings)} strings marked as processed" 

242 ) 

243 except Exception as e: 

244 warn_user( 

245 code="translation.collector.mark_processed_failed", 

246 user_message="Error marking strings", 

247 log_message="Error marking strings", 

248 error=e, 

249 ) 

250 

251 def get_stats(self) -> dict[str, Any]: 

252 return { 

253 "collected_strings": len(self._collected_strings), 

254 "new_strings": len(self._new_strings), 

255 "language_detected": len(self._language_detected_strings), 

256 "pending_file": str(self.pending_file), 

257 "processed_file": str(self.processed_file), 

258 "language_detected_file": str(self.language_detected_file), 

259 "translation_tasks_file": str(self.translation_tasks_file), 

260 } 

261 

262 def clear_cache(self) -> None: 

263 self._collected_strings.clear() 

264 self._new_strings.clear() 

265 self._language_detected_strings.clear() 

266 get_printer().debug_msg("[TranslationService] Collector cache cleared") 

267 

268 

269# /////////////////////////////////////////////////////////////// 

270# FUNCTIONS 

271# /////////////////////////////////////////////////////////////// 

272def get_string_collector(user_dir: Path | None = None) -> StringCollector: 

273 """Return the global StringCollector singleton.""" 

274 from .._registry import ServiceRegistry 

275 

276 return ServiceRegistry.get(StringCollector, lambda: StringCollector(user_dir))