-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathemail_worker.py
More file actions
393 lines (346 loc) · 16.2 KB
/
email_worker.py
File metadata and controls
393 lines (346 loc) · 16.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
import csv
import imaplib
import re
import time
import socket
import logging
from contextlib import contextmanager
from email import message_from_bytes
from email.header import decode_header
import concurrent.futures
from typing import Dict, List, Optional, Tuple
from PySide6.QtCore import QObject, QThread, Signal, Slot
from db import Database
from net_imap import fetch_records
try:
import socks # PySocks
except Exception: # pragma: no cover
socks = None
# Set a reasonable global socket timeout so network ops don't block shutdown forever
socket.setdefaulttimeout(10)
@contextmanager
def _timed(section: str, extra: str = ""):
start = time.perf_counter()
try:
yield
finally:
logging.debug("TIMING %s%s took=%.1f ms", section, (" " + extra) if extra else "", (time.perf_counter() - start) * 1000)
def read_accounts(csv_path: str) -> List[Tuple[str, str, Optional[str]]]:
accounts: List[Tuple[str, str, Optional[str]]] = []
with open(csv_path, mode="r", encoding="utf-8") as file:
reader = csv.DictReader(file)
for row in reader:
accounts.append((row["username"], row["password"], row.get("host")))
return accounts
def decode_mime_header(raw_value: Optional[str]) -> str:
if not raw_value:
return ""
parts = decode_header(raw_value)
decoded_segments: List[str] = []
for part, enc in parts:
if isinstance(part, bytes):
try:
decoded_segments.append(part.decode(enc or "utf-8", errors="ignore"))
except Exception:
decoded_segments.append(part.decode("utf-8", errors="ignore"))
else:
decoded_segments.append(part)
return "".join(decoded_segments)
def extract_text_from_message_bytes(raw_bytes: bytes) -> Tuple[str, Optional[str]]:
msg = message_from_bytes(raw_bytes)
message_id = msg.get("Message-ID")
payload_text: Optional[str] = None
if msg.is_multipart():
for part in msg.walk():
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition", "")).lower()
if "attachment" in content_disposition:
continue
if content_type == "text/plain":
try:
payload_text = part.get_payload(decode=True).decode(part.get_content_charset() or "utf-8", errors="ignore")
break
except Exception:
continue
if content_type == "text/html" and payload_text is None:
try:
html_text = part.get_payload(decode=True).decode(part.get_content_charset() or "utf-8", errors="ignore")
payload_text = re.sub(r"<[^>]+>", " ", html_text)
except Exception:
continue
else:
content_type = msg.get_content_type()
try:
if content_type == "text/plain":
payload_text = msg.get_payload(decode=True).decode(msg.get_content_charset() or "utf-8", errors="ignore")
elif content_type == "text/html":
html_text = msg.get_payload(decode=True).decode(msg.get_content_charset() or "utf-8", errors="ignore")
payload_text = re.sub(r"<[^>]+>", " ", html_text)
except Exception:
payload_text = None
return payload_text or "", message_id
class PollerThread(QThread):
new_email = Signal(dict)
new_emails_batch = Signal(list)
error = Signal(str)
heartbeat = Signal()
progress = Signal(str, int, int, str) # account, processed, total, phase
netinfo = Signal(str, float, str) # account, connect_ms, proxy_desc
request_manual = Signal(str, int, object, bool) # mode, limit, target_username, full_scan
def __init__(
self,
db: Database,
code_patterns: List[re.Pattern],
accounts: List[Tuple[str, str, Optional[str]]],
poll_interval_sec: int,
initial_fetch_mode: str = "all",
initial_fetch_limit: int = 200,
only_account: Optional[str] = None,
parent: Optional[QObject] = None,
) -> None:
super().__init__(parent)
self._db = db
self._code_patterns = code_patterns
self._accounts = accounts
self._poll_interval_sec = max(3, int(poll_interval_sec))
self._running = False
self._initial_fetch_mode = initial_fetch_mode
self._initial_fetch_limit = int(initial_fetch_limit)
self._only_account = only_account
self.request_manual.connect(self._on_manual_fetch)
self._last_progress_emit_ts: float = 0.0
self._proxy_cfg: Optional[Dict[str, object]] = None
def update_code_patterns(self, patterns: List[re.Pattern]) -> None:
self._code_patterns = patterns
def update_poll_interval(self, seconds: int) -> None:
self._poll_interval_sec = max(3, int(seconds))
def update_accounts(self, accounts: List[Tuple[str, str, Optional[str]]]) -> None:
self._accounts = accounts
def set_proxy(self, proxy_cfg: Optional[Dict[str, object]]) -> None:
self._proxy_cfg = proxy_cfg
def filter_only_account(self, username: Optional[str]) -> None:
self._only_account = username
def stop(self) -> None:
self._running = False
self.requestInterruption()
def run(self) -> None:
self._running = True
self._initial_fetch()
while self._running and not self.isInterruptionRequested():
for username, password, host in list(self._iter_accounts()):
if not self._running or self.isInterruptionRequested():
break
try:
logging.debug("poll: fetch for %s", username)
for _ in self._fetch_account_iter(username, password, host, mode='UNSEEN', limit=0, phase='poll'):
pass
except Exception as exc:
logging.exception("poll error for %s", username)
self.error.emit(f"{username}: {exc}")
self.heartbeat.emit()
for _ in range(self._poll_interval_sec):
if not self._running or self.isInterruptionRequested():
break
time.sleep(1)
def _iter_accounts(self):
if self._only_account:
for u, p, h in self._accounts:
if u == self._only_account:
yield (u, p, h)
else:
yield from self._accounts
def _emit_progress_throttled(self, account: str, processed: int, total: int, phase: str, force: bool = False) -> None:
now = time.perf_counter()
if force or (now - self._last_progress_emit_ts) >= 0.2 or processed == total or processed == 0:
self.progress.emit(account, processed, total, phase)
self._last_progress_emit_ts = now
def _initial_fetch(self) -> None:
for username, password, host in list(self._iter_accounts()):
if not self._running or self.isInterruptionRequested():
break
try:
# 条件首次同步:若 DB 已有该账号数据且配置为仅空库时同步,则跳过
try:
any_existing = len(self._db.fetch_recent_emails(limit=1, account_username=username)) > 0
except Exception:
any_existing = False
if any_existing and getattr(self, "_initial_on_empty_only", False):
logging.info("initial: skip %s (db already has data)", username)
continue
logging.info("initial: fetch %s mode=%s", username, self._initial_fetch_mode)
for _ in self._fetch_account_iter(username, password, host, mode=self._initial_fetch_mode, limit=self._initial_fetch_limit, phase='initial'):
pass
except Exception as exc:
logging.exception("initial error for %s", username)
self.error.emit(f"{username}: {exc}")
self.heartbeat.emit()
@Slot(str, int, object, bool)
def _on_manual_fetch(self, mode: str, limit: int, target_username: Optional[str], full_scan: bool) -> None:
# queued execution in worker thread
self.one_shot_fetch(mode=mode, limit=limit, target_username=target_username, full_scan=full_scan)
def _iter_accounts_for_target(self, target_username: Optional[str]):
if target_username:
for u, p, h in self._accounts:
if u == target_username:
yield (u, p, h)
else:
yield from self._iter_accounts()
def one_shot_fetch(self, mode: str = "all", limit: int = 200, target_username: Optional[str] = None, full_scan: bool = False) -> None:
for username, password, host in list(self._iter_accounts_for_target(target_username)):
if not self._running or self.isInterruptionRequested():
break
try:
logging.info("manual: %s mode=%s limit=%s", username, mode, limit)
for _ in self._fetch_account_iter(username, password, host, mode=mode, limit=limit, phase='manual', full_scan=full_scan):
pass
except Exception as exc:
logging.exception("manual error for %s", username)
self.error.emit(f"{username}: {exc}")
self.heartbeat.emit()
def _count_ids(self, username: str, password: str, host: Optional[str], mode: str) -> int:
imap_server = host or f"imap.{username.split('@')[-1]}"
with _timed("imap_connect", username):
mail = imaplib.IMAP4_SSL(imap_server, 993)
with _timed("imap_login", username):
mail.login(username, password)
with _timed("imap_select", username):
mail.select('INBOX')
search_criteria = 'ALL' if mode.lower() == 'all' else 'UNSEEN'
with _timed("imap_search", f"{username} {search_criteria}"):
status, data = mail.uid('search', None, search_criteria)
total = 0
if status == 'OK':
ids = [eid for eid in data[0].split() if eid]
total = len(ids)
try:
mail.logout()
except Exception:
pass
logging.debug("count_ids: %s %s -> %d", username, search_criteria, total)
return total
def _fetch_account_iter(self, username: str, password: str, host: Optional[str], mode: str, limit: int, phase: str, full_scan: bool = False):
# 将 IMAP 密集工作下沉到独立进程,进一步隔离 GIL/SSL 对 UI 的影响
with _timed("proc_fetch", username):
with concurrent.futures.ProcessPoolExecutor(max_workers=1) as pool:
max_per = 0 if full_scan else 50
future = pool.submit(fetch_records, username, password, host, mode, limit, max_per, self._proxy_cfg)
records, metrics = future.result(timeout=60)
total = len(records)
self._emit_progress_throttled(username, 0, total, phase, force=True)
try:
self.netinfo.emit(username, float(metrics.get("connect_ms", 0.0)), str(metrics.get("proxy_desc", "direct")))
except Exception:
pass
processed = 0
batch_new: List[Dict[str, Optional[str]]] = []
for rec in records:
if not self._running or self.isInterruptionRequested():
break
# 在当前进程提取验证码(避免在子进程重复编译正则)
rec['code'] = self._extract_code(rec.get('body_full') or "")
if rec.get('message_uid'):
with _timed("db_insert", username):
inserted = self._db.insert_email(rec)
if inserted:
batch_new.append(rec)
processed += 1
self._emit_progress_throttled(username, processed, total, phase)
yield rec
if batch_new:
# 批量一次性通知 UI,减少事件风暴
self.new_emails_batch.emit(batch_new)
# note: logout handled in child process
def _poll_account(self, username: str, password: str, host: Optional[str]) -> None:
for _ in self._fetch_account_iter(username, password, host, mode='UNSEEN', limit=0):
pass
def _extract_code(self, text_content: str) -> Optional[str]:
for pattern in self._code_patterns:
match = pattern.search(text_content)
if match:
if match.groups():
return match.group(1)
return match.group(0)
return None
class EmailPoller(QObject):
new_email = Signal(dict)
error = Signal(str)
heartbeat = Signal()
progress = Signal(str, int, int, str)
def __init__(self, db: Database, code_patterns: List[re.Pattern], poll_interval_sec: int = 15, parent: Optional[QObject] = None) -> None:
super().__init__(parent)
self._db = db
self._code_patterns = code_patterns
self._poll_interval_sec = poll_interval_sec
self._accounts: List[Tuple[str, str, Optional[str]]] = []
self._thread: Optional[PollerThread] = None
self._initial_fetch_mode: str = "all"
self._initial_fetch_limit: int = 200
self._initial_on_empty_only: bool = True
def set_accounts(self, accounts: List[Tuple[str, str, Optional[str]]]) -> None:
self._accounts = accounts
for username, _password, host in accounts:
self._db.upsert_account(username, host)
if self._thread is not None:
self._thread.update_accounts(accounts)
def set_code_patterns(self, patterns: List[re.Pattern]) -> None:
self._code_patterns = patterns
if self._thread is not None:
self._thread.update_code_patterns(patterns)
def set_poll_interval_sec(self, seconds: int) -> None:
self._poll_interval_sec = max(3, int(seconds))
if self._thread is not None:
self._thread.update_poll_interval(self._poll_interval_sec)
def set_initial_fetch(self, mode: str, limit: int, on_empty_only: Optional[bool] = None) -> None:
self._initial_fetch_mode = mode
self._initial_fetch_limit = int(limit)
if on_empty_only is not None:
self._initial_on_empty_only = bool(on_empty_only)
def filter_only_account(self, username: Optional[str]) -> None:
if self._thread is not None:
self._thread.filter_only_account(username)
def start(self) -> None:
if self._thread is not None:
return
self._thread = PollerThread(
db=self._db,
code_patterns=self._code_patterns,
accounts=self._accounts,
poll_interval_sec=self._poll_interval_sec,
initial_fetch_mode=self._initial_fetch_mode,
initial_fetch_limit=self._initial_fetch_limit,
)
# 传递仅空库时首次同步的开关
self._thread._initial_on_empty_only = self._initial_on_empty_only # type: ignore[attr-defined]
self._thread.new_email.connect(self.new_email.emit)
self._thread.error.connect(self.error.emit)
self._thread.heartbeat.connect(self.heartbeat.emit)
self._thread.progress.connect(self.progress.emit)
self._thread.start()
# 在线程启动后再设置优先级,避免“thread is not running”
try:
self._thread.setPriority(QThread.Priority.LowPriority)
except Exception:
pass
def stop(self) -> None:
if self._thread is not None:
self._thread.stop()
total_wait_ms = 0
while self._thread.isRunning() and total_wait_ms < 15000:
self._thread.wait(250)
total_wait_ms += 250
self._thread = None
def one_shot_fetch(self, mode: str = 'all', limit: int = 200, target_username: Optional[str] = None) -> None:
if self._thread is not None:
# Use queued signal to execute inside worker thread and avoid blocking UI
self._thread.request_manual.emit(mode, int(limit), target_username)
def mark_all_read_for_account(self, username: str) -> None:
try:
self._db.mark_all_read(username)
except Exception:
pass
def mark_all_read_for_all_accounts(self) -> None:
try:
self._db.mark_all_read(None)
except Exception:
pass
__all__ = ["EmailPoller", "read_accounts"]