Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added 'since' option to search for messages since a certain time #527

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/source/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,9 @@ The full set of configuration options are:
- `check_timeout` - int: Number of seconds to wait for a IMAP
IDLE response or the number of seconds until the next
mail check (Default: `30`)
- `since` - str: Search for messages since certain time. (Examples: `5m|3h|2d|1w`)
Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}).
Defaults to `1d` if incorrect value is provided.
- `imap`
- `host` - str: The IMAP server hostname or IP address
- `port` - int: The IMAP server port (Default: `993`)
Expand Down
66 changes: 59 additions & 7 deletions parsedmarc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from base64 import b64decode
from collections import OrderedDict
from csv import DictWriter
from datetime import datetime
from datetime import datetime, timedelta
from io import BytesIO, StringIO
from typing import Callable

Expand All @@ -28,7 +28,8 @@
from mailsuite.smtp import send_email

from parsedmarc.log import logger
from parsedmarc.mail import MailboxConnection
from parsedmarc.mail import MailboxConnection, IMAPConnection, \
MSGraphConnection, GmailConnection
from parsedmarc.utils import get_base_domain, get_ip_address_info
from parsedmarc.utils import is_outlook_msg, convert_outlook_msg
from parsedmarc.utils import parse_email
Expand Down Expand Up @@ -1371,6 +1372,7 @@
strip_attachment_payloads=False,
results=None,
batch_size=10,
since=None,
create_folders=True):
"""
Fetches and parses DMARC reports from a mailbox
Expand All @@ -1393,6 +1395,8 @@
results (dict): Results from the previous run
batch_size (int): Number of messages to read and process before saving
(use 0 for no limit)
since: Search for messages since certain time
(units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"})
create_folders (bool): Whether to create the destination folders
(not used in watch)

Expand All @@ -1405,6 +1409,9 @@
if connection is None:
raise ValueError("Must supply a connection")

# current_time useful to fetch_messages later in the program
current_time = None

Check warning on line 1413 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1413

Added line #L1413 was not covered by tests

aggregate_reports = []
forensic_reports = []
smtp_tls_reports = []
Expand All @@ -1428,12 +1435,44 @@
connection.create_folder(smtp_tls_reports_folder)
connection.create_folder(invalid_reports_folder)

messages = connection.fetch_messages(reports_folder, batch_size=batch_size)
if since:
_since = 1440 # default one day
if re.match(r'\d{1,2}[mhd]$', since):
Copy link
Contributor

@AnaelMobilia AnaelMobilia Jun 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why restrict to two digits ? (eg: 365 days)
I suggest to use \d+ as the next regex

s = re.split(r'(\d+)', since)
match s[2]:
case 'm': _since = int(s[1])
case 'h': _since = int(s[1])*60
case 'd': _since = int(s[1])*60*24
case 'w': _since = int(s[1])*60*24*7

Check warning on line 1446 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1438-L1446

Added lines #L1438 - L1446 were not covered by tests
else:
logger.warning("Incorrect format for \'since\' option. \

Check warning on line 1448 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1448

Added line #L1448 was not covered by tests
Provided value:{0}, Expected values:(5m|3h|2d|1w). \
Ignoring option, fetching messages for last 24hrs"
.format(since))

if isinstance(connection, IMAPConnection):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a reference to RFC3501 in order to indicate that the restriction (no time, no timezone) is on the SMTP protocol

logger.debug("Only days and weeks values in \'since\' option are \

Check warning on line 1454 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1453-L1454

Added lines #L1453 - L1454 were not covered by tests
considered for IMAP conections. Examples: 2d or 1w")
since = (datetime.utcnow() - timedelta(minutes=_since)).date()
current_time = datetime.utcnow().date()
elif isinstance(connection, MSGraphConnection):
since = (datetime.utcnow() - timedelta(minutes=_since)) \

Check warning on line 1459 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1456-L1459

Added lines #L1456 - L1459 were not covered by tests
.isoformat() + 'Z'
current_time = datetime.utcnow().isoformat() + 'Z'
elif isinstance(connection, GmailConnection):
since = (datetime.utcnow() - timedelta(minutes=_since)) \

Check warning on line 1463 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1461-L1463

Added lines #L1461 - L1463 were not covered by tests
.strftime('%s')
current_time = datetime.utcnow().strftime('%s')

Check warning on line 1465 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1465

Added line #L1465 was not covered by tests
else:
pass

Check warning on line 1467 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1467

Added line #L1467 was not covered by tests

messages = connection.fetch_messages(reports_folder, batch_size=batch_size,

Check warning on line 1469 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1469

Added line #L1469 was not covered by tests
since=since)
total_messages = len(messages)
logger.debug("Found {0} messages in {1}".format(len(messages),
reports_folder))

if batch_size:
if batch_size and not since:

Check warning on line 1475 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1475

Added line #L1475 was not covered by tests
message_limit = min(total_messages, batch_size)
else:
message_limit = total_messages
Expand All @@ -1445,7 +1484,15 @@
logger.debug("Processing message {0} of {1}: UID {2}".format(
i+1, message_limit, msg_uid
))
msg_content = connection.fetch_message(msg_uid)
if isinstance(mailbox, MSGraphConnection):
if test:
msg_content = connection.fetch_message(msg_uid,

Check warning on line 1489 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1487-L1489

Added lines #L1487 - L1489 were not covered by tests
mark_read=False)
else:
msg_content = connection.fetch_message(msg_uid,

Check warning on line 1492 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1492

Added line #L1492 was not covered by tests
mark_read=True)
else:
msg_content = connection.fetch_message(msg_uid)

Check warning on line 1495 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1495

Added line #L1495 was not covered by tests
try:
sa = strip_attachment_payloads
parsed_email = parse_report_email(
Expand Down Expand Up @@ -1564,7 +1611,11 @@
("forensic_reports", forensic_reports),
("smtp_tls_reports", smtp_tls_reports)])

total_messages = len(connection.fetch_messages(reports_folder))
if current_time:
total_messages = len(connection.fetch_messages(reports_folder,

Check warning on line 1615 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1614-L1615

Added lines #L1614 - L1615 were not covered by tests
since=current_time))
else:
total_messages = len(connection.fetch_messages(reports_folder))

Check warning on line 1618 in parsedmarc/__init__.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/__init__.py#L1618

Added line #L1618 was not covered by tests

if not test and not batch_size and total_messages > 0:
# Process emails that came in during the last run
Expand All @@ -1582,7 +1633,8 @@
always_use_local_files=always_use_local_files,
reverse_dns_map_path=reverse_dns_map_path,
reverse_dns_map_url=reverse_dns_map_url,
offline=offline
offline=offline,
since=current_time,
)

return results
Expand Down
4 changes: 4 additions & 0 deletions parsedmarc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ def process_reports(reports_):
mailbox_test=False,
mailbox_batch_size=10,
mailbox_check_timeout=30,
mailbox_since=None,
imap_host=None,
imap_skip_certificate_verification=False,
imap_ssl=True,
Expand Down Expand Up @@ -585,6 +586,8 @@ def process_reports(reports_):
if "check_timeout" in mailbox_config:
opts.mailbox_check_timeout = mailbox_config.getint(
"check_timeout")
if "since" in mailbox_config:
opts.mailbox_since = mailbox_config["since"]

if "imap" in config.sections():
imap_config = config["imap"]
Expand Down Expand Up @@ -1312,6 +1315,7 @@ def process_reports(reports_):
nameservers=opts.nameservers,
test=opts.mailbox_test,
strip_attachment_payloads=opts.strip_attachment_payloads,
since=opts.mailbox_since,
)

aggregate_reports += reports["aggregate_reports"]
Expand Down
44 changes: 32 additions & 12 deletions parsedmarc/mail/gmail.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,33 @@
else:
raise e

def _fetch_all_message_ids(self, reports_label_id, page_token=None):
results = (
self.service.users()
.messages()
.list(
userId="me",
includeSpamTrash=self.include_spam_trash,
labelIds=[reports_label_id],
pageToken=page_token,
def _fetch_all_message_ids(self, reports_label_id, page_token=None,
since=None):
if since:
results = (

Check warning on line 73 in parsedmarc/mail/gmail.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/mail/gmail.py#L72-L73

Added lines #L72 - L73 were not covered by tests
self.service.users()
.messages()
.list(
userId="me",
includeSpamTrash=self.include_spam_trash,
labelIds=[reports_label_id],
pageToken=page_token,
q=f'after:{since}',
)
.execute()
)
else:
results = (

Check warning on line 86 in parsedmarc/mail/gmail.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/mail/gmail.py#L86

Added line #L86 was not covered by tests
self.service.users()
.messages()
.list(
userId="me",
includeSpamTrash=self.include_spam_trash,
labelIds=[reports_label_id],
pageToken=page_token,
)
.execute()
)
.execute()
)
messages = results.get("messages", [])
for message in messages:
yield message["id"]
Expand All @@ -90,7 +105,12 @@

def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
reports_label_id = self._find_label_id_for_label(reports_folder)
return [id for id in self._fetch_all_message_ids(reports_label_id)]
since = kwargs.get('since')
if since:
return [id for id in self._fetch_all_message_ids(reports_label_id,

Check warning on line 110 in parsedmarc/mail/gmail.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/mail/gmail.py#L108-L110

Added lines #L108 - L110 were not covered by tests
since=since)]
else:
return [id for id in self._fetch_all_message_ids(reports_label_id)]

Check warning on line 113 in parsedmarc/mail/gmail.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/mail/gmail.py#L113

Added line #L113 was not covered by tests

def fetch_message(self, message_id):
msg = self.service.users().messages()\
Expand Down
18 changes: 13 additions & 5 deletions parsedmarc/mail/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,17 +144,22 @@
folder_id = self._find_folder_id_from_folder_path(folder_name)
url = f'/users/{self.mailbox_name}/mailFolders/' \
f'{folder_id}/messages'
since = kwargs.get('since')
if not since:
since = None

Check warning on line 149 in parsedmarc/mail/graph.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/mail/graph.py#L147-L149

Added lines #L147 - L149 were not covered by tests
batch_size = kwargs.get('batch_size')
if not batch_size:
batch_size = 0
emails = self._get_all_messages(url, batch_size)
emails = self._get_all_messages(url, batch_size, since)

Check warning on line 153 in parsedmarc/mail/graph.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/mail/graph.py#L153

Added line #L153 was not covered by tests
return [email['id'] for email in emails]

def _get_all_messages(self, url, batch_size):
def _get_all_messages(self, url, batch_size, since):
messages: list
params = {
'$select': 'id'
}
if since:
params['$filter'] = f'receivedDateTime ge {since}'

Check warning on line 162 in parsedmarc/mail/graph.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/mail/graph.py#L161-L162

Added lines #L161 - L162 were not covered by tests
if batch_size and batch_size > 0:
params['$top'] = batch_size
else:
Expand All @@ -165,8 +170,9 @@
messages = result.json()['value']
# Loop if next page is present and not obtained message limit.
while '@odata.nextLink' in result.json() and (
since is not None or (
batch_size == 0 or
batch_size - len(messages) > 0):
batch_size - len(messages) > 0)):
result = self._client.get(result.json()['@odata.nextLink'])
if result.status_code != 200:
raise RuntimeError(f'Failed to fetch messages {result.text}')
Expand All @@ -181,13 +187,15 @@
raise RuntimeWarning(f"Failed to mark message read"
f"{resp.status_code}: {resp.json()}")

def fetch_message(self, message_id: str):
def fetch_message(self, message_id: str, **kwargs):
url = f'/users/{self.mailbox_name}/messages/{message_id}/$value'
result = self._client.get(url)
if result.status_code != 200:
raise RuntimeWarning(f"Failed to fetch message"
f"{result.status_code}: {result.json()}")
self.mark_message_read(message_id)
mark_read = kwargs.get('mark_read')
if mark_read:
self.mark_message_read(message_id)

Check warning on line 198 in parsedmarc/mail/graph.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/mail/graph.py#L196-L198

Added lines #L196 - L198 were not covered by tests
return result.text

def delete_message(self, message_id: str):
Expand Down
6 changes: 5 additions & 1 deletion parsedmarc/mail/imap.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@

def fetch_messages(self, reports_folder: str, **kwargs):
self._client.select_folder(reports_folder)
return self._client.search()
since = kwargs.get('since')
if since:
return self._client.search([u'SINCE', since])

Check warning on line 36 in parsedmarc/mail/imap.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/mail/imap.py#L34-L36

Added lines #L34 - L36 were not covered by tests
else:
return self._client.search()

Check warning on line 38 in parsedmarc/mail/imap.py

View check run for this annotation

Codecov / codecov/patch

parsedmarc/mail/imap.py#L38

Added line #L38 was not covered by tests

def fetch_message(self, message_id):
return self._client.fetch_message(message_id, parse=False)
Expand Down
Loading