Skip to content

Commit ea44658

Browse files
Merge pull request #204 from obsidianforensics/timestamp-parser-refactor
Refactor the timestamp parser with more structured data types and tim…
2 parents e687b96 + 61f5a43 commit ea44658

File tree

5 files changed

+79
-36
lines changed

5 files changed

+79
-36
lines changed

.github/workflows/unit-tests.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
runs-on: ${{ matrix.os }}
1616
strategy:
1717
matrix:
18-
python-version: ['3.10', '3.11', '3.12']
18+
python-version: ['3.11', '3.12', '3.13']
1919
os: [ubuntu-latest, windows-latest, macos-latest]
2020

2121
steps:

unfurl/parsers/parse_timestamp.py

+72-17
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2020 Google LLC
1+
# Copyright 2024 Ryan Benson
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
1414

1515
import datetime
1616
import re
17+
1718
from unfurl import utils
1819

1920
timestamp_edge = {
@@ -50,7 +51,11 @@ def decode_epoch_seconds(seconds):
5051
2030: 1900000000
5152
5253
"""
53-
return datetime.datetime.utcfromtimestamp(float(seconds)), 'Epoch seconds'
54+
return {
55+
'data_type': 'timestamp.epoch-seconds',
56+
'display_type': 'Epoch seconds',
57+
'timestamp_value': str(datetime.datetime.fromtimestamp(float(seconds), datetime.UTC))
58+
}
5459

5560

5661
def decode_epoch_centiseconds(centiseconds):
@@ -68,9 +73,13 @@ def decode_epoch_centiseconds(centiseconds):
6873
"""
6974
# Trim off the 4 trailing 0s (don't add precision that wasn't in the timestamp)
7075
converted_ts = trim_zero_fractional_seconds(
71-
str(datetime.datetime.utcfromtimestamp(float(centiseconds) / 100)), 4)
72-
return converted_ts, 'Epoch centiseconds'
76+
str(datetime.datetime.fromtimestamp(float(centiseconds) / 100, datetime.UTC)), 4)
7377

78+
return {
79+
'data_type': 'timestamp.epoch-centiseconds',
80+
'display_type': 'Epoch centiseconds',
81+
'timestamp_value': converted_ts
82+
}
7483

7584
def decode_epoch_milliseconds(milliseconds):
7685
"""Decode a numeric timestamp in Epoch milliseconds format to a human-readable timestamp.
@@ -87,7 +96,12 @@ def decode_epoch_milliseconds(milliseconds):
8796
converted_dt = datetime.datetime(1970, 1, 1) + datetime.timedelta(milliseconds=float(milliseconds))
8897
# Trim off the 3 trailing 0s (don't add precision that wasn't in the timestamp)
8998
converted_ts = trim_zero_fractional_seconds(str(converted_dt), 3)
90-
return converted_ts, 'Epoch milliseconds'
99+
100+
return {
101+
'data_type': 'timestamp.epoch-milliseconds',
102+
'display_type': 'Epoch milliseconds',
103+
'timestamp_value': converted_ts
104+
}
91105

92106

93107
def decode_epoch_ten_microseconds(ten_microseconds):
@@ -105,9 +119,13 @@ def decode_epoch_ten_microseconds(ten_microseconds):
105119
"""
106120
# Trim off the trailing 0 (don't add precision that wasn't in the timestamp)
107121
converted_ts = trim_zero_fractional_seconds(
108-
str(datetime.datetime.utcfromtimestamp(float(ten_microseconds) / 100000)), 1)
109-
return converted_ts, 'Epoch ten-microsecond increments'
122+
str(datetime.datetime.fromtimestamp(float(ten_microseconds) / 100000, datetime.UTC)), 1)
110123

124+
return {
125+
'data_type': 'timestamp.epoch-ten-microseconds',
126+
'display_type': 'Epoch ten-microsecond increments',
127+
'timestamp_value': converted_ts
128+
}
111129

112130
def decode_epoch_microseconds(microseconds):
113131
"""Decode a numeric timestamp in Epoch microseconds format to a human-readable timestamp.
@@ -121,8 +139,13 @@ def decode_epoch_microseconds(microseconds):
121139
2030: 1900000000000000
122140
123141
"""
124-
converted_ts = str(datetime.datetime.utcfromtimestamp(float(microseconds) / 1000000))
125-
return converted_ts, 'Epoch microseconds'
142+
converted_ts = datetime.datetime.fromtimestamp(float(microseconds) / 1000000, datetime.UTC)
143+
144+
return {
145+
'data_type': 'timestamp.epoch-microseconds',
146+
'display_type': 'Epoch microseconds',
147+
'timestamp_value': str(converted_ts)
148+
}
126149

127150

128151
def decode_webkit(microseconds):
@@ -136,8 +159,13 @@ def decode_webkit(microseconds):
136159
2025: 13380163200000000
137160
138161
"""
139-
return datetime.datetime.utcfromtimestamp((float(microseconds) / 1000000) - 11644473600), 'Webkit'
162+
converted_ts = datetime.datetime.fromtimestamp((float(microseconds) / 1000000) - 11644473600, datetime.UTC)
140163

164+
return {
165+
'data_type': 'timestamp.webkit',
166+
'display_type': 'Webkit',
167+
'timestamp_value': str(converted_ts)
168+
}
141169

142170
def decode_windows_filetime(intervals):
143171
"""Decode a numeric timestamp in Windows FileTime format to a human-readable timestamp.
@@ -152,8 +180,13 @@ def decode_windows_filetime(intervals):
152180
2065: 146424672000000000
153181
154182
"""
155-
return datetime.datetime.utcfromtimestamp((float(intervals) / 10000000) - 11644473600), 'Windows FileTime'
183+
converted_ts = datetime.datetime.fromtimestamp((float(intervals) / 10000000) - 11644473600, datetime.UTC)
156184

185+
return {
186+
'data_type': 'timestamp.windows-filetime',
187+
'display_type': 'Windows FileTime',
188+
'timestamp_value': str(converted_ts)
189+
}
157190

158191
def decode_datetime_ticks(ticks):
159192
"""Decode a numeric timestamp in .Net/C# DateTime ticks format to a human-readable timestamp.
@@ -175,7 +208,13 @@ def decode_datetime_ticks(ticks):
175208
176209
"""
177210
seconds = (ticks - 621355968000000000) / 10000000
178-
return (datetime.datetime.fromtimestamp(seconds)), 'DateTime ticks'
211+
converted_ts = datetime.datetime.fromtimestamp(seconds)
212+
213+
return {
214+
'data_type': 'timestamp.datetime-ticks',
215+
'display_type': 'DateTime ticks',
216+
'timestamp_value': str(converted_ts)
217+
}
179218

180219

181220
def decode_mac_absolute_time(seconds):
@@ -194,7 +233,13 @@ def decode_mac_absolute_time(seconds):
194233
2035: 1072915200
195234
196235
"""
197-
return datetime.datetime.utcfromtimestamp(float(seconds)+978307200), 'Mac Absolute Time / Cocoa'
236+
converted_ts = datetime.datetime.fromtimestamp(float(seconds) + 978307200, datetime.UTC)
237+
238+
return {
239+
'data_type': 'timestamp.mac-absolute-time',
240+
'display_type': 'Mac Absolute Time / Cocoa',
241+
'timestamp_value': str(converted_ts)
242+
}
198243

199244

200245
def decode_epoch_hex(seconds):
@@ -209,7 +254,12 @@ def decode_epoch_hex(seconds):
209254
210255
"""
211256
timestamp, _ = decode_epoch_seconds(int(seconds, 16))
212-
return timestamp, 'Epoch seconds (hex)'
257+
258+
return {
259+
'data_type': 'timestamp.epoch-seconds-hex',
260+
'display_type': 'Epoch seconds (hex)',
261+
'timestamp_value': str(timestamp)
262+
}
213263

214264

215265
def decode_windows_filetime_hex(intervals):
@@ -227,7 +277,12 @@ def decode_windows_filetime_hex(intervals):
227277
"""
228278
int_right = int(intervals, 16)
229279
timestamp, _ = decode_windows_filetime(int_right)
230-
return timestamp, 'Windows FileTime (hex)'
280+
281+
return {
282+
'data_type': 'timestamp.windows-filetime-hex',
283+
'display_type': 'Windows FileTime (hex)',
284+
'timestamp_value': str(timestamp)
285+
}
231286

232287

233288
def run(unfurl, node):
@@ -333,6 +388,6 @@ def run(unfurl, node):
333388

334389
if new_timestamp != (None, 'unknown'):
335390
unfurl.add_to_queue(
336-
data_type=new_timestamp[1], key=None, value=new_timestamp[0],
337-
hover=f'Converted as {new_timestamp[1]}', parent_id=node.node_id,
391+
data_type=new_timestamp['data_type'], key=None, value=new_timestamp['timestamp_value'],
392+
hover=f'Converted as {new_timestamp["display_type"]}', parent_id=node.node_id,
338393
incoming_edge_config=timestamp_edge)

unfurl/parsers/parse_url.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2019 Google LLC
1+
# Copyright 2024 Ryan Benson
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -43,7 +43,8 @@ def parse_delimited_string(unfurl_instance, node, delimiter, pairs=False) -> Non
4343

4444
def try_url_unquote(unfurl_instance, node) -> bool:
4545
unquoted = urllib.parse.unquote_plus(node.value)
46-
if unquoted != node.value:
46+
# The regex is to avoid erroneously unquoting a timestamp string (ending with +00:00)
47+
if unquoted != node.value and not re.match(r'.*\+\d\d:\d\d$', node.value):
4748
unfurl_instance.add_to_queue(
4849
data_type='string', key=None, value=unquoted,
4950
hover='Unquoted URL (replaced %xx escapes with their single-character equivalent)',

unfurl/tests/unit/test_bluesky.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def test_bluesky_post(self):
2424
self.assertEqual(1732040395098000, test.nodes[12].value)
2525

2626
# embedded timestamp parses correctly
27-
self.assertEqual('2024-11-19 18:19:55.098000', test.nodes[13].value)
27+
self.assertEqual('2024-11-19 18:19:55.098000+00:00', test.nodes[13].value)
2828

2929
def test_bluesky_bare_tid(self):
3030
""" Test parsing a Bluesky/ATProto TID"""
@@ -46,7 +46,7 @@ def test_bluesky_bare_tid(self):
4646
self.assertEqual(1731543333133695, test.nodes[2].value)
4747

4848
# embedded timestamp parses correctly
49-
self.assertEqual('2024-11-14 00:15:33.133695', test.nodes[3].value)
49+
self.assertEqual('2024-11-14 00:15:33.133695+00:00', test.nodes[3].value)
5050

5151
if __name__ == '__main__':
5252
unittest.main()

unfurl/tests/unit/test_jwt.py

+1-14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from unfurl.core import Unfurl
2-
import datetime
32
import unittest
43

54

@@ -32,10 +31,6 @@ def test_jwt_simple(self):
3231
# confirm that the explanation of the standard "typ" parameter was added
3332
self.assertIn('declare the media type', test.nodes[12].label)
3433

35-
# make sure the queue finished empty
36-
self.assertTrue(test.queue.empty())
37-
self.assertEqual(len(test.edges), 0)
38-
3934
def test_jwt_iat_timestamp(self):
4035
"""Parse a sole JWT with an iat field that is parsed as a timestamp.
4136
@@ -64,11 +59,7 @@ def test_jwt_iat_timestamp(self):
6459
self.assertEqual(1422779638, test.nodes[10].value)
6560

6661
# confirm that the "iat" claim was detected and parsed as a timestamp
67-
self.assertEqual(datetime.datetime(2015, 2, 1, 8, 33, 58), test.nodes[14].value)
68-
69-
# make sure the queue finished empty
70-
self.assertTrue(test.queue.empty())
71-
self.assertEqual(len(test.edges), 0)
62+
self.assertEqual('2015-02-01 08:33:58+00:00', test.nodes[14].value)
7263

7364
def test_jwt_as_url_segment(self):
7465
"""Parse a JWT that is part of the URL.
@@ -101,10 +92,6 @@ def test_jwt_as_url_segment(self):
10192
# confirm that the header was parsed as JSON
10293
self.assertEqual('alg', test.nodes[19].key)
10394

104-
# make sure the queue finished empty
105-
self.assertTrue(test.queue.empty())
106-
self.assertEqual(len(test.edges), 0)
107-
10895

10996
if __name__ == '__main__':
11097
unittest.main()

0 commit comments

Comments
 (0)