Skip to content

Commit 65aff09

Browse files
authored
fix: dismiss until (#5270)
1 parent a920f1b commit 65aff09

File tree

6 files changed

+1470
-5
lines changed

6 files changed

+1470
-5
lines changed

keep/api/bl/dismissal_expiry_bl.py

Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,308 @@
1+
"""
2+
Business logic for handling dismissal expiry.
3+
4+
This module provides functionality to automatically expire alert dismissals
5+
when their dismissedUntil timestamp has passed.
6+
"""
7+
8+
import datetime
9+
import logging
10+
from typing import List, Optional
11+
12+
from sqlmodel import Session, select
13+
from keep.api.core.db import get_session_sync
14+
from keep.api.core.db_utils import get_json_extract_field
15+
from keep.api.core.elastic import ElasticClient
16+
from keep.api.core.dependencies import get_pusher_client
17+
from keep.api.models.action_type import ActionType
18+
from keep.api.models.alert import AlertDto
19+
from keep.api.models.db.alert import Alert, AlertAudit, AlertEnrichment
20+
21+
22+
class DismissalExpiryBl:
23+
24+
@staticmethod
25+
def get_alerts_with_expired_dismissals(session: Session) -> List[AlertEnrichment]:
26+
"""
27+
Get all AlertEnrichment records that have expired dismissedUntil timestamps.
28+
29+
Returns enrichment records where:
30+
1. dismissed = true
31+
2. dismissedUntil is not null and not "forever"
32+
3. dismissedUntil timestamp is in the past
33+
34+
Args:
35+
session: Database session
36+
37+
Returns:
38+
List of AlertEnrichment objects with expired dismissals
39+
"""
40+
logger = logging.getLogger(__name__)
41+
now = datetime.datetime.now(datetime.timezone.utc)
42+
43+
logger.info("Searching for enrichments with expired dismissals")
44+
45+
# Query for enrichments with dismissed=true and dismissedUntil set
46+
# Use the proper helper function for cross-database compatibility
47+
dismissed_field = get_json_extract_field(session, AlertEnrichment.enrichments, "dismissed")
48+
dismissed_until_field = get_json_extract_field(session, AlertEnrichment.enrichments, "dismissUntil")
49+
50+
# Build cross-database compatible boolean comparison
51+
# Different databases store/extract JSON booleans differently:
52+
# - SQLite: json_extract returns 1/0 for true/false
53+
# - MySQL: JSON_UNQUOTE(JSON_EXTRACT()) returns "true"/"false" strings
54+
# - PostgreSQL: ->> operator returns "true"/"false" strings
55+
if session.bind.dialect.name == "sqlite":
56+
dismissed_condition = dismissed_field == 1
57+
else:
58+
# For MySQL and PostgreSQL, compare with string "true"
59+
dismissed_condition = dismissed_field == "true"
60+
61+
query = session.exec(
62+
select(AlertEnrichment).where(
63+
dismissed_condition,
64+
# dismissedUntil is not null
65+
dismissed_until_field.isnot(None),
66+
# dismissedUntil is not "forever"
67+
dismissed_until_field != "forever",
68+
)
69+
)
70+
71+
candidate_enrichments = query.all()
72+
73+
logger.info(f"Found {len(candidate_enrichments)} candidate enrichments with dismissals")
74+
75+
# Filter in Python for safety and clarity (parsing ISO timestamps)
76+
expired_enrichments = []
77+
for enrichment in candidate_enrichments:
78+
dismiss_until_str = enrichment.enrichments.get("dismissUntil")
79+
if not dismiss_until_str or dismiss_until_str == "forever":
80+
continue
81+
82+
try:
83+
# Parse the dismissedUntil timestamp
84+
dismiss_until = datetime.datetime.strptime(
85+
dismiss_until_str, "%Y-%m-%dT%H:%M:%S.%fZ"
86+
).replace(tzinfo=datetime.timezone.utc)
87+
88+
# Check if it's expired (current time > dismissedUntil)
89+
if now > dismiss_until:
90+
logger.info(
91+
f"Found expired dismissal for fingerprint {enrichment.alert_fingerprint}",
92+
extra={
93+
"tenant_id": enrichment.tenant_id,
94+
"fingerprint": enrichment.alert_fingerprint,
95+
"dismissed_until": dismiss_until_str,
96+
"expired_by_seconds": (now - dismiss_until).total_seconds()
97+
}
98+
)
99+
expired_enrichments.append(enrichment)
100+
101+
except (ValueError, TypeError) as e:
102+
# Log invalid timestamp but don't fail
103+
logger.warning(
104+
f"Invalid dismissedUntil timestamp for fingerprint {enrichment.alert_fingerprint}: {dismiss_until_str}",
105+
extra={
106+
"tenant_id": enrichment.tenant_id,
107+
"fingerprint": enrichment.alert_fingerprint,
108+
"error": str(e)
109+
}
110+
)
111+
continue
112+
113+
logger.info(f"Found {len(expired_enrichments)} enrichments with expired dismissals")
114+
return expired_enrichments
115+
116+
@staticmethod
117+
def check_dismissal_expiry(logger: logging.Logger, session: Optional[Session] = None):
118+
"""
119+
Check for alerts with expired dismissedUntil and restore them.
120+
121+
This function:
122+
1. Finds AlertEnrichment records with expired dismissedUntil timestamps
123+
2. Updates their enrichments to set dismissed=false and dismissedUntil=null
124+
3. Cleans up disposable fields
125+
4. Updates Elasticsearch indexes
126+
5. Notifies UI of changes
127+
6. Adds audit trail
128+
129+
Args:
130+
logger: Logger instance for detailed logging
131+
session: Optional database session (creates new if None)
132+
"""
133+
logger.info("Starting dismissal expiry check")
134+
135+
if session is None:
136+
session = get_session_sync()
137+
138+
try:
139+
# Find enrichments with expired dismissedUntil
140+
expired_enrichments = DismissalExpiryBl.get_alerts_with_expired_dismissals(session)
141+
142+
if not expired_enrichments:
143+
logger.info("No enrichments with expired dismissals found")
144+
return
145+
146+
logger.info(f"Processing {len(expired_enrichments)} expired dismissal enrichments")
147+
148+
# Process each expired enrichment
149+
for enrichment in expired_enrichments:
150+
logger.info(
151+
f"Processing expired dismissal for fingerprint {enrichment.alert_fingerprint}",
152+
extra={
153+
"tenant_id": enrichment.tenant_id,
154+
"fingerprint": enrichment.alert_fingerprint,
155+
"dismissed_until": enrichment.enrichments.get("dismissedUntil")
156+
}
157+
)
158+
159+
# Store original values for audit
160+
original_dismissed = enrichment.enrichments.get("dismissed", False)
161+
original_dismissed_until = enrichment.enrichments.get("dismissedUntil")
162+
163+
# Update enrichment - set back to not dismissed
164+
new_enrichments = enrichment.enrichments.copy()
165+
new_enrichments["dismissed"] = False
166+
new_enrichments["dismissUntil"] = None # Clear the original field
167+
168+
# Clean up disposable fields (similar to maintenance windows)
169+
disposable_fields = [
170+
"disposable_dismissed",
171+
"disposable_dismissedUntil",
172+
"disposable_note",
173+
"disposable_status"
174+
]
175+
176+
cleaned_fields = []
177+
for field in disposable_fields:
178+
if field in new_enrichments:
179+
new_enrichments.pop(field)
180+
cleaned_fields.append(field)
181+
182+
if cleaned_fields:
183+
logger.info(
184+
f"Cleaned up disposable fields: {cleaned_fields}",
185+
extra={
186+
"tenant_id": enrichment.tenant_id,
187+
"fingerprint": enrichment.alert_fingerprint
188+
}
189+
)
190+
191+
# Update the enrichment record
192+
enrichment.enrichments = new_enrichments
193+
session.add(enrichment)
194+
195+
# Add audit trail
196+
try:
197+
audit = AlertAudit(
198+
tenant_id=enrichment.tenant_id,
199+
fingerprint=enrichment.alert_fingerprint,
200+
user_id="system",
201+
action=ActionType.DISMISSAL_EXPIRED.value, # Use .value to get the string
202+
description=(
203+
f"Dismissal expired at {original_dismissed_until}, "
204+
f"enrichment updated from dismissed={original_dismissed} to dismissed=False"
205+
)
206+
)
207+
session.add(audit)
208+
logger.info(
209+
"Added audit trail for expired dismissal",
210+
extra={
211+
"tenant_id": enrichment.tenant_id,
212+
"fingerprint": enrichment.alert_fingerprint
213+
}
214+
)
215+
except Exception as e:
216+
logger.error(
217+
f"Failed to add audit trail for fingerprint {enrichment.alert_fingerprint}: {e}",
218+
extra={
219+
"tenant_id": enrichment.tenant_id,
220+
"fingerprint": enrichment.alert_fingerprint
221+
}
222+
)
223+
224+
# Update Elasticsearch index
225+
try:
226+
# Get the latest alert for this fingerprint to create AlertDto
227+
latest_alert = session.exec(
228+
select(Alert)
229+
.where(Alert.tenant_id == enrichment.tenant_id)
230+
.where(Alert.fingerprint == enrichment.alert_fingerprint)
231+
.order_by(Alert.timestamp.desc())
232+
.limit(1)
233+
).first()
234+
235+
if latest_alert:
236+
# Create AlertDto with updated enrichments
237+
alert_data = latest_alert.event.copy()
238+
alert_data.update(new_enrichments) # Apply updated enrichments
239+
alert_dto = AlertDto(**alert_data)
240+
241+
elastic_client = ElasticClient(enrichment.tenant_id)
242+
elastic_client.index_alert(alert_dto)
243+
logger.info(
244+
f"Updated Elasticsearch index for fingerprint {enrichment.alert_fingerprint}",
245+
extra={
246+
"tenant_id": enrichment.tenant_id,
247+
"fingerprint": enrichment.alert_fingerprint
248+
}
249+
)
250+
else:
251+
logger.warning(
252+
f"No alert found for fingerprint {enrichment.alert_fingerprint}, skipping Elasticsearch update",
253+
extra={
254+
"tenant_id": enrichment.tenant_id,
255+
"fingerprint": enrichment.alert_fingerprint
256+
}
257+
)
258+
259+
except Exception as e:
260+
logger.error(
261+
f"Failed to update Elasticsearch for fingerprint {enrichment.alert_fingerprint}: {e}",
262+
extra={
263+
"tenant_id": enrichment.tenant_id,
264+
"fingerprint": enrichment.alert_fingerprint
265+
}
266+
)
267+
268+
# Notify UI of change
269+
try:
270+
pusher_client = get_pusher_client()
271+
if pusher_client:
272+
pusher_client.trigger(
273+
f"private-{enrichment.tenant_id}",
274+
"alert-update",
275+
{
276+
"fingerprint": enrichment.alert_fingerprint,
277+
"action": "dismissal_expired"
278+
}
279+
)
280+
logger.info(
281+
f"Sent UI notification for fingerprint {enrichment.alert_fingerprint}",
282+
extra={
283+
"tenant_id": enrichment.tenant_id,
284+
"fingerprint": enrichment.alert_fingerprint
285+
}
286+
)
287+
except Exception as e:
288+
logger.error(
289+
f"Failed to send UI notification for fingerprint {enrichment.alert_fingerprint}: {e}",
290+
extra={
291+
"tenant_id": enrichment.tenant_id,
292+
"fingerprint": enrichment.alert_fingerprint
293+
}
294+
)
295+
296+
# Commit all changes
297+
session.commit()
298+
logger.info(
299+
f"Successfully processed {len(expired_enrichments)} expired dismissal enrichments",
300+
extra={"processed_count": len(expired_enrichments)}
301+
)
302+
303+
except Exception as e:
304+
logger.error(f"Error during dismissal expiry check: {e}", exc_info=True)
305+
session.rollback()
306+
raise
307+
finally:
308+
logger.info("Dismissal expiry check completed")

keep/api/models/action_type.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class ActionType(enum.Enum):
3737
UNCOMMENT = "a comment was removed from the alert"
3838
MAINTENANCE = "Alert is in maintenance window"
3939
MAINTENANCE_EXPIRED = "Alert has been removed from maintenance window"
40+
DISMISSAL_EXPIRED = "Alert dismissal expired"
4041
INCIDENT_COMMENT = "A comment was added to the incident"
4142
INCIDENT_ENRICH = "Incident enriched"
4243
INCIDENT_STATUS_CHANGE = "Incident status changed"

keep/api/tasks/process_watcher_task.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from filelock import FileLock, Timeout
55
import redis
66
from keep.api.bl.maintenance_windows_bl import MaintenanceWindowsBl
7+
from keep.api.bl.dismissal_expiry_bl import DismissalExpiryBl
78
from keep.api.consts import REDIS, WATCHER_LAPSED_TIME
89

910
logger = logging.getLogger(__name__)
@@ -21,9 +22,19 @@ async def async_process_watcher(*args):
2122
logger.info("Watcher process started, acquiring lock.")
2223
try:
2324
loop = asyncio.get_running_loop()
25+
26+
# Run maintenance windows recovery
2427
resp = await loop.run_in_executor(ctx.get("pool"), MaintenanceWindowsBl.recover_strategy, logger)
28+
29+
# Run dismissal expiry check
30+
await loop.run_in_executor(
31+
ctx.get("pool"),
32+
DismissalExpiryBl.check_dismissal_expiry,
33+
logger
34+
)
35+
2536
except Exception as e:
26-
logger.error("Error in run_in_executor: %s", e, exc_info=True)
37+
logger.error("Error in watcher process: %s", e, exc_info=True)
2738
raise
2839
finally:
2940
await redis_instance.delete(lock_key)
@@ -36,10 +47,20 @@ async def async_process_watcher(*args):
3647
with FileLock("/tmp/watcher_process.lock", timeout=WATCHER_LAPSED_TIME//2):
3748
logger.info("Watcher process started, acquiring lock.")
3849
loop = asyncio.get_running_loop()
50+
51+
# Run maintenance windows recovery
3952
resp = await loop.run_in_executor(None, MaintenanceWindowsBl.recover_strategy, logger)
40-
logger.info("Sleeping for 60 seconds before next run.")
53+
54+
# Run dismissal expiry check
55+
await loop.run_in_executor(
56+
None,
57+
DismissalExpiryBl.check_dismissal_expiry,
58+
logger
59+
)
60+
61+
logger.info(f"Sleeping for {WATCHER_LAPSED_TIME} seconds before next run.")
4162
complete_time = datetime.datetime.now()
4263
await asyncio.sleep(max(0, WATCHER_LAPSED_TIME - (complete_time - init_time).total_seconds()))
43-
logger.info("Unlock process completed.")
64+
logger.info("Watcher process completed.")
4465
except Timeout:
4566
logger.info("Watcher process is already running, skipping this run.")

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "keep"
3-
version = "0.47.1"
3+
version = "0.47.2"
44
description = "Alerting. for developers, by developers."
55
authors = ["Keep Alerting LTD"]
66
packages = [{include = "keep"}]

0 commit comments

Comments
 (0)