Skip to content

Commit c18c093

Browse files
bszaboBernard Szabo
and
Bernard Szabo
authored
Bszabo/frontend app authoring pipeline testing (#21)
* feat: Run Datadog synthetic tests * feat: drop confusing "waffle switch" in names --------- Co-authored-by: Bernard Szabo <[email protected]>
1 parent 1214d38 commit c18c093

File tree

3 files changed

+316
-0
lines changed

3 files changed

+316
-0
lines changed

scripts/dd_synthetic_tests.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../tubular/scripts/dd_synthetic_tests.py

setup.cfg

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ console_scripts =
3131
create_tag.py = tubular.scripts.create_tag:create_tag
3232
delete-asg.py = tubular.scripts.delete_asg:delete_asg
3333
delete_expired_partner_gdpr_reports.py = tubular.scripts.delete_expired_partner_gdpr_reports:delete_expired_reports
34+
dd_synthetic_tests.py = tubular.scripts.dd_synthetic_tests:run_synthetic_tests
3435
drupal_backup_database.py = tubular.scripts.drupal_backup_database:backup_database
3536
drupal_clear_varnish.py = tubular.scripts.drupal_clear_varnish:clear_varnish_cache
3637
drupal_deploy.py = tubular.scripts.drupal_deploy:deploy

tubular/scripts/dd_synthetic_tests.py

+314
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
#! /usr/bin/env python3
2+
3+
import click
4+
import logging
5+
import os
6+
import requests
7+
import time
8+
import sys
9+
10+
class SyntheticTest:
11+
'''
12+
Attributes for a Datadog synthetic test and its test run
13+
'''
14+
def __init__(self, name, public_id):
15+
self.name = name # The test's Datadog name
16+
self.public_id = public_id # The test's Datadog Test ID
17+
self.test_run_id = None # The run ID given by Datadog to this test's invocation
18+
self.success = None
19+
20+
class DatadogClient:
21+
''' Client class to invoke datadog API to run and monitor synthetic tests '''
22+
23+
DATADOG_SYNTHETIC_TESTS_API_URL = "https://api.datadoghq.com/api/v1/synthetics/tests"
24+
MAX_ALLOWABLE_TIME_SECS = 600 # 10 minutes
25+
26+
DEPLOYMENT_TESTING_ENABLED_SWITCH = SyntheticTest(
27+
'''
28+
Deployment testing enable test governing CI/CD synthetic testing
29+
''',
30+
"sad-hqu-h33"
31+
)
32+
33+
def __init__(self, api_key, app_key):
34+
self.api_key = api_key
35+
self.app_key = app_key
36+
self.test_batch_id = None # A 'batch' is a set of tests intended to be run in parallel
37+
self.trigger_time = None # The system time at which a batch's execution was requested
38+
self.tests_by_public_id = {} # Dictionary mapping Datadog test ID to all info we have for a specific test
39+
40+
def trigger_synthetic_tests(self, tests_to_report: [SyntheticTest]):
41+
'''
42+
Trigger running of a batch of synthetic tests.
43+
:param tests_to_report: List of tests to run and report on
44+
:return: None, but saves test info including batch ID and test run IDs in 'self'
45+
'''
46+
47+
# Note that the list of tests to be run is one longer than the list of tests to be reported on.
48+
# The extra test is the so-called "deployment testing enable switch test".
49+
# That test should be modified via the Datadog UI to either always pass or always fail, depending
50+
# on whether synthetic testing is to be enabled at runtime or not, respectively.
51+
# While the test's result does affect how the pipeline operates, the result is not treated as reportable.
52+
tests_to_run = [self.DEPLOYMENT_TESTING_ENABLED_SWITCH] + tests_to_report
53+
self._record_requested_test_particulars(tests_to_run)
54+
self.trigger_time = time.time() # Key timeouts off of this
55+
logging.info(f'CI batch triggered at time {self.trigger_time}')
56+
57+
try:
58+
response = self._trigger_batch_tests() # Kicks off asynchronous test execution for a batch of tests
59+
response_body = response.json()
60+
self._record_batch_id(response_body) # a single batch ID has now been assiged. Save for future reference
61+
self._map_test_run_ids(response_body) # one test run ID per test has been assigned. Save for reference.
62+
63+
except Exception as e:
64+
raise Exception("Datadog error on triggering tests: " + str(e))
65+
66+
def gate_on_deployment_testing_enable_switch(self):
67+
'''
68+
This is a bit hacky, but there's a designated test that's used as a deployment testing enable switch.
69+
If the test passes, it means that the synthetic testing GoCD pipeline is enabled, and the
70+
build should only proceed if all reportable tests pass; if the test fails, the build should proceed irrespective
71+
of any failures among the synthetic tests (which will be allowed to run, nonetheless). When this is intended,
72+
the GoCD pipeline responsible for running the tests should just return a success code without waiting
73+
for the reportable tests to complete their runs.
74+
75+
:return: Nothing, but terminates task with a success code if the synthetic testing feature is disabled
76+
and logs the decision to skip testing on this build
77+
'''
78+
deployment_testing_enabled = self._poll_for_test_result(self.DEPLOYMENT_TESTING_ENABLED_SWITCH)
79+
if deployment_testing_enabled == False:
80+
switch_test_name = self.DEPLOYMENT_TESTING_ENABLED_SWITCH.name
81+
logging.warning(
82+
f'*** Datadog Synthetic testing disabled via failing test {switch_test_name} ***')
83+
sys.exit(0)
84+
85+
def get_and_record_test_results(self):
86+
'''
87+
Poll for pass/fail results for all batch tests
88+
89+
:return: Nothing, but saves pass/fail results in 'self'
90+
'''
91+
for test in list(self.tests_by_public_id.values()):
92+
test.success = self._poll_for_test_result(test)
93+
94+
def get_failed_tests(self):
95+
'''
96+
Compile a list of all failed tests from the set of all tests that were run
97+
:return: A list of failed test objects; Empty list if all tests passed
98+
'''
99+
failed_tests = []
100+
for test in list(self.tests_by_public_id.values()):
101+
if not test.success:
102+
failed_tests.append(test)
103+
104+
return failed_tests
105+
106+
# ***************** Private methods **********************
107+
108+
def _record_requested_test_particulars(self, test_requests):
109+
'''
110+
Save list of requested tests in this dictionary for later reference, indexed by test public ID
111+
'''
112+
for test in test_requests:
113+
self.tests_by_public_id[test.public_id] = test
114+
115+
def _trigger_batch_tests(self):
116+
'''
117+
Ask datadog to run the set of selected synthetic tests
118+
returns the response from the datadog API call
119+
120+
Note that using the ci (continuous integration) route leverages
121+
the parallel execution Datadog feature we pay extra for
122+
'''
123+
url = f"{self.DATADOG_SYNTHETIC_TESTS_API_URL}/trigger/ci"
124+
headers = {
125+
"Content-Type": "application/json",
126+
"DD-API-KEY": self.api_key,
127+
"DD-APPLICATION-KEY": self.app_key
128+
}
129+
test_public_ids = self.tests_by_public_id.keys()
130+
json_request_body = {"tests": [{"public_id": public_id} for public_id in test_public_ids]}
131+
response = requests.post(url, headers=headers, json=json_request_body)
132+
if response.status_code != 200:
133+
raise Exception(f"Datadog API error. Status = {response.status_code}")
134+
return response
135+
136+
def _record_batch_id(self, response_body):
137+
'''
138+
Datadog generates a single batch ID associated with the request for all the requested tests. This is distinct
139+
from the run ids, which are uniquely assigned to each test run.
140+
'''
141+
self.batch_id = response_body['batch_id']
142+
143+
144+
def _map_test_run_ids(self, response_body):
145+
'''
146+
Saves the test run ID values assigned by datatod to this barch request's tests, as
147+
a dictionary keyed off of each test's (unique) public id
148+
149+
A test's public ID is assigned by Datadog when the test is created, and is entered as hard-coded
150+
test configuration data in this module. It is the public ids that are used in the test run results
151+
to identify which test is being reported on.
152+
153+
While we do care as to the result for the "deployment testing enabled switch test", we use that
154+
result differently from all other test results, and do not save it in the dictionary
155+
with results we intend to report on.
156+
'''
157+
for result in response_body['results']:
158+
public_id = result['public_id']
159+
test_run_id = result['result_id']
160+
if public_id == self.DEPLOYMENT_TESTING_ENABLED_SWITCH.public_id:
161+
self.DEPLOYMENT_TESTING_ENABLED_SWITCH.test_run_id = test_run_id
162+
else:
163+
self.tests_by_public_id[public_id].test_run_id = test_run_id
164+
165+
def _poll_for_test_result(self, test):
166+
"""
167+
Poll every few seconds for test run results for a single, specified test, until available.
168+
169+
Note that if all tests take 90 seconds or more to run, the call into this method will take 90 or
170+
more seconds, but subsequent calls may just take a few seconds each, depending on
171+
test execution time variability.
172+
173+
The timeout on this operation is relative to when the batch request for test execution was made,
174+
not relative to the last time we polled on a test result.
175+
176+
Returns None if still running; otherwise, returns True on test success and False on test failure.
177+
"""
178+
test_result = None
179+
while test_result is None and (time.time() - self.trigger_time) < (self.MAX_ALLOWABLE_TIME_SECS):
180+
time.sleep(5) # Poll every 5 seconds
181+
test_result = self._get_test_result(test)
182+
logging.info(f'{test_result=}')
183+
184+
if test_result is None:
185+
raise Exception("The test run timed out.")
186+
187+
completion_time = time.time()
188+
logging.info(f"Test {test.public_id} finished at time {completion_time} with {test_result=}")
189+
return test_result
190+
191+
def _get_test_result(self, test):
192+
"""
193+
Issue a single request to the Datadog API to fetch test results for a single, specified test.
194+
returns JSON structure with test results if the test run has completed; returns None otherwise
195+
"""
196+
url = f"{self.DATADOG_SYNTHETIC_TESTS_API_URL}/{test.public_id}/results/{test.test_run_id}"
197+
headers = {
198+
"DD-API-KEY": self.api_key,
199+
"DD-APPLICATION-KEY": self.app_key
200+
}
201+
202+
response = requests.get(url, headers=headers)
203+
if response.status_code != 200:
204+
return None
205+
206+
response_json = response.json()
207+
return response_json['result']['passed']
208+
209+
"""
210+
Command-line script to run Datadog synthetic tests in the production enviornment and then slack notify and/or roll back
211+
"""
212+
213+
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
214+
215+
@click.option(
216+
'--enable-automated-rollbacks',
217+
is_flag=True,
218+
default=False,
219+
help='When set and synthetic tests fail, the most recent deployment to production is automatically rolled back'
220+
)
221+
@click.option(
222+
'--slack-notification-channel',
223+
required=False,
224+
help='When set and synthetic tests fail, an alert Slack message is sent to this channel'
225+
)
226+
227+
def run_synthetic_tests(tests_to_report_on, enable_automated_rollbacks, slack_notification_channel):
228+
'''
229+
:param enable_automated_rollbacks: Failing tests trigger a rollback in the build pipeline when true
230+
:param slack_notification_channel: Newly failing tests deliver a slack message to this channel; none on repeat fails
231+
:return: exits thread with success or fail code indicating tests' collective success or failure (of one or more)
232+
'''
233+
if enable_automated_rollbacks:
234+
logging.Error("Automated rollbacks are not yet supported")
235+
sys.exit(1)
236+
237+
try:
238+
api_key = os.getenv("DATADOG_API_KEY")
239+
app_key = os.getenv("DATADOG_APP_KEY")
240+
dd_client = DatadogClient(api_key, app_key)
241+
242+
dd_client.trigger_synthetic_tests(tests_to_report_on)
243+
dd_client.gate_on_deployment_testing_enable_switch() # Exits summarily if test results to be ignored
244+
for test in tests_to_report_on:
245+
logging.info(f"\t Running test {test.public_id}: {test.name}")
246+
dd_client.get_and_record_test_results()
247+
failed_tests = dd_client.get_failed_tests()
248+
249+
for failed_test in failed_tests:
250+
logging.warning(f'Test failed: {failed_test.public_id} -- {failed_test.name}')
251+
252+
task_failed_code = 1 if failed_tests else 0
253+
254+
except Exception as e:
255+
logging.error("GoCD/Datadog integration error: ", str(e))
256+
task_failed_code = 1
257+
258+
sys.exit(task_failed_code)
259+
260+
if __name__ == "__main__":
261+
SLACK_NOTIFICATION_CHANNEL = 'project-edxapp-deployment-future'
262+
ENABLE_AUTOMATED_ROLLBACKS = False
263+
TESTS_TO_REPORT_ON = [
264+
# All tests disabled for now. Will reinstate
265+
# them after the deployment testing enable switch functionality has been tested on stage.
266+
#
267+
# TODO: Two tests are disabled behind two layers of comment symbols. These are broken and should not
268+
# be reinstated until fixed.
269+
#
270+
# SyntheticTest(
271+
# '''
272+
# [Synthetics] edX Smoke Test - [Verified student] A verified student can
273+
# access a graded course problem
274+
# ''',
275+
# "tck-hrr-ubp"
276+
# ),
277+
# SyntheticTest(
278+
# '''
279+
# [Synthetics] edX Smoke Test - [Verified student] An enrolled verified student can
280+
# access a course’s landing page, course content, and course forum
281+
# ''',
282+
# "zbz-r28-jjx"
283+
# ),
284+
# # SyntheticTest(
285+
# # '''
286+
# # [Synthetics] edX Smoke Test - [Audit student] An enrolled audit student cannot load
287+
# # a graded problem, and sees the upsell screen
288+
# # ''',
289+
# # "75p-sez-5wg"
290+
# # ),
291+
# # SyntheticTest(
292+
# # '''
293+
# # [Synthetics] edX Smoke Test - [Audit student] An enrolled audit student can access
294+
# # a course’s landing page, course content, and course forum
295+
# # ''',
296+
# # "jvx-2jw-agj"
297+
# # ),
298+
# SyntheticTest(
299+
# '''
300+
# edX Smoke Test - [Unenrolled student] An unenrolled student cannot load a
301+
# course’s landing page, and sees the “Enroll Now” screen
302+
# ''',
303+
# "zkx-36f-kui"
304+
# ),
305+
# SyntheticTest(
306+
# '''
307+
# edX Smoke Test - [Anonymous user] An anonymous user is directed to the
308+
# Logistration page (authn.edx.org) when trying to access content behind log-in wall
309+
# ''',
310+
# "6tq-u28-hwa"
311+
# ),
312+
]
313+
#TODO: Pick up these settings from GoCD invocation
314+
run_synthetic_tests(TESTS_TO_REPORT_ON, ENABLE_AUTOMATED_ROLLBACKS, SLACK_NOTIFICATION_CHANNEL)

0 commit comments

Comments
 (0)