Skip to content

Commit

Permalink
src/teuthology_api/suite: Modify how we handle Error and Success runs
Browse files Browse the repository at this point in the history
The changes includes:

1. make suite route return
{"run": run_details, "logs": logs, "job_count": job_count}

2. Improve how we handle Exception by utilizing Queue from
python multiprocessing library.

Signed-off-by: Kamoltat Sirivadhna <[email protected]>
  • Loading branch information
kamoltat committed May 9, 2024
1 parent f0a9d59 commit e0642da
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 36 deletions.
7 changes: 6 additions & 1 deletion src/teuthology_api/routes/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,9 @@ def create_run(
):
args = args.model_dump(by_alias=True)
args["--user"] = get_username(request)
return run(args, logs, access_token)
try:
created_run = run(args, logs, access_token)
log.debug(created_run)
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
return created_run
2 changes: 1 addition & 1 deletion src/teuthology_api/schemas/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class SuiteArgs(BaseArgs):
default="https://github.com/ceph/ceph-ci.git", alias="--suite_repo"
)
teuthology_branch: Union[str, None] = Field(
default="main", alias="--teuthology-branch"
default=None, alias="--teuthology-branch"
)
validate_sha1: Union[str, None] = Field(default="true", alias="--validate-sha1")
wait: Union[bool, None] = Field(default=False, alias="--wait")
Expand Down
37 changes: 28 additions & 9 deletions src/teuthology_api/services/helpers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from multiprocessing import Process
from multiprocessing import Process, Queue
import logging
import os
import uuid
Expand Down Expand Up @@ -26,26 +26,45 @@ def logs_run(func, args):
_id = str(uuid.uuid4())
archive = Path(ARCHIVE_DIR)
log_file = archive / f"{_id}.log"

teuthology_process = Process(target=_execute_with_logs, args=(func, args, log_file))
teuthology_process.start()
teuthology_process.join()

teuth_queue = Queue()
teuth_process = Process(
target=_execute_with_logs,
args=(func, args, log_file, teuth_queue)
)
teuth_process.daemon = True # Set the process as a daemon
teuth_process.start()
teuth_process.join(timeout=180) # Set the timeout value in seconds
if teuth_process.is_alive():
teuth_process.terminate() # Terminate the process if it exceeds the timeout
teuth_process.join()
raise TimeoutError("Process execution timed out")
logs = ""
with open(log_file, encoding="utf-8") as file:
logs = file.readlines()
if os.path.isfile(log_file):
os.remove(log_file)
return logs
log.debug(logs)
if teuth_process.exitcode > 0:
e = teuth_queue.get()
log.error(e)
return "fail", e, 0
else:
job_count = teuth_queue.get()
return "success", logs, job_count


def _execute_with_logs(func, args, log_file):
def _execute_with_logs(func, args, log_file, teuth_queue):
"""
To store logs, set a new FileHandler for teuthology root logger
and then execute the command function.
"""
teuthology.setup_log_file(log_file)
func(args)
try:
job_count = func(args)
teuth_queue.put(job_count)
except Exception as e:
teuth_queue.put(e)
raise


def get_run_details(run_name: str):
Expand Down
48 changes: 23 additions & 25 deletions src/teuthology_api/services/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,31 +20,29 @@ def run(args, send_logs: bool, access_token: str):
detail="You need to be logged in",
headers={"WWW-Authenticate": "Bearer"},
)
try:
args["--timestamp"] = datetime.now().strftime("%Y-%m-%d_%H:%M:%S")

logs = logs_run(teuthology.suite.main, args)

# get run details from paddles
run_name = make_run_name(
{
"machine_type": args["--machine-type"],
"user": args["--user"],
"timestamp": args["--timestamp"],
"suite": args["--suite"],
"ceph_branch": args["--ceph"],
"kernel_branch": args["--kernel"],
"flavor": args["--flavor"],
}
)
run_details = get_run_details(run_name)
if send_logs or args["--dry-run"]:
return {"run": run_details, "logs": logs}
return {"run": run_details}
except Exception as exc:
log.error("teuthology.suite.main failed with the error: %s", repr(exc))
raise HTTPException(status_code=500, detail=repr(exc)) from exc

args["--timestamp"] = datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
status, logs, job_count = logs_run(teuthology.suite.main, args)
if status == "fail":
raise logs
if args["--dry-run"] or job_count < 1:
return {"run": {}, "logs": logs, "job_count": job_count}
# get run details from paddles
run_name = make_run_name(
{
"machine_type": args["--machine-type"],
"user": args["--user"],
"timestamp": args["--timestamp"],
"suite": args["--suite"],
"ceph_branch": args["--ceph"],
"kernel_branch": args["--kernel"],
"flavor": args["--flavor"],
}
)
run_details = get_run_details(run_name)
if send_logs:
return {"run": run_details, "logs":logs, "job_count": job_count}
else:
return {"run": run_details, "job_count": job_count}

def make_run_name(run_dic):
"""
Expand Down

0 comments on commit e0642da

Please sign in to comment.