Skip to content

Commit

Permalink
Fixes to docker detached mode (#163)
Browse files Browse the repository at this point in the history
* Clean up boolean compare in run-docker

* Fix bug in detecting detached containers
  • Loading branch information
arjunsuresh authored Jan 27, 2025
1 parent 3657548 commit 6161058
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations

on:
schedule:
- cron: "50 17 * * *" #to be adjusted
- cron: "38 18 * * *" #to be adjusted

jobs:
run_nvidia:
Expand Down
4 changes: 3 additions & 1 deletion automation/script/docker_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def prepare_docker_inputs(input_params, docker_settings,
keys += [
"skip_run_cmd", "pre_run_cmds", "run_cmd_prefix", "all_gpus", "num_gpus", "device", "gh_token",
"port_maps", "shm_size", "pass_user_id", "pass_user_group", "extra_run_args", "detached", "interactive",
"dt", "it", "use_host_group_id", "use_host_user_id"
"dt", "it", "use_host_group_id", "use_host_user_id", "keep_detached", "reuse_existing"
]
# Collect Dockerfile inputs
docker_inputs = {
Expand Down Expand Up @@ -377,6 +377,8 @@ def get_docker_default(key):
"port_maps": [],
"use_host_user_id": True,
"use_host_group_id": True,
"keep_detached": False,
"reuse_existing": True
}
if key in defaults:
return defaults[key]
Expand Down
2 changes: 1 addition & 1 deletion script/get-gh-actions-runner/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ new_env_keys:

deps:
- tags: detect-os
- tags: download-and-extract,_extract,_url.https://github.com/actions/runner/releases/download/v2.320.0/actions-runner-linux-x64-2.320.0.tar.gz
- tags: download-and-extract,_extract,_url.https://github.com/actions/runner/releases/download/v2.321.0/actions-runner-linux-x64-2.321.0.tar.gz
force_cache: yes
extra_cache_tags: gh-actions-runner-code,gh-actions,code
env:
Expand Down
42 changes: 16 additions & 26 deletions script/run-docker-container/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def preprocess(i):

interactive = env.get('MLC_DOCKER_INTERACTIVE_MODE', '')

if str(interactive).lower() in ['yes', 'true', '1']:
if is_true(interactive):
env['MLC_DOCKER_DETACHED_MODE'] = 'no'

if 'MLC_DOCKER_RUN_SCRIPT_TAGS' not in env:
Expand Down Expand Up @@ -54,7 +54,9 @@ def preprocess(i):
print('')
print('Checking existing Docker container:')
print('')
CMD = f"""{env['MLC_CONTAINER_TOOL']} ps --format=json --filter "ancestor={DOCKER_CONTAINER}" """
# CMD = f"""{env['MLC_CONTAINER_TOOL']} ps --format=json --filter "ancestor={DOCKER_CONTAINER}" """
CMD = f"""{env['MLC_CONTAINER_TOOL']} ps --format """ + \
"'{{ .ID }},'" + f""" --filter "ancestor={DOCKER_CONTAINER}" """
if os_info['platform'] == 'windows':
CMD += " 2> nul"
else:
Expand All @@ -71,27 +73,21 @@ def preprocess(i):
'error': 'Unexpected error occurred with docker run:\n{}'.format(e)
}

if len(out) > 0 and str(env.get('MLC_DOCKER_REUSE_EXISTING_CONTAINER',
'')).lower() in ["1", "true", "yes"]: # container exists
# print(out)
out_split = out.splitlines()
existing_container_id = None
if len(out) > 0:
out_split = out.split(",")
if len(out_split) > 0:
try:
out_json = json.loads(out_split[0])
# print("JSON successfully loaded:", out_json)
except json.JSONDecodeError as e:
print(f"Error: First line of 'out' is not valid JSON: {e}")
return {
'return': 1, 'error': f"Error: First line of 'out' is not valid JSON: {e}"}
else:
out_json = []
existing_container_id = out_split[0].strip()

if isinstance(out_json, list) and len(out_json) > 0:
existing_container_id = out_json[0]['Id']
if existing_container_id and is_true(
env.get('MLC_DOCKER_REUSE_EXISTING_CONTAINER', '')):
print(f"Reusing existing container {existing_container_id}")
env['MLC_DOCKER_CONTAINER_ID'] = existing_container_id

else:
if existing_container_id:
print(
f"""Not using existing container {existing_container_id} as env['MLC_DOCKER_REUSE_EXISTING_CONTAINER'] is not set""")
if env.get('MLC_DOCKER_CONTAINER_ID', '') != '':
del (env['MLC_DOCKER_CONTAINER_ID']) # not valid ID

Expand Down Expand Up @@ -237,13 +233,8 @@ def postprocess(i):
run_opts += port_map_cmd_string

# Currently have problem running Docker in detached mode on Windows:
detached = str(
env.get(
'MLC_DOCKER_DETACHED_MODE',
'')).lower() in [
'yes',
'true',
"1"]
detached = is_true(env.get('MLC_DOCKER_DETACHED_MODE', ''))

# if detached and os_info['platform'] != 'windows':
if detached:
if os_info['platform'] == 'windows':
Expand All @@ -257,8 +248,7 @@ def postprocess(i):
CONTAINER = f"""{env['MLC_CONTAINER_TOOL']} run -dt {run_opts} --rm {docker_image_repo}/{docker_image_name}:{docker_image_tag} bash"""
CMD = f"""ID=`{CONTAINER}` && {env['MLC_CONTAINER_TOOL']} exec $ID bash -c '{run_cmd}'"""

if False and str(env.get('MLC_KEEP_DETACHED_CONTAINER', '')).lower() not in [
'yes', "1", 'true']:
if not is_true(env.get('MLC_KEEP_DETACHED_CONTAINER', '')):
CMD += f""" && {env['MLC_CONTAINER_TOOL']} kill $ID >/dev/null"""

CMD += ' && echo "ID=$ID"'
Expand Down
2 changes: 1 addition & 1 deletion script/run-docker-container/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ cache: false
category: Docker automation

default_env:
MLC_DOCKER_DETACHED_MODE: 'yes'
MLC_DOCKER_REUSE_EXISTING_CONTAINER: 'no'
MLC_DOCKER_PRIVILEGED_MODE: 'no'
MLC_PODMAN_MAP_USER_ID: 'no'
Expand All @@ -31,6 +30,7 @@ input_mapping:
docker_base_image: MLC_DOCKER_IMAGE_BASE
base_image: MLC_DOCKER_IMAGE_BASE
keep_detached: MLC_KEEP_DETACHED_CONTAINER
reuse_existing: MLC_DOCKER_REUSE_EXISTING_CONTAINER
docker_os: MLC_DOCKER_OS
docker_os_version: MLC_DOCKER_OS_VERSION
os: MLC_DOCKER_OS
Expand Down
12 changes: 6 additions & 6 deletions script/run-mlperf-inference-app/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import subprocess
import copy
import mlperf_utils
from utils import *

summary_ext = ['.csv', '.json', '.xlsx']

Expand Down Expand Up @@ -218,8 +219,7 @@ def preprocess(i):

print('=========================================================')

if str(env.get('MLC_MLPERF_USE_DOCKER', '')
).lower() in ["1", "true", "yes"]:
if is_true(env.get('MLC_MLPERF_USE_DOCKER', '')):
action = "docker"
# del(env['OUTPUT_BASE_DIR'])
state = {}
Expand All @@ -232,7 +232,7 @@ def preprocess(i):
if k.startswith("docker_"):
docker_extra_input[k] = inp[k]
inp = {}
if str(docker_dt).lower() in ["yes", "true", "1"]:
if is_true(docker_dt):
# turning it off for the first run and after that we turn it on
if env.get('MLC_DOCKER_REUSE_EXISTING_CONTAINER', '') == '':
env['MLC_DOCKER_REUSE_EXISTING_CONTAINER'] = 'no'
Expand Down Expand Up @@ -292,7 +292,7 @@ def preprocess(i):
env['OUTPUT_BASE_DIR'], f"{env['MLC_MLPERF_RUN_STYLE']}_results")

if action == "docker":
if str(docker_dt).lower() not in ["yes", "true", "1"]:
if not is_true(docker_dt):
print(
f"\nStop Running loadgen scenario: {scenario} and mode: {mode}")
# We run commands interactively inside the docker container
Expand Down Expand Up @@ -320,8 +320,8 @@ def preprocess(i):
if state.get('docker', {}):
del (state['docker'])

if env.get('MLC_DOCKER_CONTAINER_ID', '') != '' and str(env.get(
'MLC_DOCKER_CONTAINER_KEEP_ALIVE', '')).lower() not in ["yes", "1", "true"]:
if env.get('MLC_DOCKER_CONTAINER_ID', '') != '' and not is_true(env.get(
'MLC_DOCKER_CONTAINER_KEEP_ALIVE', '')):
container_id = env['MLC_DOCKER_CONTAINER_ID']
CMD = f"docker kill {container_id}"
docker_out = subprocess.check_output(CMD, shell=True).decode("utf-8")
Expand Down

0 comments on commit 6161058

Please sign in to comment.