Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes to docker detached mode #163

Merged
merged 6 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations

on:
schedule:
- cron: "50 17 * * *" #to be adjusted
- cron: "38 18 * * *" #to be adjusted

jobs:
run_nvidia:
Expand Down
4 changes: 3 additions & 1 deletion automation/script/docker_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def prepare_docker_inputs(input_params, docker_settings,
keys += [
"skip_run_cmd", "pre_run_cmds", "run_cmd_prefix", "all_gpus", "num_gpus", "device", "gh_token",
"port_maps", "shm_size", "pass_user_id", "pass_user_group", "extra_run_args", "detached", "interactive",
"dt", "it", "use_host_group_id", "use_host_user_id"
"dt", "it", "use_host_group_id", "use_host_user_id", "keep_detached", "reuse_existing"
]
# Collect Dockerfile inputs
docker_inputs = {
Expand Down Expand Up @@ -377,6 +377,8 @@ def get_docker_default(key):
"port_maps": [],
"use_host_user_id": True,
"use_host_group_id": True,
"keep_detached": False,
"reuse_existing": True
}
if key in defaults:
return defaults[key]
Expand Down
2 changes: 1 addition & 1 deletion script/get-gh-actions-runner/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ new_env_keys:

deps:
- tags: detect-os
- tags: download-and-extract,_extract,_url.https://github.com/actions/runner/releases/download/v2.320.0/actions-runner-linux-x64-2.320.0.tar.gz
- tags: download-and-extract,_extract,_url.https://github.com/actions/runner/releases/download/v2.321.0/actions-runner-linux-x64-2.321.0.tar.gz
force_cache: yes
extra_cache_tags: gh-actions-runner-code,gh-actions,code
env:
Expand Down
42 changes: 16 additions & 26 deletions script/run-docker-container/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def preprocess(i):

interactive = env.get('MLC_DOCKER_INTERACTIVE_MODE', '')

if str(interactive).lower() in ['yes', 'true', '1']:
if is_true(interactive):
env['MLC_DOCKER_DETACHED_MODE'] = 'no'

if 'MLC_DOCKER_RUN_SCRIPT_TAGS' not in env:
Expand Down Expand Up @@ -54,7 +54,9 @@ def preprocess(i):
print('')
print('Checking existing Docker container:')
print('')
CMD = f"""{env['MLC_CONTAINER_TOOL']} ps --format=json --filter "ancestor={DOCKER_CONTAINER}" """
# CMD = f"""{env['MLC_CONTAINER_TOOL']} ps --format=json --filter "ancestor={DOCKER_CONTAINER}" """
CMD = f"""{env['MLC_CONTAINER_TOOL']} ps --format """ + \
"'{{ .ID }},'" + f""" --filter "ancestor={DOCKER_CONTAINER}" """
if os_info['platform'] == 'windows':
CMD += " 2> nul"
else:
Expand All @@ -71,27 +73,21 @@ def preprocess(i):
'error': 'Unexpected error occurred with docker run:\n{}'.format(e)
}

if len(out) > 0 and str(env.get('MLC_DOCKER_REUSE_EXISTING_CONTAINER',
'')).lower() in ["1", "true", "yes"]: # container exists
# print(out)
out_split = out.splitlines()
existing_container_id = None
if len(out) > 0:
out_split = out.split(",")
if len(out_split) > 0:
try:
out_json = json.loads(out_split[0])
# print("JSON successfully loaded:", out_json)
except json.JSONDecodeError as e:
print(f"Error: First line of 'out' is not valid JSON: {e}")
return {
'return': 1, 'error': f"Error: First line of 'out' is not valid JSON: {e}"}
else:
out_json = []
existing_container_id = out_split[0].strip()

if isinstance(out_json, list) and len(out_json) > 0:
existing_container_id = out_json[0]['Id']
if existing_container_id and is_true(
env.get('MLC_DOCKER_REUSE_EXISTING_CONTAINER', '')):
print(f"Reusing existing container {existing_container_id}")
env['MLC_DOCKER_CONTAINER_ID'] = existing_container_id

else:
if existing_container_id:
print(
f"""Not using existing container {existing_container_id} as env['MLC_DOCKER_REUSE_EXISTING_CONTAINER'] is not set""")
if env.get('MLC_DOCKER_CONTAINER_ID', '') != '':
del (env['MLC_DOCKER_CONTAINER_ID']) # not valid ID

Expand Down Expand Up @@ -237,13 +233,8 @@ def postprocess(i):
run_opts += port_map_cmd_string

# Currently have problem running Docker in detached mode on Windows:
detached = str(
env.get(
'MLC_DOCKER_DETACHED_MODE',
'')).lower() in [
'yes',
'true',
"1"]
detached = is_true(env.get('MLC_DOCKER_DETACHED_MODE', ''))

# if detached and os_info['platform'] != 'windows':
if detached:
if os_info['platform'] == 'windows':
Expand All @@ -257,8 +248,7 @@ def postprocess(i):
CONTAINER = f"""{env['MLC_CONTAINER_TOOL']} run -dt {run_opts} --rm {docker_image_repo}/{docker_image_name}:{docker_image_tag} bash"""
CMD = f"""ID=`{CONTAINER}` && {env['MLC_CONTAINER_TOOL']} exec $ID bash -c '{run_cmd}'"""

if False and str(env.get('MLC_KEEP_DETACHED_CONTAINER', '')).lower() not in [
'yes', "1", 'true']:
if not is_true(env.get('MLC_KEEP_DETACHED_CONTAINER', '')):
CMD += f""" && {env['MLC_CONTAINER_TOOL']} kill $ID >/dev/null"""

CMD += ' && echo "ID=$ID"'
Expand Down
2 changes: 1 addition & 1 deletion script/run-docker-container/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ cache: false
category: Docker automation

default_env:
MLC_DOCKER_DETACHED_MODE: 'yes'
MLC_DOCKER_REUSE_EXISTING_CONTAINER: 'no'
MLC_DOCKER_PRIVILEGED_MODE: 'no'
MLC_PODMAN_MAP_USER_ID: 'no'
Expand All @@ -31,6 +30,7 @@ input_mapping:
docker_base_image: MLC_DOCKER_IMAGE_BASE
base_image: MLC_DOCKER_IMAGE_BASE
keep_detached: MLC_KEEP_DETACHED_CONTAINER
reuse_existing: MLC_DOCKER_REUSE_EXISTING_CONTAINER
docker_os: MLC_DOCKER_OS
docker_os_version: MLC_DOCKER_OS_VERSION
os: MLC_DOCKER_OS
Expand Down
12 changes: 6 additions & 6 deletions script/run-mlperf-inference-app/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import subprocess
import copy
import mlperf_utils
from utils import *

summary_ext = ['.csv', '.json', '.xlsx']

Expand Down Expand Up @@ -218,8 +219,7 @@ def preprocess(i):

print('=========================================================')

if str(env.get('MLC_MLPERF_USE_DOCKER', '')
).lower() in ["1", "true", "yes"]:
if is_true(env.get('MLC_MLPERF_USE_DOCKER', '')):
action = "docker"
# del(env['OUTPUT_BASE_DIR'])
state = {}
Expand All @@ -232,7 +232,7 @@ def preprocess(i):
if k.startswith("docker_"):
docker_extra_input[k] = inp[k]
inp = {}
if str(docker_dt).lower() in ["yes", "true", "1"]:
if is_true(docker_dt):
# turning it off for the first run and after that we turn it on
if env.get('MLC_DOCKER_REUSE_EXISTING_CONTAINER', '') == '':
env['MLC_DOCKER_REUSE_EXISTING_CONTAINER'] = 'no'
Expand Down Expand Up @@ -292,7 +292,7 @@ def preprocess(i):
env['OUTPUT_BASE_DIR'], f"{env['MLC_MLPERF_RUN_STYLE']}_results")

if action == "docker":
if str(docker_dt).lower() not in ["yes", "true", "1"]:
if not is_true(docker_dt):
print(
f"\nStop Running loadgen scenario: {scenario} and mode: {mode}")
# We run commands interactively inside the docker container
Expand Down Expand Up @@ -320,8 +320,8 @@ def preprocess(i):
if state.get('docker', {}):
del (state['docker'])

if env.get('MLC_DOCKER_CONTAINER_ID', '') != '' and str(env.get(
'MLC_DOCKER_CONTAINER_KEEP_ALIVE', '')).lower() not in ["yes", "1", "true"]:
if env.get('MLC_DOCKER_CONTAINER_ID', '') != '' and not is_true(env.get(
'MLC_DOCKER_CONTAINER_KEEP_ALIVE', '')):
container_id = env['MLC_DOCKER_CONTAINER_ID']
CMD = f"docker kill {container_id}"
docker_out = subprocess.check_output(CMD, shell=True).decode("utf-8")
Expand Down
Loading