From 83a316b5dfc832ca9b6d175cb94f44bf24e8ec91 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Sat, 25 Jan 2025 12:37:11 +0530 Subject: [PATCH 01/37] support find cache --- mlc/main.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/mlc/main.py b/mlc/main.py index 594804b..5bf61a3 100644 --- a/mlc/main.py +++ b/mlc/main.py @@ -1077,6 +1077,7 @@ def call_script_module_function(self, function_name, run_args): return result else: logger.info("ScriptAutomation class not found in the script.") + return {'return': 1, 'error': 'ScriptAutomation class not found in the script.'} def docker(self, run_args): return self.call_script_module_function("docker", run_args) @@ -1111,10 +1112,23 @@ def find(self, run_args): #logger.info(f"Running script with identifier: {args.details}") # The REPOS folder is set by the user, for example via an environment variable. #logger.info(f"In cache action {repos_folder}") - run_args['target_name'] = "cache" #print(f"run_args = {run_args}") - return self.search(run_args) + res = self.search(run_args) + if res['return'] > 0: + return res + else: + if not res['list']: + logger.warning("No cache entry found for the specified tags!") + return {'return': 0, 'list': []} + else: + logger.info("Listing all cache entries found for the specified tags.") + print("Cache entries:") + print("-------------") + for cache_entry in res['list']: + print(f"- {cache_entry.path}\n") + print("-------------") + return {"return": 0, 'list': res['list']} def list(self, args): logger.info("Listing all caches.") From e4a05f57770a36df81b3b0e615fce4c72fcb9bd6 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Sat, 25 Jan 2025 12:37:32 +0530 Subject: [PATCH 02/37] initial commit for testing mlc core actions --- .github/workflows/test-mlc-core-actions.yaml | 57 ++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 .github/workflows/test-mlc-core-actions.yaml diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml new file mode 100644 index 0000000..b84d422 --- /dev/null +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -0,0 +1,57 @@ +name: MLC core actions test + +on: + pull_request: + branches: [ "main", "dev" ] + paths: + - '.github/workflows/test-mlc-core-actions.yml' + - '**' + - '!**.md' + +jobs: + test_mlc_core_actions: + + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.12", "3.8"] + os: ["ubuntu-latest", "windows-latest", "macos-latest"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + + - name: Configure git longpaths (Windows) + if: matrix.os == 'windows-latest' + run: | + git config --system core.longpaths true + + - name: Test pull repo - Pull MLOps repository + run: | + pip install mlcflow + mlc pull repo anandhu-eng@mlperf-automations + + - name: Test pull repo - Test conflicting repo scenario + run: | + mlc pull repo mlcommons@mlperf-automations + + - name: Test list repo - List the existing repositories + run: | + mlc list repo + + - name: Test find cache - Cache not present + run: | + mlc find cache --tags=detect,os + + - name: Test run script - Output being used for testing mlc cache + run: | + mlc run script --tags=get,imagenet-aux + mlc run script --tags=get,imagenet-aux,_from.dropbox + + - name: Test find cache - More than one cache present + run: | + mlc find cache --tags=get,imagenet-aux \ No newline at end of file From a08f555f20a8ecf207f6fc38dbea0a5138dc7e42 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Sat, 25 Jan 2025 12:41:42 +0530 Subject: [PATCH 03/37] fix branch --- .github/workflows/test-mlc-core-actions.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index b84d422..d8a11dc 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -33,11 +33,11 @@ jobs: - name: Test pull repo - Pull MLOps repository run: | pip install mlcflow - mlc pull repo anandhu-eng@mlperf-automations + mlc pull repo anandhu-eng@mlperf-automations --checkout=dev - name: Test pull repo - Test conflicting repo scenario run: | - mlc pull repo mlcommons@mlperf-automations + mlc pull repo mlcommons@mlperf-automations --checkout=dev - name: Test list repo - List the existing repositories run: | From 0982081a208b3ef630944aba59a1653953b42396 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Sat, 25 Jan 2025 12:53:57 +0530 Subject: [PATCH 04/37] install mlcflow from source of pr --- .github/workflows/test-mlc-core-actions.yaml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index d8a11dc..9ac472e 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -30,9 +30,14 @@ jobs: run: | git config --system core.longpaths true - - name: Test pull repo - Pull MLOps repository + - name: Install mlcflow from the pull request's source repository and branch + run: | + git clone ${{ github.event.pull_request.head.repo.html_url }} --branch=${{ github.event.pull_request.head.ref }} + cd mlcflow + pip install . + + - name: Test pull repo - Pull a forked MLOps repository run: | - pip install mlcflow mlc pull repo anandhu-eng@mlperf-automations --checkout=dev - name: Test pull repo - Test conflicting repo scenario @@ -43,6 +48,10 @@ jobs: run: | mlc list repo + - name: Test rm repo - Remove the forked mlperf-automation repo + run: | + mlc rm repo anandhu-eng@mlperf-automations + - name: Test find cache - Cache not present run: | mlc find cache --tags=detect,os From 1074131813d0bd8b73dbc274951390fa7d26f1c7 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Sat, 25 Jan 2025 13:11:17 +0530 Subject: [PATCH 05/37] fix repo key error --- mlc/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mlc/main.py b/mlc/main.py index 5bf61a3..482dfc9 100644 --- a/mlc/main.py +++ b/mlc/main.py @@ -1266,6 +1266,10 @@ def main(): if hasattr(args, 'repo') and args.repo: run_args['repo'] = args.repo + if args.command in ['rm']: + if args.target == "repo": + run_args['repo'] = args.details + if args.command in ["cp", "mv"]: run_args['target'] = args.target if hasattr(args, 'details') and args.details: From dff795392c1dbf9d9ef88c241cc061fc6ab0f9ab Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Sat, 25 Jan 2025 17:18:46 +0530 Subject: [PATCH 06/37] use action to clone source repo --- .github/workflows/test-mlc-core-actions.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index 9ac472e..d06c86f 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -32,9 +32,9 @@ jobs: - name: Install mlcflow from the pull request's source repository and branch run: | - git clone ${{ github.event.pull_request.head.repo.html_url }} --branch=${{ github.event.pull_request.head.ref }} - cd mlcflow - pip install . + python -m pip install --upgrade pip + python -m pip install --ignore-installed --verbose pip setuptools + python -m pip install . - name: Test pull repo - Pull a forked MLOps repository run: | From 85ca2a313c937e4a9433b007b43db9ca12003715 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Sun, 26 Jan 2025 03:36:25 +0530 Subject: [PATCH 07/37] added more extensive checking --- .github/workflows/test-mlc-core-actions.yaml | 73 +++++++++++++++++--- 1 file changed, 64 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index d06c86f..cfca4bd 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -36,31 +36,86 @@ jobs: python -m pip install --ignore-installed --verbose pip setuptools python -m pip install . - - name: Test pull repo - Pull a forked MLOps repository + - name: Define helper functions + shell: bash + run: | + # Function to validate a repository after pulling + validate_repo() { + local repo_path="$1" + local repo_json_path="$2" + local expected_branch="$3" + + if [ ! -d "$repo_path" ]; then + echo "Repository folder $repo_path not found. Exiting with failure." + exit 1 + fi + if [ ! -f "$repo_json_path" ]; then + echo "File $repo_json_path does not exist. Exiting with failure." + exit 1 + fi + if ! grep -q "$repo_path" "$repo_json_path"; then + echo "Path $repo_path not found in $repo_json_path. Exiting with failure." + exit 1 + fi + CURRENT_BRANCH=$(git -C "$repo_path" rev-parse --abbrev-ref HEAD) + if [ "$CURRENT_BRANCH" != "$expected_branch" ]; then + echo "Expected branch '$expected_branch', but found '$CURRENT_BRANCH'. Exiting with failure." + exit 1 + fi + } + + - name: Test 1 - pull repo - Pull a forked MLOps repository + env: + GH_MLC_REPO_PATH_FORK: "$HOME/MLC/repos/anandhu-eng@mlperf-automations" + GH_MLC_REPO_JSON_PATH: "$HOME/MLC/repos/repos.json" run: | mlc pull repo anandhu-eng@mlperf-automations --checkout=dev + validate_repo "$GH_MLC_REPO_PATH_FORK" "$GH_MLC_REPO_JSON_PATH" "dev" + - - name: Test pull repo - Test conflicting repo scenario + - name: Test 2 - pull repo - Test conflicting repo scenario + env: + GH_MLC_REPO_PATH: "$HOME/MLC/repos/anandhu-eng@mlperf-automations" + GH_MLC_REPO_JSON_PATH: "$HOME/MLC/repos/repos.json" run: | mlc pull repo mlcommons@mlperf-automations --checkout=dev + validate_repo "$GH_MLC_REPO_PATH" "$GH_MLC_REPO_JSON_PATH" "dev" + if grep -q "$GH_MLC_REPO_PATH_FORK" "$GH_MLC_REPO_JSON_PATH"; then + echo "Path $GH_MLC_REPO_PATH_FORK also found in $GH_MLC_REPO_JSON_PATH. This should have been replaced. Exiting with failure." + exit 1 + fi - - name: Test list repo - List the existing repositories + - name: Test 3 - list repo - List the existing repositories run: | mlc list repo - - name: Test rm repo - Remove the forked mlperf-automation repo + - name: Test 4 - rm repo - Remove the forked mlperf-automation repo + env: + GH_MLC_REPO_PATH_FORK: "$HOME/MLC/repos/anandhu-eng@mlperf-automations" + GH_MLC_REPO_JSON_PATH: "$HOME/MLC/repos/repos.json" run: | mlc rm repo anandhu-eng@mlperf-automations + if [ -d "$GH_MLC_REPO_PATH_FORK" ]; then + echo "Repository folder $GH_MLC_REPO_PATH found. It should ideally be deleted. Exiting with failure." + exit 1 + fi - - name: Test find cache - Cache not present + - name: Test 5 - find cache - Cache not present run: | - mlc find cache --tags=detect,os + mlc find cache --tags=detect,os 2>&1 | tee test5.log + if ! grep -q "No cache entry found for the specified tags!" test5.log; then + exit 1 + fi - - name: Test run script - Output being used for testing mlc cache + - name: Test 6 - run script - Output being used for testing mlc cache run: | mlc run script --tags=get,imagenet-aux mlc run script --tags=get,imagenet-aux,_from.dropbox - - name: Test find cache - More than one cache present + - name: Test 7 - find cache - More than one cache present run: | - mlc find cache --tags=get,imagenet-aux \ No newline at end of file + mlc find cache --tags=get,imagenet-aux 2>&1 | tee test7.log + mlc find cache --tags=detect,os 2>&1 | tee test5.log + if grep -q "No cache entry found for the specified tags!" test5.log; then + exit 1 + fi \ No newline at end of file From 45a4314e46ffb8ecca5a5fc58c59bb9e01ff27fa Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Sun, 26 Jan 2025 03:37:36 +0530 Subject: [PATCH 08/37] silence the prompts - quiet --- .github/workflows/test-mlc-core-actions.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index cfca4bd..09110df 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -109,8 +109,8 @@ jobs: - name: Test 6 - run script - Output being used for testing mlc cache run: | - mlc run script --tags=get,imagenet-aux - mlc run script --tags=get,imagenet-aux,_from.dropbox + mlc run script --tags=get,imagenet-aux --quiet + mlc run script --tags=get,imagenet-aux,_from.dropbox --quiet - name: Test 7 - find cache - More than one cache present run: | From b784d5fe336ad73eb899a37bd63b3ff17212b5e6 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Sun, 26 Jan 2025 03:41:03 +0530 Subject: [PATCH 09/37] expanded validate function --- .github/workflows/test-mlc-core-actions.yaml | 66 +++++++++++--------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index 09110df..b97ae6c 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -36,41 +36,29 @@ jobs: python -m pip install --ignore-installed --verbose pip setuptools python -m pip install . - - name: Define helper functions - shell: bash - run: | - # Function to validate a repository after pulling - validate_repo() { - local repo_path="$1" - local repo_json_path="$2" - local expected_branch="$3" - - if [ ! -d "$repo_path" ]; then - echo "Repository folder $repo_path not found. Exiting with failure." - exit 1 - fi - if [ ! -f "$repo_json_path" ]; then - echo "File $repo_json_path does not exist. Exiting with failure." - exit 1 - fi - if ! grep -q "$repo_path" "$repo_json_path"; then - echo "Path $repo_path not found in $repo_json_path. Exiting with failure." - exit 1 - fi - CURRENT_BRANCH=$(git -C "$repo_path" rev-parse --abbrev-ref HEAD) - if [ "$CURRENT_BRANCH" != "$expected_branch" ]; then - echo "Expected branch '$expected_branch', but found '$CURRENT_BRANCH'. Exiting with failure." - exit 1 - fi - } - - name: Test 1 - pull repo - Pull a forked MLOps repository env: GH_MLC_REPO_PATH_FORK: "$HOME/MLC/repos/anandhu-eng@mlperf-automations" GH_MLC_REPO_JSON_PATH: "$HOME/MLC/repos/repos.json" run: | mlc pull repo anandhu-eng@mlperf-automations --checkout=dev - validate_repo "$GH_MLC_REPO_PATH_FORK" "$GH_MLC_REPO_JSON_PATH" "dev" + if [ ! -d "$GH_MLC_REPO_PATH_FORK" ]; then + echo "Repository folder $GH_MLC_REPO_PATH_FORK not found. Exiting with failure." + exit 1 + fi + if [ ! -f "$GH_MLC_REPO_JSON_PATH" ]; then + echo "File $GH_MLC_REPO_JSON_PATH does not exist. Exiting with failure." + exit 1 + fi + if ! grep -q "$GH_MLC_REPO_PATH_FORK" "$GH_MLC_REPO_JSON_PATH"; then + echo "Path $GH_MLC_REPO_PATH_FORK not found in $GH_MLC_REPO_JSON_PATH. Exiting with failure." + exit 1 + fi + CURRENT_BRANCH=$(git -C "$GH_MLC_REPO_PATH_FORK" rev-parse --abbrev-ref HEAD) + if [ "$CURRENT_BRANCH" != "dev" ]; then + echo "Expected branch 'dev', but found '$CURRENT_BRANCH'. Exiting with failure." + exit 1 + fi - name: Test 2 - pull repo - Test conflicting repo scenario @@ -79,11 +67,27 @@ jobs: GH_MLC_REPO_JSON_PATH: "$HOME/MLC/repos/repos.json" run: | mlc pull repo mlcommons@mlperf-automations --checkout=dev - validate_repo "$GH_MLC_REPO_PATH" "$GH_MLC_REPO_JSON_PATH" "dev" - if grep -q "$GH_MLC_REPO_PATH_FORK" "$GH_MLC_REPO_JSON_PATH"; then + if [ ! -d "$GH_MLC_REPO_PATH" ]; then + echo "Repository folder $GH_MLC_REPO_PATH not found. Exiting with failure." + exit 1 + fi + if [ ! -f "$GH_MLC_REPO_JSON_PATH" ]; then + echo "File $GH_MLC_REPO_JSON_PATH does not exist. Exiting with failure." + exit 1 + fi + if ! grep -q "$GH_MLC_REPO_PATH" "$GH_MLC_REPO_JSON_PATH"; then + echo "Path $GH_MLC_REPO_PATH not found in $GH_MLC_REPO_JSON_PATH. Exiting with failure." + exit 1 + fi + if ! grep -q "$GH_MLC_REPO_PATH_FORK" "$GH_MLC_REPO_JSON_PATH"; then echo "Path $GH_MLC_REPO_PATH_FORK also found in $GH_MLC_REPO_JSON_PATH. This should have been replaced. Exiting with failure." exit 1 fi + CURRENT_BRANCH=$(git -C "$GH_MLC_REPO_PATH" rev-parse --abbrev-ref HEAD) + if [ "$CURRENT_BRANCH" != "dev" ]; then + echo "Expected branch 'dev', but found '$CURRENT_BRANCH'. Exiting with failure." + exit 1 + fi - name: Test 3 - list repo - List the existing repositories run: | From 0cc856a32df03efa5a60b17eaf254b0ed756124d Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Sun, 26 Jan 2025 03:44:49 +0530 Subject: [PATCH 10/37] test commit --- .github/workflows/test-mlc-core-actions.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index b97ae6c..fc6aae9 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -38,11 +38,11 @@ jobs: - name: Test 1 - pull repo - Pull a forked MLOps repository env: - GH_MLC_REPO_PATH_FORK: "$HOME/MLC/repos/anandhu-eng@mlperf-automations" + GH_MLC_REPO_NAME_FORK: "anandhu-eng@mlperf-automations" GH_MLC_REPO_JSON_PATH: "$HOME/MLC/repos/repos.json" run: | mlc pull repo anandhu-eng@mlperf-automations --checkout=dev - if [ ! -d "$GH_MLC_REPO_PATH_FORK" ]; then + if [ ! -d "$HOME/MLC/repos/$GH_MLC_REPO_PATH_FORK" ]; then echo "Repository folder $GH_MLC_REPO_PATH_FORK not found. Exiting with failure." exit 1 fi From 25c2ca25972c502b27585258e4b2d239d48d6a11 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Sun, 26 Jan 2025 03:55:16 +0530 Subject: [PATCH 11/37] fix repo path --- .github/workflows/test-mlc-core-actions.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index fc6aae9..e707f61 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -38,11 +38,11 @@ jobs: - name: Test 1 - pull repo - Pull a forked MLOps repository env: - GH_MLC_REPO_NAME_FORK: "anandhu-eng@mlperf-automations" - GH_MLC_REPO_JSON_PATH: "$HOME/MLC/repos/repos.json" + GH_MLC_REPO_NAME_FORK: "${HOME}/MLC/repos/anandhu-eng@mlperf-automations" + GH_MLC_REPO_JSON_PATH: "${HOME}/MLC/repos/repos.json" run: | mlc pull repo anandhu-eng@mlperf-automations --checkout=dev - if [ ! -d "$HOME/MLC/repos/$GH_MLC_REPO_PATH_FORK" ]; then + if [ ! -d "$GH_MLC_REPO_PATH_FORK" ]; then echo "Repository folder $GH_MLC_REPO_PATH_FORK not found. Exiting with failure." exit 1 fi @@ -63,8 +63,8 @@ jobs: - name: Test 2 - pull repo - Test conflicting repo scenario env: - GH_MLC_REPO_PATH: "$HOME/MLC/repos/anandhu-eng@mlperf-automations" - GH_MLC_REPO_JSON_PATH: "$HOME/MLC/repos/repos.json" + GH_MLC_REPO_PATH: "${HOME}/MLC/repos/mlcommons@mlperf-automations" + GH_MLC_REPO_JSON_PATH: "${HOME}/MLC/repos/repos.json" run: | mlc pull repo mlcommons@mlperf-automations --checkout=dev if [ ! -d "$GH_MLC_REPO_PATH" ]; then @@ -95,8 +95,8 @@ jobs: - name: Test 4 - rm repo - Remove the forked mlperf-automation repo env: - GH_MLC_REPO_PATH_FORK: "$HOME/MLC/repos/anandhu-eng@mlperf-automations" - GH_MLC_REPO_JSON_PATH: "$HOME/MLC/repos/repos.json" + GH_MLC_REPO_PATH_FORK: "${HOME}/MLC/repos/anandhu-eng@mlperf-automations" + GH_MLC_REPO_JSON_PATH: "${HOME}/MLC/repos/repos.json" run: | mlc rm repo anandhu-eng@mlperf-automations if [ -d "$GH_MLC_REPO_PATH_FORK" ]; then @@ -122,4 +122,4 @@ jobs: mlc find cache --tags=detect,os 2>&1 | tee test5.log if grep -q "No cache entry found for the specified tags!" test5.log; then exit 1 - fi \ No newline at end of file + fi From 9d57d0e5a8eff0fa2e5ab8598a8367dc1f4c9b63 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Sun, 26 Jan 2025 03:56:26 +0530 Subject: [PATCH 12/37] test commit --- .github/workflows/test-mlc-core-actions.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index e707f61..681728d 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -38,7 +38,7 @@ jobs: - name: Test 1 - pull repo - Pull a forked MLOps repository env: - GH_MLC_REPO_NAME_FORK: "${HOME}/MLC/repos/anandhu-eng@mlperf-automations" + GH_MLC_REPO_PATH_FORK: "${HOME}/MLC/repos/anandhu-eng@mlperf-automations" GH_MLC_REPO_JSON_PATH: "${HOME}/MLC/repos/repos.json" run: | mlc pull repo anandhu-eng@mlperf-automations --checkout=dev From 19d95b089ff870172b619f51799c73502a0f2238 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Sun, 26 Jan 2025 04:00:46 +0530 Subject: [PATCH 13/37] test commit --- .github/workflows/test-mlc-core-actions.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index 681728d..cdb4260 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -17,6 +17,9 @@ jobs: matrix: python-version: ["3.12", "3.8"] os: ["ubuntu-latest", "windows-latest", "macos-latest"] + exclude: + - os: windows-latest + - os: macos-latest steps: - uses: actions/checkout@v4 @@ -42,7 +45,7 @@ jobs: GH_MLC_REPO_JSON_PATH: "${HOME}/MLC/repos/repos.json" run: | mlc pull repo anandhu-eng@mlperf-automations --checkout=dev - if [ ! -d "$GH_MLC_REPO_PATH_FORK" ]; then + if [ ! -d "${GH_MLC_REPO_PATH_FORK}" ]; then echo "Repository folder $GH_MLC_REPO_PATH_FORK not found. Exiting with failure." exit 1 fi From 7ef00730a599e64a0f4cb9c5c9ea92c10dd06bfe Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Sun, 26 Jan 2025 04:18:32 +0530 Subject: [PATCH 14/37] test commit --- .github/workflows/test-mlc-core-actions.yaml | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index cdb4260..9278c04 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -40,10 +40,9 @@ jobs: python -m pip install . - name: Test 1 - pull repo - Pull a forked MLOps repository - env: - GH_MLC_REPO_PATH_FORK: "${HOME}/MLC/repos/anandhu-eng@mlperf-automations" - GH_MLC_REPO_JSON_PATH: "${HOME}/MLC/repos/repos.json" run: | + GH_MLC_REPO_PATH_FORK="${HOME}/MLC/repos/anandhu-eng@mlperf-automations" + GH_MLC_REPO_JSON_PATH="${HOME}/MLC/repos/repos.json" mlc pull repo anandhu-eng@mlperf-automations --checkout=dev if [ ! -d "${GH_MLC_REPO_PATH_FORK}" ]; then echo "Repository folder $GH_MLC_REPO_PATH_FORK not found. Exiting with failure." @@ -65,10 +64,9 @@ jobs: - name: Test 2 - pull repo - Test conflicting repo scenario - env: - GH_MLC_REPO_PATH: "${HOME}/MLC/repos/mlcommons@mlperf-automations" - GH_MLC_REPO_JSON_PATH: "${HOME}/MLC/repos/repos.json" run: | + GH_MLC_REPO_PATH="${HOME}/MLC/repos/mlcommons@mlperf-automations" + GH_MLC_REPO_JSON_PATH="${HOME}/MLC/repos/repos.json" mlc pull repo mlcommons@mlperf-automations --checkout=dev if [ ! -d "$GH_MLC_REPO_PATH" ]; then echo "Repository folder $GH_MLC_REPO_PATH not found. Exiting with failure." @@ -97,10 +95,8 @@ jobs: mlc list repo - name: Test 4 - rm repo - Remove the forked mlperf-automation repo - env: - GH_MLC_REPO_PATH_FORK: "${HOME}/MLC/repos/anandhu-eng@mlperf-automations" - GH_MLC_REPO_JSON_PATH: "${HOME}/MLC/repos/repos.json" run: | + GH_MLC_REPO_PATH_FORK="${HOME}/MLC/repos/anandhu-eng@mlperf-automations" mlc rm repo anandhu-eng@mlperf-automations if [ -d "$GH_MLC_REPO_PATH_FORK" ]; then echo "Repository folder $GH_MLC_REPO_PATH found. It should ideally be deleted. Exiting with failure." From e0e3b35e2d0fe7eac4d5ea1df85ec959fdffc289 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 28 Jan 2025 05:13:43 +0530 Subject: [PATCH 15/37] Cleanup logging, rm action --- mlc/main.py | 52 +++++++++++++++++++------------------------------- pyproject.toml | 2 +- 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/mlc/main.py b/mlc/main.py index a506209..89ba688 100644 --- a/mlc/main.py +++ b/mlc/main.py @@ -13,6 +13,7 @@ from pathlib import Path from colorama import Fore, Style, init import shutil + # Initialize colorama for Windows support init(autoreset=True) class ColoredFormatter(logging.Formatter): @@ -29,31 +30,13 @@ def format(self, record): record.levelname = f"{self.COLORS[record.levelname]}{record.levelname}{Style.RESET_ALL}" return super().format(record) - -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') - logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - -# Create console handler with the custom formatter -console_handler = logging.StreamHandler() -console_handler.setFormatter(ColoredFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) - -# Remove any existing handlers and add our custom handler -if logger.hasHandlers(): - logger.handlers.clear() - -logger.addHandler(console_handler) - -# # Set up logging configuration -# logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') -# logger = logging.getLogger(__name__) - # Set up logging configuration def setup_logging(log_path = 'mlc',log_file = 'mlc-log.txt'): - logFormatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + logFormatter = ColoredFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + logger.setLevel(logging.INFO) # File hander for logging in file in the specified path file_handler = logging.FileHandler("{0}/{1}".format(log_path, log_file)) @@ -65,10 +48,6 @@ def setup_logging(log_path = 'mlc',log_file = 'mlc-log.txt'): consoleHandler.setFormatter(logFormatter) logger.addHandler(consoleHandler) -# Testing the log -# setup_logging(log_path='.',log_file='mlc-log2.txt') -# logger = logging.getLogger(__name__) -# logger.info('This is an info message') # Base class for CLI actions class Action: @@ -262,7 +241,8 @@ def unregister_repo(self, repo_path): def __init__(self): - self.logger = logging.getLogger() + setup_logging(log_path='.',log_file='mlc-log.txt') + #self.logger = logging.getLogger() temp_repo = os.environ.get('MLC_REPOS','').strip() if temp_repo == '': self.repos_path = os.path.expanduser('~/MLC/repos') @@ -424,18 +404,26 @@ def rm(self, i): if len(res['list']) == 0: return {'return': 1, 'error': f'No {target_name} found for {inp}'} elif len(res['list']) > 1: - return {'return': 1, 'error': f'More than 1 {action_target} found for {inp}: {res["list"]}'} - else: - result = res['list'][0] + print(f"More than 1 {target_name} found for {inp}:") + if not i.get('all'): + for idx, item in enumerate(res["list"]): + print(f"{idx}. Path: {item.path}, Meta: {item.meta}") + + user_choice = input("Would you like to proceed with all items? (yes/no): ").strip().lower() + if user_choice not in ['yes', 'y']: + return {'return': 1, 'error': "Operation aborted by user."} + results = res['list'] + + for result in results: item_path = result.path item_meta = result.meta - if os.path.exists(item_path): - shutil.rmtree(item_path) - logger.info(f"{target_name} item: {item_path} has been successfully removed") + if os.path.exists(item_path): + shutil.rmtree(item_path) + logger.info(f"{target_name} item: {item_path} has been successfully removed") - self.index.rm(item_meta, target_name, item_path) + self.index.rm(item_meta, target_name, item_path) return { "return": 0, diff --git a/pyproject.toml b/pyproject.toml index 770c808..ab8ce58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mlcflow" -version = "0.1.16" +version = "0.1.17" description = "An automation interface for ML applications" authors = [ { name = "MLCommons", email = "systems@mlcommons.org" } From e5ed56954303e3d9b0d3e4318d265e4b55c83a29 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 28 Jan 2025 05:18:43 +0530 Subject: [PATCH 16/37] Made --tags= optional for mlcr --- mlc/main.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mlc/main.py b/mlc/main.py index 89ba688..88f3b61 100644 --- a/mlc/main.py +++ b/mlc/main.py @@ -1314,7 +1314,7 @@ def main(): pull_parser.add_argument('extra', nargs=argparse.REMAINDER, help='Extra options (e.g., -v)') # Script and Cache-specific subcommands - for action in ['run', 'test', 'show', 'update', 'list', 'find', 'search', 'rm', 'cp', 'mv']: + for action in ['run', 'test', 'show', 'list', 'find', 'search', 'rm', 'cp', 'mv']: action_parser = subparsers.add_parser(action, help=f'{action} a target.') action_parser.add_argument('target', choices=['repo', 'script', 'cache'], help='Target type (repo, script, cache).') # the argument given after target and before any extra options like --tags will be stored in "details" @@ -1347,6 +1347,9 @@ def main(): if hasattr(args, 'repo') and args.repo: run_args['repo'] = args.repo + if hasattr(args, 'details') and args.details and "," in args.details and not run_args.get("tags") and args.target in ["script", "cache"]: + run_args['tags'] = args.details + if args.command in ["cp", "mv"]: run_args['target'] = args.target if hasattr(args, 'details') and args.details: @@ -1368,4 +1371,3 @@ def main(): if __name__ == '__main__': main() -#__version__ = "0.0.1" From 9c8a42a16c3da2b435407e4142d522faa48f0d62 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 28 Jan 2025 05:41:02 +0530 Subject: [PATCH 17/37] Use global logger --- mlc/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlc/main.py b/mlc/main.py index 88f3b61..ff6697a 100644 --- a/mlc/main.py +++ b/mlc/main.py @@ -242,7 +242,7 @@ def unregister_repo(self, repo_path): def __init__(self): setup_logging(log_path='.',log_file='mlc-log.txt') - #self.logger = logging.getLogger() + self.logger = logger temp_repo = os.environ.get('MLC_REPOS','').strip() if temp_repo == '': self.repos_path = os.path.expanduser('~/MLC/repos') From fe859c53af10324d8ef7d08e1f810561fba2f70a Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 29 Jan 2025 02:45:10 +0530 Subject: [PATCH 18/37] Fixed logger --- mlc/main.py | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mlc/main.py b/mlc/main.py index ff6697a..493c619 100644 --- a/mlc/main.py +++ b/mlc/main.py @@ -35,12 +35,12 @@ def format(self, record): # Set up logging configuration def setup_logging(log_path = 'mlc',log_file = 'mlc-log.txt'): - logFormatter = ColoredFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + logFormatter = ColoredFormatter('[%(asctime)s %(filename)s:%(lineno)d %(levelname)s] - %(message)s') logger.setLevel(logging.INFO) # File hander for logging in file in the specified path file_handler = logging.FileHandler("{0}/{1}".format(log_path, log_file)) - file_handler.setFormatter(logFormatter) + file_handler.setFormatter(logging.Formatter('[%(asctime)s %(filename)s:%(lineno)d %(levelname)s] - %(message)s')) logger.addHandler(file_handler) # Console handler for logging on console @@ -617,7 +617,7 @@ def cp(self, run_args): if res['return'] > 0: return res - logging.info(f"{action_target} {src_item_path} copied to {target_item_path}") + logger.info(f"{action_target} {src_item_path} copied to {target_item_path}") return {'return': 0} @@ -625,7 +625,7 @@ def copy_item(self, source_path, destination_path): try: # Copy the source folder to the destination shutil.copytree(source_path, destination_path) - logging.info(f"Folder successfully copied from {source_path} to {destination_path}") + logger.info(f"Folder successfully copied from {source_path} to {destination_path}") except FileExistsError: return {'return': 1, 'error': f"Destination folder {destination_path} already exists."} except FileNotFoundError: diff --git a/pyproject.toml b/pyproject.toml index ab8ce58..314d421 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mlcflow" -version = "0.1.17" +version = "0.1.19" description = "An automation interface for ML applications" authors = [ { name = "MLCommons", email = "systems@mlcommons.org" } From 31210d2a4a3a7a8133bb0a2273c42338515fb133 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 31 Jan 2025 02:00:02 +0530 Subject: [PATCH 19/37] Support CacheAction inside ScriptAutomation --- mlc/main.py | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/mlc/main.py b/mlc/main.py index 493c619..562c3d9 100644 --- a/mlc/main.py +++ b/mlc/main.py @@ -59,6 +59,7 @@ class Action: current_repo_path = None #mlc = None repos = [] #list of Repo objects + def execute(self, args): raise NotImplementedError("Subclasses must implement the execute method") @@ -677,6 +678,7 @@ def search(self, i): return {'return': 0, 'list': result} + class Index: def __init__(self, repos_path, repos): """ @@ -913,8 +915,6 @@ def _load_meta(self): logger.info(f"No meta file found in {self.path}") def search(self, i): - #logger.info(i) - #logger.info(self) indices = self.action_object.index.indices target_index = indices.get(self.automation_type) result = [] @@ -928,9 +928,6 @@ def search(self, i): if set(p_tags).issubset(set(c_tags)) and set(n_tags).isdisjoint(set(c_tags)): it = Item(res['path'], res['repo']) result.append(it) - #logger.info(f"p_tags={p_tags}") - #logger.info(f"n_tags={n_tags}") - #for key in indices: #logger.info(result) return {'return': 0, 'list': result} #indices @@ -1109,16 +1106,23 @@ def rm(self, run_args): class ScriptAction(Action): + parent = None + def __init__(self, parent=None): + if parent is None: + parent = default_parent + self.parent = parent + self.__dict__.update(vars(parent)) + def search(self, i): if not i.get('target_name'): i['target_name'] = "script" - return super().search(i) + return self.parent.search(i) def rm(self, i): if not i.get('target_name'): i['target_name'] = "script" logger.debug(f"Removing script with input: {i}") - return super().rm(i) + return self.parent.rm(i) def dynamic_import_module(self, script_path): # Validate the script_path @@ -1170,7 +1174,7 @@ def call_script_module_function(self, function_name, run_args): if result['return'] > 0: error = result.get('error', "") - raise ScriptExecutionError(f"Script docker execution failed. Error : {error}") + raise ScriptExecutionError(f"Script {function_name} execution failed. Error : {error}") return result else: logger.info("ScriptAutomation class not found in the script.") @@ -1194,16 +1198,23 @@ class ScriptExecutionError(Exception): pass class CacheAction(Action): + + def __init__(self, parent=None): + if parent is None: + parent = default_parent + #super().__init__(parent) + self.parent = parent + self.__dict__.update(vars(parent)) def search(self, i): i['target_name'] = "cache" - logger.debug(f"Searching for cache with input: {i}") - return super().search(i) + #logger.debug(f"Searching for cache with input: {i}") + return self.parent.search(i) def rm(self, i): i['target_name'] = "cache" - logger.debug(f"Removing cache with input: {i}") - return super().rm(i) + #logger.debug(f"Removing cache with input: {i}") + return self.parent.rm(i) def show(self, run_args): self.action_type = "cache" @@ -1297,6 +1308,11 @@ def mlcr(): # Call the main function main() +default_parent = None + +if default_parent is None: + default_parent = Action() + # Main CLI function def main(): parser = argparse.ArgumentParser(prog='mlc', description='A CLI tool for managing repos, scripts, and caches.') From 55a4f15d499838ac19e53a760f980ee9fd56763d Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 30 Jan 2025 20:41:56 +0000 Subject: [PATCH 20/37] Update README.md --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 39449fe..e59da72 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,10 @@ **MLCFlow: Simplifying MLPerf Automations** +[![License](https://img.shields.io/badge/License-Apache%202.0-green)](LICENSE.md) +[![Downloads](https://static.pepy.tech/badge/mlcflow)](https://pepy.tech/project/mlcflow) +[![MLC script automation features test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml/badge.svg?cache-bust=1)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml) + + MLCFlow is a versatile CLI and Python interface developed by MLCommons in collaboration with a dedicated team of volunteers (see [Contributors](CONTRIBUTORS.md)). It serves as a streamlined replacement for the [CMind](https://github.com/mlcommons/ck/tree/master/cm) tool, designed to drive the automation workflows of MLPerf benchmarks more efficiently. The concept behind CMind originated from **Grigori Fursin**, while the **MLPerf Automations** project was created by **Grigori Fursin** and **Arjun Suresh**, whose collective contributions laid the foundation for modernizing MLPerf benchmarking tools. From 9fbac9251e7b7f82e1dbe81e2d0d3d1ce937bd6e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 30 Jan 2025 22:51:01 +0000 Subject: [PATCH 21/37] Update test-mlc-core-actions.yaml --- .github/workflows/test-mlc-core-actions.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index 9278c04..967b8d5 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -117,8 +117,8 @@ jobs: - name: Test 7 - find cache - More than one cache present run: | - mlc find cache --tags=get,imagenet-aux 2>&1 | tee test7.log - mlc find cache --tags=detect,os 2>&1 | tee test5.log + mlc search cache --tags=get,imagenet-aux 2>&1 | tee test7.log + mlc search cache --tags=detect,os 2>&1 | tee test5.log if grep -q "No cache entry found for the specified tags!" test5.log; then exit 1 fi From af3422d4a251b11b69613d6c84c9a19dc4b57d24 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 30 Jan 2025 22:53:02 +0000 Subject: [PATCH 22/37] Update test-mlc-core-actions.yaml From 60c5f997c8642af6f77e40212dda48eba157aabd Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 30 Jan 2025 22:54:38 +0000 Subject: [PATCH 23/37] Update main.py --- mlc/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlc/main.py b/mlc/main.py index e1b7f57..8ef7340 100644 --- a/mlc/main.py +++ b/mlc/main.py @@ -1211,6 +1211,7 @@ def search(self, i): i['target_name'] = "cache" #logger.debug(f"Searching for cache with input: {i}") return self.parent.search(i) + find = search def rm(self, i): i['target_name'] = "cache" From e79bee502a5859915570b194a44358fce5ea2a80 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 31 Jan 2025 04:36:53 +0530 Subject: [PATCH 24/37] Added process_console_output function --- mlc/main.py | 12 ++++++++++++ pyproject.toml | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/mlc/main.py b/mlc/main.py index 562c3d9..17911b2 100644 --- a/mlc/main.py +++ b/mlc/main.py @@ -1210,6 +1210,7 @@ def search(self, i): i['target_name'] = "cache" #logger.debug(f"Searching for cache with input: {i}") return self.parent.search(i) + find = search def rm(self, i): i['target_name'] = "cache" @@ -1313,6 +1314,16 @@ def mlcr(): if default_parent is None: default_parent = Action() +def process_console_output(res, target, action, run_args): + if action == "find": + if len(res['list']) == 0: + logger.warn(f"""No {target} entry found for the specified tags: {run_args['tags']}!""") + else: + for item in res['list']: + logger.info(f"""Item path: {item.path}""") + + + # Main CLI function def main(): parser = argparse.ArgumentParser(prog='mlc', description='A CLI tool for managing repos, scripts, and caches.') @@ -1381,6 +1392,7 @@ def main(): res = method(run_args) if res['return'] > 0: logger.error(res.get('error', f"Error in {action}")) + process_console_output(res, args.target, args.command, run_args) else: logger.info(f"Error: '{args.command}' is not supported for {args.target}.") diff --git a/pyproject.toml b/pyproject.toml index 314d421..0582c4b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mlcflow" -version = "0.1.19" +version = "0.1.20" description = "An automation interface for ML applications" authors = [ { name = "MLCommons", email = "systems@mlcommons.org" } From bc80c4ac6207c3928a97f1a3822becf8c8800dc8 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 30 Jan 2025 23:07:27 +0000 Subject: [PATCH 25/37] Update test-mlc-core-actions.yaml --- .github/workflows/test-mlc-core-actions.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index 967b8d5..5791d6c 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -106,7 +106,7 @@ jobs: - name: Test 5 - find cache - Cache not present run: | mlc find cache --tags=detect,os 2>&1 | tee test5.log - if ! grep -q "No cache entry found for the specified tags!" test5.log; then + if ! grep -q "No cache entry found for the specified tags:" test5.log; then exit 1 fi @@ -119,6 +119,6 @@ jobs: run: | mlc search cache --tags=get,imagenet-aux 2>&1 | tee test7.log mlc search cache --tags=detect,os 2>&1 | tee test5.log - if grep -q "No cache entry found for the specified tags!" test5.log; then + if grep -q "No cache entry found for the specified tags:" test5.log; then exit 1 fi From ab5818413460b6c03f18a792a4e6b71061eca9b9 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 30 Jan 2025 23:10:44 +0000 Subject: [PATCH 26/37] Update CONTRIBUTORS.md --- CONTRIBUTORS.md | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 9563fea..d048893 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -28,14 +28,10 @@ Once your contribution exceeds 50 lines of code (in total), we will: ## Current Contributors -- **[Arjun Suresh]** - *Initial Development* -- **[Anandhu Sooraj]** - *Initial Development* -- **[Shaik Masthan]** - *Initial Development* -- **[Sahil Avaran]** - *Initial Development*, added logging -- **[Ayushi Chaudhari]** - *Initial Development* -- **[Contributor 1]** - Added core features for CLI workflow. -- **[Contributor 2]** - Refactored caching system and enhanced performance. -- **[Contributor 3]** - Improved documentation and code structure. +- **[Arjun Suresh]** - *Initial Development Discussions, {Script,Cache} Action implementations* +- **[Anandhu Sooraj]** - *Initial Development Discussions, RepoAction implmentation, Github Tests* +- **[Shaik Masthan]** - *Initial Development Discussions* +- **[Sahil Avaran]** - *Initial Development Discussions*, added logging - **[Your Name Here]** - This could be you! 🎉 --- From 84cbf996df534dc15f41840ed17be0d1d2e91cce Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 31 Jan 2025 04:54:13 +0530 Subject: [PATCH 27/37] Cleanup --- mlc/main.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/mlc/main.py b/mlc/main.py index 17911b2..5141bbc 100644 --- a/mlc/main.py +++ b/mlc/main.py @@ -84,9 +84,7 @@ def access(self, options): action_target_split = action_target.split(",") action_target = action_target_split[0] - #print(f"action_target = {action_target}") action = actions.get(action_target) - #logger.info(f"action = {action}") if action: if hasattr(action, action_name): @@ -359,7 +357,7 @@ def add(self, i): def rm(self, i): """ - Removes an item to the repository. + Removes an item from the repository. Args: i (dict): Input dictionary with the following keys: @@ -405,10 +403,10 @@ def rm(self, i): if len(res['list']) == 0: return {'return': 1, 'error': f'No {target_name} found for {inp}'} elif len(res['list']) > 1: - print(f"More than 1 {target_name} found for {inp}:") + logger.info(f"More than 1 {target_name} found for {inp}:") if not i.get('all'): for idx, item in enumerate(res["list"]): - print(f"{idx}. Path: {item.path}, Meta: {item.meta}") + logger.info(f"{idx}. Path: {item.path}, Meta: {item.meta}") user_choice = input("Would you like to proceed with all items? (yes/no): ").strip().lower() if user_choice not in ['yes', 'y']: From c44e5ae521a8e2c427e5e0d1e383dbff1c5d2c8a Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Fri, 31 Jan 2025 09:40:11 +0530 Subject: [PATCH 28/37] List all target and corresponding actions --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e59da72..5dfc032 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,16 @@ mlc [options] #### 3. **Cache** - Handle cached data, including cleanup or inspection. -Each target has its own set of specific actions to tailor automation workflows. +Each target has its own set of specific actions to tailor automation workflows as specified below. + + + +| Target | Action | +|--------|-----------------| +| script | run, search, rm, mv, cp, add, list, test, docker, show | +| cache | search, rm, list, show | +| repo | pull, search, rm, list | + ## CM compatibility layer From 243efee61257f51671063e16c10f1aeeb6648b0c Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Fri, 31 Jan 2025 10:02:54 +0530 Subject: [PATCH 29/37] add find action --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5dfc032..8896ea0 100644 --- a/README.md +++ b/README.md @@ -54,8 +54,8 @@ Each target has its own set of specific actions to tailor automation workflows a | Target | Action | |--------|-----------------| | script | run, search, rm, mv, cp, add, list, test, docker, show | -| cache | search, rm, list, show | -| repo | pull, search, rm, list | +| cache | search, rm, list, show, find | +| repo | pull, search, rm, list, find | ## CM compatibility layer From 5bd464705430d760a4b80bdc178039765de4c195 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 1 Feb 2025 05:14:18 +0530 Subject: [PATCH 30/37] Support show action for cache, printd with begin spaces --- mlc/main.py | 50 +++++++++++++++++++++++++++++++++++--------------- mlc/utils.py | 28 ++++++++++++++++++---------- 2 files changed, 53 insertions(+), 25 deletions(-) diff --git a/mlc/main.py b/mlc/main.py index ffb280e..8bdaccb 100644 --- a/mlc/main.py +++ b/mlc/main.py @@ -689,7 +689,7 @@ def __init__(self, repos_path, repos): self.repos = repos #logger.info(repos) - logger.info(f"Repos path for Index: {self.repos_path}") + logger.debug(f"Repos path for Index: {self.repos_path}") self.index_files = { "script": os.path.join(repos_path, "index_script.json"), "cache": os.path.join(repos_path, "index_cache.json"), @@ -832,7 +832,7 @@ def _save_indices(self): try: with open(output_file, "w") as f: json.dump(index_data, f, indent=4, cls=CustomJSONEncoder) - logger.info(f"Shared index for {folder_type} saved to {output_file}.") + logger.debug(f"Shared index for {folder_type} saved to {output_file}.") except Exception as e: logger.error(f"Error saving shared index for {folder_type}: {e}") @@ -1218,17 +1218,49 @@ def rm(self, i): def show(self, run_args): self.action_type = "cache" - logger.info(f"Showing cache with identifier: {args.details}") + res = self.search(run_args) + logger.info(f"Showing cache with tags: {run_args.get('tags')}") + cached_meta_keys_to_show = ["uid", "tags", "dependent_cached_path", "associated_script_item"] + cached_state_keys_to_show = ["new_env", "new_state", "version"] + for item in res['list']: + print(f"""Location: {item.path}: +Cache Meta:""") + for key in cached_meta_keys_to_show: + if key in item.meta: + print(f""" {key}: {item.meta[key]}""") + print("""Cached State:""") + cached_state_meta_file = os.path.join(item.path, "mlc-cached-state.json") + try: + # Load and parse the JSON file containing the cached state + with open(cached_state_meta_file, 'r') as file: + meta = json.load(file) + for key in cached_state_keys_to_show: + if key in meta: + print(f""" {key}:""", end="") + if meta[key] and isinstance(meta[key], dict): + print("") + utils.printd(meta[key], yaml=False, sort_keys=True, begin_spaces=8) + else: + print(f""" {meta[key]}""") + except json.JSONDecodeError as e: + logger.error(f"Error decoding JSON: {e}") + print("......................................................") + print("") + + return {'return': 0} def list(self, args): logger.info("Listing all caches.") + return {'return': 0} class ExperimentAction(Action): def show(self, args): logger.info(f"Showing experiment with identifier: {args.details}") + return {'return': 0} def list(self, args): logger.info("Listing all experiments.") + return {'return': 0} class CfgAction(Action): @@ -1261,18 +1293,6 @@ def load(self, args): return {'return': 0, 'config': self.cfg} - def unload(self, args): - """ - Unload the configuration. - - Args: - args (dict): Optional, could be used to specify a particular configuration to unload. - """ - if hasattr(self, 'config'): - logger.info(f"Unloading configuration.") - del self.config # Remove the loaded config from memory - else: - logger.error("Error: No configuration is currently loaded.") actions = { 'repo': RepoAction, diff --git a/mlc/utils.py b/mlc/utils.py index fe43deb..56c095f 100644 --- a/mlc/utils.py +++ b/mlc/utils.py @@ -178,16 +178,16 @@ def run_system_cmd(i): return {'return': 1, 'error': f"Unexpected error occurred: {str(e)}"} -def print_env(env, yaml=True, sort_keys=True): - printd(env, yaml=yaml, sort_keys=sort_keys) +def print_env(env, yaml=True, sort_keys=True, begin_spaces=None): + printd(env, yaml=yaml, sort_keys=sort_keys, begin_spaces=begin_spaces) -def printd(mydict, yaml=True, sort_keys=True): +def printd(mydict, yaml=True, sort_keys=True, begin_spaces = None): if yaml: - print_formatted_yaml(mydict, sort_keys=True) + print_formatted_yaml(mydict, sort_keys=sort_keys, begin_spaces=begin_spaces) else: - print_formatted_json(mydict) + print_formatted_json(mydict, sort_keys = sort_keys, begin_spaces = begin_spaces) -def print_formatted_yaml(data, sort_keys=True): +def print_formatted_yaml(data, sort_keys=True, begin_spaces = None): """ Converts a Python dictionary (or other serializable object) to a YAML-formatted string and prints it in a human-readable format. @@ -206,11 +206,15 @@ def print_formatted_yaml(data, sort_keys=True): sort_keys=False, allow_unicode=True ) - print(yaml_string) + if not begin_spaces: + print(yaml_string) + else: + indented_yaml_str = "\n".join(" " * begin_spaces + line for line in yaml_string.splitlines()) + print(indented_yaml_str) except yaml.YAMLError as e: print(f"Error formatting YAML: {e}") -def print_formatted_json(data): +def print_formatted_json(data, sort_keys = True, begin_spaces = None): """ Prints a dictionary as a formatted JSON string. @@ -221,8 +225,12 @@ def print_formatted_json(data): None """ try: - formatted_json = json.dumps(data, indent=4, sort_keys=True) - print(formatted_json) + formatted_json = json.dumps(data, indent=4, sort_keys=sort_keys) + if not begin_spaces: + print(formatted_json) + else: + indented_json_str = "\n".join(" " * begin_spaces + line for line in formatted_json.splitlines()) + print(indented_json_str) except TypeError as e: print(f"Error formatting JSON: {e}") From 7afa9fb7a91d2843e9cb1065c2265e422e93ea3e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 2 Feb 2025 01:30:47 +0000 Subject: [PATCH 31/37] Update architectural diagram --- README.md | 121 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 91 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 8896ea0..32bcb93 100644 --- a/README.md +++ b/README.md @@ -64,30 +64,82 @@ MLC has a compatibility layer where by it supports MLCommons CM automations - Sc ## Architectural Diagram +```mermaid +I cannot generate an architectural diagram directly, but I can help you understand the key components and structure of the code, which you can use to create a mermaid diagram manually. + +Here are the key elements and relationships in the code: + +1. **Classes:** + - `Action` + - `RepoAction` (extends `Action`) + - `ScriptAction` (extends `Action`) + - `CacheAction` (extends `Action`) + - `ExperimentAction` (extends `Action`) + - `CfgAction` (extends `Action`) + - `Index` + - `Item` + - `Repo` + - `Automation` + +2. **Functions:** + - `setup_logging` + - `get_action` + - `access` + - `main` + - `mlcr` + - `process_console_output` + +3. **Relationships:** + - `Action` is the base class for specific action classes (`RepoAction`, `ScriptAction`, `CacheAction`, `ExperimentAction`, `CfgAction`). + - `Index` is initialized with a list of repositories and builds indices. + - `Item` and `Repo` are used to represent individual items and repositories. + - `Automation` is used to manage automation tasks and loads metadata. + +Here's a basic representation you can use in a mermaid diagram: + ```mermaid classDiagram class Action { - -repos_path : str - -cfg : dict - -repos : list +execute(args) +access(options) - +asearch(i) +find_target_folder(target) +load_repos_and_meta() +load_repos() + +conflicting_repo(repo_meta) + +register_repo(repo_meta) + +unregister_repo(repo_path) + +add(i) + +rm(i) + +save_new_meta(i, item_id, item_name, target_name, item_path, repo) + +update(i) + +is_uid(name) + +cp(run_args) + +copy_item(source_path, destination_path) + +search(i) } class RepoAction { + +find(run_args) +github_url_to_user_repo_format(url) - +pull(args) - +list(args) + +pull_repo(repo_url, branch, checkout) + +pull(run_args) + +list(run_args) + +rm(run_args) } class ScriptAction { - +run(args) + +search(i) + +rm(i) + +dynamic_import_module(script_path) + +call_script_module_function(function_name, run_args) + +docker(run_args) + +run(run_args) + +test(run_args) +list(args) } class CacheAction { - +show(args) + +search(i) + +find(i) + +rm(i) + +show(run_args) +list(args) } class ExperimentAction { @@ -96,18 +148,32 @@ classDiagram } class CfgAction { +load(args) - +unload(args) + } + class Index { + +add(meta, folder_type, path, repo) + +get_index(folder_type, uid) + +update(meta, folder_type, path, repo) + +rm(meta, folder_type, path) + +build_index() + } + class Item { + +meta + +path + +repo + +_load_meta() } class Repo { - -path : str - -meta : dict + +path + +meta + +_load_meta() } class Automation { - -cmind : Action - +execute(args) - } - class Index { - +find() + +action_object + +automation_type + +meta + +path + +_load_meta() + +search(i) } Action <|-- RepoAction @@ -115,19 +181,14 @@ classDiagram Action <|-- CacheAction Action <|-- ExperimentAction Action <|-- CfgAction - Repo "1" *-- Action - Automation "1" *-- Action - - class get_action { - +actions : dict - +get_action(target) - } - - main --> get_action - get_action --> RepoAction - get_action --> ScriptAction - get_action --> CacheAction - get_action --> ExperimentAction - get_action --> CfgAction + RepoAction o-- Repo + ScriptAction o-- Automation + CacheAction o-- Index + ExperimentAction o-- Index + CfgAction o-- Index + Index o-- Repo + Index o-- Item + Item o-- Repo + Automation o-- Action ``` From 06890c90f973ebb2d1aaf945cc04a8bb3510436b Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 2 Feb 2025 01:31:23 +0000 Subject: [PATCH 32/37] Update README.md --- README.md | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/README.md b/README.md index 32bcb93..49aedbe 100644 --- a/README.md +++ b/README.md @@ -64,39 +64,6 @@ MLC has a compatibility layer where by it supports MLCommons CM automations - Sc ## Architectural Diagram -```mermaid -I cannot generate an architectural diagram directly, but I can help you understand the key components and structure of the code, which you can use to create a mermaid diagram manually. - -Here are the key elements and relationships in the code: - -1. **Classes:** - - `Action` - - `RepoAction` (extends `Action`) - - `ScriptAction` (extends `Action`) - - `CacheAction` (extends `Action`) - - `ExperimentAction` (extends `Action`) - - `CfgAction` (extends `Action`) - - `Index` - - `Item` - - `Repo` - - `Automation` - -2. **Functions:** - - `setup_logging` - - `get_action` - - `access` - - `main` - - `mlcr` - - `process_console_output` - -3. **Relationships:** - - `Action` is the base class for specific action classes (`RepoAction`, `ScriptAction`, `CacheAction`, `ExperimentAction`, `CfgAction`). - - `Index` is initialized with a list of repositories and builds indices. - - `Item` and `Repo` are used to represent individual items and repositories. - - `Automation` is used to manage automation tasks and loads metadata. - -Here's a basic representation you can use in a mermaid diagram: - ```mermaid classDiagram class Action { From c5e49137dbfae8b6371edc933e142895747f8b0f Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 2 Feb 2025 01:32:47 +0000 Subject: [PATCH 33/37] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 49aedbe..29a84e9 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,6 @@ MLC has a compatibility layer where by it supports MLCommons CM automations - Sc ```mermaid classDiagram class Action { - +execute(args) +access(options) +find_target_folder(target) +load_repos_and_meta() From 8dd3706149619f0dfd16a8759376d25ed8bd4bb0 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 2 Feb 2025 01:41:38 +0000 Subject: [PATCH 34/37] Update README.md --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 29a84e9..105d15f 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,11 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-green)](LICENSE.md) [![Downloads](https://static.pepy.tech/badge/mlcflow)](https://pepy.tech/project/mlcflow) + +[![MLC core actions test](https://github.com/mlcommons/mlcflow/actions/workflows/test-mlc-core-actions.yaml/badge.svg)](https://github.com/mlcommons/mlcflow/actions/workflows/test-mlc-core-actions.yaml) [![MLC script automation features test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml/badge.svg?cache-bust=1)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml) +[![MLPerf inference resnet50](https://github.com/mlcommons/mlcflow/actions/workflows/mlperf-inference-resnet50.yml/badge.svg)](https://github.com/mlcommons/mlcflow/actions/workflows/mlperf-inference-resnet50.yml) +[![MLPerf inference bert (deepsparse, tf, onnxruntime, pytorch)](https://github.com/mlcommons/mlcflow/actions/workflows/mlperf-inference-bert.yml/badge.svg)](https://github.com/mlcommons/mlcflow/actions/workflows/mlperf-inference-bert.yml) MLCFlow is a versatile CLI and Python interface developed by MLCommons in collaboration with a dedicated team of volunteers (see [Contributors](CONTRIBUTORS.md)). It serves as a streamlined replacement for the [CMind](https://github.com/mlcommons/ck/tree/master/cm) tool, designed to drive the automation workflows of MLPerf benchmarks more efficiently. @@ -60,7 +64,7 @@ Each target has its own set of specific actions to tailor automation workflows a ## CM compatibility layer -MLC has a compatibility layer where by it supports MLCommons CM automations - Script, Cache and Experiment. +MLC started with a compatibility layer where by it supported MLCommons CM automations - Script, Cache and Experiment. Now, MLCFLow has just the Script Automation which is an extension of the Script Automation from CM but with a cleaner integration of Cache Automation and Docker and Test extensions. The old CM scripts are now updated with the latest MLCFlow scripts in the [MLPerf Automations](https://github.com/mlcommons/mlperf-automations/tree/main/script) repository. ## Architectural Diagram From 8940a7082b00cf2903d315500aa45010539b3266 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 2 Feb 2025 01:57:10 +0000 Subject: [PATCH 35/37] Update README.md --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 105d15f..635fe52 100644 --- a/README.md +++ b/README.md @@ -16,12 +16,10 @@ The concept behind CMind originated from **Grigori Fursin**, while the **MLPerf ### Key Features Building upon the core idea of CMind—wrapping native scripts with Python wrappers and YAML metadata—MLCFlow focuses exclusively on key automation components: **Scripts**, along with its complementary modules: **Cache**, **Docker**, and **Experiments**. This targeted design simplifies both implementation and interface, enabling a more user-friendly experience. -### Status -MLCFlow is currently a **work in progress** and not yet ready for production use. If you are interested in contributing to its initial development, please email [arjun@mlcommons.org](mailto:arjun@mlcommons.org) to join the daily development meetings and see [Issues](https://github.com/mlcommons/mlcflow/issues) for seeing the development progress. - -### Getting Started -For early contributors, please use the `mlc` branch of the [MLPerf Automations](https://github.com/mlcommons/mlperf-automations) repository while working with MLCFlow. +--- +### Status +MLCFlow is now fully equipped for workflow development, with complete support for all previously used CM scripts in MLPerf inference automation. If you're interested in discussions, join the MLCommons Benchmark Infra [Discord channel](https://discord.gg/T9rHVwQFNX), and check out the latest progress in [Issues](https://github.com/mlcommons/mlcflow/issues). --- From 8631a42de7f890594a93e8ea5f66d5661821be40 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 2 Feb 2025 17:42:37 +0000 Subject: [PATCH 36/37] Update test-mlc-core-actions.yaml --- .github/workflows/test-mlc-core-actions.yaml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-mlc-core-actions.yaml b/.github/workflows/test-mlc-core-actions.yaml index 5791d6c..71761f3 100644 --- a/.github/workflows/test-mlc-core-actions.yaml +++ b/.github/workflows/test-mlc-core-actions.yaml @@ -118,7 +118,19 @@ jobs: - name: Test 7 - find cache - More than one cache present run: | mlc search cache --tags=get,imagenet-aux 2>&1 | tee test7.log - mlc search cache --tags=detect,os 2>&1 | tee test5.log - if grep -q "No cache entry found for the specified tags:" test5.log; then + if grep -q "No cache entry found for the specified tags:" test7.log; then exit 1 fi + + - name: Test 8 - show cache - More than one cache present + run: | + mlc show cache --tags=get,imagenet-aux 2>&1 | tee test7.log + + - name: Test 9 - rm cache - More than one cache present + run: | + mlc rm cache --tags=get,imagenet-aux 2>&1 --all | tee test7.log + + - name: Test 10 - cp script - Copy mlc script + run: | + mlc cp script detect-os my-os-detect + From 2b942964cfb7d1fbcb785cbd324429b23de2cdf3 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 3 Feb 2025 18:07:41 +0000 Subject: [PATCH 37/37] Update main.py | Fix logpath --- mlc/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlc/main.py b/mlc/main.py index 8bdaccb..f81be4f 100644 --- a/mlc/main.py +++ b/mlc/main.py @@ -33,7 +33,7 @@ def format(self, record): logger = logging.getLogger(__name__) # Set up logging configuration -def setup_logging(log_path = 'mlc',log_file = 'mlc-log.txt'): +def setup_logging(log_path = os.getcwd(),log_file = 'mlc-log.txt'): logFormatter = ColoredFormatter('[%(asctime)s %(filename)s:%(lineno)d %(levelname)s] - %(message)s') logger.setLevel(logging.INFO)