Skip to content

Commit

Permalink
fix(utils): handle deleted files when calculating access times (reana…
Browse files Browse the repository at this point in the history
  • Loading branch information
mdonadoni committed Jan 15, 2024
1 parent 709a685 commit 568874f
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 18 deletions.
23 changes: 16 additions & 7 deletions reana_commons/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import os
import platform
import shutil
import stat
import subprocess
import sys
import time
Expand All @@ -25,6 +26,7 @@
import click
import requests

from reana_commons import workspace
from reana_commons.config import (
REANA_COMPONENT_NAMING_SCHEME,
REANA_COMPONENT_PREFIX,
Expand Down Expand Up @@ -113,13 +115,20 @@ def calculate_job_input_hash(job_spec, workflow_json):
def calculate_file_access_time(workflow_workspace):
"""Calculate access times of files in workspace."""
access_times = {}
for subdir, dirs, files in os.walk(workflow_workspace):
for file in files:
file_path = os.path.join(subdir, file)
# skip symlinks
if os.path.islink(file_path):
continue
access_times[file_path] = os.stat(file_path).st_atime
for file_path in workspace.walk(workflow_workspace, include_dirs=False):
try:
file_stat = workspace.lstat(workflow_workspace, file_path)
except FileNotFoundError:
logging.warn(
f"Could not get stats of '{file_path}' in '{workflow_workspace}' "
"while calculating access times. "
"Maybe file was deleted or moved?"
)
continue
if stat.S_ISLNK(file_stat.st_mode):
continue
full_path = os.path.join(workflow_workspace, file_path)
access_times[full_path] = file_stat.st_atime
return access_times


Expand Down
14 changes: 11 additions & 3 deletions reana_commons/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,18 @@ def walk(
dir_fd=root_fd, topdown=topdown
):
for dirname in dirnames:
if include_dirs or stat.S_ISLNK(
os.lstat(dirname, dir_fd=dirfd).st_mode
):
if include_dirs:
yield str(path.joinpath(dirpath, dirname))
else:
# dirname could be a symlink, if so we return it even if
# include_dirs is False, given that we treat symlinks as files
try:
st = os.lstat(dirname, dir_fd=dirfd)
except FileNotFoundError:
# we skip this path, as it does not exist anymore
continue
if stat.S_ISLNK(st.st_mode):
yield str(path.joinpath(dirpath, dirname))
for filename in filenames:
yield str(path.joinpath(dirpath, filename))
finally:
Expand Down
26 changes: 18 additions & 8 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
"""REANA-Commons utilities testing."""

import os
import pkg_resources
import shutil
import time
from hashlib import md5
from pathlib import Path

import pkg_resources
import pytest
from pytest_reana.fixtures import sample_workflow_workspace

Expand Down Expand Up @@ -100,13 +100,23 @@ def test_calculate_job_input_hash():
) == calculate_job_input_hash(job_spec_2, workflow_json)


def test_calculate_file_access_time(sample_workflow_workspace): # noqa: F811
def test_calculate_file_access_time(tmp_path):
"""Test calculate_file_access_time."""
sample_workflow_workspace_path = next(sample_workflow_workspace("sample"))
access_times = calculate_file_access_time(sample_workflow_workspace_path)
all_file_paths = list(Path(sample_workflow_workspace_path).rglob("*.*"))
for file_path in all_file_paths:
assert str(file_path) in access_times
before_writing_files = time.time() - 1
tmp_path.joinpath("a.txt").write_text("content of a")
tmp_path.joinpath("subdir").mkdir()
tmp_path.joinpath("subdir", "b.txt").write_text("content of b")
tmp_path.joinpath("c.txt").symlink_to("a.txt")
tmp_path.joinpath("another_subdir").mkdir()
before_calculating_times = time.time() + 1

access_times = calculate_file_access_time(str(tmp_path))

assert len(access_times) == 2
assert str(tmp_path / "a.txt") in access_times
assert str(tmp_path / "subdir" / "b.txt") in access_times
for access_time in access_times.values():
assert before_writing_files <= access_time <= before_calculating_times


def test_format_cmd():
Expand Down

0 comments on commit 568874f

Please sign in to comment.