Skip to content

Commit 936c976

Browse files
committed
Merge branch 'develop' into chore/update-linting-tools
2 parents 292eb86 + 504d784 commit 936c976

5 files changed

Lines changed: 78 additions & 18 deletions

File tree

docs/guide/caching.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -236,10 +236,10 @@ Maximum-cache-size
236236

237237
Maximum-cache-disk-usage
238238
The ``maximum-cache-disk-usage`` setting ensures that earthkit-data
239-
leaves does not fill your disk.
240-
Its values sets the maximum disk usage as % of the filesystem containing the cache
241-
directory. When the disk space goes below this limit, earthkit-data triggers
242-
its cache cleaning mechanism before downloading additional data.
239+
does not fill your disk. It specifies the maximum disk usage (as a percentage) on the filesystem
240+
containing the cache directory. When the total disk usage (so this is not the cache usage alone) goes above
241+
this limit, earthkit-data triggers its cache cleaning mechanism to free up space before
242+
downloading additional data.
243243
The value of maximum-cache-disk-usage is relative (such as "90%" or "100%").
244244
To disable it use None.
245245

docs/release_notes/version_0.19_updates.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
Version 0.19 Updates
33
/////////////////////////
44

5+
Version 0.19.2
6+
===============
7+
8+
Fixes
9+
++++++++
10+
11+
- Fixed a bug in the cache when files were not deleted if cache size exceeded the limit when cache-policy was set to "user" or "temporary" (:pr:`937`).
512

613

714
Version 0.19.1

src/earthkit/data/core/caching.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,9 @@ def _latest_date(self):
195195
So we do not purge files being downloaded.
196196
"""
197197
with self.connection as db:
198+
# TODO; this is not perfect, since it is not guaranteed that an empty file is actually being
199+
# downloaded. So an old empty file will block removing the files by _decache when the cache
200+
# limit is exceeded. Must be improved by at least adding a time margin
198201
latest = db.execute("SELECT MIN(creation_date) FROM cache WHERE size IS NULL").fetchone()[0]
199202
if latest is None:
200203
latest = db.execute("SELECT MAX(creation_date) FROM cache WHERE size IS NOT NULL").fetchone()[0]
@@ -328,7 +331,7 @@ def _housekeeping(self, clean=False):
328331
pass
329332

330333
if parent is None:
331-
LOG.warning(f"earthkit-data cache: orphan found: {full}")
334+
LOG.debug(f"earthkit-data cache: orphan found: {full}")
332335
else:
333336
LOG.debug(f"earthkit-data cache: orphan found: {full} with parent {parent}")
334337

@@ -349,8 +352,7 @@ def _delete_file(self, path):
349352
else:
350353
os.unlink(path)
351354
except Exception as e:
352-
print(e)
353-
LOG.exception("Deleting %s", path)
355+
LOG.exception(f"Deleting {path}", e)
354356

355357
def _entry_to_dict(self, entry):
356358
n = dict(entry)
@@ -377,6 +379,8 @@ def _delete_entry(self, entry):
377379
pass
378380

379381
if entry["size"] is None:
382+
if not isinstance(entry, dict):
383+
entry = self._entry_to_dict(entry)
380384
entry["size"] = 0
381385

382386
path, size, owner, args = (
@@ -386,10 +390,12 @@ def _delete_entry(self, entry):
386390
entry["args"],
387391
)
388392

389-
LOG.warning(
390-
"Deleting entry %s",
391-
json.dumps(self._entry_to_dict(entry), indent=4, default=default_serialiser),
392-
)
393+
if logging.DEBUG >= LOG.getEffectiveLevel():
394+
LOG.debug(
395+
"Deleting entry %s",
396+
json.dumps(self._entry_to_dict(entry), indent=4, default=default_serialiser),
397+
)
398+
393399
total = 0
394400

395401
# First, delete child files, e.g. unzipped data
@@ -403,8 +409,9 @@ def _delete_entry(self, entry):
403409
db.execute("DELETE FROM cache WHERE path=?", (path,))
404410
return total
405411

406-
LOG.warning(f"earthkit-data cache: deleting {path} ({humanize.bytes(size)})")
407-
LOG.warning(f"earthkit-data cache: {owner} {args}")
412+
LOG.debug(f"earthkit-data cache: deleting {path} ({humanize.bytes(size)})")
413+
LOG.debug(f"earthkit-data cache: {owner} {args}")
414+
408415
self._delete_file(path)
409416

410417
with self.connection as db:
@@ -512,6 +519,12 @@ def _check_cache_size(self):
512519
size = self._cache_size()
513520
if size > maximum:
514521
self._housekeeping()
522+
LOG.warning(
523+
(
524+
f"earthkit-data cache: cache size({humanize.bytes(size)}) exceeds limit"
525+
f" ({humanize.bytes(maximum)}) set by 'maximum-cache-size' config option"
526+
)
527+
)
515528
self._decache(size - maximum)
516529

517530
# Check relative limit
@@ -520,9 +533,14 @@ def _check_cache_size(self):
520533
size = self._cache_size()
521534
df = disk_usage(self._policy.directory())
522535
if df.percent > usage:
523-
LOG.debug("Cache disk usage %s, limit %s", df.percent, usage)
524536
self._housekeeping()
525537
delta = (df.percent - usage) * df.total * 0.01
538+
LOG.warning(
539+
(
540+
f"earthkit-data cache: filesystem usage ({df.percent}%) exceeds limit ({usage}%)"
541+
f" set by 'maximum-cache-disk-usage' config option"
542+
)
543+
)
526544
self._decache(delta)
527545

528546
def _repr_html_(self):

src/earthkit/data/core/config.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -179,14 +179,19 @@ def validate(self, name, value):
179179
"maximum-cache-size": _(
180180
None,
181181
"""Maximum disk space used by the earthkit-data cache (e.g.: 100G or 2T).
182-
Can be set to None.""",
182+
When exceeded, earthkit-data evicts older cached entries until the usage
183+
is below the specified limit. Can be set to None.
184+
Ignored when ``cache-policy`` is ``off``.
185+
See :ref:`caching` for more information.""",
183186
getter="_as_bytes",
184187
none_ok=True,
185188
),
186189
"maximum-cache-disk-usage": _(
187190
"95%",
188-
"""Disk usage threshold after which earthkit-data expires older cached
189-
entries (% of the full disk capacity). Can be set to None.
191+
"""Specify maximum disk usage as a percentage of the full disk capacity on the filesystem the
192+
cache is located (e.g.: 90%). When the total disk usage exceeds this limit (it's not limited to the
193+
cache usage alone), earthkit-data evicts older cached entries until the usage is below the
194+
specified limit. Can be set to None. Ignored when ``cache-policy`` is ``off``.
190195
See :ref:`caching` for more information.""",
191196
getter="_as_percent",
192197
none_ok=True,
@@ -415,7 +420,12 @@ def _set(self, name: str, *args, **kwargs):
415420
assert len(args) == 1
416421
assert len(kwargs) == 0
417422
value = args[0]
418-
value = klass(value)
423+
if value is not None:
424+
if klass is bool:
425+
try:
426+
value = klass(value)
427+
except Exception as e:
428+
raise ValueError(f"Invalid value for config option '{name}': {value}") from e
419429

420430
if klass is list:
421431
assert len(args) > 0

tests/core/test_config.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,31 @@ def test_config_invalid():
6464
config.set("url-download-timeout", "A")
6565

6666

67+
@pytest.mark.parametrize(
68+
"param,set_value,stored_value,raise_error",
69+
[
70+
("check-out-of-date-urls", True, True, None),
71+
("check-out-of-date-urls", 1, True, None),
72+
("check-out-of-date-urls", 1.0, True, None),
73+
("check-out-of-date-urls", False, False, None),
74+
("check-out-of-date-urls", 0, False, None),
75+
("check-out-of-date-urls", 0.0, False, None),
76+
("check-out-of-date-urls", "true", True, None),
77+
("check-out-of-date-urls", "false", True, None),
78+
("check-out-of-date-urls", "1", True, None),
79+
("check-out-of-date-urls", "0", True, None),
80+
],
81+
)
82+
def test_config_set_bool(param, set_value, stored_value, raise_error):
83+
with config.temporary():
84+
if raise_error is None:
85+
config.set(param, set_value)
86+
assert config.get(param) == stored_value
87+
else:
88+
with pytest.raises(raise_error):
89+
config.set(param, set_value)
90+
91+
6792
@pytest.mark.parametrize(
6893
"param,set_value,stored_value,raise_error",
6994
[

0 commit comments

Comments
 (0)