@@ -195,6 +195,9 @@ def _latest_date(self):
195195 So we do not purge files being downloaded.
196196 """
197197 with self .connection as db :
198+ # TODO; this is not perfect, since it is not guaranteed that an empty file is actually being
199+ # downloaded. So an old empty file will block removing the files by _decache when the cache
200+ # limit is exceeded. Must be improved by at least adding a time margin
198201 latest = db .execute ("SELECT MIN(creation_date) FROM cache WHERE size IS NULL" ).fetchone ()[0 ]
199202 if latest is None :
200203 latest = db .execute ("SELECT MAX(creation_date) FROM cache WHERE size IS NOT NULL" ).fetchone ()[0 ]
@@ -328,7 +331,7 @@ def _housekeeping(self, clean=False):
328331 pass
329332
330333 if parent is None :
331- LOG .warning (f"earthkit-data cache: orphan found: { full } " )
334+ LOG .debug (f"earthkit-data cache: orphan found: { full } " )
332335 else :
333336 LOG .debug (f"earthkit-data cache: orphan found: { full } with parent { parent } " )
334337
@@ -349,8 +352,7 @@ def _delete_file(self, path):
349352 else :
350353 os .unlink (path )
351354 except Exception as e :
352- print (e )
353- LOG .exception ("Deleting %s" , path )
355+ LOG .exception (f"Deleting { path } " , e )
354356
355357 def _entry_to_dict (self , entry ):
356358 n = dict (entry )
@@ -377,6 +379,8 @@ def _delete_entry(self, entry):
377379 pass
378380
379381 if entry ["size" ] is None :
382+ if not isinstance (entry , dict ):
383+ entry = self ._entry_to_dict (entry )
380384 entry ["size" ] = 0
381385
382386 path , size , owner , args = (
@@ -386,10 +390,12 @@ def _delete_entry(self, entry):
386390 entry ["args" ],
387391 )
388392
389- LOG .warning (
390- "Deleting entry %s" ,
391- json .dumps (self ._entry_to_dict (entry ), indent = 4 , default = default_serialiser ),
392- )
393+ if logging .DEBUG >= LOG .getEffectiveLevel ():
394+ LOG .debug (
395+ "Deleting entry %s" ,
396+ json .dumps (self ._entry_to_dict (entry ), indent = 4 , default = default_serialiser ),
397+ )
398+
393399 total = 0
394400
395401 # First, delete child files, e.g. unzipped data
@@ -403,8 +409,9 @@ def _delete_entry(self, entry):
403409 db .execute ("DELETE FROM cache WHERE path=?" , (path ,))
404410 return total
405411
406- LOG .warning (f"earthkit-data cache: deleting { path } ({ humanize .bytes (size )} )" )
407- LOG .warning (f"earthkit-data cache: { owner } { args } " )
412+ LOG .debug (f"earthkit-data cache: deleting { path } ({ humanize .bytes (size )} )" )
413+ LOG .debug (f"earthkit-data cache: { owner } { args } " )
414+
408415 self ._delete_file (path )
409416
410417 with self .connection as db :
@@ -512,6 +519,12 @@ def _check_cache_size(self):
512519 size = self ._cache_size ()
513520 if size > maximum :
514521 self ._housekeeping ()
522+ LOG .warning (
523+ (
524+ f"earthkit-data cache: cache size({ humanize .bytes (size )} ) exceeds limit"
525+ f" ({ humanize .bytes (maximum )} ) set by 'maximum-cache-size' config option"
526+ )
527+ )
515528 self ._decache (size - maximum )
516529
517530 # Check relative limit
@@ -520,9 +533,14 @@ def _check_cache_size(self):
520533 size = self ._cache_size ()
521534 df = disk_usage (self ._policy .directory ())
522535 if df .percent > usage :
523- LOG .debug ("Cache disk usage %s, limit %s" , df .percent , usage )
524536 self ._housekeeping ()
525537 delta = (df .percent - usage ) * df .total * 0.01
538+ LOG .warning (
539+ (
540+ f"earthkit-data cache: filesystem usage ({ df .percent } %) exceeds limit ({ usage } %)"
541+ f" set by 'maximum-cache-disk-usage' config option"
542+ )
543+ )
526544 self ._decache (delta )
527545
528546 def _repr_html_ (self ):
0 commit comments