Skip to content

Commit a04432d

Browse files
authored
fix(mfs): limit cache growth by default (#1037)
* fix: limit MFS directory cache size to prevent unbounded growth adds auto-flush mechanism when directory cache exceeds 256 entries (matching HAMT shard size). this prevents unbounded memory growth when using --flush=false with many MFS operations. the cache limit is currently hardcoded but marked as TODO for future configurability. related to ipfs/kubo#10842 * feat: make MFS cache size configurable adds SetMaxCacheSize methods to Root and Directory to allow runtime configuration of the cache limit. subdirectories inherit the parent's cache size setting. this allows users to tune the cache size based on their needs: - smaller for memory-constrained environments - larger for heavy MFS usage - 0 to disable limiting (old behavior) related to ipfs/kubo#10842 * docs: update changelog for mfs cache limit
1 parent bd89313 commit a04432d

File tree

3 files changed

+74
-1
lines changed

3 files changed

+74
-1
lines changed

CHANGELOG.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ The following emojis are used to highlight certain changes:
5555
- `DagModifier` now correctly preserves raw node codec when modifying data under the chunker threshold, instead of incorrectly forcing everything to dag-pb
5656
- `DagModifier` prevents creation of identity CIDs exceeding `verifcid.DefaultMaxIdentityDigestSize` limit when modifying data, automatically switching to proper cryptographic hash while preserving small identity CIDs
5757
- `DagModifier` now supports appending data to a `RawNode` by automatically converting it into a UnixFS file structure where the original `RawNode` becomes the first leaf block, fixing previously impossible append operations that would fail with "expected protobuf dag node" errors
58-
- `mfs`: Files with identity CIDs now properly inherit full CID prefix from parent directories (version, codec, hash type, length), not just hash type
58+
- `mfs`:
59+
- Files with identity CIDs now properly inherit full CID prefix from parent directories (version, codec, hash type, length), not just hash type ([#1018](https://github.com/ipfs/boxo/pull/1018))
60+
- Fixed unbounded memory growth when using deferred flushing and user forgets to flush manually. Added `SetMaxCacheSize()` to limit directory cache growth. Default 256 entries, set to 0 to disable. ([#1035](https://github.com/ipfs/boxo/pull/1035))
5961

6062
### Security
6163

mfs/dir.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@ var (
2323
ErrDirExists = errors.New("directory already has entry by that name")
2424
)
2525

26+
const (
27+
// DefaultMaxCacheSize is the default maximum number of entries
28+
// that can be cached in a directory before auto-flush is triggered.
29+
// This prevents unbounded memory growth when using --flush=false.
30+
// The value matches HAMT shard size (256).
31+
// TODO: make this configurable
32+
// See https://github.com/ipfs/kubo/issues/10842
33+
DefaultMaxCacheSize = 256
34+
)
35+
2636
// TODO: There's too much functionality associated with this structure,
2737
// let's organize it (and if possible extract part of it elsewhere)
2838
// and document the main features of `Directory` here.
@@ -42,6 +52,10 @@ type Directory struct {
4252
// reading and editing directories.
4353
unixfsDir uio.Directory
4454

55+
// Maximum number of entries to cache before triggering auto-flush.
56+
// Set to 0 to disable cache size limiting.
57+
maxCacheSize int
58+
4559
prov provider.MultihashProvider
4660
}
4761

@@ -82,6 +96,7 @@ func NewDirectory(ctx context.Context, name string, node ipld.Node, parent paren
8296
unixfsDir: db,
8397
prov: prov,
8498
entriesCache: make(map[string]FSNode),
99+
maxCacheSize: DefaultMaxCacheSize,
85100
}, nil
86101
}
87102

@@ -121,6 +136,7 @@ func NewEmptyDirectory(ctx context.Context, name string, parent parent, dserv ip
121136
unixfsDir: db,
122137
prov: prov,
123138
entriesCache: make(map[string]FSNode),
139+
maxCacheSize: DefaultMaxCacheSize,
124140
}, nil
125141
}
126142

@@ -216,15 +232,25 @@ func (d *Directory) cacheNode(name string, nd ipld.Node) (FSNode, error) {
216232
// inherited from the parent.
217233
ndir.unixfsDir.SetMaxLinks(d.unixfsDir.GetMaxLinks())
218234
ndir.unixfsDir.SetMaxHAMTFanout(d.unixfsDir.GetMaxHAMTFanout())
235+
// Inherit cache size limit from parent
236+
ndir.maxCacheSize = d.maxCacheSize
219237

220238
d.entriesCache[name] = ndir
239+
// Check cache size after adding entry
240+
if err := d.checkCacheSize(); err != nil {
241+
return nil, err
242+
}
221243
return ndir, nil
222244
case ft.TFile, ft.TRaw, ft.TSymlink:
223245
nfi, err := NewFile(name, nd, d, d.dagService, d.prov)
224246
if err != nil {
225247
return nil, err
226248
}
227249
d.entriesCache[name] = nfi
250+
// Check cache size after adding entry
251+
if err := d.checkCacheSize(); err != nil {
252+
return nil, err
253+
}
228254
return nfi, nil
229255
case ft.TMetadata:
230256
return nil, ErrNotYetImplemented
@@ -237,6 +263,10 @@ func (d *Directory) cacheNode(name string, nd ipld.Node) (FSNode, error) {
237263
return nil, err
238264
}
239265
d.entriesCache[name] = nfi
266+
// Check cache size after adding entry
267+
if err := d.checkCacheSize(); err != nil {
268+
return nil, err
269+
}
240270
return nfi, nil
241271
default:
242272
return nil, errors.New("unrecognized node type in cache node")
@@ -374,6 +404,9 @@ func (d *Directory) MkdirWithOpts(name string, opts MkdirOpts) (*Directory, erro
374404
return nil, err
375405
}
376406

407+
// Inherit cache size limit from parent
408+
dirobj.maxCacheSize = d.maxCacheSize
409+
377410
ndir, err := dirobj.GetNode()
378411
if err != nil {
379412
return nil, err
@@ -385,9 +418,40 @@ func (d *Directory) MkdirWithOpts(name string, opts MkdirOpts) (*Directory, erro
385418
}
386419

387420
d.entriesCache[name] = dirobj
421+
422+
// Check cache size after adding new directory
423+
if err := d.checkCacheSize(); err != nil {
424+
return nil, err
425+
}
426+
388427
return dirobj, nil
389428
}
390429

430+
// checkCacheSize checks if the cache has exceeded the maximum size
431+
// and triggers an auto-flush if needed to prevent unbounded growth.
432+
// Must be called with d.lock held.
433+
func (d *Directory) checkCacheSize() error {
434+
// Skip check if cache limiting is disabled (maxCacheSize == 0)
435+
if d.maxCacheSize > 0 && len(d.entriesCache) >= d.maxCacheSize {
436+
// Auto-flush to prevent unbounded cache growth
437+
log.Debugf("mfs: auto-flushing directory cache (size: %d >= limit: %d)", len(d.entriesCache), d.maxCacheSize)
438+
err := d.cacheSync(true)
439+
if err != nil {
440+
return err
441+
}
442+
}
443+
return nil
444+
}
445+
446+
// SetMaxCacheSize sets the maximum number of entries to cache before
447+
// triggering an auto-flush. Set to 0 to disable cache size limiting.
448+
// This method is thread-safe.
449+
func (d *Directory) SetMaxCacheSize(size int) {
450+
d.lock.Lock()
451+
defer d.lock.Unlock()
452+
d.maxCacheSize = size
453+
}
454+
391455
func (d *Directory) Unlink(name string) error {
392456
d.lock.Lock()
393457
defer d.lock.Unlock()

mfs/root.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,13 @@ func (kr *Root) GetDirectory() *Directory {
170170
return kr.dir
171171
}
172172

173+
// SetMaxCacheSize sets the maximum number of entries to cache in each
174+
// directory before triggering an auto-flush. Set to 0 to disable cache
175+
// size limiting. This setting is propagated to all directories in the tree.
176+
func (kr *Root) SetMaxCacheSize(size int) {
177+
kr.dir.SetMaxCacheSize(size)
178+
}
179+
173180
// Flush signals that an update has occurred since the last publish,
174181
// and updates the Root republisher.
175182
// TODO: We are definitely abusing the "flush" terminology here.

0 commit comments

Comments
 (0)