Skip to content

Commit

Permalink
Implement fallocate(), except really perverted FALLOC_FL_COLLAPSE_RAN…
Browse files Browse the repository at this point in the history
…GE and FALLOC_FL_INSERT_RANGE
  • Loading branch information
vitalif committed Jan 23, 2023
1 parent aa5d8cf commit 19e9b07
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 30 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Also check out our CSI S3 driver (GeeseFS-based): https://github.com/yandex-clou
| Read after write | + | + | - | + | + |
| Partial writes | + | + | - | + | + |
| Truncate | + | - | - | + | + |
| fallocate | + | - | - | - | - |
| chmod/chown | Y | - | - | + | - |
| fsync | + | - | - | + | + |
| Symlinks | Y | - | - | + | + |
Expand Down
94 changes: 64 additions & 30 deletions internal/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,20 +205,54 @@ func insertBuffer(buffers []*FileBuffer, pos int, add ...*FileBuffer) []*FileBuf
}

func (inode *Inode) addBuffer(offset uint64, data []byte, state int16, copyData bool) int64 {
allocated := int64(0)

start := locateBuffer(inode.buffers, offset)
dataLen := uint64(len(data))
endOffset := offset+dataLen

// Remove intersecting parts as they're being overwritten
// If we're inserting a clean buffer, don't remove dirty ones
allocated := inode.removeRange(offset, dataLen, state)

// Insert non-overlapping parts of the buffer
curOffset := offset
dataPtr := &BufferPointer{
mem: data,
refs: 0,
}
start := locateBuffer(inode.buffers, offset)
pos := start
for ; pos < len(inode.buffers) && curOffset < endOffset; pos++ {
b := inode.buffers[pos]
if b.offset + b.length <= offset {
continue
}
if b.offset > curOffset {
// insert curOffset->min(b.offset,endOffset)
nextEnd := b.offset
if nextEnd > endOffset {
nextEnd = endOffset
}
allocated += inode.insertBuffer(pos, curOffset, data[curOffset-offset : nextEnd-offset], state, copyData, dataPtr)
}
curOffset = b.offset + b.length
}
if curOffset < endOffset {
// Insert curOffset->endOffset
allocated += inode.insertBuffer(pos, curOffset, data[curOffset-offset : ], state, copyData, dataPtr)
}

return allocated
}

// Remove buffers in range (offset..size)
func (inode *Inode) removeRange(offset, size uint64, state int16) (allocated int64) {
start := locateBuffer(inode.buffers, offset)
endOffset := offset+size
for pos := start; pos < len(inode.buffers); pos++ {
b := inode.buffers[pos]
if b.offset >= endOffset {
break
}
bufEnd := b.offset+b.length
// If we're inserting a clean buffer, don't remove dirty ones
if (state >= BUF_DIRTY || b.state < BUF_DIRTY) && bufEnd > offset && endOffset > b.offset {
if offset <= b.offset {
if endOffset >= bufEnd {
Expand Down Expand Up @@ -284,35 +318,35 @@ func (inode *Inode) addBuffer(offset uint64, data []byte, state int16, copyData
}
}
}
return
}

// Insert non-overlapping parts of the buffer
curOffset := offset
dataPtr := &BufferPointer{
mem: data,
refs: 0,
}
pos := start
for ; pos < len(inode.buffers) && curOffset < endOffset; pos++ {
b := inode.buffers[pos]
if b.offset + b.length <= offset {
continue
}
if b.offset > curOffset {
// insert curOffset->min(b.offset,endOffset)
nextEnd := b.offset
if nextEnd > endOffset {
nextEnd = endOffset
}
allocated += inode.insertBuffer(pos, curOffset, data[curOffset-offset : nextEnd-offset], state, copyData, dataPtr)
}
curOffset = b.offset + b.length
}
if curOffset < endOffset {
// Insert curOffset->endOffset
allocated += inode.insertBuffer(pos, curOffset, data[curOffset-offset : ], state, copyData, dataPtr)
func (inode *Inode) zeroRange(offset, size uint64) (bool, int64) {
// Check if it's already zeroed
pos := locateBuffer(inode.buffers, offset)
if pos < len(inode.buffers) && inode.buffers[pos].zero &&
inode.buffers[pos].offset == offset && inode.buffers[pos].length == size {
return false, 0
}

return allocated
// Remove intersecting parts as they're being overwritten
allocated := inode.removeRange(offset, size, BUF_DIRTY)

// Insert a zero buffer
pos = locateBuffer(inode.buffers, offset)
inode.buffers = insertBuffer(inode.buffers, pos, &FileBuffer{
offset: offset,
dirtyID: atomic.AddUint64(&inode.fs.bufferPool.curDirtyID, 1),
state: BUF_DIRTY,
onDisk: false,
zero: true,
recency: 0,
length: size,
data: nil,
ptr: nil,
})

return true, allocated
}

func (inode *Inode) ResizeUnlocked(newSize uint64, zeroFill bool, finalizeFlushed bool) {
Expand Down
82 changes: 82 additions & 0 deletions internal/goofys.go
Original file line number Diff line number Diff line change
Expand Up @@ -1805,3 +1805,85 @@ func (fs *Goofys) SyncFS(parent *Inode) (err error) {
}
return
}

const (
FALLOC_FL_KEEP_SIZE = uint32(0x01)
FALLOC_FL_PUNCH_HOLE = uint32(0x02)
FALLOC_FL_COLLAPSE_RANGE = uint32(0x08)
FALLOC_FL_ZERO_RANGE = uint32(0x10)
FALLOC_FL_INSERT_RANGE = uint32(0x20)
)

func (fs *Goofys) Fallocate(
ctx context.Context,
op *fuseops.FallocateOp) (err error) {

atomic.AddInt64(&fs.stats.metadataWrites, 1)

fs.mu.RLock()
inode := fs.getInodeOrDie(op.Inode)
fs.mu.RUnlock()

if atomic.LoadInt32(&inode.refreshed) == -1 {
// Stale inode
return syscall.ESTALE
}

if op.Length == 0 {
return nil
}

inode.mu.Lock()

modified := false

if (op.Mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)) != 0 {
// Insert range/remove range operations are not supported
// It's possible to support them, but it will require buffer remapping support.
// I.e. if you open a file, insert/collapse a range and then read past the
// affected offset you should get data from the old offset! And it's probably
// wise to use UploadPartCopy with the corresponding ranges to optimize copying
// on the server side in this case. Some day we might even be able to preserve
// multipart part IDs if cutting a non-finalized upload across part boundaries,
// but now we can't - part offsets are always fixed.
inode.mu.Unlock()
return syscall.ENOTSUP
}

if op.Offset+op.Length > inode.Attributes.Size {
if (op.Mode & FALLOC_FL_KEEP_SIZE) == 0 {
// Resize
if op.Offset+op.Length > fs.getMaxFileSize() {
// File size too large
log.Warnf(
"Maximum file size exceeded when trying to extend %v to %v bytes using fallocate",
inode.FullName(), op.Offset+op.Length,
)
inode.mu.Unlock()
return syscall.EFBIG
}
inode.ResizeUnlocked(op.Offset+op.Length, true, true)
modified = true
} else {
if op.Offset > inode.Attributes.Size {
op.Offset = inode.Attributes.Size
}
op.Length = inode.Attributes.Size-op.Offset
}
}

if (op.Mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) != 0 {
// Zero fill
mod, _ := inode.zeroRange(op.Offset, op.Length)
modified = modified || mod
}

if modified && inode.CacheState == ST_CACHED {
inode.SetCacheState(ST_MODIFIED)
inode.fs.WakeupFlusher()
}

inode.mu.Unlock()

return
}

0 comments on commit 19e9b07

Please sign in to comment.