Skip to content

Commit

Permalink
Optional uid, gid, UNIX permissions, special file and mtime support
Browse files Browse the repository at this point in the history
  • Loading branch information
vitalif committed Nov 3, 2022
1 parent 57f72c8 commit bbf24cd
Show file tree
Hide file tree
Showing 7 changed files with 343 additions and 72 deletions.
40 changes: 23 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,35 +21,41 @@ Also check out our CSI S3 driver (GeeseFS-based): https://github.com/yandex-clou
| Read after write | + | + | - | + | + |
| Partial writes | + | + | - | + | + |
| Truncate | + | - | - | + | + |
| chmod/chown | - | - | - | + | - |
| chmod/chown | Y | - | - | + | - |
| fsync | + | - | - | + | + |
| Symlinks | + | - | - | + | + |
| Symlinks | Y | - | - | + | + |
| Socket files | Y | - | - | + | - |
| Device files | Y | - | - | - | - |
| Custom mtime | Y | + | - | + | + |
| xattr | + | - | + | + | - |
| Directory renames | + | + | * | + | + |
| readdir & changes | + | + | - | + | + |

\* Directory renames are allowed in Goofys for directories with no more than 1000 entries and the limit is hardcoded
**Y** Only works correctly with Yandex S3.

**\*** Directory renames are allowed in Goofys for directories with no more than 1000 entries and the limit is hardcoded.

List of non-POSIX behaviors/limitations for GeeseFS:
* symbolic links are only restored correctly when using Yandex S3 because standard S3
doesn't return user metadata in listings and detecting symlinks in standard S3 would
require an additional HEAD request for every file in listing which would make listings
too slow
* does not store file mode/owner/group, use `--(dir|file)-mode` or `--(uid|gid)` options
* does not support hard links
* does not support special files (block/character devices, named pipes, UNIX sockets)
* does not support locking
* `ctime`, `atime` is always the same as `mtime`
* file modification time can't be set by user (for example with `cp --preserve` or utimes(2))
* File mode/owner/group, symbolic links, custom mtimes and special files (block/character devices,
named pipes, UNIX sockets) are supported, but they are restored correctly only when
using Yandex S3 because standard S3 doesn't return user metadata in listings and
reading all this metadata in standard S3 would require an additional HEAD request
for every file in listing which would make listings too slow.
* Special file support is enabled by default for Yandex S3 (disable with `--no-specials`) and disabled for others.
* File mode/owner/group are disabled by default even for Yandex S3 (enable with `--enable-perms`).
When disabled, global permissions can be set with `--(dir|file)-mode` and `--(uid|gid)` options.
* Custom modification times are also disabled by default even for Yandex S3 (enable with `--enable-mtime`).
When disabled:
- `ctime`, `atime` and `mtime` are always the same
- file modification time can't be set by user (for example with `cp --preserve` or utimes(2))
* Does not support hard links
* Does not support locking

In addition to the items above:
* default file size limit is 1.03 TB, achieved by splitting the file into 1000x 5MB parts,
* Default file size limit is 1.03 TB, achieved by splitting the file into 1000x 5MB parts,
1000x 25 MB parts and 8000x 125 MB parts. You can change part sizes, but AWS's own limit
is anyway 5 TB.

Owner & group, modification times and special files are in fact supportable with Yandex S3
because it has listings with metadata. Feel free to post issues if you want it. :-)

# Stability

GeeseFS is stable enough to pass most of `xfstests` which are applicable,
Expand Down
8 changes: 8 additions & 0 deletions api/common/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ type FlagStorage struct {
SinglePartMB uint64
MaxMergeCopyMB uint64
IgnoreFsync bool
EnablePerms bool
EnableSpecials bool
EnableMtime bool
UidAttr string
GidAttr string
FileModeAttr string
RdevAttr string
MtimeAttr string
SymlinkAttr string
CachePopularThreshold int64
CacheMaxHits int64
Expand Down
33 changes: 26 additions & 7 deletions internal/dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ func (inode *Inode) sealDir() {
inode.dir.listDone = true
inode.dir.lastFromCloud = nil
inode.dir.DirTime = time.Now()
inode.Attributes.Mtime = inode.findChildMaxTime()
inode.Attributes.Mtime, inode.Attributes.Ctime = inode.findChildMaxTime()
}

// Sorting order of entries in directories is slightly inconsistent between goofys
Expand Down Expand Up @@ -1014,7 +1014,11 @@ func (parent *Inode) Create(name string) (inode *Inode, fh *FileHandle) {
defer inode.mu.Unlock()
inode.Attributes = InodeAttributes{
Size: 0,
Ctime: now,
Mtime: now,
Uid: fs.flags.Uid,
Gid: fs.flags.Gid,
Mode: fs.flags.FileMode,
}
// one ref is for lookup
inode.Ref()
Expand Down Expand Up @@ -1085,6 +1089,10 @@ func (parent *Inode) doMkDir(name string) (inode *Inode) {
oldInode.refcnt = 0
oldInode.Ref()
oldInode.SetCacheState(ST_MODIFIED)
oldInode.Attributes.Ctime = time.Now()
if parent.Attributes.Ctime.Before(oldInode.Attributes.Ctime) {
parent.Attributes.Ctime = oldInode.Attributes.Ctime
}
oldInode.Attributes.Mtime = time.Now()
if parent.Attributes.Mtime.Before(oldInode.Attributes.Mtime) {
parent.Attributes.Mtime = oldInode.Attributes.Mtime
Expand All @@ -1102,7 +1110,10 @@ func (parent *Inode) doMkDir(name string) (inode *Inode) {
inode.ToDir()
inode.touch()
// Record dir as actual
inode.dir.DirTime = inode.Attributes.Mtime
inode.dir.DirTime = inode.Attributes.Ctime
if parent.Attributes.Ctime.Before(inode.Attributes.Ctime) {
parent.Attributes.Ctime = inode.Attributes.Ctime
}
if parent.Attributes.Mtime.Before(inode.Attributes.Mtime) {
parent.Attributes.Mtime = inode.Attributes.Mtime
}
Expand Down Expand Up @@ -1139,6 +1150,10 @@ func (parent *Inode) CreateSymlink(
inode.Attributes = InodeAttributes{
Size: 0,
Mtime: now,
Ctime: now,
Uid: fs.flags.Uid,
Gid: fs.flags.Gid,
Mode: fs.flags.FileMode,
}
// one ref is for lookup
inode.Ref()
Expand Down Expand Up @@ -1587,20 +1602,24 @@ func (parent *Inode) insertSubTree(path string, obj *BlobItemOutput, dirs map[*I
}
}

func (parent *Inode) findChildMaxTime() time.Time {
maxTime := parent.Attributes.Mtime
func (parent *Inode) findChildMaxTime() (maxMtime, maxCtime time.Time) {
maxCtime = parent.Attributes.Ctime
maxMtime = parent.Attributes.Mtime

for i, c := range parent.dir.Children {
if i < 2 {
// skip . and ..
continue
}
if c.Attributes.Mtime.After(maxTime) {
maxTime = c.Attributes.Mtime
if c.Attributes.Ctime.After(maxCtime) {
maxCtime = c.Attributes.Ctime
}
if c.Attributes.Mtime.After(maxMtime) {
maxMtime = c.Attributes.Mtime
}
}

return maxTime
return
}

func (parent *Inode) LookUp(name string) (*Inode, error) {
Expand Down
3 changes: 2 additions & 1 deletion internal/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ func (fh *FileHandle) WriteFile(offset int64, data []byte, copyData bool) (err e
// FIXME: Don't activate the flusher immediately for small writes
fh.inode.fs.WakeupFlusher()
fh.inode.Attributes.Mtime = time.Now()
fh.inode.Attributes.Ctime = fh.inode.Attributes.Mtime

fh.inode.mu.Unlock()

Expand Down Expand Up @@ -2005,7 +2006,7 @@ func (inode *Inode) updateFromFlush(size uint64, etag *string, lastModified *tim
inode.s3Metadata["storage-class"] = []byte(*storageClass)
}
if lastModified != nil {
inode.Attributes.Mtime = *lastModified
inode.Attributes.Ctime = *lastModified
}
inode.knownSize = size
inode.knownETag = *etag
Expand Down
102 changes: 86 additions & 16 deletions internal/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,13 @@ func NewApp() (app *cli.App) {

cli.StringFlag{
Name: "multipart-age",
Usage: "Multipart uploads older than this value will be deleted on start (default: 48 hours)",
Usage: "Multipart uploads older than this value will be deleted on start",
Value: "48h",
},

cli.IntFlag{
Name: "multipart-copy-threshold",
Usage: "Threshold for switching from single-part to multipart object copy in MB. Maximum for AWS S3 is 5 GB (default: 128 MB)",
Usage: "Threshold for switching from single-part to multipart object copy in MB. Maximum for AWS S3 is 5 GB",
Value: 128,
},

Expand Down Expand Up @@ -280,27 +280,27 @@ func NewApp() (app *cli.App) {
cli.IntFlag{
Name: "max-flushers",
Value: 16,
Usage: "How much parallel requests should be used for flushing changes to server (default: 16)",
Usage: "How much parallel requests should be used for flushing changes to server",
},

cli.IntFlag{
Name: "max-parallel-parts",
Value: 8,
Usage: "How much parallel requests out of the total number can be used for large part uploads."+
" Large parts take more bandwidth so they usually require less parallelism (default: 8)",
" Large parts take more bandwidth so they usually require less parallelism",
},

cli.IntFlag{
Name: "max-parallel-copy",
Value: 16,
Usage: "How much parallel unmodified part copy requests should be used."+
" This limit is separate from max-flushers (default: 16)",
" This limit is separate from max-flushers",
},

cli.IntFlag{
Name: "read-ahead",
Value: 5*1024,
Usage: "How much data in KB should be pre-loaded with every read by default (default: 5 MB)",
Usage: "How much data in KB should be pre-loaded with every read by default",
},

cli.IntFlag{
Expand All @@ -312,45 +312,45 @@ func NewApp() (app *cli.App) {
cli.IntFlag{
Name: "small-read-cutoff",
Value: 128,
Usage: "Maximum average size of last reads in KB to trigger \"small\" readahead (default: 128 KB)",
Usage: "Maximum average size of last reads in KB to trigger \"small\" readahead",
},

cli.IntFlag{
Name: "read-ahead-small",
Value: 128,
Usage: "Smaller readahead size in KB to be used when small random reads are detected (default: 128 KB)",
Usage: "Smaller readahead size in KB to be used when small random reads are detected",
},

cli.IntFlag{
Name: "large-read-cutoff",
Value: 20*1024,
Usage: "Amount of linear read in KB after which the \"large\" readahead should be triggered (default: 20 MB)",
Usage: "Amount of linear read in KB after which the \"large\" readahead should be triggered",
},

cli.IntFlag{
Name: "read-ahead-large",
Value: 100*1024,
Usage: "Larger readahead size in KB to be used when long linear reads are detected (default: 100 MB)",
Usage: "Larger readahead size in KB to be used when long linear reads are detected",
},

cli.IntFlag{
Name: "read-ahead-parallel",
Value: 20*1024,
Usage: "Larger readahead will be triggered in parallel chunks of this size in KB (default: 20 MB)",
Usage: "Larger readahead will be triggered in parallel chunks of this size in KB",
},

cli.IntFlag{
Name: "read-merge",
Value: 512,
Usage: "Two HTTP requests required to satisfy a read will be merged into one" +
" if they're at most this number of KB away (default: 512)",
" if they're at most this number of KB away",
},

cli.IntFlag{
Name: "single-part",
Value: 5,
Usage: "Maximum size of an object in MB to upload it as a single part." +
" Can't be less than 5 MB (default: 5 MB)",
" Can't be less than 5 MB",
},

cli.StringFlag{
Expand All @@ -366,18 +366,72 @@ func NewApp() (app *cli.App) {
Value: 0,
Usage: "If non-zero, allow to compose larger parts up to this number of megabytes" +
" in size from existing unchanged parts when doing server-side part copy."+
" Must be left at 0 for Yandex S3 (default: 0)",
" Must be left at 0 for Yandex S3",
},

cli.BoolFlag{
Name: "ignore-fsync",
Usage: "Do not wait until changes are persisted to the server on fsync() call (default: off)",
},

cli.BoolFlag{
Name: "enable-perms",
Usage: "Enable permissions, user and group ID." +
" Only works correctly if your S3 returns UserMetadata in listings (default: off)",
},

cli.BoolFlag{
Name: "enable-specials",
Usage: "Enable special file support (sockets, devices, named pipes)." +
" Only works correctly if your S3 returns UserMetadata in listings (default: on for Yandex, off for others)",
},

cli.BoolFlag{
Name: "no-specials",
Usage: "Disable special file support (sockets, devices, named pipes).",
},

cli.BoolFlag{
Name: "enable-mtime",
Usage: "Enable modification time preservation." +
" Only works correctly if your S3 returns UserMetadata in listings (default: off)",
},

cli.StringFlag{
Name: "uid-attr",
Value: "uid",
Usage: "User ID metadata attribute name",
},

cli.StringFlag{
Name: "gid-attr",
Value: "gid",
Usage: "Group ID metadata attribute name",
},

cli.StringFlag{
Name: "mode-attr",
Value: "mode",
Usage: "File mode (permissions & special file flags) metadata attribute name",
},

cli.StringFlag{
Name: "rdev-attr",
Value: "rdev",
Usage: "Block/character device number metadata attribute name",
},

cli.StringFlag{
Name: "mtime-attr",
Value: "mtime",
Usage: "File modification time (UNIX time) metadata attribute name",
},

cli.StringFlag{
Name: "symlink-attr",
Value: "--symlink-target",
Usage: "Symbolic link target metadata attribute (default: --symlink-target)",
Usage: "Symbolic link target metadata attribute name." +
" Only works correctly if your S3 returns UserMetadata in listings",
},

cli.DurationFlag{
Expand Down Expand Up @@ -617,6 +671,14 @@ func PopulateFlags(c *cli.Context) (ret *FlagStorage) {
SinglePartMB: uint64(singlePart),
MaxMergeCopyMB: uint64(c.Int("max-merge-copy")),
IgnoreFsync: c.Bool("ignore-fsync"),
EnablePerms: c.Bool("enable-perms"),
EnableSpecials: c.Bool("enable-specials"),
EnableMtime: c.Bool("enable-mtime"),
UidAttr: c.String("uid-attr"),
GidAttr: c.String("gid-attr"),
FileModeAttr: c.String("mode-attr"),
RdevAttr: c.String("rdev-attr"),
MtimeAttr: c.String("mtime-attr"),
SymlinkAttr: c.String("symlink-attr"),
CachePopularThreshold: int64(c.Int("cache-popular-threshold")),
CacheMaxHits: int64(c.Int("cache-max-hits")),
Expand Down Expand Up @@ -662,8 +724,12 @@ func PopulateFlags(c *cli.Context) (ret *FlagStorage) {
config.IAMHeader = c.String("iam-header")
config.MultipartAge = c.Duration("multipart-age")
listType := c.String("list-type")
isYandex := strings.Index(flags.Endpoint, "yandex") != -1
if isYandex && !c.IsSet("no-specials") {
flags.EnableSpecials = true
}
if listType == "" {
if idx := strings.Index(flags.Endpoint, "yandex"); idx != -1 {
if isYandex {
listType = "ext-v1"
} else {
listType = "1"
Expand All @@ -680,6 +746,10 @@ func PopulateFlags(c *cli.Context) (ret *FlagStorage) {
}
}

if c.IsSet("no-specials") {
flags.EnableSpecials = false
}

// Handle the repeated "-o" flag.
for _, o := range c.StringSlice("o") {
parseOptions(flags.MountOptions, o)
Expand Down
Loading

0 comments on commit bbf24cd

Please sign in to comment.