Skip to content

Commit 1552fda

Browse files
nixprimegvisor-bot
authored andcommitted
Add //pkg/sentry/stateio and use it for async page loading.
PiperOrigin-RevId: 803052657
1 parent 2925932 commit 1552fda

File tree

19 files changed

+1292
-128
lines changed

19 files changed

+1292
-128
lines changed

pkg/abi/linux/fs.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@
1414

1515
package linux
1616

17+
import (
18+
"math"
19+
20+
"gvisor.dev/gvisor/pkg/hostarch"
21+
)
22+
1723
// Filesystem types used in statfs(2).
1824
//
1925
// See linux/magic.h.
@@ -127,3 +133,8 @@ const (
127133
WHITEOUT_MODE = 0
128134
WHITEOUT_DEV = 0
129135
)
136+
137+
// MAX_RW_COUNT is the maximum size in bytes of a single read or write.
138+
// Reads and writes that exceed this size may be truncated.
139+
// (Linux: include/linux/fs.h:MAX_RW_COUNT)
140+
var MAX_RW_COUNT = int(hostarch.PageRoundDown(uint32(math.MaxInt32)))

pkg/sentry/kernel/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ go_library(
390390
"//pkg/sentry/seccheck/points:points_go_proto",
391391
"//pkg/sentry/socket/netlink/port",
392392
"//pkg/sentry/socket/unix/transport",
393+
"//pkg/sentry/state/stateio",
393394
"//pkg/sentry/time",
394395
"//pkg/sentry/unimpl",
395396
"//pkg/sentry/unimpl:unimplemented_syscall_go_proto",

pkg/sentry/kernel/kernel_restore.go

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,14 @@
1515
package kernel
1616

1717
import (
18-
"bufio"
1918
"fmt"
2019
"io"
2120

2221
"gvisor.dev/gvisor/pkg/cleanup"
2322
"gvisor.dev/gvisor/pkg/context"
24-
"gvisor.dev/gvisor/pkg/fd"
2523
"gvisor.dev/gvisor/pkg/log"
2624
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
25+
"gvisor.dev/gvisor/pkg/sentry/state/stateio"
2726
"gvisor.dev/gvisor/pkg/state"
2827
"gvisor.dev/gvisor/pkg/sync"
2928
"gvisor.dev/gvisor/pkg/timing"
@@ -272,7 +271,7 @@ type AsyncMFLoader struct {
272271
// If timeline is provided, it will be used to track async page loading.
273272
// It takes ownership of the timeline, and will end it when done loading all
274273
// pages.
275-
func NewAsyncMFLoader(pagesMetadata, pagesFile *fd.FD, mainMF *pgalloc.MemoryFile, timeline *timing.Timeline) *AsyncMFLoader {
274+
func NewAsyncMFLoader(pagesMetadata io.ReadCloser, pagesFile stateio.AsyncReader, mainMF *pgalloc.MemoryFile, timeline *timing.Timeline) *AsyncMFLoader {
276275
mfl := &AsyncMFLoader{
277276
privateMFsChan: make(chan map[string]*pgalloc.MemoryFile, 1),
278277
}
@@ -283,27 +282,25 @@ func NewAsyncMFLoader(pagesMetadata, pagesFile *fd.FD, mainMF *pgalloc.MemoryFil
283282
return mfl
284283
}
285284

286-
func (mfl *AsyncMFLoader) backgroundGoroutine(pagesMetadataFD, pagesFileFD *fd.FD, mainMF *pgalloc.MemoryFile, timeline *timing.Timeline) {
285+
func (mfl *AsyncMFLoader) backgroundGoroutine(pagesMetadata io.ReadCloser, pagesFile stateio.AsyncReader, mainMF *pgalloc.MemoryFile, timeline *timing.Timeline) {
287286
defer timeline.End()
288-
defer pagesMetadataFD.Close()
289-
defer pagesFileFD.Close()
287+
defer pagesMetadata.Close()
290288
cu := cleanup.Make(func() {
291289
mfl.metadataWg.Done()
292290
mfl.loadWg.Done()
293291
})
294292
defer cu.Clean()
295293

296-
// //pkg/state/wire reads one byte at a time; buffer these reads to
297-
// avoid making one syscall per read. For the "main" state file, this
298-
// buffering is handled by statefile.NewReader() => compressio.Reader
299-
// or compressio.NewSimpleReader().
300-
pagesMetadata := bufio.NewReader(pagesMetadataFD)
301-
302294
mfl.loadWg.Add(1)
303-
apfl := pgalloc.StartAsyncPagesFileLoad(int32(pagesFileFD.FD()), func(err error) {
295+
apfl, err := pgalloc.StartAsyncPagesFileLoad(pagesFile, func(err error) {
304296
defer mfl.loadWg.Done()
305297
mfl.loadErr = err
306-
}, timeline)
298+
}, timeline) // transfers ownership of pagesFile
299+
if err != nil {
300+
mfl.loadWg.Done()
301+
log.Warningf("Failed to start async page loading: %v", err)
302+
return
303+
}
307304
cu.Add(apfl.MemoryFilesDone)
308305

309306
opts := pgalloc.LoadOpts{
@@ -314,7 +311,7 @@ func (mfl *AsyncMFLoader) backgroundGoroutine(pagesMetadataFD, pagesFileFD *fd.F
314311
timeline.Reached("loading mainMF")
315312
log.Infof("Loading metadata for main MemoryFile: %p", mainMF)
316313
ctx := context.Background()
317-
err := mainMF.LoadFrom(ctx, pagesMetadata, &opts)
314+
err = mainMF.LoadFrom(ctx, pagesMetadata, &opts)
318315
mfl.metadataErr = err
319316
mfl.mainMetadataErr = err
320317
mfl.mainMFStartWg.Done()

pkg/sentry/kernel/task_usermem.go

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,6 @@ import (
2828

2929
const iovecLength = 16
3030

31-
// MAX_RW_COUNT is the maximum size in bytes of a single read or write.
32-
// Reads and writes that exceed this size may be silently truncated.
33-
// (Linux: include/linux/fs.h:MAX_RW_COUNT)
34-
var MAX_RW_COUNT = int(hostarch.Addr(math.MaxInt32).RoundDown())
35-
3631
// Activate ensures that the task has an active address space.
3732
func (t *Task) Activate() {
3833
if mm := t.MemoryManager(); mm != nil {
@@ -190,7 +185,7 @@ func copyInIovec(ctx marshal.CopyContext, t *Task, addr hostarch.Addr) (hostarch
190185
if err != nil {
191186
return hostarch.AddrRangeSeq{}, err
192187
}
193-
return hostarch.AddrRangeSeqOf(ar).TakeFirst(MAX_RW_COUNT), nil
188+
return hostarch.AddrRangeSeqOf(ar).TakeFirst(linux.MAX_RW_COUNT), nil
194189
}
195190

196191
// copyInIovecs copies an array of numIovecs struct iovecs from the memory
@@ -243,7 +238,7 @@ func copyInIovecs(ctx marshal.CopyContext, t *Task, addr hostarch.Addr, numIovec
243238
var total uint64
244239
for i := range dst {
245240
dstlen := uint64(dst[i].Length())
246-
if rem := uint64(MAX_RW_COUNT) - total; rem < dstlen {
241+
if rem := uint64(linux.MAX_RW_COUNT) - total; rem < dstlen {
247242
dst[i].End -= hostarch.Addr(dstlen - rem)
248243
dstlen = rem
249244
}
@@ -288,8 +283,8 @@ func makeIovec(ctx marshal.CopyContext, t *Task, addr hostarch.Addr, b []byte) (
288283
// access_ok() in fs/read_write.c:vfs_read/vfs_write, and overflowing address
289284
// ranges are truncated to MAX_RW_COUNT by fs/read_write.c:rw_verify_area().)
290285
func (t *Task) SingleIOSequence(addr hostarch.Addr, length int, opts usermem.IOOpts) (usermem.IOSequence, error) {
291-
if length > MAX_RW_COUNT {
292-
length = MAX_RW_COUNT
286+
if length > linux.MAX_RW_COUNT {
287+
length = linux.MAX_RW_COUNT
293288
}
294289
ar, ok := t.MemoryManager().CheckIORange(addr, int64(length))
295290
if !ok {

pkg/sentry/pgalloc/BUILD

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,6 @@ go_library(
166166
visibility = ["//pkg/sentry:internal"],
167167
deps = [
168168
"//pkg/abi/linux",
169-
"//pkg/aio",
170169
"//pkg/atomicbitops",
171170
"//pkg/bitmap",
172171
"//pkg/context",
@@ -180,6 +179,7 @@ go_library(
180179
"//pkg/sentry/arch",
181180
"//pkg/sentry/hostmm",
182181
"//pkg/sentry/memmap",
182+
"//pkg/sentry/state/stateio",
183183
"//pkg/sentry/usage",
184184
"//pkg/state",
185185
"//pkg/state/wire",

0 commit comments

Comments
 (0)