From d8aab1c4660928c9ffbe345760e265aa42c4d629 Mon Sep 17 00:00:00 2001 From: Fedor Uporov Date: Thu, 10 Apr 2025 16:44:37 +0300 Subject: [PATCH] vdev/geom: Add vdev chr/blk devices access support Add to ZFS ability to open char/block devices, which does not have the geom provider. The functionality is implemented under vdev_file, which now able to open char and block devices from devfs additionally to regular files. The geom provider detection decision is made under platform-dependent zfs_dev_is_whole_disk() function. The two new *strategy* functions are added to vdev_file platform-dependent API to call devfs device strategy routine directly. Also, the zfs_file_attr_t structure is modified to able to get logical and physical devfs device block sizes. The user logic for devfs devices is the same as for zpool creation/importing for regular files. Mean, zpool could be created based on devfs devices and later could be imported by using '-s' option or using '-d' option with /dev directory argument. In case of zpool import without arguments, zpool on devfs will not be seen. For devices, which have both devfs device and geom provider (case nvme: ndaX/nvdX + nvmeXnsY), pool could be created on geom and later imported as devfs using vdev_file importing rules and vise versa. Signed-off-by: Fedor Uporov --- cmd/zpool/zpool_vdev.c | 25 +- include/sys/zfs_file.h | 9 +- lib/libzpool/kernel.c | 32 ++ .../os/freebsd/zutil_device_path_os.c | 17 +- module/os/freebsd/zfs/zfs_file_os.c | 324 +++++++++++++++++- module/os/linux/zfs/zfs_file_os.c | 32 ++ module/zfs/vdev_file.c | 107 ++++-- 7 files changed, 508 insertions(+), 38 deletions(-) diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 07868a30d7e7..1dfdcc6c2251 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -353,15 +353,32 @@ make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary) /* * Determine whether this is a device or a file. */ +#if defined(__FreeBSD__) + if (wholedisk) { + /* Devices with geom provider only */ + type = VDEV_TYPE_DISK; + } else if (S_ISREG(statbuf.st_mode) || + S_ISCHR(statbuf.st_mode) || S_ISBLK(statbuf.st_mode)) { + /* + * Regular and devfs files, excluding have geom + * providers. The decision, is it geom provider, + * is made by zfs_dev_is_whole_disk() function. + */ + type = VDEV_TYPE_FILE; + } +#else if (wholedisk || S_ISBLK(statbuf.st_mode)) { type = VDEV_TYPE_DISK; } else if (S_ISREG(statbuf.st_mode)) { type = VDEV_TYPE_FILE; - } else { - fprintf(stderr, gettext("cannot use '%s': must " - "be a block device or regular file\n"), path); - return (NULL); } +#endif + } + + if (type == NULL) { + fprintf(stderr, gettext("cannot use '%s': must " + "be a block device or regular file\n"), path); + return (NULL); } /* diff --git a/include/sys/zfs_file.h b/include/sys/zfs_file.h index a1f344c2bb79..cb7e802a7116 100644 --- a/include/sys/zfs_file.h +++ b/include/sys/zfs_file.h @@ -37,8 +37,10 @@ typedef struct file zfs_file_t; #endif typedef struct zfs_file_attr { - uint64_t zfa_size; /* file size */ - mode_t zfa_mode; /* file type */ + uint64_t zfa_size; /* file size */ + mode_t zfa_mode; /* file type */ + uint32_t zfa_logical_block_size; /* file logical bs */ + uint32_t zfa_physical_block_size; /* file physical bs */ } zfs_file_attr_t; int zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fp); @@ -51,6 +53,9 @@ int zfs_file_read(zfs_file_t *fp, void *buf, size_t len, ssize_t *resid); int zfs_file_pread(zfs_file_t *fp, void *buf, size_t len, loff_t off, ssize_t *resid); +void zfs_file_io_strategy(zfs_file_t *fp, void *arg); +void zfs_file_io_strategy_done(zfs_file_t *fp, void *arg); + int zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence); int zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr); int zfs_file_fsync(zfs_file_t *fp, int flags); diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 653380149a9e..0f29f70e93ed 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -1324,6 +1324,36 @@ zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence) return (0); } +/* + * The file IO strategy routine. + * + * fp - pointer to file (regular, blk or chr) + * arg - data to transfer from/to file, typically ZFS zio + * + * Used only on FreeBSD kernel side for now. + */ +void +zfs_file_io_strategy(zfs_file_t *fp, void *arg) +{ + (void) fp; + (void) arg; +} + +/* + * The file IO strategy completion routine. + * + * fp - pointer to file (regular, blk or chr) + * arg - data to transfer from/to file, typically ZFS zio + * + * Used only on FreeBSD kernel side for now. + */ +void +zfs_file_io_strategy_done(zfs_file_t *fp, void *arg) +{ + (void) fp; + (void) arg; +} + /* * Get file attributes * @@ -1344,6 +1374,8 @@ zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr) zfattr->zfa_size = st.st_size; zfattr->zfa_mode = st.st_mode; + zfattr->zfa_logical_block_size = 0; + zfattr->zfa_physical_block_size = 0; return (0); } diff --git a/lib/libzutil/os/freebsd/zutil_device_path_os.c b/lib/libzutil/os/freebsd/zutil_device_path_os.c index 6e348e8b6ec1..47f3a3b4c8c5 100644 --- a/lib/libzutil/os/freebsd/zutil_device_path_os.c +++ b/lib/libzutil/os/freebsd/zutil_device_path_os.c @@ -79,14 +79,23 @@ zfs_get_underlying_path(const char *dev_name) boolean_t zfs_dev_is_whole_disk(const char *dev_name) { + boolean_t wholedisk = B_FALSE; + char *name; int fd; fd = g_open(dev_name, 0); - if (fd >= 0) { - g_close(fd); - return (B_TRUE); + if (fd < 0) { + return (B_FALSE); } - return (B_FALSE); + + name = g_providername(fd); + if (name != NULL) { + wholedisk = B_TRUE; + } + + g_close(fd); + + return (wholedisk); } /* diff --git a/module/os/freebsd/zfs/zfs_file_os.c b/module/os/freebsd/zfs/zfs_file_os.c index 21e5f7938f9f..b127317574d6 100644 --- a/module/os/freebsd/zfs/zfs_file_os.c +++ b/module/os/freebsd/zfs/zfs_file_os.c @@ -39,14 +39,30 @@ #include #include #include +#include +#include #include #include #include #include #include +#include #include +#include +#include +#include #include +SYSCTL_DECL(_vfs_zfs_vdev); +/* Don't send BIO_FLUSH. */ +static int zfs_file_bio_flush_disable; +SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, file_bio_flush_disable, CTLFLAG_RWTUN, + &zfs_file_bio_flush_disable, 0, "Disable vdev_file BIO_FLUSH"); +/* Don't send BIO_DELETE. */ +static int zfs_file_bio_delete_disable; +SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, file_bio_delete_disable, CTLFLAG_RWTUN, + &zfs_file_bio_delete_disable, 0, "Disable vdev_file BIO_DELETE"); + int zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp) { @@ -88,12 +104,12 @@ zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp) finit_vnode(fp, flags, NULL, &vnops); } VOP_UNLOCK(vp); - if (vp->v_type != VREG) { + if (vp->v_type != VREG && !IS_DEVVP(vp)) { zfs_file_close(fp); return (SET_ERROR(EACCES)); } - if (flags & O_TRUNC) { + if (!IS_DEVVP(vp) && flags & O_TRUNC) { error = fo_truncate(fp, 0, td->td_ucred, td); if (error != 0) { zfs_file_close(fp); @@ -235,11 +251,273 @@ zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence) return (SET_ERROR(rc)); } +/* + * Callback to translate the ABD segment into array of physical pages. + */ +static int +zfs_file_fill_unmap_cb(void *buf, size_t len, void *priv) +{ + struct bio *bp = priv; + vm_offset_t addr = (vm_offset_t)buf; + vm_offset_t end = addr + len; + + if (bp->bio_ma_n == 0) { + bp->bio_ma_offset = addr & PAGE_MASK; + addr &= ~PAGE_MASK; + } else { + ASSERT0(P2PHASE(addr, PAGE_SIZE)); + } + do { + bp->bio_ma[bp->bio_ma_n++] = + PHYS_TO_VM_PAGE(pmap_kextract(addr)); + addr += PAGE_SIZE; + } while (addr < end); + return (0); +} + +static void +zfs_file_io_intr(struct bio *bp) +{ + vdev_t *vd; + zio_t *zio; + + zio = bp->bio_caller1; + vd = zio->io_vd; + zio->io_error = bp->bio_error; + if (zio->io_error == 0 && bp->bio_resid != 0) + zio->io_error = SET_ERROR(EIO); + + switch (zio->io_error) { + case ENXIO: + if (!vd->vdev_remove_wanted) { + if (bp->bio_to->error != 0) { + vd->vdev_remove_wanted = B_TRUE; + spa_async_request(zio->io_spa, + SPA_ASYNC_REMOVE); + } else if (!vd->vdev_delayed_close) { + vd->vdev_delayed_close = B_TRUE; + } + } + break; + } + + /* + * We have to split bio freeing into two parts, because the ABD code + * cannot be called in this context and vdev_op_io_done is not called + * for ZIO_TYPE_FLUSH zio-s. + */ + if (zio->io_type != ZIO_TYPE_READ && zio->io_type != ZIO_TYPE_WRITE) { + g_destroy_bio(bp); + zio->io_bio = NULL; + } + zio_delay_interrupt(zio); +} + +struct zfs_file_check_unmapped_cb_state { + int pages; + uint_t end; +}; + +/* + * Callback to check the ABD segment size/alignment and count the pages. + */ +static int +zfs_file_check_unmapped_cb(void *buf, size_t len, void *priv) +{ + struct zfs_file_check_unmapped_cb_state *s = priv; + vm_offset_t off = (vm_offset_t)buf & PAGE_MASK; + + if (s->pages != 0 && off != 0) + return (1); + if (s->end != 0) + return (1); + s->end = (off + len) & PAGE_MASK; + s->pages += (off + len + PAGE_MASK) >> PAGE_SHIFT; + return (0); +} + +/* + * Check whether we can use unmapped I/O for this ZIO on this device to + * avoid data copying between scattered and/or gang ABD buffer and linear. + */ +static int +zfs_file_check_unmapped(zio_t *zio) +{ + struct zfs_file_check_unmapped_cb_state s; + + /* If unmapped I/O is administratively disabled, respect that. */ + if (!unmapped_buf_allowed) + return (0); + + /* If the buffer is already linear, then nothing to do here. */ + if (abd_is_linear(zio->io_abd)) + return (0); + + /* Check the buffer chunks sizes/alignments and count pages. */ + s.pages = s.end = 0; + if (abd_iterate_func(zio->io_abd, 0, zio->io_size, + zfs_file_check_unmapped_cb, &s)) + return (0); + return (s.pages); +} + +void +zfs_file_io_strategy(zfs_file_t *fp, void *arg) +{ + vdev_t *vd; + zio_t *zio = arg; + struct vdev_data *vdp; + struct bio *bp; + struct cdevsw *csw; + struct cdev *dev; + int ref; + + vd = zio->io_vd; + + if (zio->io_type == ZIO_TYPE_FLUSH) { + /* XXPOLICY */ + if (!vdev_readable(vd)) { + zio->io_error = SET_ERROR(ENXIO); + zio_interrupt(zio); + return; + } + + if (zfs_nocacheflush || zfs_file_bio_flush_disable) { + zio_execute(zio); + return; + } + + if (vd->vdev_nowritecache) { + zio->io_error = SET_ERROR(ENOTSUP); + zio_execute(zio); + return; + } + } else if (zio->io_type == ZIO_TYPE_TRIM) { + if (zfs_file_bio_delete_disable) { + zio_execute(zio); + return; + } + } + + ASSERT(zio->io_type == ZIO_TYPE_READ || + zio->io_type == ZIO_TYPE_WRITE || + zio->io_type == ZIO_TYPE_TRIM || + zio->io_type == ZIO_TYPE_FLUSH); + + vdp = vd->vdev_tsd; + if (vdp == NULL) { + zio->io_error = SET_ERROR(ENXIO); + zio_interrupt(zio); + return; + } + + bp = g_alloc_bio(); + bp->bio_caller1 = zio; + switch (zio->io_type) { + case ZIO_TYPE_READ: + case ZIO_TYPE_WRITE: + zio->io_target_timestamp = zio_handle_io_delay(zio); + bp->bio_offset = zio->io_offset; + bp->bio_bcount = bp->bio_length = zio->io_size; + if (zio->io_type == ZIO_TYPE_READ) + bp->bio_cmd = BIO_READ; + else + bp->bio_cmd = BIO_WRITE; + + /* + * If possible, represent scattered and/or gang ABD buffer + * as an array of physical pages. It allows to satisfy + * requirement of virtually contiguous buffer without copying. + */ + int pgs = zfs_file_check_unmapped(zio); + if (pgs > 0) { + bp->bio_ma = malloc(sizeof (struct vm_page *) * pgs, + M_DEVBUF, M_WAITOK); + bp->bio_ma_n = 0; + bp->bio_ma_offset = 0; + abd_iterate_func(zio->io_abd, 0, zio->io_size, + zfs_file_fill_unmap_cb, bp); + bp->bio_data = unmapped_buf; + bp->bio_flags |= BIO_UNMAPPED; + } else { + if (zio->io_type == ZIO_TYPE_READ) { + bp->bio_data = abd_borrow_buf(zio->io_abd, + zio->io_size); + } else { + bp->bio_data = abd_borrow_buf_copy(zio->io_abd, + zio->io_size); + } + } + break; + case ZIO_TYPE_TRIM: + bp->bio_cmd = BIO_DELETE; + bp->bio_data = NULL; + bp->bio_offset = zio->io_offset; + bp->bio_length = zio->io_size; + break; + case ZIO_TYPE_FLUSH: + bp->bio_cmd = BIO_FLUSH; + bp->bio_data = NULL; + bp->bio_offset = vd->vdev_asize; + bp->bio_length = 0; + break; + default: + panic("invalid zio->io_type: %d\n", zio->io_type); + } + bp->bio_done = zfs_file_io_intr; + zio->io_bio = bp; + csw = devvn_refthread(fp->f_vnode, &dev, &ref); + if (csw == NULL) { + zio->io_error = SET_ERROR(ENXIO); + zio_interrupt(zio); + return; + } + bp->bio_dev = dev; + csw->d_strategy(bp); + dev_relthread(dev, ref); +} + +void +zfs_file_io_strategy_done(zfs_file_t *fp, void *arg) +{ + zio_t *zio = arg; + struct bio *bp = zio->io_bio; + + if (zio->io_type != ZIO_TYPE_READ && zio->io_type != ZIO_TYPE_WRITE) { + ASSERT3P(bp, ==, NULL); + return; + } + + if (bp == NULL) { + ASSERT3S(zio->io_error, ==, ENXIO); + return; + } + + if (bp->bio_ma != NULL) { + free(bp->bio_ma, M_DEVBUF); + } else { + if (zio->io_type == ZIO_TYPE_READ) { + abd_return_buf_copy(zio->io_abd, bp->bio_data, + zio->io_size); + } else { + abd_return_buf(zio->io_abd, bp->bio_data, + zio->io_size); + } + } + + g_destroy_bio(bp); + zio->io_bio = NULL; +} + int zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr) { struct thread *td; struct stat sb; + off_t stripesize = 0; + off_t stripeoffset = 0; + off_t mediasize; + uint_t sectorsize; int rc; td = curthread; @@ -251,8 +529,50 @@ zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr) #endif if (rc) return (SET_ERROR(rc)); + zfattr->zfa_size = sb.st_size; zfattr->zfa_mode = sb.st_mode; + zfattr->zfa_logical_block_size = 0; + zfattr->zfa_physical_block_size = 0; + + if (fp->f_vnode->v_type == VREG) + return (0); + + rc = fo_ioctl(fp, DIOCGMEDIASIZE, (caddr_t)&mediasize, + td->td_ucred, td); + if (rc) { + zfs_dbgmsg("zfs file open: cannot get media size"); + vrele(fp->f_vnode); + fdrop(fp, curthread); + return (SET_ERROR(EINVAL)); + } + + zfattr->zfa_size = mediasize; + + rc = fo_ioctl(fp, DIOCGSECTORSIZE, (caddr_t)§orsize, + td->td_ucred, td); + if (rc) { + zfs_dbgmsg("zfs file open: cannot get sector size"); + vrele(fp->f_vnode); + fdrop(fp, curthread); + return (SET_ERROR(EINVAL)); + } + + rc = fo_ioctl(fp, DIOCGSTRIPESIZE, (caddr_t)&stripesize, + td->td_ucred, td); + if (rc) + zfs_dbgmsg("zfs file open: cannot get stripe size"); + + rc = fo_ioctl(fp, DIOCGSTRIPEOFFSET, (caddr_t)&stripeoffset, + td->td_ucred, td); + if (rc) + zfs_dbgmsg("zfs file open: cannot get stripe offset"); + + zfattr->zfa_logical_block_size = MAX(sectorsize, SPA_MINBLOCKSIZE); + zfattr->zfa_physical_block_size = 0; + if (stripesize && stripesize > zfattr->zfa_logical_block_size && + ISP2(stripesize) && stripeoffset == 0) + zfattr->zfa_physical_block_size = stripesize; return (0); } diff --git a/module/os/linux/zfs/zfs_file_os.c b/module/os/linux/zfs/zfs_file_os.c index d193eb80dca2..faf1d34c0499 100644 --- a/module/os/linux/zfs/zfs_file_os.c +++ b/module/os/linux/zfs/zfs_file_os.c @@ -220,6 +220,36 @@ zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence) return (0); } +/* + * The file IO strategy routine. + * + * fp - pointer to file (regular, blk or chr) + * arg - data to transfer from/to file, typically ZFS zio + * + * Used only on FreeBSD kernel side for now. + */ +void +zfs_file_io_strategy(zfs_file_t *fp, void *arg) +{ + (void) fp; + (void) arg; +} + +/* + * The file IO strategy completion routine. + * + * fp - pointer to file (regular, blk or chr) + * arg - data to transfer from/to file, typically ZFS zio + * + * Used only on FreeBSD kernel side for now. + */ +void +zfs_file_io_strategy_done(zfs_file_t *fp, void *arg) +{ + (void) fp; + (void) arg; +} + /* * Get file attributes * @@ -243,6 +273,8 @@ zfs_file_getattr(zfs_file_t *filp, zfs_file_attr_t *zfattr) zfattr->zfa_size = stat.size; zfattr->zfa_mode = stat.mode; + zfattr->zfa_logical_block_size = 0; + zfattr->zfa_physical_block_size = 0; return (0); } diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c index a2cb6f9b9ef9..604ef3148b6b 100644 --- a/module/zfs/vdev_file.c +++ b/module/zfs/vdev_file.c @@ -35,6 +35,23 @@ #include #include +/* File device access types */ +typedef enum access_type { + ACCESS_REGULAR, + ACCESS_BLKDEV +} access_type_t; + +/* + * This structure is used for adding indirection layer to provide + * ability to split access logic between regular files and special + * device files. + */ +typedef struct vdev_file_data +{ + access_type_t type; + vdev_file_t vf; +} vdev_file_data_t; + /* * Virtual device vector for files. */ @@ -99,8 +116,7 @@ static int vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, uint64_t *logical_ashift, uint64_t *physical_ashift) { - vdev_file_t *vf; - zfs_file_t *fp; + vdev_file_data_t *vfd; zfs_file_attr_t zfa; int error; @@ -136,11 +152,11 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, */ if (vd->vdev_tsd != NULL) { ASSERT(vd->vdev_reopening); - vf = vd->vdev_tsd; + vfd = vd->vdev_tsd; goto skip_open; } - vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP); + vfd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_data_t), KM_SLEEP); /* * We always open the files from the root of the global zone, even if @@ -152,38 +168,54 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, ASSERT3S(vd->vdev_path[0], ==, '/'); error = zfs_file_open(vd->vdev_path, - vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp); + vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &vfd->vf.vf_file); if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } - vf->vf_file = fp; - + /* + * Work with chr/blk devices same as regular files in userspace. + * The different logic is used, if we are on FreeBSD kernel side. + */ + vfd->type = ACCESS_REGULAR; #ifdef _KERNEL /* * Make sure it's a regular file. */ - if (zfs_file_getattr(fp, &zfa)) { + if (zfs_file_getattr(vfd->vf.vf_file, &zfa)) { return (SET_ERROR(ENODEV)); } +#ifdef __linux__ if (!S_ISREG(zfa.zfa_mode)) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (SET_ERROR(ENODEV)); } +#elif __FreeBSD__ + if (S_ISCHR(zfa.zfa_mode) || S_ISBLK(zfa.zfa_mode)) { + vfd->type = ACCESS_BLKDEV; + } else if (!S_ISREG(zfa.zfa_mode)) { + vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; + return (SET_ERROR(ENODEV)); + } #endif +#endif // #ifdef _KERNEL skip_open: - error = zfs_file_getattr(vf->vf_file, &zfa); + error = zfs_file_getattr(vfd->vf.vf_file, &zfa); if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } *max_psize = *psize = zfa.zfa_size; - *logical_ashift = vdev_file_logical_ashift; - *physical_ashift = vdev_file_physical_ashift; + *logical_ashift = zfa.zfa_logical_block_size ? + highbit64(zfa.zfa_logical_block_size) - 1 : + vdev_file_logical_ashift; + *physical_ashift = zfa.zfa_physical_block_size ? + highbit64(zfa.zfa_physical_block_size) - 1 : + vdev_file_physical_ashift; return (0); } @@ -191,17 +223,17 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, static void vdev_file_close(vdev_t *vd) { - vdev_file_t *vf = vd->vdev_tsd; + vdev_file_data_t *vfd = vd->vdev_tsd; - if (vd->vdev_reopening || vf == NULL) + if (vd->vdev_reopening || vfd == NULL) return; - if (vf->vf_file != NULL) { - (void) zfs_file_close(vf->vf_file); + if (vfd->vf.vf_file != NULL) { + (void) zfs_file_close(vfd->vf.vf_file); } vd->vdev_delayed_close = B_FALSE; - kmem_free(vf, sizeof (vdev_file_t)); + kmem_free(vfd, sizeof (vdev_file_data_t)); vd->vdev_tsd = NULL; } @@ -210,7 +242,7 @@ vdev_file_io_strategy(void *arg) { zio_t *zio = (zio_t *)arg; vdev_t *vd = zio->io_vd; - vdev_file_t *vf = vd->vdev_tsd; + vdev_file_data_t *vfd = vd->vdev_tsd; void *buf; ssize_t resid; loff_t off; @@ -221,14 +253,19 @@ vdev_file_io_strategy(void *arg) size = zio->io_size; resid = 0; + if (vfd->type == ACCESS_BLKDEV) { + zfs_file_io_strategy(vfd->vf.vf_file, arg); + return; + } + ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); if (zio->io_type == ZIO_TYPE_READ) { buf = abd_borrow_buf(zio->io_abd, zio->io_size); - err = zfs_file_pread(vf->vf_file, buf, size, off, &resid); + err = zfs_file_pread(vfd->vf.vf_file, buf, size, off, &resid); abd_return_buf_copy(zio->io_abd, buf, size); } else { buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size); - err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid); + err = zfs_file_pwrite(vfd->vf.vf_file, buf, size, off, &resid); abd_return_buf(zio->io_abd, buf, size); } zio->io_error = err; @@ -242,9 +279,9 @@ static void vdev_file_io_fsync(void *arg) { zio_t *zio = (zio_t *)arg; - vdev_file_t *vf = zio->io_vd->vdev_tsd; + vdev_file_data_t *vfd = zio->io_vd->vdev_tsd; - zio->io_error = zfs_file_fsync(vf->vf_file, O_SYNC | O_DSYNC); + zio->io_error = zfs_file_fsync(vfd->vf.vf_file, O_SYNC | O_DSYNC); zio_interrupt(zio); } @@ -253,9 +290,9 @@ static void vdev_file_io_deallocate(void *arg) { zio_t *zio = (zio_t *)arg; - vdev_file_t *vf = zio->io_vd->vdev_tsd; + vdev_file_data_t *vfd = zio->io_vd->vdev_tsd; - zio->io_error = zfs_file_deallocate(vf->vf_file, + zio->io_error = zfs_file_deallocate(vfd->vf.vf_file, zio->io_offset, zio->io_size); zio_interrupt(zio); @@ -265,6 +302,7 @@ static void vdev_file_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; + vdev_file_data_t *vfd = vd->vdev_tsd; if (zio->io_type == ZIO_TYPE_FLUSH) { /* XXPOLICY */ @@ -279,9 +317,19 @@ vdev_file_io_start(zio_t *zio) return; } - VERIFY3U(taskq_dispatch(vdev_file_taskq, - vdev_file_io_fsync, zio, TQ_SLEEP), !=, TASKQID_INVALID); + if (vfd->type == ACCESS_BLKDEV) { + vdev_file_io_strategy(zio); + } else { + VERIFY3U(taskq_dispatch(vdev_file_taskq, + vdev_file_io_fsync, zio, TQ_SLEEP), !=, + TASKQID_INVALID); + } + + return; + } + if (vfd->type == ACCESS_BLKDEV) { + vdev_file_io_strategy(zio); return; } @@ -305,7 +353,14 @@ vdev_file_io_start(zio_t *zio) static void vdev_file_io_done(zio_t *zio) { - (void) zio; + vdev_t *vd = zio->io_vd; + vdev_file_data_t *vfd = vd->vdev_tsd; + + if (vfd && vfd->type == ACCESS_REGULAR) { + return; + } + + zfs_file_io_strategy_done(NULL, zio); } vdev_ops_t vdev_file_ops = {