Skip to content

Commit 0382188

Browse files
committed
zvol_os: add support for zvol block cloning
Signed-off-by: Ameer Hamza <[email protected]>
1 parent d7a872f commit 0382188

File tree

7 files changed

+460
-4
lines changed

7 files changed

+460
-4
lines changed

config/kernel-blkdev.m4

+32
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,36 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_MODE_T], [
132132
])
133133
])
134134

135+
dnl #
136+
dnl # Upstream patch for blkdev copy offload support
137+
dnl #
138+
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_COPY_OFFLOAD], [
139+
ZFS_LINUX_TEST_SRC([blkdev_copy_offload], [
140+
#include <linux/bio.h>
141+
#include <linux/blkdev.h>
142+
], [
143+
struct block_device *bdev_in = NULL, *bdev_out = NULL;
144+
loff_t pos_in = 0, pos_out = 0;
145+
ssize_t ret __attribute__ ((unused));
146+
ssize_t len = 0;
147+
void *private = NULL;
148+
void (*endio)(void *, int, ssize_t) = NULL;
149+
ret = blkdev_copy_offload(bdev_in, pos_in, pos_out, len,
150+
endio, private, GFP_KERNEL, bdev_out);
151+
])
152+
])
153+
154+
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_COPY_OFFLOAD], [
155+
AC_MSG_CHECKING([whether blkdev_copy_offload exists])
156+
ZFS_LINUX_TEST_RESULT([blkdev_copy_offload], [
157+
AC_MSG_RESULT(yes)
158+
AC_DEFINE(HAVE_BLKDEV_COPY_OFFLOAD, 1,
159+
[blkdev_copy_offload exists])
160+
], [
161+
AC_MSG_RESULT(no)
162+
])
163+
])
164+
135165
dnl #
136166
dnl # 2.6.38 API change,
137167
dnl # Added blkdev_put()
@@ -759,6 +789,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
759789
ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE
760790
ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT
761791
ZFS_AC_KERNEL_SRC_BLKDEV_BLK_MODE_T
792+
ZFS_AC_KERNEL_SRC_BLKDEV_COPY_OFFLOAD
762793
])
763794

764795
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
@@ -781,4 +812,5 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
781812
ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE
782813
ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT
783814
ZFS_AC_KERNEL_BLKDEV_BLK_MODE_T
815+
ZFS_AC_KERNEL_BLKDEV_COPY_OFFLOAD
784816
])

include/sys/zvol.h

+2
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ extern int zvol_set_ro(const char *, boolean_t);
5656
extern zvol_state_handle_t *zvol_suspend(const char *);
5757
extern int zvol_resume(zvol_state_handle_t *);
5858
extern void *zvol_tag(zvol_state_handle_t *);
59+
extern int zvol_clone_range(zvol_state_handle_t *, uint64_t,
60+
zvol_state_handle_t *, uint64_t, uint64_t);
5961

6062
extern int zvol_init(void);
6163
extern void zvol_fini(void);

include/sys/zvol_impl.h

+3
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off,
8383
uint64_t len);
8484
void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
8585
uint64_t size, boolean_t commit);
86+
void zvol_log_clone_range(zilog_t *zilog, dmu_tx_t *tx, int txtype,
87+
uint64_t off, uint64_t len, uint64_t blksz, const blkptr_t *bps,
88+
size_t nbps);
8689
int zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
8790
struct lwb *lwb, zio_t *zio);
8891
int zvol_init_impl(void);

module/os/linux/zfs/zvol_os.c

+137
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include <sys/zvol.h>
4141
#include <sys/zvol_impl.h>
4242
#include <cityhash.h>
43+
#include <sys/zfs_znode.h>
4344

4445
#include <linux/blkdev_compat.h>
4546
#include <linux/task_io_accounting_ops.h>
@@ -68,6 +69,8 @@ static unsigned int zvol_threads = 0;
6869
static unsigned int zvol_blk_mq_threads = 0;
6970
static unsigned int zvol_blk_mq_actual_threads;
7071
static boolean_t zvol_use_blk_mq = B_FALSE;
72+
static boolean_t zvol_bclone_enabled = B_TRUE;
73+
static unsigned long zvol_max_copy_bytes = 0;
7174

7275
/*
7376
* The maximum number of volblocksize blocks to process per thread. Typically,
@@ -496,6 +499,85 @@ zvol_read_task(void *arg)
496499
zv_request_task_free(task);
497500
}
498501

502+
#ifdef HAVE_BLKDEV_COPY_OFFLOAD
503+
static void zvol_clone_range_impl(zv_request_t *zvr)
504+
{
505+
zvol_state_t *zv_src = zvr->zv, *zv_dst = NULL;
506+
struct request *req = zvr->rq;
507+
struct bio *bio = zvr->bio;
508+
zfs_uio_t uio_src, uio_dst;
509+
uint64_t len = 0;
510+
int error = EINVAL, seg = 1;
511+
struct blkdev_copy_offload_io *offload_io;
512+
513+
if (!zvol_bclone_enabled) {
514+
zvol_end_io(bio, req, -SET_ERROR(EOPNOTSUPP));
515+
return;
516+
}
517+
518+
memset(&uio_src, 0, sizeof (zfs_uio_t));
519+
memset(&uio_dst, 0, sizeof (zfs_uio_t));
520+
521+
if (bio) {
522+
/*
523+
* Single-Queue Request: driver_private contains the
524+
* destination ZVOL.
525+
*/
526+
offload_io = bio->bi_private;
527+
if (offload_io && offload_io->driver_private)
528+
zv_dst = offload_io->driver_private;
529+
if (bio->bi_iter.bi_size !=
530+
offload_io->dst_bio->bi_iter.bi_size) {
531+
zvol_end_io(bio, req, -SET_ERROR(error));
532+
return;
533+
}
534+
zfs_uio_bvec_init(&uio_src, bio, NULL);
535+
zfs_uio_bvec_init(&uio_dst, offload_io->dst_bio, NULL);
536+
len = bio->bi_iter.bi_size;
537+
} else {
538+
/*
539+
* Multi-Queue (MQ) Request: First bio contains information
540+
* about destination and the second contains information
541+
* about the source
542+
*/
543+
struct bio *bio_temp;
544+
__rq_for_each_bio(bio_temp, req) {
545+
if (seg == blk_rq_nr_phys_segments(req)) {
546+
offload_io = bio_temp->bi_private;
547+
zfs_uio_bvec_init(&uio_src, bio_temp, NULL);
548+
if (len != bio_temp->bi_iter.bi_size) {
549+
zvol_end_io(bio, req,
550+
-SET_ERROR(error));
551+
return;
552+
}
553+
if (offload_io && offload_io->driver_private)
554+
zv_dst = offload_io->driver_private;
555+
} else {
556+
zfs_uio_bvec_init(&uio_dst, bio_temp, NULL);
557+
len = bio_temp->bi_iter.bi_size;
558+
}
559+
seg++;
560+
}
561+
}
562+
563+
if (!zv_src || !zv_dst) {
564+
zvol_end_io(bio, req, -SET_ERROR(error));
565+
return;
566+
}
567+
568+
error = zvol_clone_range(zv_src, uio_src.uio_loffset, zv_dst,
569+
uio_dst.uio_loffset, len);
570+
zvol_end_io(bio, req, -error);
571+
}
572+
573+
static void
574+
zvol_clone_range_task(void *arg)
575+
{
576+
zv_request_task_t *task = arg;
577+
zvol_clone_range_impl(&task->zvr);
578+
zv_request_task_free(task);
579+
}
580+
#endif
499581

500582
/*
501583
* Process a BIO or request
@@ -555,6 +637,24 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
555637
blk_mq_hw_queue);
556638
tq_idx = taskq_hash % ztqs->tqs_cnt;
557639

640+
#ifdef HAVE_BLKDEV_COPY_OFFLOAD
641+
if ((bio && op_is_copy(bio_op(bio))) ||
642+
(rq && op_is_copy(req_op(rq)))) {
643+
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
644+
zvol_end_io(bio, rq, -SET_ERROR(EROFS));
645+
goto out;
646+
}
647+
if (force_sync) {
648+
zvol_clone_range_impl(&zvr);
649+
} else {
650+
task = zv_request_task_create(zvr);
651+
taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
652+
zvol_clone_range_task, task, 0, &task->ent);
653+
}
654+
goto out;
655+
}
656+
#endif
657+
558658
if (rw == WRITE) {
559659
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
560660
zvol_end_io(bio, rq, -SET_ERROR(EROFS));
@@ -1607,6 +1707,10 @@ zvol_os_create_minor(const char *name)
16071707
uint64_t hash = zvol_name_hash(name);
16081708
uint64_t volthreading;
16091709
bool replayed_zil = B_FALSE;
1710+
#ifdef HAVE_BLKDEV_COPY_OFFLOAD
1711+
struct queue_limits *lim;
1712+
uint64_t max_clone_blocks = 1022;
1713+
#endif
16101714

16111715
if (zvol_inhibit_dev)
16121716
return (0);
@@ -1693,6 +1797,33 @@ zvol_os_create_minor(const char *name)
16931797
else
16941798
replayed_zil = zil_replay(os, zv, zvol_replay_vector);
16951799
}
1800+
#ifdef HAVE_BLKDEV_COPY_OFFLOAD
1801+
lim = &zv->zv_zso->zvo_queue->limits;
1802+
lim->max_user_copy_sectors = UINT_MAX;
1803+
1804+
/*
1805+
* When zvol_bclone_enabled is unset, blkdev_copy_offload() should
1806+
* return early and fall back to the default path. Existing zvols
1807+
* would require export/import to make this applicable.
1808+
*/
1809+
if (!zvol_bclone_enabled) {
1810+
lim->max_copy_hw_sectors = 0;
1811+
lim->max_copy_sectors = 0;
1812+
} else if (!zvol_max_copy_bytes) {
1813+
if (zv->zv_zilog)
1814+
max_clone_blocks = zil_max_log_data(zv->zv_zilog,
1815+
sizeof (lr_clone_range_t)) / sizeof (blkptr_t);
1816+
lim->max_copy_hw_sectors = MIN((doi->doi_data_block_size *
1817+
max_clone_blocks), BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT;
1818+
lim->max_copy_sectors = MIN((doi->doi_data_block_size *
1819+
max_clone_blocks), BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT;
1820+
} else {
1821+
lim->max_copy_hw_sectors = MIN(zvol_max_copy_bytes,
1822+
BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT;
1823+
lim->max_copy_sectors = MIN(zvol_max_copy_bytes,
1824+
BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT;
1825+
}
1826+
#endif
16961827
if (replayed_zil)
16971828
zil_close(zv->zv_zilog);
16981829
zv->zv_zilog = NULL;
@@ -1934,6 +2065,12 @@ module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
19342065
MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
19352066
"Process volblocksize blocks per thread");
19362067

2068+
module_param(zvol_max_copy_bytes, ulong, 0644);
2069+
MODULE_PARM_DESC(zvol_max_copy_bytes, "max copy bytes for zvol block cloning");
2070+
2071+
module_param(zvol_bclone_enabled, uint, 0644);
2072+
MODULE_PARM_DESC(zvol_bclone_enabled, "Disable block cloning for zvols");
2073+
19372074
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
19382075
module_param(zvol_open_timeout_ms, uint, 0644);
19392076
MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");

module/zfs/zfs_vnops.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ int zfs_bclone_enabled = 1;
7171
* a copy of the file and is therefore not the default. However, in certain
7272
* scenarios this behavior may be desirable so a tunable is provided.
7373
*/
74-
static int zfs_bclone_wait_dirty = 0;
74+
int zfs_bclone_wait_dirty = 0;
7575

7676
/*
7777
* Enable Direct I/O. If this setting is 0, then all I/O requests will be

0 commit comments

Comments
 (0)