|
40 | 40 | #include <sys/zvol.h>
|
41 | 41 | #include <sys/zvol_impl.h>
|
42 | 42 | #include <cityhash.h>
|
| 43 | +#include <sys/zfs_znode.h> |
43 | 44 |
|
44 | 45 | #include <linux/blkdev_compat.h>
|
45 | 46 | #include <linux/task_io_accounting_ops.h>
|
@@ -68,6 +69,8 @@ static unsigned int zvol_threads = 0;
|
68 | 69 | static unsigned int zvol_blk_mq_threads = 0;
|
69 | 70 | static unsigned int zvol_blk_mq_actual_threads;
|
70 | 71 | static boolean_t zvol_use_blk_mq = B_FALSE;
|
| 72 | +static boolean_t zvol_bclone_enabled = B_TRUE; |
| 73 | +static unsigned long zvol_max_copy_bytes = 0; |
71 | 74 |
|
72 | 75 | /*
|
73 | 76 | * The maximum number of volblocksize blocks to process per thread. Typically,
|
@@ -496,6 +499,85 @@ zvol_read_task(void *arg)
|
496 | 499 | zv_request_task_free(task);
|
497 | 500 | }
|
498 | 501 |
|
| 502 | +#ifdef HAVE_BLKDEV_COPY_OFFLOAD |
| 503 | +static void zvol_clone_range_impl(zv_request_t *zvr) |
| 504 | +{ |
| 505 | + zvol_state_t *zv_src = zvr->zv, *zv_dst = NULL; |
| 506 | + struct request *req = zvr->rq; |
| 507 | + struct bio *bio = zvr->bio; |
| 508 | + zfs_uio_t uio_src, uio_dst; |
| 509 | + uint64_t len = 0; |
| 510 | + int error = EINVAL, seg = 1; |
| 511 | + struct blkdev_copy_offload_io *offload_io; |
| 512 | + |
| 513 | + if (!zvol_bclone_enabled) { |
| 514 | + zvol_end_io(bio, req, -SET_ERROR(EOPNOTSUPP)); |
| 515 | + return; |
| 516 | + } |
| 517 | + |
| 518 | + memset(&uio_src, 0, sizeof (zfs_uio_t)); |
| 519 | + memset(&uio_dst, 0, sizeof (zfs_uio_t)); |
| 520 | + |
| 521 | + if (bio) { |
| 522 | + /* |
| 523 | + * Single-Queue Request: driver_private contains the |
| 524 | + * destination ZVOL. |
| 525 | + */ |
| 526 | + offload_io = bio->bi_private; |
| 527 | + if (offload_io && offload_io->driver_private) |
| 528 | + zv_dst = offload_io->driver_private; |
| 529 | + if (bio->bi_iter.bi_size != |
| 530 | + offload_io->dst_bio->bi_iter.bi_size) { |
| 531 | + zvol_end_io(bio, req, -SET_ERROR(error)); |
| 532 | + return; |
| 533 | + } |
| 534 | + zfs_uio_bvec_init(&uio_src, bio, NULL); |
| 535 | + zfs_uio_bvec_init(&uio_dst, offload_io->dst_bio, NULL); |
| 536 | + len = bio->bi_iter.bi_size; |
| 537 | + } else { |
| 538 | + /* |
| 539 | + * Multi-Queue (MQ) Request: First bio contains information |
| 540 | + * about destination and the second contains information |
| 541 | + * about the source |
| 542 | + */ |
| 543 | + struct bio *bio_temp; |
| 544 | + __rq_for_each_bio(bio_temp, req) { |
| 545 | + if (seg == blk_rq_nr_phys_segments(req)) { |
| 546 | + offload_io = bio_temp->bi_private; |
| 547 | + zfs_uio_bvec_init(&uio_src, bio_temp, NULL); |
| 548 | + if (len != bio_temp->bi_iter.bi_size) { |
| 549 | + zvol_end_io(bio, req, |
| 550 | + -SET_ERROR(error)); |
| 551 | + return; |
| 552 | + } |
| 553 | + if (offload_io && offload_io->driver_private) |
| 554 | + zv_dst = offload_io->driver_private; |
| 555 | + } else { |
| 556 | + zfs_uio_bvec_init(&uio_dst, bio_temp, NULL); |
| 557 | + len = bio_temp->bi_iter.bi_size; |
| 558 | + } |
| 559 | + seg++; |
| 560 | + } |
| 561 | + } |
| 562 | + |
| 563 | + if (!zv_src || !zv_dst) { |
| 564 | + zvol_end_io(bio, req, -SET_ERROR(error)); |
| 565 | + return; |
| 566 | + } |
| 567 | + |
| 568 | + error = zvol_clone_range(zv_src, uio_src.uio_loffset, zv_dst, |
| 569 | + uio_dst.uio_loffset, len); |
| 570 | + zvol_end_io(bio, req, -error); |
| 571 | +} |
| 572 | + |
| 573 | +static void |
| 574 | +zvol_clone_range_task(void *arg) |
| 575 | +{ |
| 576 | + zv_request_task_t *task = arg; |
| 577 | + zvol_clone_range_impl(&task->zvr); |
| 578 | + zv_request_task_free(task); |
| 579 | +} |
| 580 | +#endif |
499 | 581 |
|
500 | 582 | /*
|
501 | 583 | * Process a BIO or request
|
@@ -555,6 +637,24 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
|
555 | 637 | blk_mq_hw_queue);
|
556 | 638 | tq_idx = taskq_hash % ztqs->tqs_cnt;
|
557 | 639 |
|
| 640 | +#ifdef HAVE_BLKDEV_COPY_OFFLOAD |
| 641 | + if ((bio && op_is_copy(bio_op(bio))) || |
| 642 | + (rq && op_is_copy(req_op(rq)))) { |
| 643 | + if (unlikely(zv->zv_flags & ZVOL_RDONLY)) { |
| 644 | + zvol_end_io(bio, rq, -SET_ERROR(EROFS)); |
| 645 | + goto out; |
| 646 | + } |
| 647 | + if (force_sync) { |
| 648 | + zvol_clone_range_impl(&zvr); |
| 649 | + } else { |
| 650 | + task = zv_request_task_create(zvr); |
| 651 | + taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx], |
| 652 | + zvol_clone_range_task, task, 0, &task->ent); |
| 653 | + } |
| 654 | + goto out; |
| 655 | + } |
| 656 | +#endif |
| 657 | + |
558 | 658 | if (rw == WRITE) {
|
559 | 659 | if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
|
560 | 660 | zvol_end_io(bio, rq, -SET_ERROR(EROFS));
|
@@ -1607,6 +1707,10 @@ zvol_os_create_minor(const char *name)
|
1607 | 1707 | uint64_t hash = zvol_name_hash(name);
|
1608 | 1708 | uint64_t volthreading;
|
1609 | 1709 | bool replayed_zil = B_FALSE;
|
| 1710 | +#ifdef HAVE_BLKDEV_COPY_OFFLOAD |
| 1711 | + struct queue_limits *lim; |
| 1712 | + uint64_t max_clone_blocks = 1022; |
| 1713 | +#endif |
1610 | 1714 |
|
1611 | 1715 | if (zvol_inhibit_dev)
|
1612 | 1716 | return (0);
|
@@ -1693,6 +1797,33 @@ zvol_os_create_minor(const char *name)
|
1693 | 1797 | else
|
1694 | 1798 | replayed_zil = zil_replay(os, zv, zvol_replay_vector);
|
1695 | 1799 | }
|
| 1800 | +#ifdef HAVE_BLKDEV_COPY_OFFLOAD |
| 1801 | + lim = &zv->zv_zso->zvo_queue->limits; |
| 1802 | + lim->max_user_copy_sectors = UINT_MAX; |
| 1803 | + |
| 1804 | + /* |
| 1805 | + * When zvol_bclone_enabled is unset, blkdev_copy_offload() should |
| 1806 | + * return early and fall back to the default path. Existing zvols |
| 1807 | + * would require export/import to make this applicable. |
| 1808 | + */ |
| 1809 | + if (!zvol_bclone_enabled) { |
| 1810 | + lim->max_copy_hw_sectors = 0; |
| 1811 | + lim->max_copy_sectors = 0; |
| 1812 | + } else if (!zvol_max_copy_bytes) { |
| 1813 | + if (zv->zv_zilog) |
| 1814 | + max_clone_blocks = zil_max_log_data(zv->zv_zilog, |
| 1815 | + sizeof (lr_clone_range_t)) / sizeof (blkptr_t); |
| 1816 | + lim->max_copy_hw_sectors = MIN((doi->doi_data_block_size * |
| 1817 | + max_clone_blocks), BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT; |
| 1818 | + lim->max_copy_sectors = MIN((doi->doi_data_block_size * |
| 1819 | + max_clone_blocks), BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT; |
| 1820 | + } else { |
| 1821 | + lim->max_copy_hw_sectors = MIN(zvol_max_copy_bytes, |
| 1822 | + BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT; |
| 1823 | + lim->max_copy_sectors = MIN(zvol_max_copy_bytes, |
| 1824 | + BLK_COPY_MAX_BYTES) >> SECTOR_SHIFT; |
| 1825 | + } |
| 1826 | +#endif |
1696 | 1827 | if (replayed_zil)
|
1697 | 1828 | zil_close(zv->zv_zilog);
|
1698 | 1829 | zv->zv_zilog = NULL;
|
@@ -1934,6 +2065,12 @@ module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
|
1934 | 2065 | MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
|
1935 | 2066 | "Process volblocksize blocks per thread");
|
1936 | 2067 |
|
| 2068 | +module_param(zvol_max_copy_bytes, ulong, 0644); |
| 2069 | +MODULE_PARM_DESC(zvol_max_copy_bytes, "max copy bytes for zvol block cloning"); |
| 2070 | + |
| 2071 | +module_param(zvol_bclone_enabled, uint, 0644); |
| 2072 | +MODULE_PARM_DESC(zvol_bclone_enabled, "Disable block cloning for zvols"); |
| 2073 | + |
1937 | 2074 | #ifndef HAVE_BLKDEV_GET_ERESTARTSYS
|
1938 | 2075 | module_param(zvol_open_timeout_ms, uint, 0644);
|
1939 | 2076 | MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
|
|
0 commit comments