From 0594de3100847d6980278eee30711e2866156d3e Mon Sep 17 00:00:00 2001 From: Igor Ostapenko Date: Tue, 4 Mar 2025 13:02:14 +0000 Subject: [PATCH] range_tree: Add zfs_recover_rt parameter and extra debug info There are production cases where unexpected range tree segment adding/removal leads to panic. The root cause investigation requires more debug info about the range tree and the segments in question when it happens. In addition, the zfs_recover_rt parameter allows converting such panics into warnings with a potential space leak as a trade-off. Signed-off-by: Igor Ostapenko --- include/sys/range_tree.h | 32 ++++++++ man/man4/zfs.4 | 6 ++ module/zfs/dnode.c | 6 +- module/zfs/metaslab.c | 73 ++++++++++++++----- module/zfs/range_tree.c | 150 ++++++++++++++++++++++++++++++-------- module/zfs/vdev.c | 14 ++-- module/zfs/vdev_rebuild.c | 5 +- module/zfs/vdev_removal.c | 20 +++-- module/zfs/vdev_trim.c | 22 ++++-- 9 files changed, 257 insertions(+), 71 deletions(-) diff --git a/include/sys/range_tree.h b/include/sys/range_tree.h index 23eea3210c98..1938a6587848 100644 --- a/include/sys/range_tree.h +++ b/include/sys/range_tree.h @@ -48,6 +48,32 @@ typedef enum zfs_range_seg_type { ZFS_RANGE_SEG_NUM_TYPES, } zfs_range_seg_type_t; +/* + * Range tree behavior flags. + * + * The UC (use case) flags are intended to support the zfs_recover_rt mode. + * The range tree's logic needs to know the context in order to correctly + * recover from an unexpected situation by exchanging potential data loss for + * a potential space leak: + * + * - If it knows that the tree represents allocated space then it should better + * perform an unexpected addition to the tree. + * + * - Similarly, if it's about free space (aka allocatable) then it should + * perform unexpected removals instead of silently ignoring the issue. + * + * The generic case means to simply ignore unexpected additions/removals as + * a recovery mechanism, without special treatment. + * + * Unexpected actions are logged with extra details such as a range tree + * name string, which can be marked as dynamic to be freed along with the tree + * instance destruction. + */ +#define ZFS_RANGE_TREE_F_UC_GENERIC (1 << 0) +#define ZFS_RANGE_TREE_F_UC_ALLOCATED_SPACE (1 << 1) +#define ZFS_RANGE_TREE_F_UC_FREE_SPACE (1 << 2) +#define ZFS_RANGE_TREE_F_DYN_NAME (1 << 3) + /* * Note: the range_tree may not be accessed concurrently; consumers * must provide external locking if required. @@ -67,6 +93,9 @@ typedef struct zfs_range_tree { void *rt_arg; uint64_t rt_gap; /* allowable inter-segment gap */ + uint64_t rt_flags; + const char *rt_name; /* details for debugging */ + /* * The rt_histogram maintains a histogram of ranges. Each bucket, * rt_histogram[i], contains the number of ranges whose size is: @@ -280,6 +309,9 @@ zfs_range_tree_t *zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops, uint64_t gap); zfs_range_tree_t *zfs_range_tree_create(const zfs_range_tree_ops_t *ops, zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift); +zfs_range_tree_t *zfs_range_tree_create_flags(const zfs_range_tree_ops_t *ops, + zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, + uint64_t flags, const char *name); void zfs_range_tree_destroy(zfs_range_tree_t *rt); boolean_t zfs_range_tree_contains(zfs_range_tree_t *rt, uint64_t start, uint64_t size); diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 9d83357fcc6d..7b216c5da2c0 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1987,6 +1987,12 @@ Set to attempt to recover from fatal errors. This should only be used as a last resort, as it typically results in leaked space, or worse. . +.It Sy zfs_recover_rt Ns = Ns Sy 0 Ns | Ns 1 Pq int +Set to attempt to recover from fatal errors while adding or removing +unexpected segments to a range tree. +This should only be used as a last resort, +as it typically results in leaked space. +. .It Sy zfs_removal_ignore_errors Ns = Ns Sy 0 Ns | Ns 1 Pq int Ignore hard I/O errors during device removal. When set, if a device encounters a hard I/O error during the removal process diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index ce2c79dbfaa3..ac513f41aa36 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -2435,8 +2435,10 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) { int txgoff = tx->tx_txg & TXG_MASK; if (dn->dn_free_ranges[txgoff] == NULL) { - dn->dn_free_ranges[txgoff] = zfs_range_tree_create(NULL, - ZFS_RANGE_SEG64, NULL, 0, 0); + dn->dn_free_ranges[txgoff] = + zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + ZFS_RANGE_TREE_F_UC_FREE_SPACE, "dn_free_ranges"); } zfs_range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks); zfs_range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks); diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index c1424a81bf7b..a6e3259663f4 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -368,6 +368,17 @@ static metaslab_stats_t metaslab_stats = { #define METASLABSTAT_BUMP(stat) \ atomic_inc_64(&metaslab_stats.stat.value.ui64); +static inline char * +zfs_rt_name(metaslab_group_t *mg, metaslab_t *ms, + const char *name) +{ + return (kmem_asprintf("{spa=%s vdev_guid=%llu ms_id=%llu %s}", + mg->mg_vd->vdev_spa->spa_name, + (u_longlong_t)mg->mg_vd->vdev_guid, + (u_longlong_t)ms->ms_id, + name)); +} + static kstat_t *metaslab_ksp; @@ -2753,30 +2764,53 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object, zfs_range_seg_type_t type = metaslab_calculate_range_tree_type(vd, ms, &start, &shift); - ms->ms_allocatable = zfs_range_tree_create(NULL, type, NULL, start, - shift); + ms->ms_allocatable = zfs_range_tree_create_flags( + NULL, type, NULL, start, shift, + ZFS_RANGE_TREE_F_UC_FREE_SPACE | ZFS_RANGE_TREE_F_DYN_NAME, + zfs_rt_name(mg, ms, "ms_allocatable")); for (int t = 0; t < TXG_SIZE; t++) { - ms->ms_allocating[t] = zfs_range_tree_create(NULL, type, - NULL, start, shift); - } - ms->ms_freeing = zfs_range_tree_create(NULL, type, NULL, start, shift); - ms->ms_freed = zfs_range_tree_create(NULL, type, NULL, start, shift); + ms->ms_allocating[t] = zfs_range_tree_create_flags( + NULL, type, NULL, start, shift, + ZFS_RANGE_TREE_F_UC_ALLOCATED_SPACE | + ZFS_RANGE_TREE_F_DYN_NAME, + zfs_rt_name(mg, ms, "ms_allocating")); + } + ms->ms_freeing = zfs_range_tree_create_flags( + NULL, type, NULL, start, shift, + ZFS_RANGE_TREE_F_UC_FREE_SPACE | ZFS_RANGE_TREE_F_DYN_NAME, + zfs_rt_name(mg, ms, "ms_freeing")); + ms->ms_freed = zfs_range_tree_create_flags( + NULL, type, NULL, start, shift, + ZFS_RANGE_TREE_F_UC_FREE_SPACE | ZFS_RANGE_TREE_F_DYN_NAME, + zfs_rt_name(mg, ms, "ms_freed")); for (int t = 0; t < TXG_DEFER_SIZE; t++) { - ms->ms_defer[t] = zfs_range_tree_create(NULL, type, NULL, - start, shift); - } - ms->ms_checkpointing = - zfs_range_tree_create(NULL, type, NULL, start, shift); - ms->ms_unflushed_allocs = - zfs_range_tree_create(NULL, type, NULL, start, shift); + ms->ms_defer[t] = zfs_range_tree_create_flags( + NULL, type, NULL, start, shift, + ZFS_RANGE_TREE_F_UC_FREE_SPACE | + ZFS_RANGE_TREE_F_DYN_NAME, + zfs_rt_name(mg, ms, "ms_defer")); + } + ms->ms_checkpointing = zfs_range_tree_create_flags( + NULL, type, NULL, start, shift, + ZFS_RANGE_TREE_F_UC_FREE_SPACE | ZFS_RANGE_TREE_F_DYN_NAME, + zfs_rt_name(mg, ms, "ms_checkpointing")); + ms->ms_unflushed_allocs = zfs_range_tree_create_flags( + NULL, type, NULL, start, shift, + ZFS_RANGE_TREE_F_UC_ALLOCATED_SPACE | ZFS_RANGE_TREE_F_DYN_NAME, + zfs_rt_name(mg, ms, "ms_unflushed_allocs")); metaslab_rt_arg_t *mrap = kmem_zalloc(sizeof (*mrap), KM_SLEEP); mrap->mra_bt = &ms->ms_unflushed_frees_by_size; mrap->mra_floor_shift = metaslab_by_size_min_shift; - ms->ms_unflushed_frees = zfs_range_tree_create(&metaslab_rt_ops, - type, mrap, start, shift); + ms->ms_unflushed_frees = zfs_range_tree_create_flags( + &metaslab_rt_ops, type, mrap, start, shift, + ZFS_RANGE_TREE_F_UC_FREE_SPACE | ZFS_RANGE_TREE_F_DYN_NAME, + zfs_rt_name(mg, ms, "ms_unflushed_frees")); - ms->ms_trim = zfs_range_tree_create(NULL, type, NULL, start, shift); + ms->ms_trim = zfs_range_tree_create_flags( + NULL, type, NULL, start, shift, + ZFS_RANGE_TREE_F_UC_FREE_SPACE | ZFS_RANGE_TREE_F_DYN_NAME, + zfs_rt_name(mg, ms, "ms_trim")); metaslab_group_add(mg, ms); metaslab_set_fragmentation(ms, B_FALSE); @@ -3750,7 +3784,10 @@ metaslab_condense(metaslab_t *msp, dmu_tx_t *tx) type = metaslab_calculate_range_tree_type(msp->ms_group->mg_vd, msp, &start, &shift); - condense_tree = zfs_range_tree_create(NULL, type, NULL, start, shift); + condense_tree = zfs_range_tree_create_flags( + NULL, type, NULL, start, shift, + ZFS_RANGE_TREE_F_UC_FREE_SPACE | ZFS_RANGE_TREE_F_DYN_NAME, + zfs_rt_name(msp->ms_group, msp, "condense_tree")); for (int t = 0; t < TXG_DEFER_SIZE; t++) { zfs_range_tree_walk(msp->ms_defer[t], diff --git a/module/zfs/range_tree.c b/module/zfs/range_tree.c index 8bb9a0724e61..5f350ade106a 100644 --- a/module/zfs/range_tree.c +++ b/module/zfs/range_tree.c @@ -75,6 +75,28 @@ * support removing complete segments. */ +/* + * zfs_recover_rt can be set to nonzero to attempt to recover from + * otherwise-fatal errors while adding or removing unexpected segments to a + * tree. When set, calls to zfs_panic_recover_rt() will turn into warning + * messages. This should only be used as a last resort, as it typically results + * in leaked space. + */ +static int zfs_recover_rt = B_FALSE; + +#define RT_NAME(rt) (((rt)->rt_name != NULL) ? (rt)->rt_name : "") + +static void +zfs_panic_recover_rt(const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + vcmn_err((zfs_recover || zfs_recover_rt) ? CE_WARN : CE_PANIC, + fmt, adx); + va_end(adx); +} + static inline void zfs_rs_copy(zfs_range_seg_t *src, zfs_range_seg_t *dest, zfs_range_tree_t *rt) { @@ -200,15 +222,16 @@ ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg64_find_in_buf, zfs_range_seg64_t, ZFS_BTREE_FIND_IN_BUF_FUNC(zfs_range_tree_seg_gap_find_in_buf, zfs_range_seg_gap_t, zfs_range_tree_seg_gap_compare) -zfs_range_tree_t * -zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops, +static zfs_range_tree_t * +zfs_range_tree_create_impl(const zfs_range_tree_ops_t *ops, zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, - uint64_t gap) + uint64_t gap, uint64_t flags, const char *name) { zfs_range_tree_t *rt = kmem_zalloc(sizeof (zfs_range_tree_t), KM_SLEEP); ASSERT3U(shift, <, 64); ASSERT3U(type, <=, ZFS_RANGE_SEG_NUM_TYPES); + ASSERT(flags); size_t size; int (*compare) (const void *, const void *); bt_find_in_buf_f bt_find; @@ -235,6 +258,8 @@ zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops, rt->rt_ops = ops; rt->rt_gap = gap; + rt->rt_flags = flags; + rt->rt_name = name; rt->rt_arg = arg; rt->rt_type = type; rt->rt_start = start; @@ -246,11 +271,30 @@ zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops, return (rt); } +zfs_range_tree_t * +zfs_range_tree_create_gap(const zfs_range_tree_ops_t *ops, + zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, + uint64_t gap) +{ + return (zfs_range_tree_create_impl(ops, type, arg, start, shift, gap, + ZFS_RANGE_TREE_F_UC_GENERIC, NULL)); +} + zfs_range_tree_t * zfs_range_tree_create(const zfs_range_tree_ops_t *ops, zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift) { - return (zfs_range_tree_create_gap(ops, type, arg, start, shift, 0)); + return (zfs_range_tree_create_impl(ops, type, arg, start, shift, 0, + ZFS_RANGE_TREE_F_UC_GENERIC, NULL)); +} + +zfs_range_tree_t * +zfs_range_tree_create_flags(const zfs_range_tree_ops_t *ops, + zfs_range_seg_type_t type, void *arg, uint64_t start, uint64_t shift, + uint64_t flags, const char *name) +{ + return (zfs_range_tree_create_impl(ops, type, arg, start, shift, 0, + flags, name)); } void @@ -261,6 +305,9 @@ zfs_range_tree_destroy(zfs_range_tree_t *rt) if (rt->rt_ops != NULL && rt->rt_ops->rtop_destroy != NULL) rt->rt_ops->rtop_destroy(rt, rt->rt_arg); + if (rt->rt_name != NULL && (rt->rt_flags & ZFS_RANGE_TREE_F_DYN_NAME)) + kmem_strfree((char *)(uintptr_t)rt->rt_name); + zfs_btree_destroy(&rt->rt_root); kmem_free(rt, sizeof (*rt)); } @@ -270,15 +317,19 @@ zfs_range_tree_adjust_fill(zfs_range_tree_t *rt, zfs_range_seg_t *rs, int64_t delta) { if (delta < 0 && delta * -1 >= zfs_rs_get_fill(rs, rt)) { - zfs_panic_recover("zfs: attempting to decrease fill to or " - "below 0; probable double remove in segment [%llx:%llx]", + zfs_panic_recover_rt("zfs: rt_instance=%s: attempting to " + "decrease fill to or below 0; probable double remove in " + "segment [%llx:%llx]", + RT_NAME(rt), (longlong_t)zfs_rs_get_start(rs, rt), (longlong_t)zfs_rs_get_end(rs, rt)); } if (zfs_rs_get_fill(rs, rt) + delta > zfs_rs_get_end(rs, rt) - zfs_rs_get_start(rs, rt)) { - zfs_panic_recover("zfs: attempting to increase fill beyond " - "max; probable double add in segment [%llx:%llx]", + zfs_panic_recover_rt("zfs: rt_instance=%s: attempting to " + "increase fill beyond max; probable double add in " + "segment [%llx:%llx]", + RT_NAME(rt), (longlong_t)zfs_rs_get_start(rs, rt), (longlong_t)zfs_rs_get_end(rs, rt)); } @@ -318,14 +369,28 @@ zfs_range_tree_add_impl(void *arg, uint64_t start, uint64_t size, uint64_t fill) * the normal code paths. */ if (rs != NULL) { + uint64_t rstart = zfs_rs_get_start(rs, rt); + uint64_t rend = zfs_rs_get_end(rs, rt); if (gap == 0) { - zfs_panic_recover("zfs: adding existent segment to " - "range tree (offset=%llx size=%llx)", - (longlong_t)start, (longlong_t)size); + zfs_panic_recover_rt("zfs: rt_instance=%s: " + "adding segment (offset=%llx size=%llx) " + "overlapping with existing one " + "(offset=%llx size=%llx)", + RT_NAME(rt), + (longlong_t)start, (longlong_t)size, + (longlong_t)rstart, (longlong_t)(rend - rstart)); + if (!(rt->rt_flags & + ZFS_RANGE_TREE_F_UC_ALLOCATED_SPACE)) + return; + /* add non-overlapping chunks */ + if (rstart > start) + zfs_range_tree_add_impl(rt, start, + rstart - start, rstart - start); + if (rend < end) + zfs_range_tree_add_impl(rt, rend, end - rend, + end - rend); return; } - uint64_t rstart = zfs_rs_get_start(rs, rt); - uint64_t rend = zfs_rs_get_end(rs, rt); if (rstart <= start && rend >= end) { zfs_range_tree_adjust_fill(rt, rs, fill); return; @@ -450,6 +515,7 @@ zfs_range_tree_remove_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size, zfs_range_seg_t *rs; zfs_range_seg_max_t rsearch, rs_tmp; uint64_t end = start + size; + uint64_t rstart, rend; boolean_t left_over, right_over; VERIFY3U(size, !=, 0); @@ -463,12 +529,16 @@ zfs_range_tree_remove_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size, /* Make sure we completely overlap with someone */ if (rs == NULL) { - zfs_panic_recover("zfs: removing nonexistent segment from " - "range tree (offset=%llx size=%llx)", - (longlong_t)start, (longlong_t)size); + zfs_panic_recover_rt("zfs: rt_instance=%s: removing " + "nonexistent segment from range tree " + "(offset=%llx size=%llx)", + RT_NAME(rt), (longlong_t)start, (longlong_t)size); return; } + rstart = zfs_rs_get_start(rs, rt); + rend = zfs_rs_get_end(rs, rt); + /* * Range trees with gap support must only remove complete segments * from the tree. This allows us to maintain accurate fill accounting @@ -478,31 +548,49 @@ zfs_range_tree_remove_impl(zfs_range_tree_t *rt, uint64_t start, uint64_t size, if (rt->rt_gap != 0) { if (do_fill) { if (zfs_rs_get_fill(rs, rt) == size) { - start = zfs_rs_get_start(rs, rt); - end = zfs_rs_get_end(rs, rt); + start = rstart; + end = rend; size = end - start; } else { zfs_range_tree_adjust_fill(rt, rs, -size); return; } - } else if (zfs_rs_get_start(rs, rt) != start || - zfs_rs_get_end(rs, rt) != end) { - zfs_panic_recover("zfs: freeing partial segment of " - "gap tree (offset=%llx size=%llx) of " + } else if (rstart != start || rend != end) { + zfs_panic_recover_rt("zfs: rt_instance=%s: freeing " + "partial segment of gap tree " + "(offset=%llx size=%llx) of " "(offset=%llx size=%llx)", + RT_NAME(rt), (longlong_t)start, (longlong_t)size, - (longlong_t)zfs_rs_get_start(rs, rt), - (longlong_t)zfs_rs_get_end(rs, rt) - - zfs_rs_get_start(rs, rt)); + (longlong_t)rstart, (longlong_t)(rend - rstart)); return; } } - VERIFY3U(zfs_rs_get_start(rs, rt), <=, start); - VERIFY3U(zfs_rs_get_end(rs, rt), >=, end); + if (!(rstart <= start && rend >= end)) { + zfs_panic_recover_rt("zfs: rt_instance=%s: removing segment " + "(offset=%llx size=%llx) not completely overlapped by " + "existing one (offset=%llx size=%llx)", + RT_NAME(rt), + (longlong_t)start, (longlong_t)size, + (longlong_t)rstart, (longlong_t)(rend - rstart)); + if (!(rt->rt_flags & ZFS_RANGE_TREE_F_UC_FREE_SPACE)) + return; + /* perform removal of the chunks */ + if (rstart > start) + zfs_range_tree_remove_impl(rt, start, rstart - start, + do_fill); + uint64_t mstart = MAX(rstart, start); + uint64_t mend = MIN(rend, end); + zfs_range_tree_remove_impl(rt, mstart, mend - mstart, do_fill); + if (rend < end) + zfs_range_tree_remove_impl(rt, rend, end - rend, + do_fill); + return; + } - left_over = (zfs_rs_get_start(rs, rt) != start); - right_over = (zfs_rs_get_end(rs, rt) != end); + left_over = (rstart != start); + right_over = (rend != end); zfs_range_tree_stat_decr(rt, rs); @@ -873,3 +961,7 @@ zfs_range_tree_span(zfs_range_tree_t *rt) { return (zfs_range_tree_max(rt) - zfs_range_tree_min(rt)); } + +ZFS_MODULE_PARAM(zfs, zfs_, recover_rt, INT, ZMOD_RW, + "Set to attempt to recover from fatal errors upon modification of " + "a range tree"); diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 74e36c0300f0..4b0886797a15 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -677,8 +677,9 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) rw_init(&vd->vdev_indirect_rwlock, NULL, RW_DEFAULT, NULL); mutex_init(&vd->vdev_obsolete_lock, NULL, MUTEX_DEFAULT, NULL); - vd->vdev_obsolete_segments = zfs_range_tree_create(NULL, - ZFS_RANGE_SEG64, NULL, 0, 0); + vd->vdev_obsolete_segments = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + ZFS_RANGE_TREE_F_UC_GENERIC, "vdev_obsolete_segments"); /* * Initialize rate limit structs for events. We rate limit ZIO delay @@ -732,8 +733,9 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) cv_init(&vd->vdev_rebuild_cv, NULL, CV_DEFAULT, NULL); for (int t = 0; t < DTL_TYPES; t++) { - vd->vdev_dtl[t] = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, - NULL, 0, 0); + vd->vdev_dtl[t] = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + ZFS_RANGE_TREE_F_UC_GENERIC, "vdev_dtl"); } txg_list_create(&vd->vdev_ms_list, spa, @@ -3394,7 +3396,9 @@ vdev_dtl_load(vdev_t *vd) return (error); ASSERT(vd->vdev_dtl_sm != NULL); - rt = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0); + rt = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + ZFS_RANGE_TREE_F_UC_ALLOCATED_SPACE, "vdev_dtl_load:rt"); error = space_map_load(vd->vdev_dtl_sm, rt, SM_ALLOC); if (error == 0) { mutex_enter(&vd->vdev_dtl_lock); diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c index 7ca1b1f846b6..e358c41729b0 100644 --- a/module/zfs/vdev_rebuild.c +++ b/module/zfs/vdev_rebuild.c @@ -786,8 +786,9 @@ vdev_rebuild_thread(void *arg) vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys; vr->vr_top_vdev = vd; vr->vr_scan_msp = NULL; - vr->vr_scan_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, - 0, 0); + vr->vr_scan_tree = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + ZFS_RANGE_TREE_F_UC_GENERIC, "vr_scan_tree"); mutex_init(&vr->vr_io_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&vr->vr_io_cv, NULL, CV_DEFAULT, NULL); diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c index d3351555ced5..8a8891716880 100644 --- a/module/zfs/vdev_removal.c +++ b/module/zfs/vdev_removal.c @@ -369,13 +369,15 @@ spa_vdev_removal_create(vdev_t *vd) spa_vdev_removal_t *svr = kmem_zalloc(sizeof (*svr), KM_SLEEP); mutex_init(&svr->svr_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&svr->svr_cv, NULL, CV_DEFAULT, NULL); - svr->svr_allocd_segs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, - NULL, 0, 0); + svr->svr_allocd_segs = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + ZFS_RANGE_TREE_F_UC_GENERIC, "svr_allocd_segs"); svr->svr_vdev_id = vd->vdev_id; for (int i = 0; i < TXG_SIZE; i++) { - svr->svr_frees[i] = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, - NULL, 0, 0); + svr->svr_frees[i] = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + ZFS_RANGE_TREE_F_UC_GENERIC, "svr_frees"); list_create(&svr->svr_new_segments[i], sizeof (vdev_indirect_mapping_entry_t), offsetof(vdev_indirect_mapping_entry_t, vime_node)); @@ -1184,8 +1186,9 @@ spa_vdev_copy_segment(vdev_t *vd, zfs_range_tree_t *segs, * relative to the start of the range to be copied (i.e. relative to the * local variable "start"). */ - zfs_range_tree_t *obsolete_segs = zfs_range_tree_create(NULL, - ZFS_RANGE_SEG64, NULL, 0, 0); + zfs_range_tree_t *obsolete_segs = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + ZFS_RANGE_TREE_F_UC_GENERIC, "vcsa_obsolete_segs"); zfs_btree_index_t where; zfs_range_seg_t *rs = zfs_btree_first(&segs->rt_root, &where); @@ -1458,8 +1461,9 @@ spa_vdev_copy_impl(vdev_t *vd, spa_vdev_removal_t *svr, vdev_copy_arg_t *vca, * allocated segments that we are copying. We may also be copying * free segments (of up to vdev_removal_max_span bytes). */ - zfs_range_tree_t *segs = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, - NULL, 0, 0); + zfs_range_tree_t *segs = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + ZFS_RANGE_TREE_F_UC_ALLOCATED_SPACE, "spa_vdev_copy_impl:segs"); for (;;) { zfs_range_tree_t *rt = svr->svr_allocd_segs; zfs_range_seg_t *rs = zfs_range_tree_first(rt); diff --git a/module/zfs/vdev_trim.c b/module/zfs/vdev_trim.c index 1ca0b23c0ee4..0ee7bf3655a7 100644 --- a/module/zfs/vdev_trim.c +++ b/module/zfs/vdev_trim.c @@ -901,7 +901,9 @@ vdev_trim_thread(void *arg) ta.trim_vdev = vd; ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max; ta.trim_extent_bytes_min = zfs_trim_extent_bytes_min; - ta.trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0); + ta.trim_tree = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + ZFS_RANGE_TREE_F_UC_GENERIC, "trim_tree"); ta.trim_type = TRIM_TYPE_MANUAL; ta.trim_flags = 0; @@ -1304,8 +1306,9 @@ vdev_autotrim_thread(void *arg) * Allocate an empty range tree which is swapped in * for the existing ms_trim tree while it is processed. */ - trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, - NULL, 0, 0); + trim_tree = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + ZFS_RANGE_TREE_F_UC_GENERIC, "trim_tree"); zfs_range_tree_swap(&msp->ms_trim, &trim_tree); ASSERT(zfs_range_tree_is_empty(msp->ms_trim)); @@ -1359,8 +1362,9 @@ vdev_autotrim_thread(void *arg) if (!cvd->vdev_ops->vdev_op_leaf) continue; - ta->trim_tree = zfs_range_tree_create(NULL, - ZFS_RANGE_SEG64, NULL, 0, 0); + ta->trim_tree = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + ZFS_RANGE_TREE_F_UC_GENERIC, "trim_tree"); zfs_range_tree_walk(trim_tree, vdev_trim_range_add, ta); } @@ -1599,7 +1603,9 @@ vdev_trim_l2arc_thread(void *arg) vd->vdev_trim_secure = 0; ta.trim_vdev = vd; - ta.trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0); + ta.trim_tree = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + ZFS_RANGE_TREE_F_UC_GENERIC, "trim_tree"); ta.trim_type = TRIM_TYPE_MANUAL; ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max; ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE; @@ -1734,7 +1740,9 @@ vdev_trim_simple(vdev_t *vd, uint64_t start, uint64_t size) ASSERT(!vd->vdev_top->vdev_rz_expanding); ta.trim_vdev = vd; - ta.trim_tree = zfs_range_tree_create(NULL, ZFS_RANGE_SEG64, NULL, 0, 0); + ta.trim_tree = zfs_range_tree_create_flags( + NULL, ZFS_RANGE_SEG64, NULL, 0, 0, + ZFS_RANGE_TREE_F_UC_GENERIC, "trim_tree"); ta.trim_type = TRIM_TYPE_SIMPLE; ta.trim_extent_bytes_max = zfs_trim_extent_bytes_max; ta.trim_extent_bytes_min = SPA_MINBLOCKSIZE;