Skip to content

Commit e2642ed

Browse files
behlendorftonyhutter
authored andcommitted
Add interface to interface spa_get_worst_case_min_alloc() function
Provide an interface to retrieve the lowest and highest minimum allocation size for the normal allocation class. This can be used by external consumers of the DMU to estimate potential wasted capacity when setting the recordsize for an object. The new "min_alloc" and "max_alloc" keys are added to the pool configuration and used by default_volblocksize() to warn when an ineffecient block size is requested. For older kmods which don't yet include the new keys fallback to the previous logic. Reviewed-by: Tony Hutter <[email protected]> Reviewed-by: Alexander Motin <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes openzfs#17758
1 parent 54198a6 commit e2642ed

File tree

8 files changed

+52
-14
lines changed

8 files changed

+52
-14
lines changed

cmd/zfs/zfs_main.c

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -930,19 +930,15 @@ zfs_do_clone(int argc, char **argv)
930930
}
931931

932932
/*
933-
* Return a default volblocksize for the pool which always uses more than
934-
* half of the data sectors. This primarily applies to dRAID which always
935-
* writes full stripe widths.
933+
* Calculate the minimum allocation size based on the top-level vdevs.
936934
*/
937935
static uint64_t
938-
default_volblocksize(zpool_handle_t *zhp, nvlist_t *props)
936+
calculate_volblocksize(nvlist_t *config)
939937
{
940-
uint64_t volblocksize, asize = SPA_MINBLOCKSIZE;
938+
uint64_t asize = SPA_MINBLOCKSIZE;
941939
nvlist_t *tree, **vdevs;
942940
uint_t nvdevs;
943941

944-
nvlist_t *config = zpool_get_config(zhp, NULL);
945-
946942
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) != 0 ||
947943
nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN,
948944
&vdevs, &nvdevs) != 0) {
@@ -973,6 +969,24 @@ default_volblocksize(zpool_handle_t *zhp, nvlist_t *props)
973969
}
974970
}
975971

972+
return (asize);
973+
}
974+
975+
/*
976+
* Return a default volblocksize for the pool which always uses more than
977+
* half of the data sectors. This primarily applies to dRAID which always
978+
* writes full stripe widths.
979+
*/
980+
static uint64_t
981+
default_volblocksize(zpool_handle_t *zhp, nvlist_t *props)
982+
{
983+
uint64_t volblocksize, asize = SPA_MINBLOCKSIZE;
984+
985+
nvlist_t *config = zpool_get_config(zhp, NULL);
986+
987+
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_MAX_ALLOC, &asize) != 0)
988+
asize = calculate_volblocksize(config);
989+
976990
/*
977991
* Calculate the target volblocksize such that more than half
978992
* of the asize is used. The following table is for 4k sectors.

include/sys/fs/zfs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,8 @@ typedef struct zpool_load_policy {
740740
#define ZPOOL_CONFIG_METASLAB_SHIFT "metaslab_shift"
741741
#define ZPOOL_CONFIG_ASHIFT "ashift"
742742
#define ZPOOL_CONFIG_ASIZE "asize"
743+
#define ZPOOL_CONFIG_MIN_ALLOC "min_alloc"
744+
#define ZPOOL_CONFIG_MAX_ALLOC "max_alloc"
743745
#define ZPOOL_CONFIG_DTL "DTL"
744746
#define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */
745747
#define ZPOOL_CONFIG_REMOVAL_STATS "removal_stats" /* not stored on disk */

include/sys/spa.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,6 +1055,7 @@ extern pool_state_t spa_state(spa_t *spa);
10551055
extern spa_load_state_t spa_load_state(spa_t *spa);
10561056
extern uint64_t spa_freeze_txg(spa_t *spa);
10571057
extern uint64_t spa_get_worst_case_asize(spa_t *spa, uint64_t lsize);
1058+
extern void spa_get_min_alloc_range(spa_t *spa, uint64_t *min, uint64_t *max);
10581059
extern uint64_t spa_get_dspace(spa_t *spa);
10591060
extern uint64_t spa_get_checkpoint_space(spa_t *spa);
10601061
extern uint64_t spa_get_slop_space(spa_t *spa);

include/sys/spa_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ struct spa {
267267
uint64_t spa_min_ashift; /* of vdevs in normal class */
268268
uint64_t spa_max_ashift; /* of vdevs in normal class */
269269
uint64_t spa_min_alloc; /* of vdevs in normal class */
270+
uint64_t spa_max_alloc; /* of vdevs in normal class */
270271
uint64_t spa_gcd_alloc; /* of vdevs in normal class */
271272
uint64_t spa_config_guid; /* config pool guid */
272273
uint64_t spa_load_guid; /* spa_load initialized guid */

module/zfs/spa_config.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,8 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
461461
fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg);
462462
fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa));
463463
fnvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, spa->spa_errata);
464+
fnvlist_add_uint64(config, ZPOOL_CONFIG_MIN_ALLOC, spa->spa_min_alloc);
465+
fnvlist_add_uint64(config, ZPOOL_CONFIG_MAX_ALLOC, spa->spa_max_alloc);
464466
if (spa->spa_comment != NULL)
465467
fnvlist_add_string(config, ZPOOL_CONFIG_COMMENT,
466468
spa->spa_comment);

module/zfs/spa_misc.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
814814
spa->spa_min_ashift = INT_MAX;
815815
spa->spa_max_ashift = 0;
816816
spa->spa_min_alloc = INT_MAX;
817+
spa->spa_max_alloc = 0;
817818
spa->spa_gcd_alloc = INT_MAX;
818819

819820
/* Reset cached value */
@@ -1876,6 +1877,19 @@ spa_get_worst_case_asize(spa_t *spa, uint64_t lsize)
18761877
return (MAX(lsize, 1 << spa->spa_max_ashift) * spa_asize_inflation);
18771878
}
18781879

1880+
/*
1881+
* Return the range of minimum allocation sizes for the normal allocation
1882+
* class. This can be used by external consumers of the DMU to estimate
1883+
* potential wasted capacity when setting the recordsize for an object.
1884+
* This is mainly for dRAID pools which always pad to a full stripe width.
1885+
*/
1886+
void
1887+
spa_get_min_alloc_range(spa_t *spa, uint64_t *min_alloc, uint64_t *max_alloc)
1888+
{
1889+
*min_alloc = spa->spa_min_alloc;
1890+
*max_alloc = spa->spa_max_alloc;
1891+
}
1892+
18791893
/*
18801894
* Return the amount of slop space in bytes. It is typically 1/32 of the pool
18811895
* (3.2%), minus the embedded log space. On very small pools, it may be
@@ -3083,6 +3097,7 @@ EXPORT_SYMBOL(spa_version);
30833097
EXPORT_SYMBOL(spa_state);
30843098
EXPORT_SYMBOL(spa_load_state);
30853099
EXPORT_SYMBOL(spa_freeze_txg);
3100+
EXPORT_SYMBOL(spa_get_min_alloc_range); /* for Lustre */
30863101
EXPORT_SYMBOL(spa_get_dspace);
30873102
EXPORT_SYMBOL(spa_update_dspace);
30883103
EXPORT_SYMBOL(spa_deflate);

module/zfs/vdev.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1474,12 +1474,14 @@ vdev_spa_set_alloc(spa_t *spa, uint64_t min_alloc)
14741474
{
14751475
if (min_alloc < spa->spa_min_alloc)
14761476
spa->spa_min_alloc = min_alloc;
1477-
if (spa->spa_gcd_alloc == INT_MAX) {
1477+
1478+
if (min_alloc > spa->spa_max_alloc)
1479+
spa->spa_max_alloc = min_alloc;
1480+
1481+
if (spa->spa_gcd_alloc == INT_MAX)
14781482
spa->spa_gcd_alloc = min_alloc;
1479-
} else {
1480-
spa->spa_gcd_alloc = vdev_gcd(min_alloc,
1481-
spa->spa_gcd_alloc);
1482-
}
1483+
else
1484+
spa->spa_gcd_alloc = vdev_gcd(min_alloc, spa->spa_gcd_alloc);
14831485
}
14841486

14851487
void
@@ -1533,8 +1535,7 @@ vdev_metaslab_group_create(vdev_t *vd)
15331535
if (vd->vdev_ashift < spa->spa_min_ashift)
15341536
spa->spa_min_ashift = vd->vdev_ashift;
15351537

1536-
uint64_t min_alloc = vdev_get_min_alloc(vd);
1537-
vdev_spa_set_alloc(spa, min_alloc);
1538+
vdev_spa_set_alloc(spa, vdev_get_min_alloc(vd));
15381539
}
15391540
}
15401541
}

module/zfs/vdev_label.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,8 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
511511
fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift);
512512
fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE,
513513
vd->vdev_asize);
514+
fnvlist_add_uint64(nv, ZPOOL_CONFIG_MIN_ALLOC,
515+
vdev_get_min_alloc(vd));
514516
fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, vd->vdev_islog);
515517
if (vd->vdev_noalloc) {
516518
fnvlist_add_uint64(nv, ZPOOL_CONFIG_NONALLOCATING,

0 commit comments

Comments
 (0)