Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

zio_flush: propagate flush errors to the ZIL #16314

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,

extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg,
blkptr_t *new_bp, uint64_t size, boolean_t *slog);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern void zio_flush(zio_t *zio, vdev_t *vd, boolean_t propagate);
extern void zio_shrink(zio_t *zio, uint64_t size);

extern int zio_wait(zio_t *zio);
Expand Down
15 changes: 0 additions & 15 deletions module/os/freebsd/zfs/vdev_geom.c
Original file line number Diff line number Diff line change
Expand Up @@ -1014,21 +1014,6 @@ vdev_geom_io_intr(struct bio *bp)
zio->io_error = SET_ERROR(EIO);

switch (zio->io_error) {
case ENOTSUP:
/*
* If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
* that future attempts will never succeed. In this case
* we set a persistent flag so that we don't bother with
* requests in the future.
*/
switch (bp->bio_cmd) {
case BIO_FLUSH:
vd->vdev_nowritecache = B_TRUE;
break;
case BIO_DELETE:
break;
}
break;
case ENXIO:
if (!vd->vdev_remove_wanted) {
/*
Expand Down
3 changes: 0 additions & 3 deletions module/os/linux/zfs/vdev_disk.c
Original file line number Diff line number Diff line change
Expand Up @@ -1232,9 +1232,6 @@ BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
zio->io_error = -error;
#endif

if (zio->io_error && (zio->io_error == EOPNOTSUPP))
zio->io_vd->vdev_nowritecache = B_TRUE;

bio_put(bio);
ASSERT3S(zio->io_error, >=, 0);
if (zio->io_error)
Expand Down
14 changes: 8 additions & 6 deletions module/zfs/vdev_label.c
Original file line number Diff line number Diff line change
Expand Up @@ -1830,19 +1830,21 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags)

for (int v = 0; v < svdcount; v++) {
if (vdev_writeable(svd[v])) {
zio_flush(zio, svd[v]);
zio_flush(zio, svd[v], B_FALSE);
}
}
if (spa->spa_aux_sync_uber) {
spa->spa_aux_sync_uber = B_FALSE;
for (int v = 0; v < spa->spa_spares.sav_count; v++) {
if (vdev_writeable(spa->spa_spares.sav_vdevs[v])) {
zio_flush(zio, spa->spa_spares.sav_vdevs[v]);
zio_flush(zio, spa->spa_spares.sav_vdevs[v],
B_FALSE);
}
}
for (int v = 0; v < spa->spa_l2cache.sav_count; v++) {
if (vdev_writeable(spa->spa_l2cache.sav_vdevs[v])) {
zio_flush(zio, spa->spa_l2cache.sav_vdevs[v]);
zio_flush(zio, spa->spa_l2cache.sav_vdevs[v],
B_FALSE);
}
}
}
Expand Down Expand Up @@ -2007,13 +2009,13 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags)
zio = zio_root(spa, NULL, NULL, flags);

for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd))
zio_flush(zio, vd);
zio_flush(zio, vd, B_FALSE);

for (int i = 0; i < 2; i++) {
if (!sav[i]->sav_label_sync)
continue;
for (int v = 0; v < sav[i]->sav_count; v++)
zio_flush(zio, sav[i]->sav_vdevs[v]);
zio_flush(zio, sav[i]->sav_vdevs[v], B_FALSE);
if (l == 1)
sav[i]->sav_label_sync = B_FALSE;
}
Expand Down Expand Up @@ -2091,7 +2093,7 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
for (vdev_t *vd =
txg_list_head(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)); vd != NULL;
vd = txg_list_next(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg)))
zio_flush(zio, vd);
zio_flush(zio, vd, B_FALSE);

(void) zio_wait(zio);

Expand Down
6 changes: 3 additions & 3 deletions module/zfs/vdev_raidz.c
Original file line number Diff line number Diff line change
Expand Up @@ -4172,7 +4172,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx)
goto io_error_exit;
}
pio = zio_root(spa, NULL, NULL, 0);
zio_flush(pio, raidvd);
zio_flush(pio, raidvd, B_FALSE);
zio_wait(pio);

zfs_dbgmsg("reflow: wrote %llu bytes (logical) to scratch area",
Expand Down Expand Up @@ -4231,7 +4231,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx)
goto io_error_exit;
}
pio = zio_root(spa, NULL, NULL, 0);
zio_flush(pio, raidvd);
zio_flush(pio, raidvd, B_FALSE);
zio_wait(pio);

zfs_dbgmsg("reflow: overwrote %llu bytes (logical) to real location",
Expand Down Expand Up @@ -4339,7 +4339,7 @@ vdev_raidz_reflow_copy_scratch(spa_t *spa)
}
zio_wait(pio);
pio = zio_root(spa, NULL, NULL, 0);
zio_flush(pio, raidvd);
zio_flush(pio, raidvd, B_FALSE);
zio_wait(pio);

zfs_dbgmsg("reflow recovery: overwrote %llu bytes (logical) "
Expand Down
20 changes: 3 additions & 17 deletions module/zfs/zil.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright (c) 2018 Datto Inc.
* Copyright (c) 2024, Klara, Inc.
*/

/* Portions Copyright 2010 Robert Milkowski */
Expand Down Expand Up @@ -1495,12 +1496,6 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
* includes ZIO errors from either this LWB's write or
* flush, as well as any errors from other dependent LWBs
* (e.g. a root LWB ZIO that might be a child of this LWB).
*
* With that said, it's important to note that LWB flush
* errors are not propagated up to the LWB root ZIO.
* This is incorrect behavior, and results in VDEV flush
* errors not being handled correctly here. See the
* comment above the call to "zio_flush" for details.
*/

zcw->zcw_zio_error = zio->io_error;
Expand Down Expand Up @@ -1650,17 +1645,8 @@ zil_lwb_write_done(zio_t *zio)

while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) {
vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev);
if (vd != NULL) {
/*
* The "ZIO_FLAG_DONT_PROPAGATE" is currently
* always used within "zio_flush". This means,
* any errors when flushing the vdev(s), will
* (unfortunately) not be handled correctly,
* since these "zio_flush" errors will not be
* propagated up to "zil_lwb_flush_vdevs_done".
*/
zio_flush(lwb->lwb_root_zio, vd);
}
if (vd != NULL)
zio_flush(lwb->lwb_root_zio, vd, B_TRUE);
kmem_free(zv, sizeof (*zv));
}
}
Expand Down
15 changes: 9 additions & 6 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -1633,10 +1633,10 @@ zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, abd_t *data, uint64_t size,
* the flushes complete.
*/
void
zio_flush(zio_t *pio, vdev_t *vd)
zio_flush(zio_t *pio, vdev_t *vd, boolean_t propagate)
{
const zio_flag_t flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE |
ZIO_FLAG_DONT_RETRY;
const zio_flag_t flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY |
(propagate ? 0 : ZIO_FLAG_DONT_PROPAGATE);

if (vd->vdev_nowritecache)
return;
Expand All @@ -1647,7 +1647,7 @@ zio_flush(zio_t *pio, vdev_t *vd)
NULL, ZIO_STAGE_OPEN, ZIO_FLUSH_PIPELINE));
} else {
for (uint64_t c = 0; c < vd->vdev_children; c++)
zio_flush(pio, vd->vdev_child[c]);
zio_flush(pio, vd->vdev_child[c], propagate);
}
}

Expand Down Expand Up @@ -4532,11 +4532,14 @@ zio_vdev_io_assess(zio_t *zio)
/*
* If a cache flush returns ENOTSUP or ENOTTY, we know that no future
* attempts will ever succeed. In this case we set a persistent
* boolean flag so that we don't bother with it in the future.
* boolean flag so that we don't bother with it in the future, and
* then we act like the flush succeeded.
*/
if ((zio->io_error == ENOTSUP || zio->io_error == ENOTTY) &&
zio->io_type == ZIO_TYPE_FLUSH && vd != NULL)
zio->io_type == ZIO_TYPE_FLUSH && vd != NULL) {
vd->vdev_nowritecache = B_TRUE;
zio->io_error = 0;
}

if (zio->io_error)
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
Expand Down
4 changes: 4 additions & 0 deletions tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
'scrub_after_resilver', 'suspend_resume_single', 'zpool_status_-s']
tags = ['functional', 'fault']

[tests/functional/flush:Linux]
tests = ['zil_flush_error']
tags = ['functional', 'flush']

[tests/functional/features/large_dnode:Linux]
tests = ['large_dnode_002_pos', 'large_dnode_006_pos', 'large_dnode_008_pos']
tags = ['functional', 'features', 'large_dnode']
Expand Down
1 change: 1 addition & 0 deletions tests/test-runner/bin/zts-report.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ if os.environ.get('CI') == 'true':
'fault/auto_spare_ashift': ['SKIP', ci_reason],
'fault/auto_spare_shared': ['SKIP', ci_reason],
'fault/suspend_resume_single': ['SKIP', ci_reason],
'flush/zil_flush_error': ['SKIP', ci_reason],
'procfs/pool_state': ['SKIP', ci_reason],
})

Expand Down
9 changes: 6 additions & 3 deletions tests/zfs-tests/include/blkdev.shlib
Original file line number Diff line number Diff line change
Expand Up @@ -462,13 +462,16 @@ function unload_scsi_debug
# Get scsi_debug device name.
# Returns basename of scsi_debug device (for example "sdb").
#
function get_debug_device
# $1 (optional): Return the first $1 number of SCSI debug device names.
function get_debug_device #num
{
typeset num=${1:-1}

for i in {1..10} ; do
val=$(lsscsi | awk '/scsi_debug/ {print $6; exit}' | cut -d/ -f3)
val=$(lsscsi | awk '/scsi_debug/ {print $6}' | cut -d/ -f3 | head -n$num)

# lsscsi can take time to settle
if [ "$val" != "-" ] ; then
if [[ ! "$val" =~ "-" ]] ; then
break
fi
sleep 1
Expand Down
3 changes: 3 additions & 0 deletions tests/zfs-tests/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -1516,6 +1516,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/features/large_dnode/large_dnode_008_pos.ksh \
functional/features/large_dnode/large_dnode_009_pos.ksh \
functional/features/large_dnode/setup.ksh \
functional/flush/cleanup.ksh \
functional/flush/zil_flush_error.ksh \
functional/flush/setup.ksh \
functional/grow/grow_pool_001_pos.ksh \
functional/grow/grow_replicas_001_pos.ksh \
functional/history/cleanup.ksh \
Expand Down
28 changes: 28 additions & 0 deletions tests/zfs-tests/tests/functional/flush/cleanup.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END

#
# Copyright (c) 2024, Klara, Inc.
#

. $STF_SUITE/include/libtest.shlib

default_cleanup
30 changes: 30 additions & 0 deletions tests/zfs-tests/tests/functional/flush/setup.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END

#
# Copyright (c) 2024, Klara, Inc.
#

. $STF_SUITE/include/libtest.shlib

verify_runnable "global"

log_pass
Loading
Loading