Skip to content

Commit

Permalink
vflush: Speed up reclaim by doing less in the loop (#328)
Browse files Browse the repository at this point in the history
* spl-time: Use KeQueryPerformanceCounter instead of KeQueryTickCount

`KeQueryTickCount` seems to only have a 15.625ms resolution unless the
interrupt timer frequency is increased, which should be avoided due to
power usage.

Instead, this switches the `zfs_lbolt`, `gethrtime` and
`random_get_bytes` to use `KeQueryPerformanceCounter`.

On my system this gives a 100ns resolution.

Signed-off-by: Axel Gembe <[email protected]>

* spl-time: Add assertion to gethrtime and cache NANOSEC / freq division

One less division for each call.

Signed-off-by: Axel Gembe <[email protected]>

* vflush: Print reclaim statistics

This shows how many reclaims have been processed in thousand increments
and also how many reclaims are processed per second.

Signed-off-by: Axel Gembe <[email protected]>

* vflush: Speed up reclaim by doing less in the loop

This removes leaving the `vnode_all_list_lock` in the loop as that is
not needed. It also only enters the `v_mutex` of nodes that are not
`VNODE_DEAD` yet.

This converts part of the loop to a new function called
`flush_file_objects` to make it more readable.

This also removes the restart of the loop, which is safe because
`vnode_all_list_lock` is never unlocked.

Signed-off-by: Axel Gembe <[email protected]>

---------

Signed-off-by: Axel Gembe <[email protected]>
  • Loading branch information
EchterAgo authored Nov 10, 2023
1 parent 3c86648 commit aa83204
Showing 1 changed file with 123 additions and 104 deletions.
227 changes: 123 additions & 104 deletions module/os/windows/spl/spl-vnode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1537,6 +1537,95 @@ mount_count_nodes(struct mount *mp, int flags)
return (count);
}

static void
flush_file_objects(struct vnode *rvp)
{
// Release the AVL tree
// Attempt to flush out any caches;

FILE_OBJECT *fileobject;
vnode_fileobjects_t *node;
int Status;

// Make sure we don't call vnode_flushcache() again from IRP_MJ_CLOSE.
rvp->v_flags |= VNODE_FLUSHING;

if (avl_is_empty(&rvp->v_fileobjects))
return;

for (node = avl_first(&rvp->v_fileobjects); node != NULL;
node = AVL_NEXT(&rvp->v_fileobjects, node)) {
fileobject = node->fileobject;

// Because the CC* calls can re-enter ZFS, we need to
// release the lock, and because we release the lock the
// while has to start from the top each time. We release
// the node at end of this while.

try {
Status = ObReferenceObjectByPointer(fileobject, 0,
*IoFileObjectType, KernelMode);
} except(EXCEPTION_EXECUTE_HANDLER) {
Status = GetExceptionCode();
}

// Try to lock fileobject before we use it.
if (NT_SUCCESS(Status)) {
// Let go of mutex, as flushcache will re-enter
// (IRP_MJ_CLEANUP)
mutex_exit(&rvp->v_mutex);
node->remove = vnode_flushcache(rvp, fileobject, TRUE);
ObDereferenceObject(fileobject);
mutex_enter(&rvp->v_mutex);
} // if ObReferenceObjectByPointer
} // for

// Remove any nodes we successfully closed.
restart_remove_closed:
for (node = avl_first(&rvp->v_fileobjects); node != NULL;
node = AVL_NEXT(&rvp->v_fileobjects, node)) {
if (node->remove) {
avl_remove(&rvp->v_fileobjects, node);
kmem_free(node, sizeof (*node));
goto restart_remove_closed;
}
}

dprintf("vp %p has %d fileobject(s) remaining\n", rvp,
avl_numnodes(&rvp->v_fileobjects));
}

static void
print_reclaim_stats(boolean_t init, int reclaims)
{
static int last_reclaims = 0;
int reclaims_delta;
int reclaims_per_second;
static hrtime_t last_stats_time = 0;
hrtime_t last_stats_time_delta;

if (init) {
last_stats_time = gethrtime();
return;
}

if ((reclaims % 1000) != 0) {
return;
}

reclaims_delta = reclaims - last_reclaims;
last_stats_time_delta = gethrtime() - last_stats_time;

reclaims_per_second = (((int64_t)reclaims_delta) * NANOSEC) /
MAX(last_stats_time_delta, 1);

dprintf("%s: %d reclaims processed (%d/s).\n", __func__, reclaims,
reclaims_per_second);

last_reclaims = reclaims;
last_stats_time = gethrtime();
}


/*
* Let's try something new. If we are to vflush, lets do everything we can
Expand All @@ -1555,126 +1644,56 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags)
// FORCECLOSE : release everything, force unmount

// if mp is NULL, we are reclaiming nodes, until threshold
int isbusy = 0;
int reclaims = 0;
vnode_fileobjects_t *node;
struct vnode *rvp;
int Status;
boolean_t filesonly = B_TRUE;

dprintf("vflush start\n");

mutex_enter(&vnode_all_list_lock);

filesanddirs:
while (1) {
for (rvp = list_head(&vnode_all_list);
rvp;
rvp = list_next(&vnode_all_list, rvp)) {

// skip vnodes not belonging to this mount
if (mp && rvp->v_mount != mp)
continue;

if (filesonly && vnode_isdir(rvp))
continue;

// If we aren't FORCE and asked to SKIPROOT, and node
// is MARKROOT, then go to next.
if (!(flags & FORCECLOSE)) {
if ((flags & SKIPROOT))
if (rvp->v_flags & VNODE_MARKROOT)
continue;
#if 0 // when we use SYSTEM vnodes
if ((flags & SKIPSYSTEM))
if (rvp->v_flags & VNODE_MARKSYSTEM)
continue;
#endif
}
// We are to remove this node, even if ROOT - unmark it.
mutex_exit(&vnode_all_list_lock);

// Release the AVL tree
// KIRQL OldIrql;

// Attempt to flush out any caches;
mutex_enter(&rvp->v_mutex);
// Make sure we don't call vnode_cacheflush() again
// from IRP_MJ_CLOSE.
rvp->v_flags |= VNODE_FLUSHING;

for (node = avl_first(&rvp->v_fileobjects);
node != NULL;
node = AVL_NEXT(&rvp->v_fileobjects, node)) {
FILE_OBJECT *fileobject = node->fileobject;

// Because the CC* calls can re-enter ZFS, we need to
// release the lock, and because we release the lock the
// while has to start from the top each time. We release
// the node at end of this while.

try {
Status = ObReferenceObjectByPointer(
fileobject,
0,
*IoFileObjectType,
KernelMode);
} except(EXCEPTION_EXECUTE_HANDLER) {
Status = GetExceptionCode();
}

// Try to lock fileobject before we use it.
if (NT_SUCCESS(Status)) {
int ok;
print_reclaim_stats(B_TRUE, 0);

// Let go of mutex, as flushcache will re-enter
// (IRP_MJ_CLEANUP)
mutex_exit(&rvp->v_mutex);
node->remove = vnode_flushcache(rvp,
fileobject, TRUE);
filesanddirs:
for (rvp = list_head(&vnode_all_list); rvp;
rvp = list_next(&vnode_all_list, rvp)) {
// skip vnodes not belonging to this mount
if (mp && rvp->v_mount != mp)
continue;

ObDereferenceObject(fileobject);
if (filesonly && vnode_isdir(rvp))
continue;

mutex_enter(&rvp->v_mutex);
// If we aren't FORCE and asked to SKIPROOT, and node
// is MARKROOT, then go to next.
if (!(flags & FORCECLOSE)) {
if ((flags & SKIPROOT))
if (rvp->v_flags & VNODE_MARKROOT)
continue;
#if 0 // when we use SYSTEM vnodes
if ((flags & SKIPSYSTEM))
if (rvp->v_flags & VNODE_MARKSYSTEM)
continue;
#endif
}
// We are to remove this node, even if ROOT - unmark it.

} // if ObReferenceObjectByPointer
} // for
if (rvp->v_flags & VNODE_DEAD) {
continue;
}

// Remove any nodes we successfully closed.
restart:
for (node = avl_first(&rvp->v_fileobjects);
node != NULL;
node = AVL_NEXT(&rvp->v_fileobjects, node)) {
if (node->remove) {
avl_remove(&rvp->v_fileobjects, node);
kmem_free(node, sizeof (*node));
goto restart;
}
}
mutex_enter(&rvp->v_mutex);

dprintf("vp %p has %d fileobject(s) remaining\n", rvp,
avl_numnodes(&rvp->v_fileobjects));
flush_file_objects(rvp);

// vnode_recycle_int() will call mutex_exit(&rvp->v_mutex);
// vnode_recycle_int() will exit v_mutex
// re-check flags, due to releasing locks
isbusy = 1;
if (!(rvp->v_flags & VNODE_DEAD))
isbusy = vnode_recycle_int(rvp,
(flags & FORCECLOSE) | VNODELOCKED);
else
mutex_exit(&rvp->v_mutex);

mutex_enter(&vnode_all_list_lock);

if (!isbusy) {
reclaims++;
break; // must restart loop if unlinked node
}
if (!vnode_recycle_int(rvp, (flags & FORCECLOSE) |
VNODELOCKED)) {
reclaims++;
print_reclaim_stats(B_FALSE, reclaims);
}

// If the end of the list was reached, stop entirely
if (!rvp)
break;
}

if (filesonly) {
Expand All @@ -1684,8 +1703,8 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags)

mutex_exit(&vnode_all_list_lock);

if (mp == NULL && reclaims > 0) {
dprintf("%s: %llu reclaims processed.\n", __func__, reclaims);
if (reclaims > 0) {
dprintf("%s: %d reclaims processed.\n", __func__, reclaims);
}


Expand Down

0 comments on commit aa83204

Please sign in to comment.