vflush: Speed up reclaim by doing less in the loop (#328)

* spl-time: Use KeQueryPerformanceCounter instead of KeQueryTickCount `KeQueryTickCount` seems to only have a 15.625ms resolution unless the interrupt timer frequency is increased, which should be avoided due to power usage. Instead, this switches the `zfs_lbolt`, `gethrtime` and `random_get_bytes` to use `KeQueryPerformanceCounter`. On my system this gives a 100ns resolution. Signed-off-by: Axel Gembe <[email protected]> * spl-time: Add assertion to gethrtime and cache NANOSEC / freq division One less division for each call. Signed-off-by: Axel Gembe <[email protected]> * vflush: Print reclaim statistics This shows how many reclaims have been processed in thousand increments and also how many reclaims are processed per second. Signed-off-by: Axel Gembe <[email protected]> * vflush: Speed up reclaim by doing less in the loop This removes leaving the `vnode_all_list_lock` in the loop as that is not needed. It also only enters the `v_mutex` of nodes that are not `VNODE_DEAD` yet. This converts part of the loop to a new function called `flush_file_objects` to make it more readable. This also removes the restart of the loop, which is safe because `vnode_all_list_lock` is never unlocked. Signed-off-by: Axel Gembe <[email protected]> --------- Signed-off-by: Axel Gembe <[email protected]>
openzfsonwindows · Nov 10, 2023 · aa83204 · aa83204
1 parent 3c86648
commit aa83204
Showing 1 changed file with 123 additions and 104 deletions.
diff --git a/module/os/windows/spl/spl-vnode.c b/module/os/windows/spl/spl-vnode.c
@@ -1537,6 +1537,95 @@ mount_count_nodes(struct mount *mp, int flags)
 	return (count);
 }
 
+static void
+flush_file_objects(struct vnode *rvp)
+{
+	// Release the AVL tree
+	// Attempt to flush out any caches;
+
+	FILE_OBJECT *fileobject;
+	vnode_fileobjects_t *node;
+	int Status;
+
+	// Make sure we don't call vnode_flushcache() again from IRP_MJ_CLOSE.
+	rvp->v_flags |= VNODE_FLUSHING;
+
+	if (avl_is_empty(&rvp->v_fileobjects))
+		return;
+
+	for (node = avl_first(&rvp->v_fileobjects); node != NULL;
+	    node = AVL_NEXT(&rvp->v_fileobjects, node)) {
+		fileobject = node->fileobject;
+
+		// Because the CC* calls can re-enter ZFS, we need to
+		// release the lock, and because we release the lock the
+		// while has to start from the top each time. We release
+		// the node at end of this while.
+
+		try {
+			Status = ObReferenceObjectByPointer(fileobject, 0,
+			    *IoFileObjectType, KernelMode);
+		} except(EXCEPTION_EXECUTE_HANDLER) {
+			Status = GetExceptionCode();
+		}
+
+		// Try to lock fileobject before we use it.
+		if (NT_SUCCESS(Status)) {
+			// Let go of mutex, as flushcache will re-enter
+			// (IRP_MJ_CLEANUP)
+			mutex_exit(&rvp->v_mutex);
+			node->remove = vnode_flushcache(rvp, fileobject, TRUE);
+			ObDereferenceObject(fileobject);
+			mutex_enter(&rvp->v_mutex);
+		} // if ObReferenceObjectByPointer
+	} // for
+
+	// Remove any nodes we successfully closed.
+restart_remove_closed:
+	for (node = avl_first(&rvp->v_fileobjects); node != NULL;
+	    node = AVL_NEXT(&rvp->v_fileobjects, node)) {
+		if (node->remove) {
+			avl_remove(&rvp->v_fileobjects, node);
+			kmem_free(node, sizeof (*node));
+			goto restart_remove_closed;
+		}
+	}
+
+	dprintf("vp %p has %d fileobject(s) remaining\n", rvp,
+	    avl_numnodes(&rvp->v_fileobjects));
+}
+
+static void
+print_reclaim_stats(boolean_t init, int reclaims)
+{
+	static int last_reclaims = 0;
+	int reclaims_delta;
+	int reclaims_per_second;
+	static hrtime_t last_stats_time = 0;
+	hrtime_t last_stats_time_delta;
+
+	if (init) {
+		last_stats_time = gethrtime();
+		return;
+	}
+
+	if ((reclaims % 1000) != 0) {
+		return;
+	}
+
+	reclaims_delta = reclaims - last_reclaims;
+	last_stats_time_delta = gethrtime() - last_stats_time;
+
+	reclaims_per_second = (((int64_t)reclaims_delta) * NANOSEC) /
+	    MAX(last_stats_time_delta, 1);
+
+	dprintf("%s: %d reclaims processed (%d/s).\n", __func__, reclaims,
+	    reclaims_per_second);
+
+	last_reclaims = reclaims;
+	last_stats_time = gethrtime();
+}
+
 
 /*
  * Let's try something new. If we are to vflush, lets do everything we can
@@ -1555,126 +1644,56 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags)
 	// FORCECLOSE : release everything, force unmount
 
 	// if mp is NULL, we are reclaiming nodes, until threshold
-	int isbusy = 0;
 	int reclaims = 0;
 	vnode_fileobjects_t *node;
 	struct vnode *rvp;
-	int Status;
 	boolean_t filesonly = B_TRUE;
 
 	dprintf("vflush start\n");
 
 	mutex_enter(&vnode_all_list_lock);
 
-filesanddirs:
-	while (1) {
-		for (rvp = list_head(&vnode_all_list);
-		    rvp;
-		    rvp = list_next(&vnode_all_list, rvp)) {
-
-			// skip vnodes not belonging to this mount
-			if (mp && rvp->v_mount != mp)
-				continue;
-
-			if (filesonly && vnode_isdir(rvp))
-				continue;
-
-			// If we aren't FORCE and asked to SKIPROOT, and node
-			// is MARKROOT, then go to next.
-			if (!(flags & FORCECLOSE)) {
-				if ((flags & SKIPROOT))
-					if (rvp->v_flags & VNODE_MARKROOT)
-						continue;
-#if 0 // when we use SYSTEM vnodes
-				if ((flags & SKIPSYSTEM))
-					if (rvp->v_flags & VNODE_MARKSYSTEM)
-						continue;
-#endif
-			}
-			// We are to remove this node, even if ROOT - unmark it.
-			mutex_exit(&vnode_all_list_lock);
-
-			// Release the AVL tree
-			// KIRQL OldIrql;
-
-			// Attempt to flush out any caches;
-			mutex_enter(&rvp->v_mutex);
-			// Make sure we don't call vnode_cacheflush() again
-			// from IRP_MJ_CLOSE.
-			rvp->v_flags |= VNODE_FLUSHING;
-
-			for (node = avl_first(&rvp->v_fileobjects);
-			    node != NULL;
-			    node = AVL_NEXT(&rvp->v_fileobjects, node)) {
-				FILE_OBJECT *fileobject = node->fileobject;
-
-			// Because the CC* calls can re-enter ZFS, we need to
-			// release the lock, and because we release the lock the
-			// while has to start from the top each time. We release
-			// the node at end of this while.
-
-				try {
-					Status = ObReferenceObjectByPointer(
-					    fileobject,
-					    0,
-					    *IoFileObjectType,
-					    KernelMode);
-				} except(EXCEPTION_EXECUTE_HANDLER) {
-					Status = GetExceptionCode();
-				}
-
-			// Try to lock fileobject before we use it.
-				if (NT_SUCCESS(Status)) {
-					int ok;
+	print_reclaim_stats(B_TRUE, 0);
 
-				// Let go of mutex, as flushcache will re-enter
-				// (IRP_MJ_CLEANUP)
-					mutex_exit(&rvp->v_mutex);
-					node->remove = vnode_flushcache(rvp,
-					    fileobject, TRUE);
+filesanddirs:
+	for (rvp = list_head(&vnode_all_list); rvp;
+	    rvp = list_next(&vnode_all_list, rvp)) {
+		// skip vnodes not belonging to this mount
+		if (mp && rvp->v_mount != mp)
+			continue;
 
-					ObDereferenceObject(fileobject);
+		if (filesonly && vnode_isdir(rvp))
+			continue;
 
-					mutex_enter(&rvp->v_mutex);
+		// If we aren't FORCE and asked to SKIPROOT, and node
+		// is MARKROOT, then go to next.
+		if (!(flags & FORCECLOSE)) {
+			if ((flags & SKIPROOT))
+				if (rvp->v_flags & VNODE_MARKROOT)
+					continue;
+#if 0 // when we use SYSTEM vnodes
+			if ((flags & SKIPSYSTEM))
+				if (rvp->v_flags & VNODE_MARKSYSTEM)
+					continue;
+#endif
+		}
+		// We are to remove this node, even if ROOT - unmark it.
 
-				} // if ObReferenceObjectByPointer
-			} // for
+		if (rvp->v_flags & VNODE_DEAD) {
+			continue;
+		}
 
-			// Remove any nodes we successfully closed.
-restart:
-			for (node = avl_first(&rvp->v_fileobjects);
-			    node != NULL;
-			    node = AVL_NEXT(&rvp->v_fileobjects, node)) {
-				if (node->remove) {
-					avl_remove(&rvp->v_fileobjects, node);
-					kmem_free(node, sizeof (*node));
-					goto restart;
-				}
-			}
+		mutex_enter(&rvp->v_mutex);
 
-			dprintf("vp %p has %d fileobject(s) remaining\n", rvp,
-			    avl_numnodes(&rvp->v_fileobjects));
+		flush_file_objects(rvp);
 
-		// vnode_recycle_int() will call mutex_exit(&rvp->v_mutex);
+		// vnode_recycle_int() will exit v_mutex
 		// re-check flags, due to releasing locks
-			isbusy = 1;
-			if (!(rvp->v_flags & VNODE_DEAD))
-				isbusy = vnode_recycle_int(rvp,
-				    (flags & FORCECLOSE) | VNODELOCKED);
-			else
-				mutex_exit(&rvp->v_mutex);
-
-			mutex_enter(&vnode_all_list_lock);
-
-			if (!isbusy) {
-				reclaims++;
-				break; // must restart loop if unlinked node
-			}
+		if (!vnode_recycle_int(rvp, (flags & FORCECLOSE) |
+		    VNODELOCKED)) {
+			reclaims++;
+			print_reclaim_stats(B_FALSE, reclaims);
 		}
-
-		// If the end of the list was reached, stop entirely
-		if (!rvp)
-			break;
 	}
 
 	if (filesonly) {
@@ -1684,8 +1703,8 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags)
 
 	mutex_exit(&vnode_all_list_lock);
 
-	if (mp == NULL && reclaims > 0) {
-		dprintf("%s: %llu reclaims processed.\n", __func__, reclaims);
+	if (reclaims > 0) {
+		dprintf("%s: %d reclaims processed.\n", __func__, reclaims);
 	}