Update GpuSolver.java

olepoeschl · Feb 16, 2024 · 3df930f · 3df930f
1 parent e24a8e3
commit 3df930f
Showing 1 changed file with 18 additions and 6 deletions.
diff --git a/src/main/java/de/nqueensfaf/impl/GpuSolver.java b/src/main/java/de/nqueensfaf/impl/GpuSolver.java
@@ -470,10 +470,13 @@ private void multiGpu(List<Constellation> constellations) {
 	}
 
 	int firstWorkloadToIndex = (int) (constellations.size() * 0.6);
+	int firstWorkloadJklChangeIndex = findNextJklChangeIndex(constellations, firstWorkloadToIndex);
+	if(firstWorkloadJklChangeIndex - firstWorkloadToIndex <= (int) (constellations.size() * 0.2))
+	    firstWorkloadToIndex = firstWorkloadJklChangeIndex;
 	if(constellations.size() < 10_000 * selectedGpus.size())
 	    firstWorkloadToIndex = constellations.size();
 
-	var firstWorkload = constellations.subList(0, findNextJklChangeIndex(constellations, firstWorkloadToIndex));
+	var firstWorkload = constellations.subList(0, firstWorkloadToIndex);
 
 	int fromIndex = 0;
 	HashMap<Gpu, List<Constellation>> gpuConstellations = new HashMap<Gpu, List<Constellation>>();
@@ -485,13 +488,15 @@ private void multiGpu(List<Constellation> constellations) {
 
 	    if(toIndex < firstWorkload.size() && i < selectedGpus.size() - 1) {
 		int nextJklChangeIndex = findNextJklChangeIndex(firstWorkload, toIndex);
-		if(nextJklChangeIndex - toIndex <= (int) firstWorkload.size() * 0.1)
+		if(nextJklChangeIndex - toIndex <= (int) (firstWorkload.size() * 0.2 / selectedGpus.size()))
 		    toIndex = nextJklChangeIndex;
 	    } else
 		toIndex = firstWorkload.size();
 
 	    var gpuWorkload = firstWorkload.subList(fromIndex, toIndex);
 	    gpuWorkload = fillWithPseudoConstellations(gpuWorkload, gpu.workgroupSize);
+
+	    gpu.bufferSize = gpuWorkload.size();
 
 	    gpuConstellations.put(gpu, gpuWorkload);
 
@@ -521,7 +526,7 @@ private void multiGpu(List<Constellation> constellations) {
 
 		int remaining;
 		while((remaining = queue.size()) > 0) {
-		    int workloadSize = (int) (remaining / gpuPortions[finalGpuIdx]);
+		    int workloadSize = (int) (remaining * gpuPortions[finalGpuIdx]);
 		    if(workloadSize < 4096)
 			workloadSize = 4096;
 
@@ -534,10 +539,16 @@ private void multiGpu(List<Constellation> constellations) {
 		    }
 
 		    if(workload.size() > 0) {
-			while(workload.size() > firstWorkload.size() * gpuPortions[finalGpuIdx])
-			    workload.remove(workload.size() - 1);
-
 			workload = new ArrayList<>(fillWithPseudoConstellations(workload, gpu.workgroupSize));
+
+			while(workload.size() > gpu.bufferSize) {
+			    for(int i = 0; i < gpu.workgroupSize; i++) {
+				var c = workload.remove(workload.size() - 1);
+				if(c.extractStart() != 69)
+				    queue.add(c);
+			    }
+			}
+
 			runGpu(gpu, workload);
 		    }
 		}
@@ -688,6 +699,7 @@ private class Gpu {
 
 	// measured kernel duration
 	private long duration;
+	private int bufferSize;
 
 	// related opencl objects
 	private long platform, context, program, kernel, xQueue, memQueue, constellationsMem, resMem;