Skip to content

Commit

Permalink
readd memory fences
Browse files Browse the repository at this point in the history
  • Loading branch information
timmitohnetim committed Feb 19, 2024
1 parent 5bce312 commit d2864dd
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 13 deletions.
26 changes: 13 additions & 13 deletions src/main/java/de/nqueensfaf/Demo.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@ public static void main(String[] args) {
gpu();
}

static void cpu() {
new CpuSolver()
.setPresetQueens(5)
.setThreadCount(1)
.setUpdateInterval(800)
.onInit(self -> System.out.println("Starting Solver for board size " + self.getN() + "..."))
.onUpdate((self, progress, solutions, duration) -> System.out.println("progress: " + progress + " solutions: " + solutions + " duration: " + duration))
.onFinish(self -> System.out.println("Found " + self.getSolutions() + " solutions in " + self.getDuration() + " ms"))
.setN(16)
.solve();
}
// static void cpu() {
// new CpuSolver()
// .setPresetQueens(5)
// .setThreadCount(1)
// .setUpdateInterval(800)
// .onInit(self -> System.out.println("Starting Solver for board size " + self.getN() + "..."))
// .onUpdate((self, progress, solutions, duration) -> System.out.println("progress: " + progress + " solutions: " + solutions + " duration: " + duration))
// .onFinish(self -> System.out.println("Found " + self.getSolutions() + " solutions in " + self.getDuration() + " ms"))
// .setN(16)
// .solve();
// }

static void gpu() {
GpuSolver g = new GpuSolver();
Expand All @@ -31,14 +31,14 @@ static void gpu() {
if(gpu.vendor().toLowerCase().contains("nvidia")) {
g.gpuSelection().add(gpu.id(), 5, 64);
} else {
// g.gpuSelection().add(gpu.id(), 50, 64);
g.gpuSelection().add(gpu.id(), 50, 24);
}
}
g.setUpdateInterval(400);
g.onInit(self -> System.out.println("Starting Solver for board size " + self.getN() + "..."))
.onUpdate((self, progress, solutions, duration) -> System.out.println("progress: " + progress + " solutions: " + solutions + " duration: " + duration))
.onFinish(self -> System.out.println("Found " + self.getSolutions() + " solutions in " + self.getDuration() + " ms"))
.setN(18)
.setN(20)
.solve();

// new GPUSolver()
Expand Down
3 changes: 3 additions & 0 deletions src/main/resources/kernels.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ kernel void nqfaf_nvidia(global struct constellation *constellation_arr, global
// in row k only L is free and in row l only 1 is free
local uint jkl_queens[N];
jkl_queens[l_id % N] = jkl_queens_arr[get_group_id(0) * N + l_id % N];
barrier(CLK_LOCAL_MEM_FENCE);

uint ldiag = L >> ((c.start_ijkl >> 5) & 31); // ld from queen l with respect to the first row
uint rdiag = 1 << (c.start_ijkl & 31); // ld from queen k with respect to the first row
Expand Down Expand Up @@ -205,6 +206,7 @@ kernel void nqfaf_amd(constant struct constellation *constellation_arr, global u
// in row k only L is free and in row l only 1 is free
local uint jkl_queens[N];
jkl_queens[l_id % N] = jkl_queens_arr[get_group_id(0) * N + l_id % N];
barrier(CLK_LOCAL_MEM_FENCE);

uint ldiag = L >> ((c.start_ijkl >> 5) & 31); // ld from queen l with respect to the first row
uint rdiag = 1 << (c.start_ijkl & 31); // ld from queen k with respect to the first row
Expand Down Expand Up @@ -376,6 +378,7 @@ kernel void nqfaf_intel(global struct constellation *constellation_arr, global u
// in row k only L is free and in row l only 1 is free
local uint jkl_queens[N];
jkl_queens[l_id % N] = jkl_queens_arr[get_group_id(0) * N + l_id % N];
barrier(CLK_LOCAL_MEM_FENCE);

uint ldiag = L >> ((c.start_ijkl >> 5) & 31); // ld from queen l with respect to the first row
uint rdiag = 1 << (c.start_ijkl & 31); // ld from queen k with respect to the first row
Expand Down

0 comments on commit d2864dd

Please sign in to comment.