From ef8b40d8f193de5b647da143b1e9e75d7c68c178 Mon Sep 17 00:00:00 2001 From: "Fangrui.Liu" Date: Sun, 17 Mar 2024 23:34:11 +0800 Subject: [PATCH 1/5] add offset generator --- src/main/scala/isa/backend/memMicroCode.scala | 21 ++++++++ src/main/scala/isa/instSetArch.scala | 10 +++- src/main/scala/isa/memMicroCode.scala | 44 ----------------- src/main/scala/ncore/mmu/memMngUnit.scala | 48 +++++++++++++++++++ src/main/scala/ncore/tcm/tightCpldMem.scala | 46 ++++++++---------- src/test/scala/ncore/mmu/MMUSpec.scala | 37 ++++++++++++++ src/test/scala/utils/printHelper.scala | 8 ++++ 7 files changed, 142 insertions(+), 72 deletions(-) create mode 100644 src/main/scala/isa/backend/memMicroCode.scala delete mode 100644 src/main/scala/isa/memMicroCode.scala create mode 100644 src/main/scala/ncore/mmu/memMngUnit.scala create mode 100644 src/test/scala/ncore/mmu/MMUSpec.scala diff --git a/src/main/scala/isa/backend/memMicroCode.scala b/src/main/scala/isa/backend/memMicroCode.scala new file mode 100644 index 0000000..2f243f4 --- /dev/null +++ b/src/main/scala/isa/backend/memMicroCode.scala @@ -0,0 +1,21 @@ +// See README.md for license details. + +package isa.backend +import chisel3._ +import chisel3.util._ + +object MemLayout extends ChiselEnum { + val bit8 = Value(0x0.U) + val bit16 = Value(0x1.U) + val bit32 = Value(0x2.U) +} + +object MemChannel extends ChiselEnum { + val ch0 = Value(0x0.U) + // 16/32 bits will have no ch1 + val ch1 = Value(0x1.U) + // 32 bits will have no ch2 + val ch2 = Value(0x2.U) + // 16/32 bits will have no ch3 + val ch3 = Value(0x3.U) +} diff --git a/src/main/scala/isa/instSetArch.scala b/src/main/scala/isa/instSetArch.scala index f4b5359..84fd512 100644 --- a/src/main/scala/isa/instSetArch.scala +++ b/src/main/scala/isa/instSetArch.scala @@ -8,4 +8,12 @@ object NeuralISA extends ChiselEnum { val st = Value(0x2.U(4.W)) val mma = Value(0x3.U(4.W)) val ip = Value (0x4.U(4.W)) -} \ No newline at end of file +} + +object DType extends ChiselEnum { + val uint = Value(0x0.U) + val int = Value(0x1.U) + val fp = Value(0x2.U) + // no bfp32c0 + val bfp = Value(0x3.U) +} diff --git a/src/main/scala/isa/memMicroCode.scala b/src/main/scala/isa/memMicroCode.scala deleted file mode 100644 index a47e1e2..0000000 --- a/src/main/scala/isa/memMicroCode.scala +++ /dev/null @@ -1,44 +0,0 @@ -// See README.md for license details. - -package isa -import chisel3._ -import chisel3.util._ - - -object OffsetPattern extends ChiselEnum { - val not_def = Value(0x0.U) - val sca_0d = Value(0x1.U) - val vec_1d = Value(0x2.U) - val mat_2d = Value(0x3.U) -} - -object AddressMode extends ChiselEnum { - val immd = Value(0x0.U) - val addr = Value(0x1.U) - val addr_immd = Value(0x2.U) -} - - -object MemLayout extends ChiselEnum { - val bit8 = Value(0x0.U) - val bit16 = Value(0x1.U) - val bit32 = Value(0x2.U) -} - -object DType extends ChiselEnum { - val uint = Value(0x0.U) - val int = Value(0x1.U) - val fp = Value(0x2.U) - // no bfp32c0 - val bfp = Value(0x3.U) -} - -object MemChannel extends ChiselEnum { - val ch0 = Value(0x1.U) - // 16/32 bits will have no ch1 - val ch1 = Value(0x2.U) - // 32 bits will have no ch2 - val ch2 = Value(0x4.U) - // 16/32 bits will have no ch3 - val ch3 = Value(0x8.U) -} \ No newline at end of file diff --git a/src/main/scala/ncore/mmu/memMngUnit.scala b/src/main/scala/ncore/mmu/memMngUnit.scala new file mode 100644 index 0000000..62f0149 --- /dev/null +++ b/src/main/scala/ncore/mmu/memMngUnit.scala @@ -0,0 +1,48 @@ +// See README.md for license details. + +package ncore.mmu + +import chisel3._ +import chisel3.util._ +import isa.backend._ +import ncore._ + +class MMUBundle extends Bundle { + val mem_ch = MemChannel() + val mem_lo = MemLayout() +} + +class OffsetGenerator(val n: Int = 8) extends Module { + val io = IO(new Bundle { + val inc = Input(Vec(n, Bool())) + val out = Output(Vec(n, UInt(log2Ceil(n * n).W))) + }) + val init_value = Seq.tabulate(n)(i => (n * i).U(log2Ceil(n * n).W)) + val regs = RegInit(VecInit(init_value)) + + for (i <- 0 until n){ + when (io.inc(i)) { + regs(i) := (regs(i) + 1.U) % (n * n).U + }.otherwise { + regs(i) := init_value(i) + } + io.out(i) := regs(i) + } +} + +/** + * This is the neural core design + */ +class MemoryManageUnit( + val n: Int = 8, val nbits: Int = 8, val addr_width: Int = 24 + ) extends Module { + val io = IO(new Bundle { + val base_addr = Input(Vec(n, UInt(24.W))) + val ctrl = Input(Vec(n * n, new MMUBundle())) + val out_a = Output(Vec(n * n, UInt(32.W))) + val out_b = Output(Vec(n * n, UInt(32.W))) + }) + + val offsetgen_a = new OffsetGenerator(n) + val offsetgen_b = new OffsetGenerator(n) +} \ No newline at end of file diff --git a/src/main/scala/ncore/tcm/tightCpldMem.scala b/src/main/scala/ncore/tcm/tightCpldMem.scala index f8fea8a..c0b8338 100644 --- a/src/main/scala/ncore/tcm/tightCpldMem.scala +++ b/src/main/scala/ncore/tcm/tightCpldMem.scala @@ -4,7 +4,6 @@ package ncore.tcm import chisel3._ import chisel3.util._ -import isa._ class TCMCell(val nbits: Int = 8) extends Module { val io = IO( @@ -25,16 +24,14 @@ class TCMCell(val nbits: Int = 8) extends Module { class TCMBlock(val n: Int = 8, val size: Int = 4096, - val r_addr_width: Int = 12, - val w_addr_width: Int = 12, val nbits: Int = 8 ) extends Module { val io = IO( new Bundle { val d_in = Input(Vec(n * n, UInt(nbits.W))) val d_out = Output(Vec(n * n, UInt(nbits.W))) - val r_addr = Input(Vec(n * n, UInt(r_addr_width.W))) - val w_addr = Input(Vec(n * n, UInt(w_addr_width.W))) + val r_addr = Input(Vec(n * n, UInt(log2Ceil(size).W))) + val w_addr = Input(Vec(n * n, UInt(log2Ceil(size).W))) val en_wr = Input(Bool()) } ) @@ -60,33 +57,28 @@ class TCMBlock(val n: Int = 8, class DetachableTCM( - val n: Int = 8, + val n: Int = 8, + val nblocks: Int = 4, val size: Int = 4096, - val r_addr_width: Int = 12, - val w_addr_width: Int = 12, - val mlayout_width: Int = 6, ) extends Module { val io = IO(new Bundle { - val d_in = Input(Vec(n * n, UInt(32.W))) - val d_out = Output(Vec(n * n, UInt(32.W))) - // read address will have channel selection for last 2 bits - val r_addr = Input(Vec(n * n, UInt((r_addr_width + 2).W))) - // write address will have channel selection for last 2 bits - val w_addr = Input(Vec(n * n, UInt((w_addr_width + 2).W))) - val mem_ch = Input(MemChannel()) - val mem_lo = Input(MemLayout()) - val en_wr = Input(Bool()) + val d_in = Input(Vec(n * n, Vec(nblocks, UInt(8.W)))) + val d_out = Output(Vec(n * n, Vec(nblocks, UInt(8.W)))) + val r_addr = Input(Vec(n * n, UInt(log2Ceil(size).W))) + val w_addr = Input(Vec(n * n, UInt(log2Ceil(size).W))) + val en_wr = Input(Bool()) }) - switch (io.mem_lo) { - is (MemLayout.bit8) { - - } - is (MemLayout.bit16) { - - } - is (MemLayout.bit32) { - + val tcm_blocks_io = VecInit(Seq.fill(nblocks) { + Module(new TCMBlock(n, size, 8)).io}) + + for (i <- 0 until nblocks) { + tcm_blocks_io(i).en_wr := io.en_wr + for (j <- 0 until n) { + tcm_blocks_io(i).r_addr(j) := io.r_addr(j) + tcm_blocks_io(i).w_addr(j) := io.w_addr(j) + tcm_blocks_io(i).d_in(j) := io.d_in(j)(i) + io.d_out(j)(i) := tcm_blocks_io(i).d_out(j) } } diff --git a/src/test/scala/ncore/mmu/MMUSpec.scala b/src/test/scala/ncore/mmu/MMUSpec.scala new file mode 100644 index 0000000..b04e323 --- /dev/null +++ b/src/test/scala/ncore/mmu/MMUSpec.scala @@ -0,0 +1,37 @@ +//// See README.md for license details. + +package ncore.mmu + +import testUtil._ +import scala.util.Random +import chisel3._ +import chiseltest._ +import org.scalatest.flatspec.AnyFlatSpec +import chisel3.experimental.BundleLiterals._ + +class MMUSpec extends AnyFlatSpec with ChiselScalatestTester { + + "OffsetGenerator" should "provide correct offset" in { + test(new OffsetGenerator(4)) { dut => + val print_helper = new testUtil.PrintHelper() + val _n = dut.n + val _array = List(List(false, false, false, false), + List(true, false, false, false), + List(true, true, false, false), + List(true, true, true, false), + List(false, true, true, true), + List(true, false, true, true), + List(false, true, false, true), + List(false, false, true, false), + List(false, false, false, true), + ) + for (i <- 0 until 16) { + for (j <- 0 until _n){ + dut.io.inc(j).poke(_array(i%_array.length)(j)) + } + dut.clock.step() + print_helper.printVectorChisel(dut.io.out, _n) + } + } + } +} \ No newline at end of file diff --git a/src/test/scala/utils/printHelper.scala b/src/test/scala/utils/printHelper.scala index c520bda..cf71ece 100644 --- a/src/test/scala/utils/printHelper.scala +++ b/src/test/scala/utils/printHelper.scala @@ -28,4 +28,12 @@ class PrintHelper(){ } println("]") } + + def printVectorChisel(vec: chisel3.Vec[chisel3.UInt], n: Int): Unit = { + var _row = "" + for (i <- 0 until n) { + _row += vec(i).peekInt().toString() + ", " + } + println("[" + _row + "]") + } } \ No newline at end of file From 2fd67a906c82ddd69a4a61603b409ca3263c63b7 Mon Sep 17 00:00:00 2001 From: "Fangrui.Liu" Date: Sun, 17 Mar 2024 23:49:20 +0800 Subject: [PATCH 2/5] fix expect --- src/test/scala/ncore/mmu/MMUSpec.scala | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/test/scala/ncore/mmu/MMUSpec.scala b/src/test/scala/ncore/mmu/MMUSpec.scala index b04e323..537328f 100644 --- a/src/test/scala/ncore/mmu/MMUSpec.scala +++ b/src/test/scala/ncore/mmu/MMUSpec.scala @@ -23,13 +23,26 @@ class MMUSpec extends AnyFlatSpec with ChiselScalatestTester { List(true, false, true, true), List(false, true, false, true), List(false, false, true, false), - List(false, false, false, true), + List(false, false, false, true), ) - for (i <- 0 until 16) { - for (j <- 0 until _n){ + val _expected = List(List(0, 4, 8, 12), + List(1, 4, 8, 12), + List(2, 5, 8, 12), + List(3, 6, 9, 12), + List(0, 7, 10, 13), + List(1, 4, 11, 14), + List(0, 5, 8, 15), + List(0, 4, 9, 12), + List(0, 4, 8, 13), + ) + for (i <- 0 until _array.length) { + for (j <- 0 until _n) { dut.io.inc(j).poke(_array(i%_array.length)(j)) } dut.clock.step() + for (j <- 0 until _n) { + dut.io.out(j).expect(_expected(i)(j)) + } print_helper.printVectorChisel(dut.io.out, _n) } } From 0adaeca83205705c90c63d3ab735d81792d45ba1 Mon Sep 17 00:00:00 2001 From: "Fangrui.Liu" Date: Sun, 17 Mar 2024 23:59:43 +0800 Subject: [PATCH 3/5] update ci --- .github/workflows/actions.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index e1865e1..2fb383c 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -5,8 +5,9 @@ on: - main - releases/** pull_request: - types: - - opened + branches: + - main + - 'releases/**' jobs: Lint: From 9b12a92acc5894e958166b7fd3a206b3aa3e5dbb Mon Sep 17 00:00:00 2001 From: "Fangrui.Liu" Date: Tue, 26 Mar 2024 23:49:38 +0800 Subject: [PATCH 4/5] multi channel input --- src/main/scala/ncore/mmu/memMngUnit.scala | 76 ++++++++++++++++++++- src/main/scala/ncore/tcm/tightCpldMem.scala | 20 ++++-- src/test/scala/ncore/tcm/TCMSpec.scala | 60 +++++++++++++--- 3 files changed, 139 insertions(+), 17 deletions(-) diff --git a/src/main/scala/ncore/mmu/memMngUnit.scala b/src/main/scala/ncore/mmu/memMngUnit.scala index 62f0149..cfad5f3 100644 --- a/src/main/scala/ncore/mmu/memMngUnit.scala +++ b/src/main/scala/ncore/mmu/memMngUnit.scala @@ -6,6 +6,7 @@ import chisel3._ import chisel3.util._ import isa.backend._ import ncore._ +import ncore.tcm._ class MMUBundle extends Bundle { val mem_ch = MemChannel() @@ -30,14 +31,55 @@ class OffsetGenerator(val n: Int = 8) extends Module { } } + +class MemoryControlArray(val n: Int = 8) extends Module { + val io = IO(new Bundle { + val ctrl_in_a = Input(Bool()) + val ctrl_in_b = Input(Bool()) + val offset_inc_in = Input(Bool()) + val ctrl_out_a = Output(Vec(n, Bool())) + val ctrl_out_b = Output(Vec(n, Bool())) + val offset_inc_out = Output(Vec((n-1) * (n-1), Bool())) + }) + // Assign each element with diagnal control signal + val reg_inc = RegInit(VecInit(Seq.fill(2*n - 3)(0.B))) + val reg_a = RegInit(VecInit(Seq.fill(n)(0.B))) + val reg_b = RegInit(VecInit(Seq.fill(n)(0.B))) + + reg_a(0) := io.ctrl_in_a(0) + reg_b(0) := io.ctrl_in_b(0) + for (i <- 1 until n - 1) { + reg_a(i) := reg_a(i-1) + reg_b(i) := reg_b(i-1) + } + + for (i <- 0 until n) { + io.ctrl_out_a(i) := reg_a(i) + io.ctrl_out_b(i) := reg_b(i) + } + + reg_inc(0) := io.offset_inc_in + for (i <- 0 until 2 * n - 3) { + reg_inc(i) := reg_inc(i - 1) + } + for (i <- 0 until n - 1) { + for (j <- 0 until n - 1) { + io.offset_inc_out(n * i + j) := reg_inc(i + j) + } + } +} + /** * This is the neural core design */ class MemoryManageUnit( - val n: Int = 8, val nbits: Int = 8, val addr_width: Int = 24 + val n: Int = 8, + val nbits: Int = 8, + val word_size: Int = 4, + val size: Int = 4096 ) extends Module { val io = IO(new Bundle { - val base_addr = Input(Vec(n, UInt(24.W))) + val base_addr = Input(UInt(log2Ceil(size).W)) val ctrl = Input(Vec(n * n, new MMUBundle())) val out_a = Output(Vec(n * n, UInt(32.W))) val out_b = Output(Vec(n * n, UInt(32.W))) @@ -45,4 +87,34 @@ class MemoryManageUnit( val offsetgen_a = new OffsetGenerator(n) val offsetgen_b = new OffsetGenerator(n) + + val mem = new DetachableTCM(n, word_size, size, 2) + + // Create 2d register for horizontal & vertical + val reg_h = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W)))) + val reg_v = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W)))) + + for (i <- 0 until n){ + for (j <- 0 until n) { + // ==== INPUT ==== + // vertical + if (i==0) { + mem.io.r_addr(0)(j) := io.base_addr + offsetgen_b.io.out(j) + } else { + mem.io.r_addr(0)(n * i + j) := reg_v(n * (i - 1) + j) + } + if (i < n - 1 && j < n) + reg_v(n * i + j) := mem.io.r_addr(0)(n * i + j) + + // horizontal + if (j==0) { + mem.io.r_addr(1)(n * i) := io.base_addr + offsetgen_a.io.out(i) + } else { + mem.io.r_addr(1)(n * i + j) := reg_h((n - 1) * i + (j - 1)) + } + if (i < n && j < n - 1) + reg_h((n - 1) * i + j) := mem.io.r_addr(1)(n * i + j) + } + } + } \ No newline at end of file diff --git a/src/main/scala/ncore/tcm/tightCpldMem.scala b/src/main/scala/ncore/tcm/tightCpldMem.scala index c0b8338..b036255 100644 --- a/src/main/scala/ncore/tcm/tightCpldMem.scala +++ b/src/main/scala/ncore/tcm/tightCpldMem.scala @@ -24,13 +24,14 @@ class TCMCell(val nbits: Int = 8) extends Module { class TCMBlock(val n: Int = 8, val size: Int = 4096, + val rd_ch_num: Int = 2, val nbits: Int = 8 ) extends Module { val io = IO( new Bundle { val d_in = Input(Vec(n * n, UInt(nbits.W))) - val d_out = Output(Vec(n * n, UInt(nbits.W))) - val r_addr = Input(Vec(n * n, UInt(log2Ceil(size).W))) + val d_out = Output(Vec(rd_ch_num, Vec(n * n, UInt(nbits.W)))) + val r_addr = Input(Vec(rd_ch_num, Vec(n * n, UInt(log2Ceil(size).W)))) val w_addr = Input(Vec(n * n, UInt(log2Ceil(size).W))) val en_wr = Input(Bool()) } @@ -47,7 +48,9 @@ class TCMBlock(val n: Int = 8, //TODO: add read & write conflict check for (i <- 0 until n * n) { - io.d_out(i) := cells_io(io.r_addr(i)).d_out + for (k <- 0 until rd_ch_num) { + io.d_out(k)(i) := cells_io(io.r_addr(k)(i)).d_out + } when (io.en_wr) { cells_io(io.w_addr(i)).en_wr := io.en_wr cells_io(io.w_addr(i)).d_in := io.d_in(i) @@ -60,22 +63,25 @@ class DetachableTCM( val n: Int = 8, val nblocks: Int = 4, val size: Int = 4096, + val rd_ch_num: Int = 2, ) extends Module { val io = IO(new Bundle { val d_in = Input(Vec(n * n, Vec(nblocks, UInt(8.W)))) - val d_out = Output(Vec(n * n, Vec(nblocks, UInt(8.W)))) - val r_addr = Input(Vec(n * n, UInt(log2Ceil(size).W))) + val d_out = Output(Vec(rd_ch_num, Vec(n * n, Vec(nblocks, UInt(8.W))))) + val r_addr = Input(Vec(rd_ch_num, Vec(n * n, UInt(log2Ceil(size).W)))) val w_addr = Input(Vec(n * n, UInt(log2Ceil(size).W))) val en_wr = Input(Bool()) }) val tcm_blocks_io = VecInit(Seq.fill(nblocks) { - Module(new TCMBlock(n, size, 8)).io}) + Module(new TCMBlock(n, size, rd_ch_num, 8)).io}) for (i <- 0 until nblocks) { tcm_blocks_io(i).en_wr := io.en_wr for (j <- 0 until n) { - tcm_blocks_io(i).r_addr(j) := io.r_addr(j) + for (k <- 0 until rd_ch_num) { + tcm_blocks_io(i).r_addr(k)(j) := io.r_addr(k)(j) + } tcm_blocks_io(i).w_addr(j) := io.w_addr(j) tcm_blocks_io(i).d_in(j) := io.d_in(j)(i) io.d_out(j)(i) := tcm_blocks_io(i).d_out(j) diff --git a/src/test/scala/ncore/tcm/TCMSpec.scala b/src/test/scala/ncore/tcm/TCMSpec.scala index 01bc2b5..6477973 100644 --- a/src/test/scala/ncore/tcm/TCMSpec.scala +++ b/src/test/scala/ncore/tcm/TCMSpec.scala @@ -30,7 +30,7 @@ class TCMSpec extends AnyFlatSpec with ChiselScalatestTester { } "TCM Block" should "write on signal and read anytime" in { - test(new TCMBlock(3, 192)) { dut => + test(new TCMBlock(3, 192, 1)) { dut => val _n = dut.n val _cells = dut.size val rand = new Random @@ -46,20 +46,20 @@ class TCMSpec extends AnyFlatSpec with ChiselScalatestTester { dut.io.en_wr.poke(true) dut.clock.step() for (i <- 0 until _n * _n) { - dut.io.r_addr(i).poke(_in_addr(i)) + dut.io.r_addr(0)(i).poke(_in_addr(i)) } for (i <- 0 until _n * _n){ - dut.io.d_out(i).expect(_in_data(i)) + dut.io.d_out(0)(i).expect(_in_data(i)) } println("Result tick @ " + _i + ": ") print_helper.printMatrix(_in_data, _n) - print_helper.printMatrixChisel(dut.io.d_out, _n) + print_helper.printMatrixChisel(dut.io.d_out(0), _n) } } } "TCM Block" should "read anytime" in { - test(new TCMBlock(2, 64)) { dut => + test(new TCMBlock(2, 64, 1)) { dut => val _n = dut.n val _cells = dut.size val rand = new Random @@ -81,16 +81,60 @@ class TCMSpec extends AnyFlatSpec with ChiselScalatestTester { val _r_addr = rand.shuffle((0 until _cells).toList).take(_n * _n) val _expected = new Array[Int](_n * _n) for (i <- 0 until _n * _n) { - dut.io.r_addr(i).poke(_r_addr(i)) + dut.io.r_addr(0)(i).poke(_r_addr(i)) } for (i <- 0 until _n * _n) { _expected(i) = _data(_r_addr(i)) } println("Result tick @ " + _i + ": ") print_helper.printMatrix(_expected, _n) - print_helper.printMatrixChisel(dut.io.d_out, _n) + print_helper.printMatrixChisel(dut.io.d_out(0), _n) for (i <- 0 until _n * _n){ - dut.io.d_out(i).expect(_data(_r_addr(i))) + dut.io.d_out(0)(i).expect(_data(_r_addr(i))) + } + } + } + } + + "TCM Block" should "read anytime on different channels" in { + test(new TCMBlock(2, 64, 2)) { dut => + val _n = dut.n + val _cells = dut.size + val _rd_ch_num = dut.rd_ch_num + val rand = new Random + val print_helper = new testUtil.PrintHelper() + val _data = new Array[Int](_cells) + for (_i <- 0 until 10) { + val _in_data = new Array[Int](_rd_ch_num * _n * _n) + val _in_addr = rand.shuffle((0 until _cells).toList).take(_rd_ch_num * _n * _n) + for (k <- 0 until _rd_ch_num){ + for (i <- 0 until _n * _n) { + val _ind = k * _n * _n + i + _in_data(_ind) = rand.between(0, 255) + dut.io.d_in(i).poke(_in_data(_ind)) + dut.io.w_addr(i).poke(_in_addr(_ind)) + _data(_in_addr(_ind)) = _in_data(_ind) + } + dut.io.en_wr.poke(true) + dut.clock.step() + } + } + for(_i <- 0 until 10){ + val _r_addr = rand.shuffle((0 until _cells).toList).take(_rd_ch_num * _n * _n) + val _expected = new Array[Int](_rd_ch_num * _n * _n) + for (k <- 0 until _rd_ch_num){ + for (i <- 0 until _n * _n) { + val _ind = k * _n * _n + i + dut.io.r_addr(k)(i).poke(_r_addr(_ind)) + _expected(_ind) = _data(_r_addr(_ind)) + } + } + println("Result tick @ " + _i + ": ") + for (k <- 0 until _rd_ch_num){ + for (i <- 0 until _n * _n){ + val _ind = k * _n * _n + i + dut.io.d_out(k)(i).expect(_data(_r_addr(_ind))) + } } } } From e3c9217011e55a4c3647756060799887b36ce623 Mon Sep 17 00:00:00 2001 From: "Fangrui.Liu" Date: Sat, 6 Apr 2024 16:22:48 +0800 Subject: [PATCH 5/5] clean code --- .../scala/isa/backend/controlMicroCode.scala | 9 ++ src/main/scala/isa/backend/memMicroCode.scala | 7 + src/main/scala/ncore/cu/controlUnit.scala | 27 +++- src/main/scala/ncore/mmu/memMngUnit.scala | 120 ------------------ src/main/scala/ncore/neuralCore.scala | 48 ++----- src/main/scala/ncore/sa/systolicArray.scala | 46 +++++++ src/main/scala/npu/npu.scala | 36 ------ src/main/scala/{ncore => }/pe/procElem.scala | 7 +- .../tightCpldMem.scala => sram/SRAM.scala} | 24 ++-- src/main/scala/top/top.scala | 18 +++ src/main/scala/vcore/mmu/memMngUnit.scala | 84 ++++++++++++ src/test/scala/ncore/CoreSpec.scala | 8 +- src/test/scala/ncore/cu/CUSpec.scala | 8 +- src/test/scala/ncore/mmu/MMUSpec.scala | 92 +++++++------- src/test/scala/{ncore => }/pe/PESpec.scala | 8 +- .../tcm/TCMSpec.scala => sram/SRAMSpec.scala} | 20 +-- 16 files changed, 287 insertions(+), 275 deletions(-) create mode 100644 src/main/scala/isa/backend/controlMicroCode.scala delete mode 100644 src/main/scala/ncore/mmu/memMngUnit.scala create mode 100644 src/main/scala/ncore/sa/systolicArray.scala delete mode 100644 src/main/scala/npu/npu.scala rename src/main/scala/{ncore => }/pe/procElem.scala (86%) rename src/main/scala/{ncore/tcm/tightCpldMem.scala => sram/SRAM.scala} (78%) create mode 100644 src/main/scala/top/top.scala create mode 100644 src/main/scala/vcore/mmu/memMngUnit.scala rename src/test/scala/{ncore => }/pe/PESpec.scala (93%) rename src/test/scala/{ncore/tcm/TCMSpec.scala => sram/SRAMSpec.scala} (90%) diff --git a/src/main/scala/isa/backend/controlMicroCode.scala b/src/main/scala/isa/backend/controlMicroCode.scala new file mode 100644 index 0000000..d517852 --- /dev/null +++ b/src/main/scala/isa/backend/controlMicroCode.scala @@ -0,0 +1,9 @@ +// See README.md for license details. + +package isa.backend +import chisel3._ +import chisel3.util._ + +class NCoreCUBundle (val size: Int = 4096) extends Bundle { + val accum = Bool() +} \ No newline at end of file diff --git a/src/main/scala/isa/backend/memMicroCode.scala b/src/main/scala/isa/backend/memMicroCode.scala index 2f243f4..f1a1a9f 100644 --- a/src/main/scala/isa/backend/memMicroCode.scala +++ b/src/main/scala/isa/backend/memMicroCode.scala @@ -19,3 +19,10 @@ object MemChannel extends ChiselEnum { // 16/32 bits will have no ch3 val ch3 = Value(0x3.U) } + +class MMUCtrlBundle (val n: Int = 8, val size: Int = 4096) extends Bundle { + val offset_keep = Bool() + val h_only = Bool() + val in_addr = Vec(n * n, UInt(log2Ceil(size).W)) + val out_addr = Vec(n * n, UInt(log2Ceil(size).W)) +} \ No newline at end of file diff --git a/src/main/scala/ncore/cu/controlUnit.scala b/src/main/scala/ncore/cu/controlUnit.scala index d71189f..f5d8fe4 100644 --- a/src/main/scala/ncore/cu/controlUnit.scala +++ b/src/main/scala/ncore/cu/controlUnit.scala @@ -2,11 +2,12 @@ package ncore.cu import chisel3._ +import isa.backend._ /** * Control unit also uses systolic array to pass instructions */ -class ControlUnit(val n: Int = 8, val ctrl_width: Int = 8) extends Module { +class ControlUnitforTest(val n: Int = 8, val ctrl_width: Int = 8) extends Module { val io = IO(new Bundle { val cbus_in = Input(UInt(ctrl_width.W)) val cbus_out = Output(Vec(n * n, UInt(ctrl_width.W))) @@ -14,6 +15,30 @@ class ControlUnit(val n: Int = 8, val ctrl_width: Int = 8) extends Module { // Assign each element with diagnal control signal val reg = RegInit(VecInit(Seq.fill(2*n-1)(0.U(ctrl_width.W)))) + // 1D systolic array for control + reg(0) := io.cbus_in + for(i<- 1 until 2*n-1){ + reg(i) := reg(i-1) + } + // Boardcast to all elements in the array + for(i <- 0 until n){ + for(j <- 0 until n){ + io.cbus_out(n*i+j) := reg(i+j) + } + } +} + +/** + * Control unit also uses systolic array to pass instructions + */ +class ControlUnit(val n: Int = 8, val sram_size: Int = 4096) extends Module { + val io = IO(new Bundle { + val cbus_in = Input(new NCoreCUBundle(sram_size)) + val cbus_out = Output(Vec(n * n, new NCoreCUBundle(sram_size))) + }) + // Assign each element with diagnal control signal + val reg = RegInit(VecInit(Seq.fill(2*n-1)(0.U.asTypeOf(new NCoreCUBundle(sram_size))))) + // 1D systolic array for control reg(0) := io.cbus_in for(i<- 1 until 2*n-1){ diff --git a/src/main/scala/ncore/mmu/memMngUnit.scala b/src/main/scala/ncore/mmu/memMngUnit.scala deleted file mode 100644 index cfad5f3..0000000 --- a/src/main/scala/ncore/mmu/memMngUnit.scala +++ /dev/null @@ -1,120 +0,0 @@ -// See README.md for license details. - -package ncore.mmu - -import chisel3._ -import chisel3.util._ -import isa.backend._ -import ncore._ -import ncore.tcm._ - -class MMUBundle extends Bundle { - val mem_ch = MemChannel() - val mem_lo = MemLayout() -} - -class OffsetGenerator(val n: Int = 8) extends Module { - val io = IO(new Bundle { - val inc = Input(Vec(n, Bool())) - val out = Output(Vec(n, UInt(log2Ceil(n * n).W))) - }) - val init_value = Seq.tabulate(n)(i => (n * i).U(log2Ceil(n * n).W)) - val regs = RegInit(VecInit(init_value)) - - for (i <- 0 until n){ - when (io.inc(i)) { - regs(i) := (regs(i) + 1.U) % (n * n).U - }.otherwise { - regs(i) := init_value(i) - } - io.out(i) := regs(i) - } -} - - -class MemoryControlArray(val n: Int = 8) extends Module { - val io = IO(new Bundle { - val ctrl_in_a = Input(Bool()) - val ctrl_in_b = Input(Bool()) - val offset_inc_in = Input(Bool()) - val ctrl_out_a = Output(Vec(n, Bool())) - val ctrl_out_b = Output(Vec(n, Bool())) - val offset_inc_out = Output(Vec((n-1) * (n-1), Bool())) - }) - // Assign each element with diagnal control signal - val reg_inc = RegInit(VecInit(Seq.fill(2*n - 3)(0.B))) - val reg_a = RegInit(VecInit(Seq.fill(n)(0.B))) - val reg_b = RegInit(VecInit(Seq.fill(n)(0.B))) - - reg_a(0) := io.ctrl_in_a(0) - reg_b(0) := io.ctrl_in_b(0) - for (i <- 1 until n - 1) { - reg_a(i) := reg_a(i-1) - reg_b(i) := reg_b(i-1) - } - - for (i <- 0 until n) { - io.ctrl_out_a(i) := reg_a(i) - io.ctrl_out_b(i) := reg_b(i) - } - - reg_inc(0) := io.offset_inc_in - for (i <- 0 until 2 * n - 3) { - reg_inc(i) := reg_inc(i - 1) - } - for (i <- 0 until n - 1) { - for (j <- 0 until n - 1) { - io.offset_inc_out(n * i + j) := reg_inc(i + j) - } - } -} - -/** - * This is the neural core design - */ -class MemoryManageUnit( - val n: Int = 8, - val nbits: Int = 8, - val word_size: Int = 4, - val size: Int = 4096 - ) extends Module { - val io = IO(new Bundle { - val base_addr = Input(UInt(log2Ceil(size).W)) - val ctrl = Input(Vec(n * n, new MMUBundle())) - val out_a = Output(Vec(n * n, UInt(32.W))) - val out_b = Output(Vec(n * n, UInt(32.W))) - }) - - val offsetgen_a = new OffsetGenerator(n) - val offsetgen_b = new OffsetGenerator(n) - - val mem = new DetachableTCM(n, word_size, size, 2) - - // Create 2d register for horizontal & vertical - val reg_h = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W)))) - val reg_v = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W)))) - - for (i <- 0 until n){ - for (j <- 0 until n) { - // ==== INPUT ==== - // vertical - if (i==0) { - mem.io.r_addr(0)(j) := io.base_addr + offsetgen_b.io.out(j) - } else { - mem.io.r_addr(0)(n * i + j) := reg_v(n * (i - 1) + j) - } - if (i < n - 1 && j < n) - reg_v(n * i + j) := mem.io.r_addr(0)(n * i + j) - - // horizontal - if (j==0) { - mem.io.r_addr(1)(n * i) := io.base_addr + offsetgen_a.io.out(i) - } else { - mem.io.r_addr(1)(n * i + j) := reg_h((n - 1) * i + (j - 1)) - } - if (i < n && j < n - 1) - reg_h((n - 1) * i + j) := mem.io.r_addr(1)(n * i + j) - } - } - -} \ No newline at end of file diff --git a/src/main/scala/ncore/neuralCore.scala b/src/main/scala/ncore/neuralCore.scala index 7ec0ddb..381a3a4 100644 --- a/src/main/scala/ncore/neuralCore.scala +++ b/src/main/scala/ncore/neuralCore.scala @@ -1,63 +1,41 @@ // See README.md for license details package ncore +import isa.backend._ +import pe._ import chisel3._ + /** * This is the neural core design */ - class NeuralCoreforTest(val n: Int = 8, val nbits: Int = 8, val ctrl_width: Int = 8) extends Module { + class NeuralCore(val n: Int = 8, val nbits: Int = 8, val sram_size: Int = 4096) extends Module { val io = IO(new Bundle { val vec_a = Input(Vec(n, UInt(nbits.W))) // vector `a` is the left input val vec_b = Input(Vec(n, UInt(nbits.W))) // vector `b` is the top input - val ctrl = Input(UInt(ctrl_width.W)) + val ctrl = Input(new NCoreCUBundle()) val out = Output(Vec(n * n, UInt((2 * nbits + 12).W))) }) // Create n x n pe blocks val pe_io = VecInit(Seq.fill(n * n) {Module(new pe.PE(nbits)).io}) - // Create 2d register for horizontal & vertical - val pe_reg_h = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W)))) - val pe_reg_v = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W)))) // we use systolic array to pipeline the instructions // this will avoid bubble and inst complexity // while simplifying design with higher efficiency - val ctrl_array = Module(new cu.ControlUnit(n, ctrl_width)) + val ctrl_array = Module(new cu.ControlUnit(n, sram_size)) ctrl_array.io.cbus_in := io.ctrl + val sarray = Module(new sa.SystolicArray2D(n, nbits)) + sarray.io.vec_a := io.vec_a + sarray.io.vec_b := io.vec_b + for (i <- 0 until n){ for (j <- 0 until n) { - // ==== OUTPUT ==== - // pe array's output mapped to the matrix position + pe_io(n * i + j).in_a := sarray.io.out_a(n * i + j) + pe_io(n * i + j).in_b := sarray.io.out_b(n * i + j) + pe_io(n * i + j).ctrl := ctrl_array.io.cbus_out(n * i + j) io.out(n * i + j) := pe_io(n * i + j).out - - // ==== INPUT ==== - // vertical - if (i==0) { - pe_io(j).in_b := io.vec_b(j) - } else { - pe_io(n * i + j).in_b := pe_reg_v(n * (i - 1) + j) - } - if (i < n - 1 && j < n) - pe_reg_v(n * i + j) := pe_io(n * i + j).in_b - - // horizontal - if (j==0) { - pe_io(n * i).in_a := io.vec_a(i) - } else { - pe_io(n * i + j).in_a := pe_reg_h((n - 1) * i + (j - 1)) - } - if (i < n && j < n - 1) - pe_reg_h((n - 1) * i + j) := pe_io(n * i + j).in_a - - // ==== CONTROL ==== - // Currently we only have one bit control - // which is `ACCUM` - // TODO: - // Add ALU control to pe elements - val ctrl = ctrl_array.io.cbus_out(n * i + j).asBools - pe_io(n * i + j).accum := ctrl(0) } } } \ No newline at end of file diff --git a/src/main/scala/ncore/sa/systolicArray.scala b/src/main/scala/ncore/sa/systolicArray.scala new file mode 100644 index 0000000..cd1213d --- /dev/null +++ b/src/main/scala/ncore/sa/systolicArray.scala @@ -0,0 +1,46 @@ +// See README.md for license details +package ncore.sa + +import chisel3._ + + +/** + * This is the neural core design + */ + class SystolicArray2D(val n: Int = 8, val nbits: Int = 8) extends Module { + val io = IO(new Bundle { + val vec_a = Input(Vec(n, UInt(nbits.W))) // vector `a` is the left input + val vec_b = Input(Vec(n, UInt(nbits.W))) // vector `b` is the top input + val out_a = Output(Vec(n * n, UInt(nbits.W))) + val out_b = Output(Vec(n * n, UInt(nbits.W))) + + }) + + // Create 2d register for horizontal & vertical + val reg_h = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W)))) + val reg_v = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W)))) + + for (i <- 0 until n){ + for (j <- 0 until n) { + + // ==== INPUT ==== + // vertical + if (i==0) { + io.out_b(j) := io.vec_b(j) + } else { + io.out_b(n * i + j) := reg_v(n * (i - 1) + j) + } + if (i < n - 1 && j < n) + reg_v(n * i + j) := io.out_b(n * i + j) + + // horizontal + if (j==0) { + io.out_a(n * i) := io.vec_a(i) + } else { + io.out_a(n * i + j) := reg_h((n - 1) * i + (j - 1)) + } + if (i < n && j < n - 1) + reg_h((n - 1) * i + j) := io.out_a(n * i + j) + } + } + } \ No newline at end of file diff --git a/src/main/scala/npu/npu.scala b/src/main/scala/npu/npu.scala deleted file mode 100644 index 2fba8a3..0000000 --- a/src/main/scala/npu/npu.scala +++ /dev/null @@ -1,36 +0,0 @@ -package npu - -import chisel3._ -import java.nio.file.{Paths, Files} -import java.nio.charset.StandardCharsets -import circt.stage.ChiselStage -import ncore.pe.PE - -class NPU extends Module { - - val nbits: Int = 8 - val io = IO(new Bundle { - val in_a = Input(UInt(nbits.W)) - val in_b = Input(UInt(nbits.W)) - val accum = Input(Bool()) - val out = Output(UInt((nbits*2).W)) - }) - - val pe = Module(new PE(8)) - - // get value when ready - pe.io.in_a := io.in_a - pe.io.in_b := io.in_b - pe.io.accum := io.accum - io.out := pe.io.out -} - -object Main extends App { - // These lines generate the Verilog output - - val hdl = ChiselStage.emitSystemVerilog( - new NPU(), - firtoolOpts = Array("-disable-all-randomization", "-strip-debug-info") - ) - Files.write(Paths.get("npu.v"), hdl.getBytes(StandardCharsets.UTF_8)) -} diff --git a/src/main/scala/ncore/pe/procElem.scala b/src/main/scala/pe/procElem.scala similarity index 86% rename from src/main/scala/ncore/pe/procElem.scala rename to src/main/scala/pe/procElem.scala index bf88bcc..98e909d 100644 --- a/src/main/scala/ncore/pe/procElem.scala +++ b/src/main/scala/pe/procElem.scala @@ -1,8 +1,9 @@ // See README.md for license details. -package ncore.pe +package pe import chisel3._ +import isa.backend._ /** * processing element unit in npu design. @@ -11,7 +12,7 @@ import chisel3._ class PE(val nbits: Int = 8) extends Module { val io = IO( new Bundle { - val accum = Input(Bool()) + val ctrl = Input(new NCoreCUBundle()) val in_a = Input(UInt(nbits.W)) val in_b = Input(UInt(nbits.W)) // The register bandwith is optimized for large transformer @@ -22,7 +23,7 @@ class PE(val nbits: Int = 8) extends Module { val res = RegInit(0.U((nbits*2 + 12).W)) - when (io.accum) { + when (io.ctrl.accum) { res := res + (io.in_a * io.in_b) } .otherwise { res := (io.in_a * io.in_b) diff --git a/src/main/scala/ncore/tcm/tightCpldMem.scala b/src/main/scala/sram/SRAM.scala similarity index 78% rename from src/main/scala/ncore/tcm/tightCpldMem.scala rename to src/main/scala/sram/SRAM.scala index b036255..530c681 100644 --- a/src/main/scala/ncore/tcm/tightCpldMem.scala +++ b/src/main/scala/sram/SRAM.scala @@ -1,11 +1,11 @@ // See README.md for license details. -package ncore.tcm +package sram import chisel3._ import chisel3.util._ -class TCMCell(val nbits: Int = 8) extends Module { +class SRAMCell(val nbits: Int = 8) extends Module { val io = IO( new Bundle { val d_in = Input(UInt(nbits.W)) @@ -22,7 +22,7 @@ class TCMCell(val nbits: Int = 8) extends Module { } } -class TCMBlock(val n: Int = 8, +class SRAMBlock(val n: Int = 8, val size: Int = 4096, val rd_ch_num: Int = 2, val nbits: Int = 8 @@ -36,7 +36,7 @@ class TCMBlock(val n: Int = 8, val en_wr = Input(Bool()) } ) - val cells_io = VecInit(Seq.fill(size) {Module(new TCMCell(nbits)).io}) + val cells_io = VecInit(Seq.fill(size) {Module(new SRAMCell(nbits)).io}) for (i <- 0 until size) { cells_io(i).en_wr := false.B.asTypeOf(cells_io(i).en_wr) @@ -59,7 +59,7 @@ class TCMBlock(val n: Int = 8, } -class DetachableTCM( +class SRAM( val n: Int = 8, val nblocks: Int = 4, val size: Int = 4096, @@ -73,18 +73,18 @@ class DetachableTCM( val en_wr = Input(Bool()) }) - val tcm_blocks_io = VecInit(Seq.fill(nblocks) { - Module(new TCMBlock(n, size, rd_ch_num, 8)).io}) + val sram_blocks_io = VecInit(Seq.fill(nblocks) { + Module(new SRAMBlock(n, size, rd_ch_num, 8)).io}) for (i <- 0 until nblocks) { - tcm_blocks_io(i).en_wr := io.en_wr + sram_blocks_io(i).en_wr := io.en_wr for (j <- 0 until n) { for (k <- 0 until rd_ch_num) { - tcm_blocks_io(i).r_addr(k)(j) := io.r_addr(k)(j) + sram_blocks_io(i).r_addr(k)(j) := io.r_addr(k)(j) } - tcm_blocks_io(i).w_addr(j) := io.w_addr(j) - tcm_blocks_io(i).d_in(j) := io.d_in(j)(i) - io.d_out(j)(i) := tcm_blocks_io(i).d_out(j) + sram_blocks_io(i).w_addr(j) := io.w_addr(j) + sram_blocks_io(i).d_in(j) := io.d_in(j)(i) + io.d_out(j)(i) := sram_blocks_io(i).d_out(j) } } diff --git a/src/main/scala/top/top.scala b/src/main/scala/top/top.scala new file mode 100644 index 0000000..40f93e7 --- /dev/null +++ b/src/main/scala/top/top.scala @@ -0,0 +1,18 @@ +package top + +import chisel3._ +import java.nio.file.{Paths, Files} +import java.nio.charset.StandardCharsets +import circt.stage.ChiselStage +import ncore._ + + +object Main extends App { + // These lines generate the Verilog output + + val hdl = ChiselStage.emitSystemVerilog( + new NeuralCore(), + firtoolOpts = Array("-disable-all-randomization", "-strip-debug-info") + ) + Files.write(Paths.get("top.v"), hdl.getBytes(StandardCharsets.UTF_8)) +} diff --git a/src/main/scala/vcore/mmu/memMngUnit.scala b/src/main/scala/vcore/mmu/memMngUnit.scala new file mode 100644 index 0000000..a53d2ae --- /dev/null +++ b/src/main/scala/vcore/mmu/memMngUnit.scala @@ -0,0 +1,84 @@ +// // See README.md for license details. + +// package ncore.mmu + +// import chisel3._ +// import chisel3.util._ +// import isa.backend._ +// import ncore._ + + +// class OffsetGenerator(val n: Int = 8) extends Module { +// val io = IO(new Bundle { +// val keep = Input(Vec(n, Bool())) +// val out = Output(Vec(n, UInt(log2Ceil(n * n).W))) +// }) +// val init_value = Seq.tabulate(n)(i => (n * i).U(log2Ceil(n * n).W)) +// val regs = RegInit(VecInit(init_value)) + +// for (i <- 0 until n){ +// when (io.keep(i)) { +// regs(i) := init_value(i) +// }.otherwise { +// regs(i) := (regs(i) + 1.U) % (n * n).U +// } +// io.out(i) := regs(i) +// } +// } + +// /** +// * This is the neural core design +// */ +// class MemoryManageUnit( +// val n: Int = 8, +// val size: Int = 4096 +// ) extends Module { +// val io = IO(new Bundle { +// val ctrl = Input(Vec(n * n, new MMUCtrlBundle(size))) +// val op_a = Output(Vec(n * n, UInt(log2Ceil(size).W))) +// val op_b = Output(Vec(n * n, UInt(log2Ceil(size).W))) +// val res = Output(Vec(n * n, UInt(log2Ceil(size).W))) +// }) + +// val offsetgen_a = new OffsetGenerator(n) +// val offsetgen_b = new OffsetGenerator(n) + +// // Create 2d register for horizontal & vertical +// val reg_h = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(log2Ceil(size).W)))) +// val reg_r = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(log2Ceil(size).W)))) +// val reg_v = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(log2Ceil(size).W)))) + +// for (i <- 0 until n){ +// for (j <- 0 until n) { +// offsetgen_a.io.keep(i) := io.ctrl(n * i).offset_keep +// offsetgen_b.io.keep(i) := io.ctrl(j).offset_kee +// // ==== INPUT ==== +// // vertical +// if (i==0) { +// when (io.ctrl(n * i + j).h_only) { +// io.op_b(j) := io.ctrl(n * i + j).in_addr + offsetgen_b.io.out(j) +// } .otherwise { +// io.op_b(j) := io.ctrl(n * i + j).in_addr + offsetgen_b.io.out(j) +// } +// } else { +// io.op_b(0)(n * i + j) := reg_v(n * (i - 1) + j) +// } +// if (i < n - 1 && j < n) +// reg_v(n * i + j) := io.op_b(n * i + j) + +// // horizontal & result +// if (j==0) { +// io.op_a(n * i) := io.ctrl(n * i + j).in_addr + offsetgen_a.io.out(i) +// io.res(n * i) := io.ctrl(n * i + j).out_addr + offsetgen_a.io.out(i) +// } else { +// io.op_a(n * i + j) := reg_h((n - 1) * i + (j - 1)) +// io.res(n * i + j) := reg_r((n - 1) * i + (j - 1)) +// } +// if (i < n && j < n - 1) { +// reg_h((n - 1) * i + j) := io.op_a(n * i + j) +// reg_r((n - 1) * i + j) := io.res(n * i + j) +// } +// } +// } + +// } \ No newline at end of file diff --git a/src/test/scala/ncore/CoreSpec.scala b/src/test/scala/ncore/CoreSpec.scala index c87341c..432a25f 100644 --- a/src/test/scala/ncore/CoreSpec.scala +++ b/src/test/scala/ncore/CoreSpec.scala @@ -11,8 +11,8 @@ import chisel3.experimental.BundleLiterals._ class CoreSpec extends AnyFlatSpec with ChiselScalatestTester { - "NeuralCoreforTest" should "do a normal matrix multiplication" in { - test(new NeuralCoreforTest(4, 8)) { dut => + "NeuralCore" should "do a normal matrix multiplication" in { + test(new NeuralCore(4, 8)) { dut => val print_helper = new testUtil.PrintHelper() val _n = dut.n val rand = new Random @@ -78,9 +78,9 @@ class CoreSpec extends AnyFlatSpec with ChiselScalatestTester { // The rest of the control signal will hand over // to a dedicated systolic-ish control bus if (i_tick < _n && i_tick >= 0) - dut.io.ctrl.poke(0x1) + dut.io.ctrl.accum.poke(0x1) else - dut.io.ctrl.poke(0x0) + dut.io.ctrl.accum.poke(0x0) // ideally, the array will give _n (diagnal) results per tick dut.clock.step() diff --git a/src/test/scala/ncore/cu/CUSpec.scala b/src/test/scala/ncore/cu/CUSpec.scala index b0863d4..920c846 100644 --- a/src/test/scala/ncore/cu/CUSpec.scala +++ b/src/test/scala/ncore/cu/CUSpec.scala @@ -19,19 +19,19 @@ class CUSpec extends AnyFlatSpec with ChiselScalatestTester { var history = new Array[Int](2 * _n - 1) var prod = 0 for (n <- 0 until 16) { - val _cbus_in = rand.between(0, 255) + val _cbus_in = rand.between(0, 2) history +:= _cbus_in - dut.io.cbus_in.poke(_cbus_in) + dut.io.cbus_in.accum.poke(_cbus_in) dut.clock.step() history = history.slice(0, 2 * _n - 1) println("Input tick @ " + n + ": " + _cbus_in) for(i: Int <- 0 until _n){ for(j:Int <- 0 until _n) { - dut.io.cbus_out(_n * i + j).expect(history(i + j)) + dut.io.cbus_out(_n * i + j).accum.expect(history(i + j)) } } println("Control tick @ " + n + " : ") - print_helper.printMatrixChisel(dut.io.cbus_out, _n) + // print_helper.printMatrixChisel(dut.io.cbus_out, _n) } } } diff --git a/src/test/scala/ncore/mmu/MMUSpec.scala b/src/test/scala/ncore/mmu/MMUSpec.scala index 537328f..4c71a10 100644 --- a/src/test/scala/ncore/mmu/MMUSpec.scala +++ b/src/test/scala/ncore/mmu/MMUSpec.scala @@ -1,50 +1,50 @@ -//// See README.md for license details. +// //// See README.md for license details. -package ncore.mmu +// package ncore.mmu -import testUtil._ -import scala.util.Random -import chisel3._ -import chiseltest._ -import org.scalatest.flatspec.AnyFlatSpec -import chisel3.experimental.BundleLiterals._ +// import testUtil._ +// import scala.util.Random +// import chisel3._ +// import chiseltest._ +// import org.scalatest.flatspec.AnyFlatSpec +// import chisel3.experimental.BundleLiterals._ -class MMUSpec extends AnyFlatSpec with ChiselScalatestTester { +// class MMUSpec extends AnyFlatSpec with ChiselScalatestTester { - "OffsetGenerator" should "provide correct offset" in { - test(new OffsetGenerator(4)) { dut => - val print_helper = new testUtil.PrintHelper() - val _n = dut.n - val _array = List(List(false, false, false, false), - List(true, false, false, false), - List(true, true, false, false), - List(true, true, true, false), - List(false, true, true, true), - List(true, false, true, true), - List(false, true, false, true), - List(false, false, true, false), - List(false, false, false, true), - ) - val _expected = List(List(0, 4, 8, 12), - List(1, 4, 8, 12), - List(2, 5, 8, 12), - List(3, 6, 9, 12), - List(0, 7, 10, 13), - List(1, 4, 11, 14), - List(0, 5, 8, 15), - List(0, 4, 9, 12), - List(0, 4, 8, 13), - ) - for (i <- 0 until _array.length) { - for (j <- 0 until _n) { - dut.io.inc(j).poke(_array(i%_array.length)(j)) - } - dut.clock.step() - for (j <- 0 until _n) { - dut.io.out(j).expect(_expected(i)(j)) - } - print_helper.printVectorChisel(dut.io.out, _n) - } - } - } -} \ No newline at end of file +// "OffsetGenerator" should "provide correct offset" in { +// test(new OffsetGenerator(4)) { dut => +// val print_helper = new testUtil.PrintHelper() +// val _n = dut.n +// val _array = List(List(false, false, false, false), +// List(true, false, false, false), +// List(true, true, false, false), +// List(true, true, true, false), +// List(false, true, true, true), +// List(true, false, true, true), +// List(false, true, false, true), +// List(false, false, true, false), +// List(false, false, false, true), +// ) +// val _expected = List(List(0, 4, 8, 12), +// List(1, 4, 8, 12), +// List(2, 5, 8, 12), +// List(3, 6, 9, 12), +// List(0, 7, 10, 13), +// List(1, 4, 11, 14), +// List(0, 5, 8, 15), +// List(0, 4, 9, 12), +// List(0, 4, 8, 13), +// ) +// for (i <- 0 until _array.length) { +// for (j <- 0 until _n) { +// dut.io.keep(j).poke(!_array(i%_array.length)(j)) +// } +// dut.clock.step() +// for (j <- 0 until _n) { +// dut.io.out(j).expect(_expected(i)(j)) +// } +// print_helper.printVectorChisel(dut.io.out, _n) +// } +// } +// } +// } \ No newline at end of file diff --git a/src/test/scala/ncore/pe/PESpec.scala b/src/test/scala/pe/PESpec.scala similarity index 93% rename from src/test/scala/ncore/pe/PESpec.scala rename to src/test/scala/pe/PESpec.scala index b08d114..eba2038 100644 --- a/src/test/scala/ncore/pe/PESpec.scala +++ b/src/test/scala/pe/PESpec.scala @@ -1,6 +1,6 @@ // See README.md for license details. -package ncore.pe +package pe import scala.util.Random import chisel3._ @@ -20,7 +20,7 @@ class PESpec extends AnyFlatSpec with ChiselScalatestTester { val _left_in_ = rand.between(0, 255) dut.io.in_a.poke(_top_in_) dut.io.in_b.poke(_left_in_) - dut.io.accum.poke(true) + dut.io.ctrl.accum.poke(true) dut.clock.step() prod = prod + _top_in_ * _left_in_ dut.io.out.expect(prod) @@ -32,7 +32,7 @@ class PESpec extends AnyFlatSpec with ChiselScalatestTester { var _left_in_ = rand.between(1, 255) dut.io.in_a.poke(_top_in_) dut.io.in_b.poke(_left_in_) - dut.io.accum.poke(false) + dut.io.ctrl.accum.poke(false) dut.clock.step() prod = prod + _top_in_ * _left_in_ dut.io.out.expect(prod) @@ -42,7 +42,7 @@ class PESpec extends AnyFlatSpec with ChiselScalatestTester { _left_in_ = rand.between(1, 255) dut.io.in_a.poke(_top_in_) dut.io.in_b.poke(_left_in_) - dut.io.accum.poke(true) + dut.io.ctrl.accum.poke(true) dut.clock.step() prod = prod + _top_in_ * _left_in_ dut.io.out.expect(prod) diff --git a/src/test/scala/ncore/tcm/TCMSpec.scala b/src/test/scala/sram/SRAMSpec.scala similarity index 90% rename from src/test/scala/ncore/tcm/TCMSpec.scala rename to src/test/scala/sram/SRAMSpec.scala index 6477973..2d8464b 100644 --- a/src/test/scala/ncore/tcm/TCMSpec.scala +++ b/src/test/scala/sram/SRAMSpec.scala @@ -1,6 +1,6 @@ // See README.md for license details. -package ncore.tcm +package sram import scala.util.Random import chisel3._ @@ -10,10 +10,10 @@ import org.scalatest.flatspec.AnyFlatSpec import chisel3.experimental.BundleLiterals._ -class TCMSpec extends AnyFlatSpec with ChiselScalatestTester { +class SRAMSpec extends AnyFlatSpec with ChiselScalatestTester { - "TCM Cells" should "write on signal" in { - test(new TCMCell(8)) { dut => + "SRAM Cells" should "write on signal" in { + test(new SRAMCell(8)) { dut => val rand = new Random var _prev = 0 for (i <- 0 until 10) { @@ -29,8 +29,8 @@ class TCMSpec extends AnyFlatSpec with ChiselScalatestTester { } } - "TCM Block" should "write on signal and read anytime" in { - test(new TCMBlock(3, 192, 1)) { dut => + "SRAM Block" should "write on signal and read anytime" in { + test(new SRAMBlock(3, 192, 1)) { dut => val _n = dut.n val _cells = dut.size val rand = new Random @@ -58,8 +58,8 @@ class TCMSpec extends AnyFlatSpec with ChiselScalatestTester { } } - "TCM Block" should "read anytime" in { - test(new TCMBlock(2, 64, 1)) { dut => + "SRAM Block" should "read anytime" in { + test(new SRAMBlock(2, 64, 1)) { dut => val _n = dut.n val _cells = dut.size val rand = new Random @@ -96,8 +96,8 @@ class TCMSpec extends AnyFlatSpec with ChiselScalatestTester { } } - "TCM Block" should "read anytime on different channels" in { - test(new TCMBlock(2, 64, 2)) { dut => + "SRAM Block" should "read anytime on different channels" in { + test(new SRAMBlock(2, 64, 2)) { dut => val _n = dut.n val _cells = dut.size val _rd_ch_num = dut.rd_ch_num