diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index e1865e1..2fb383c 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -5,8 +5,9 @@ on: - main - releases/** pull_request: - types: - - opened + branches: + - main + - 'releases/**' jobs: Lint: diff --git a/src/main/scala/isa/backend/controlMicroCode.scala b/src/main/scala/isa/backend/controlMicroCode.scala new file mode 100644 index 0000000..d517852 --- /dev/null +++ b/src/main/scala/isa/backend/controlMicroCode.scala @@ -0,0 +1,9 @@ +// See README.md for license details. + +package isa.backend +import chisel3._ +import chisel3.util._ + +class NCoreCUBundle (val size: Int = 4096) extends Bundle { + val accum = Bool() +} \ No newline at end of file diff --git a/src/main/scala/isa/backend/memMicroCode.scala b/src/main/scala/isa/backend/memMicroCode.scala new file mode 100644 index 0000000..f1a1a9f --- /dev/null +++ b/src/main/scala/isa/backend/memMicroCode.scala @@ -0,0 +1,28 @@ +// See README.md for license details. + +package isa.backend +import chisel3._ +import chisel3.util._ + +object MemLayout extends ChiselEnum { + val bit8 = Value(0x0.U) + val bit16 = Value(0x1.U) + val bit32 = Value(0x2.U) +} + +object MemChannel extends ChiselEnum { + val ch0 = Value(0x0.U) + // 16/32 bits will have no ch1 + val ch1 = Value(0x1.U) + // 32 bits will have no ch2 + val ch2 = Value(0x2.U) + // 16/32 bits will have no ch3 + val ch3 = Value(0x3.U) +} + +class MMUCtrlBundle (val n: Int = 8, val size: Int = 4096) extends Bundle { + val offset_keep = Bool() + val h_only = Bool() + val in_addr = Vec(n * n, UInt(log2Ceil(size).W)) + val out_addr = Vec(n * n, UInt(log2Ceil(size).W)) +} \ No newline at end of file diff --git a/src/main/scala/isa/instSetArch.scala b/src/main/scala/isa/instSetArch.scala index f4b5359..84fd512 100644 --- a/src/main/scala/isa/instSetArch.scala +++ b/src/main/scala/isa/instSetArch.scala @@ -8,4 +8,12 @@ object NeuralISA extends ChiselEnum { val st = Value(0x2.U(4.W)) val mma = Value(0x3.U(4.W)) val ip = Value (0x4.U(4.W)) -} \ No newline at end of file +} + +object DType extends ChiselEnum { + val uint = Value(0x0.U) + val int = Value(0x1.U) + val fp = Value(0x2.U) + // no bfp32c0 + val bfp = Value(0x3.U) +} diff --git a/src/main/scala/isa/memMicroCode.scala b/src/main/scala/isa/memMicroCode.scala deleted file mode 100644 index a47e1e2..0000000 --- a/src/main/scala/isa/memMicroCode.scala +++ /dev/null @@ -1,44 +0,0 @@ -// See README.md for license details. - -package isa -import chisel3._ -import chisel3.util._ - - -object OffsetPattern extends ChiselEnum { - val not_def = Value(0x0.U) - val sca_0d = Value(0x1.U) - val vec_1d = Value(0x2.U) - val mat_2d = Value(0x3.U) -} - -object AddressMode extends ChiselEnum { - val immd = Value(0x0.U) - val addr = Value(0x1.U) - val addr_immd = Value(0x2.U) -} - - -object MemLayout extends ChiselEnum { - val bit8 = Value(0x0.U) - val bit16 = Value(0x1.U) - val bit32 = Value(0x2.U) -} - -object DType extends ChiselEnum { - val uint = Value(0x0.U) - val int = Value(0x1.U) - val fp = Value(0x2.U) - // no bfp32c0 - val bfp = Value(0x3.U) -} - -object MemChannel extends ChiselEnum { - val ch0 = Value(0x1.U) - // 16/32 bits will have no ch1 - val ch1 = Value(0x2.U) - // 32 bits will have no ch2 - val ch2 = Value(0x4.U) - // 16/32 bits will have no ch3 - val ch3 = Value(0x8.U) -} \ No newline at end of file diff --git a/src/main/scala/ncore/cu/controlUnit.scala b/src/main/scala/ncore/cu/controlUnit.scala index d71189f..f5d8fe4 100644 --- a/src/main/scala/ncore/cu/controlUnit.scala +++ b/src/main/scala/ncore/cu/controlUnit.scala @@ -2,11 +2,12 @@ package ncore.cu import chisel3._ +import isa.backend._ /** * Control unit also uses systolic array to pass instructions */ -class ControlUnit(val n: Int = 8, val ctrl_width: Int = 8) extends Module { +class ControlUnitforTest(val n: Int = 8, val ctrl_width: Int = 8) extends Module { val io = IO(new Bundle { val cbus_in = Input(UInt(ctrl_width.W)) val cbus_out = Output(Vec(n * n, UInt(ctrl_width.W))) @@ -14,6 +15,30 @@ class ControlUnit(val n: Int = 8, val ctrl_width: Int = 8) extends Module { // Assign each element with diagnal control signal val reg = RegInit(VecInit(Seq.fill(2*n-1)(0.U(ctrl_width.W)))) + // 1D systolic array for control + reg(0) := io.cbus_in + for(i<- 1 until 2*n-1){ + reg(i) := reg(i-1) + } + // Boardcast to all elements in the array + for(i <- 0 until n){ + for(j <- 0 until n){ + io.cbus_out(n*i+j) := reg(i+j) + } + } +} + +/** + * Control unit also uses systolic array to pass instructions + */ +class ControlUnit(val n: Int = 8, val sram_size: Int = 4096) extends Module { + val io = IO(new Bundle { + val cbus_in = Input(new NCoreCUBundle(sram_size)) + val cbus_out = Output(Vec(n * n, new NCoreCUBundle(sram_size))) + }) + // Assign each element with diagnal control signal + val reg = RegInit(VecInit(Seq.fill(2*n-1)(0.U.asTypeOf(new NCoreCUBundle(sram_size))))) + // 1D systolic array for control reg(0) := io.cbus_in for(i<- 1 until 2*n-1){ diff --git a/src/main/scala/ncore/neuralCore.scala b/src/main/scala/ncore/neuralCore.scala index 7ec0ddb..381a3a4 100644 --- a/src/main/scala/ncore/neuralCore.scala +++ b/src/main/scala/ncore/neuralCore.scala @@ -1,63 +1,41 @@ // See README.md for license details package ncore +import isa.backend._ +import pe._ import chisel3._ + /** * This is the neural core design */ - class NeuralCoreforTest(val n: Int = 8, val nbits: Int = 8, val ctrl_width: Int = 8) extends Module { + class NeuralCore(val n: Int = 8, val nbits: Int = 8, val sram_size: Int = 4096) extends Module { val io = IO(new Bundle { val vec_a = Input(Vec(n, UInt(nbits.W))) // vector `a` is the left input val vec_b = Input(Vec(n, UInt(nbits.W))) // vector `b` is the top input - val ctrl = Input(UInt(ctrl_width.W)) + val ctrl = Input(new NCoreCUBundle()) val out = Output(Vec(n * n, UInt((2 * nbits + 12).W))) }) // Create n x n pe blocks val pe_io = VecInit(Seq.fill(n * n) {Module(new pe.PE(nbits)).io}) - // Create 2d register for horizontal & vertical - val pe_reg_h = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W)))) - val pe_reg_v = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W)))) // we use systolic array to pipeline the instructions // this will avoid bubble and inst complexity // while simplifying design with higher efficiency - val ctrl_array = Module(new cu.ControlUnit(n, ctrl_width)) + val ctrl_array = Module(new cu.ControlUnit(n, sram_size)) ctrl_array.io.cbus_in := io.ctrl + val sarray = Module(new sa.SystolicArray2D(n, nbits)) + sarray.io.vec_a := io.vec_a + sarray.io.vec_b := io.vec_b + for (i <- 0 until n){ for (j <- 0 until n) { - // ==== OUTPUT ==== - // pe array's output mapped to the matrix position + pe_io(n * i + j).in_a := sarray.io.out_a(n * i + j) + pe_io(n * i + j).in_b := sarray.io.out_b(n * i + j) + pe_io(n * i + j).ctrl := ctrl_array.io.cbus_out(n * i + j) io.out(n * i + j) := pe_io(n * i + j).out - - // ==== INPUT ==== - // vertical - if (i==0) { - pe_io(j).in_b := io.vec_b(j) - } else { - pe_io(n * i + j).in_b := pe_reg_v(n * (i - 1) + j) - } - if (i < n - 1 && j < n) - pe_reg_v(n * i + j) := pe_io(n * i + j).in_b - - // horizontal - if (j==0) { - pe_io(n * i).in_a := io.vec_a(i) - } else { - pe_io(n * i + j).in_a := pe_reg_h((n - 1) * i + (j - 1)) - } - if (i < n && j < n - 1) - pe_reg_h((n - 1) * i + j) := pe_io(n * i + j).in_a - - // ==== CONTROL ==== - // Currently we only have one bit control - // which is `ACCUM` - // TODO: - // Add ALU control to pe elements - val ctrl = ctrl_array.io.cbus_out(n * i + j).asBools - pe_io(n * i + j).accum := ctrl(0) } } } \ No newline at end of file diff --git a/src/main/scala/ncore/sa/systolicArray.scala b/src/main/scala/ncore/sa/systolicArray.scala new file mode 100644 index 0000000..cd1213d --- /dev/null +++ b/src/main/scala/ncore/sa/systolicArray.scala @@ -0,0 +1,46 @@ +// See README.md for license details +package ncore.sa + +import chisel3._ + + +/** + * This is the neural core design + */ + class SystolicArray2D(val n: Int = 8, val nbits: Int = 8) extends Module { + val io = IO(new Bundle { + val vec_a = Input(Vec(n, UInt(nbits.W))) // vector `a` is the left input + val vec_b = Input(Vec(n, UInt(nbits.W))) // vector `b` is the top input + val out_a = Output(Vec(n * n, UInt(nbits.W))) + val out_b = Output(Vec(n * n, UInt(nbits.W))) + + }) + + // Create 2d register for horizontal & vertical + val reg_h = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W)))) + val reg_v = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W)))) + + for (i <- 0 until n){ + for (j <- 0 until n) { + + // ==== INPUT ==== + // vertical + if (i==0) { + io.out_b(j) := io.vec_b(j) + } else { + io.out_b(n * i + j) := reg_v(n * (i - 1) + j) + } + if (i < n - 1 && j < n) + reg_v(n * i + j) := io.out_b(n * i + j) + + // horizontal + if (j==0) { + io.out_a(n * i) := io.vec_a(i) + } else { + io.out_a(n * i + j) := reg_h((n - 1) * i + (j - 1)) + } + if (i < n && j < n - 1) + reg_h((n - 1) * i + j) := io.out_a(n * i + j) + } + } + } \ No newline at end of file diff --git a/src/main/scala/ncore/tcm/tightCpldMem.scala b/src/main/scala/ncore/tcm/tightCpldMem.scala deleted file mode 100644 index f8fea8a..0000000 --- a/src/main/scala/ncore/tcm/tightCpldMem.scala +++ /dev/null @@ -1,93 +0,0 @@ -// See README.md for license details. - -package ncore.tcm - -import chisel3._ -import chisel3.util._ -import isa._ - -class TCMCell(val nbits: Int = 8) extends Module { - val io = IO( - new Bundle { - val d_in = Input(UInt(nbits.W)) - val d_out = Output(UInt(nbits.W)) - val en_wr = Input(Bool()) - } - ) - - val reg = RegInit(0.U(nbits.W)) - io.d_out := reg - - when (io.en_wr) { - reg := io.d_in - } -} - -class TCMBlock(val n: Int = 8, - val size: Int = 4096, - val r_addr_width: Int = 12, - val w_addr_width: Int = 12, - val nbits: Int = 8 -) extends Module { - val io = IO( - new Bundle { - val d_in = Input(Vec(n * n, UInt(nbits.W))) - val d_out = Output(Vec(n * n, UInt(nbits.W))) - val r_addr = Input(Vec(n * n, UInt(r_addr_width.W))) - val w_addr = Input(Vec(n * n, UInt(w_addr_width.W))) - val en_wr = Input(Bool()) - } - ) - val cells_io = VecInit(Seq.fill(size) {Module(new TCMCell(nbits)).io}) - - for (i <- 0 until size) { - cells_io(i).en_wr := false.B.asTypeOf(cells_io(i).en_wr) - // Need to initialize all wires just in case of not selected. - cells_io(i).d_in := 0.U.asTypeOf(cells_io(i).d_in) - } - - //TODO: add range check - //TODO: add read & write conflict check - - for (i <- 0 until n * n) { - io.d_out(i) := cells_io(io.r_addr(i)).d_out - when (io.en_wr) { - cells_io(io.w_addr(i)).en_wr := io.en_wr - cells_io(io.w_addr(i)).d_in := io.d_in(i) - } - } -} - - -class DetachableTCM( - val n: Int = 8, - val size: Int = 4096, - val r_addr_width: Int = 12, - val w_addr_width: Int = 12, - val mlayout_width: Int = 6, -) extends Module { - val io = IO(new Bundle { - val d_in = Input(Vec(n * n, UInt(32.W))) - val d_out = Output(Vec(n * n, UInt(32.W))) - // read address will have channel selection for last 2 bits - val r_addr = Input(Vec(n * n, UInt((r_addr_width + 2).W))) - // write address will have channel selection for last 2 bits - val w_addr = Input(Vec(n * n, UInt((w_addr_width + 2).W))) - val mem_ch = Input(MemChannel()) - val mem_lo = Input(MemLayout()) - val en_wr = Input(Bool()) - }) - - switch (io.mem_lo) { - is (MemLayout.bit8) { - - } - is (MemLayout.bit16) { - - } - is (MemLayout.bit32) { - - } - } - -} \ No newline at end of file diff --git a/src/main/scala/npu/npu.scala b/src/main/scala/npu/npu.scala deleted file mode 100644 index 2fba8a3..0000000 --- a/src/main/scala/npu/npu.scala +++ /dev/null @@ -1,36 +0,0 @@ -package npu - -import chisel3._ -import java.nio.file.{Paths, Files} -import java.nio.charset.StandardCharsets -import circt.stage.ChiselStage -import ncore.pe.PE - -class NPU extends Module { - - val nbits: Int = 8 - val io = IO(new Bundle { - val in_a = Input(UInt(nbits.W)) - val in_b = Input(UInt(nbits.W)) - val accum = Input(Bool()) - val out = Output(UInt((nbits*2).W)) - }) - - val pe = Module(new PE(8)) - - // get value when ready - pe.io.in_a := io.in_a - pe.io.in_b := io.in_b - pe.io.accum := io.accum - io.out := pe.io.out -} - -object Main extends App { - // These lines generate the Verilog output - - val hdl = ChiselStage.emitSystemVerilog( - new NPU(), - firtoolOpts = Array("-disable-all-randomization", "-strip-debug-info") - ) - Files.write(Paths.get("npu.v"), hdl.getBytes(StandardCharsets.UTF_8)) -} diff --git a/src/main/scala/ncore/pe/procElem.scala b/src/main/scala/pe/procElem.scala similarity index 86% rename from src/main/scala/ncore/pe/procElem.scala rename to src/main/scala/pe/procElem.scala index bf88bcc..98e909d 100644 --- a/src/main/scala/ncore/pe/procElem.scala +++ b/src/main/scala/pe/procElem.scala @@ -1,8 +1,9 @@ // See README.md for license details. -package ncore.pe +package pe import chisel3._ +import isa.backend._ /** * processing element unit in npu design. @@ -11,7 +12,7 @@ import chisel3._ class PE(val nbits: Int = 8) extends Module { val io = IO( new Bundle { - val accum = Input(Bool()) + val ctrl = Input(new NCoreCUBundle()) val in_a = Input(UInt(nbits.W)) val in_b = Input(UInt(nbits.W)) // The register bandwith is optimized for large transformer @@ -22,7 +23,7 @@ class PE(val nbits: Int = 8) extends Module { val res = RegInit(0.U((nbits*2 + 12).W)) - when (io.accum) { + when (io.ctrl.accum) { res := res + (io.in_a * io.in_b) } .otherwise { res := (io.in_a * io.in_b) diff --git a/src/main/scala/sram/SRAM.scala b/src/main/scala/sram/SRAM.scala new file mode 100644 index 0000000..530c681 --- /dev/null +++ b/src/main/scala/sram/SRAM.scala @@ -0,0 +1,91 @@ +// See README.md for license details. + +package sram + +import chisel3._ +import chisel3.util._ + +class SRAMCell(val nbits: Int = 8) extends Module { + val io = IO( + new Bundle { + val d_in = Input(UInt(nbits.W)) + val d_out = Output(UInt(nbits.W)) + val en_wr = Input(Bool()) + } + ) + + val reg = RegInit(0.U(nbits.W)) + io.d_out := reg + + when (io.en_wr) { + reg := io.d_in + } +} + +class SRAMBlock(val n: Int = 8, + val size: Int = 4096, + val rd_ch_num: Int = 2, + val nbits: Int = 8 +) extends Module { + val io = IO( + new Bundle { + val d_in = Input(Vec(n * n, UInt(nbits.W))) + val d_out = Output(Vec(rd_ch_num, Vec(n * n, UInt(nbits.W)))) + val r_addr = Input(Vec(rd_ch_num, Vec(n * n, UInt(log2Ceil(size).W)))) + val w_addr = Input(Vec(n * n, UInt(log2Ceil(size).W))) + val en_wr = Input(Bool()) + } + ) + val cells_io = VecInit(Seq.fill(size) {Module(new SRAMCell(nbits)).io}) + + for (i <- 0 until size) { + cells_io(i).en_wr := false.B.asTypeOf(cells_io(i).en_wr) + // Need to initialize all wires just in case of not selected. + cells_io(i).d_in := 0.U.asTypeOf(cells_io(i).d_in) + } + + //TODO: add range check + //TODO: add read & write conflict check + + for (i <- 0 until n * n) { + for (k <- 0 until rd_ch_num) { + io.d_out(k)(i) := cells_io(io.r_addr(k)(i)).d_out + } + when (io.en_wr) { + cells_io(io.w_addr(i)).en_wr := io.en_wr + cells_io(io.w_addr(i)).d_in := io.d_in(i) + } + } +} + + +class SRAM( + val n: Int = 8, + val nblocks: Int = 4, + val size: Int = 4096, + val rd_ch_num: Int = 2, +) extends Module { + val io = IO(new Bundle { + val d_in = Input(Vec(n * n, Vec(nblocks, UInt(8.W)))) + val d_out = Output(Vec(rd_ch_num, Vec(n * n, Vec(nblocks, UInt(8.W))))) + val r_addr = Input(Vec(rd_ch_num, Vec(n * n, UInt(log2Ceil(size).W)))) + val w_addr = Input(Vec(n * n, UInt(log2Ceil(size).W))) + val en_wr = Input(Bool()) + }) + + val sram_blocks_io = VecInit(Seq.fill(nblocks) { + Module(new SRAMBlock(n, size, rd_ch_num, 8)).io}) + + for (i <- 0 until nblocks) { + sram_blocks_io(i).en_wr := io.en_wr + for (j <- 0 until n) { + for (k <- 0 until rd_ch_num) { + sram_blocks_io(i).r_addr(k)(j) := io.r_addr(k)(j) + } + sram_blocks_io(i).w_addr(j) := io.w_addr(j) + sram_blocks_io(i).d_in(j) := io.d_in(j)(i) + io.d_out(j)(i) := sram_blocks_io(i).d_out(j) + } + } + +} \ No newline at end of file diff --git a/src/main/scala/top/top.scala b/src/main/scala/top/top.scala new file mode 100644 index 0000000..40f93e7 --- /dev/null +++ b/src/main/scala/top/top.scala @@ -0,0 +1,18 @@ +package top + +import chisel3._ +import java.nio.file.{Paths, Files} +import java.nio.charset.StandardCharsets +import circt.stage.ChiselStage +import ncore._ + + +object Main extends App { + // These lines generate the Verilog output + + val hdl = ChiselStage.emitSystemVerilog( + new NeuralCore(), + firtoolOpts = Array("-disable-all-randomization", "-strip-debug-info") + ) + Files.write(Paths.get("top.v"), hdl.getBytes(StandardCharsets.UTF_8)) +} diff --git a/src/main/scala/vcore/mmu/memMngUnit.scala b/src/main/scala/vcore/mmu/memMngUnit.scala new file mode 100644 index 0000000..a53d2ae --- /dev/null +++ b/src/main/scala/vcore/mmu/memMngUnit.scala @@ -0,0 +1,84 @@ +// // See README.md for license details. + +// package ncore.mmu + +// import chisel3._ +// import chisel3.util._ +// import isa.backend._ +// import ncore._ + + +// class OffsetGenerator(val n: Int = 8) extends Module { +// val io = IO(new Bundle { +// val keep = Input(Vec(n, Bool())) +// val out = Output(Vec(n, UInt(log2Ceil(n * n).W))) +// }) +// val init_value = Seq.tabulate(n)(i => (n * i).U(log2Ceil(n * n).W)) +// val regs = RegInit(VecInit(init_value)) + +// for (i <- 0 until n){ +// when (io.keep(i)) { +// regs(i) := init_value(i) +// }.otherwise { +// regs(i) := (regs(i) + 1.U) % (n * n).U +// } +// io.out(i) := regs(i) +// } +// } + +// /** +// * This is the neural core design +// */ +// class MemoryManageUnit( +// val n: Int = 8, +// val size: Int = 4096 +// ) extends Module { +// val io = IO(new Bundle { +// val ctrl = Input(Vec(n * n, new MMUCtrlBundle(size))) +// val op_a = Output(Vec(n * n, UInt(log2Ceil(size).W))) +// val op_b = Output(Vec(n * n, UInt(log2Ceil(size).W))) +// val res = Output(Vec(n * n, UInt(log2Ceil(size).W))) +// }) + +// val offsetgen_a = new OffsetGenerator(n) +// val offsetgen_b = new OffsetGenerator(n) + +// // Create 2d register for horizontal & vertical +// val reg_h = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(log2Ceil(size).W)))) +// val reg_r = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(log2Ceil(size).W)))) +// val reg_v = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(log2Ceil(size).W)))) + +// for (i <- 0 until n){ +// for (j <- 0 until n) { +// offsetgen_a.io.keep(i) := io.ctrl(n * i).offset_keep +// offsetgen_b.io.keep(i) := io.ctrl(j).offset_kee +// // ==== INPUT ==== +// // vertical +// if (i==0) { +// when (io.ctrl(n * i + j).h_only) { +// io.op_b(j) := io.ctrl(n * i + j).in_addr + offsetgen_b.io.out(j) +// } .otherwise { +// io.op_b(j) := io.ctrl(n * i + j).in_addr + offsetgen_b.io.out(j) +// } +// } else { +// io.op_b(0)(n * i + j) := reg_v(n * (i - 1) + j) +// } +// if (i < n - 1 && j < n) +// reg_v(n * i + j) := io.op_b(n * i + j) + +// // horizontal & result +// if (j==0) { +// io.op_a(n * i) := io.ctrl(n * i + j).in_addr + offsetgen_a.io.out(i) +// io.res(n * i) := io.ctrl(n * i + j).out_addr + offsetgen_a.io.out(i) +// } else { +// io.op_a(n * i + j) := reg_h((n - 1) * i + (j - 1)) +// io.res(n * i + j) := reg_r((n - 1) * i + (j - 1)) +// } +// if (i < n && j < n - 1) { +// reg_h((n - 1) * i + j) := io.op_a(n * i + j) +// reg_r((n - 1) * i + j) := io.res(n * i + j) +// } +// } +// } + +// } \ No newline at end of file diff --git a/src/test/scala/ncore/CoreSpec.scala b/src/test/scala/ncore/CoreSpec.scala index c87341c..432a25f 100644 --- a/src/test/scala/ncore/CoreSpec.scala +++ b/src/test/scala/ncore/CoreSpec.scala @@ -11,8 +11,8 @@ import chisel3.experimental.BundleLiterals._ class CoreSpec extends AnyFlatSpec with ChiselScalatestTester { - "NeuralCoreforTest" should "do a normal matrix multiplication" in { - test(new NeuralCoreforTest(4, 8)) { dut => + "NeuralCore" should "do a normal matrix multiplication" in { + test(new NeuralCore(4, 8)) { dut => val print_helper = new testUtil.PrintHelper() val _n = dut.n val rand = new Random @@ -78,9 +78,9 @@ class CoreSpec extends AnyFlatSpec with ChiselScalatestTester { // The rest of the control signal will hand over // to a dedicated systolic-ish control bus if (i_tick < _n && i_tick >= 0) - dut.io.ctrl.poke(0x1) + dut.io.ctrl.accum.poke(0x1) else - dut.io.ctrl.poke(0x0) + dut.io.ctrl.accum.poke(0x0) // ideally, the array will give _n (diagnal) results per tick dut.clock.step() diff --git a/src/test/scala/ncore/cu/CUSpec.scala b/src/test/scala/ncore/cu/CUSpec.scala index b0863d4..920c846 100644 --- a/src/test/scala/ncore/cu/CUSpec.scala +++ b/src/test/scala/ncore/cu/CUSpec.scala @@ -19,19 +19,19 @@ class CUSpec extends AnyFlatSpec with ChiselScalatestTester { var history = new Array[Int](2 * _n - 1) var prod = 0 for (n <- 0 until 16) { - val _cbus_in = rand.between(0, 255) + val _cbus_in = rand.between(0, 2) history +:= _cbus_in - dut.io.cbus_in.poke(_cbus_in) + dut.io.cbus_in.accum.poke(_cbus_in) dut.clock.step() history = history.slice(0, 2 * _n - 1) println("Input tick @ " + n + ": " + _cbus_in) for(i: Int <- 0 until _n){ for(j:Int <- 0 until _n) { - dut.io.cbus_out(_n * i + j).expect(history(i + j)) + dut.io.cbus_out(_n * i + j).accum.expect(history(i + j)) } } println("Control tick @ " + n + " : ") - print_helper.printMatrixChisel(dut.io.cbus_out, _n) + // print_helper.printMatrixChisel(dut.io.cbus_out, _n) } } } diff --git a/src/test/scala/ncore/mmu/MMUSpec.scala b/src/test/scala/ncore/mmu/MMUSpec.scala new file mode 100644 index 0000000..4c71a10 --- /dev/null +++ b/src/test/scala/ncore/mmu/MMUSpec.scala @@ -0,0 +1,50 @@ +// //// See README.md for license details. + +// package ncore.mmu + +// import testUtil._ +// import scala.util.Random +// import chisel3._ +// import chiseltest._ +// import org.scalatest.flatspec.AnyFlatSpec +// import chisel3.experimental.BundleLiterals._ + +// class MMUSpec extends AnyFlatSpec with ChiselScalatestTester { + +// "OffsetGenerator" should "provide correct offset" in { +// test(new OffsetGenerator(4)) { dut => +// val print_helper = new testUtil.PrintHelper() +// val _n = dut.n +// val _array = List(List(false, false, false, false), +// List(true, false, false, false), +// List(true, true, false, false), +// List(true, true, true, false), +// List(false, true, true, true), +// List(true, false, true, true), +// List(false, true, false, true), +// List(false, false, true, false), +// List(false, false, false, true), +// ) +// val _expected = List(List(0, 4, 8, 12), +// List(1, 4, 8, 12), +// List(2, 5, 8, 12), +// List(3, 6, 9, 12), +// List(0, 7, 10, 13), +// List(1, 4, 11, 14), +// List(0, 5, 8, 15), +// List(0, 4, 9, 12), +// List(0, 4, 8, 13), +// ) +// for (i <- 0 until _array.length) { +// for (j <- 0 until _n) { +// dut.io.keep(j).poke(!_array(i%_array.length)(j)) +// } +// dut.clock.step() +// for (j <- 0 until _n) { +// dut.io.out(j).expect(_expected(i)(j)) +// } +// print_helper.printVectorChisel(dut.io.out, _n) +// } +// } +// } +// } \ No newline at end of file diff --git a/src/test/scala/ncore/pe/PESpec.scala b/src/test/scala/pe/PESpec.scala similarity index 93% rename from src/test/scala/ncore/pe/PESpec.scala rename to src/test/scala/pe/PESpec.scala index b08d114..eba2038 100644 --- a/src/test/scala/ncore/pe/PESpec.scala +++ b/src/test/scala/pe/PESpec.scala @@ -1,6 +1,6 @@ // See README.md for license details. -package ncore.pe +package pe import scala.util.Random import chisel3._ @@ -20,7 +20,7 @@ class PESpec extends AnyFlatSpec with ChiselScalatestTester { val _left_in_ = rand.between(0, 255) dut.io.in_a.poke(_top_in_) dut.io.in_b.poke(_left_in_) - dut.io.accum.poke(true) + dut.io.ctrl.accum.poke(true) dut.clock.step() prod = prod + _top_in_ * _left_in_ dut.io.out.expect(prod) @@ -32,7 +32,7 @@ class PESpec extends AnyFlatSpec with ChiselScalatestTester { var _left_in_ = rand.between(1, 255) dut.io.in_a.poke(_top_in_) dut.io.in_b.poke(_left_in_) - dut.io.accum.poke(false) + dut.io.ctrl.accum.poke(false) dut.clock.step() prod = prod + _top_in_ * _left_in_ dut.io.out.expect(prod) @@ -42,7 +42,7 @@ class PESpec extends AnyFlatSpec with ChiselScalatestTester { _left_in_ = rand.between(1, 255) dut.io.in_a.poke(_top_in_) dut.io.in_b.poke(_left_in_) - dut.io.accum.poke(true) + dut.io.ctrl.accum.poke(true) dut.clock.step() prod = prod + _top_in_ * _left_in_ dut.io.out.expect(prod) diff --git a/src/test/scala/ncore/tcm/TCMSpec.scala b/src/test/scala/sram/SRAMSpec.scala similarity index 51% rename from src/test/scala/ncore/tcm/TCMSpec.scala rename to src/test/scala/sram/SRAMSpec.scala index 01bc2b5..2d8464b 100644 --- a/src/test/scala/ncore/tcm/TCMSpec.scala +++ b/src/test/scala/sram/SRAMSpec.scala @@ -1,6 +1,6 @@ // See README.md for license details. -package ncore.tcm +package sram import scala.util.Random import chisel3._ @@ -10,10 +10,10 @@ import org.scalatest.flatspec.AnyFlatSpec import chisel3.experimental.BundleLiterals._ -class TCMSpec extends AnyFlatSpec with ChiselScalatestTester { +class SRAMSpec extends AnyFlatSpec with ChiselScalatestTester { - "TCM Cells" should "write on signal" in { - test(new TCMCell(8)) { dut => + "SRAM Cells" should "write on signal" in { + test(new SRAMCell(8)) { dut => val rand = new Random var _prev = 0 for (i <- 0 until 10) { @@ -29,8 +29,8 @@ class TCMSpec extends AnyFlatSpec with ChiselScalatestTester { } } - "TCM Block" should "write on signal and read anytime" in { - test(new TCMBlock(3, 192)) { dut => + "SRAM Block" should "write on signal and read anytime" in { + test(new SRAMBlock(3, 192, 1)) { dut => val _n = dut.n val _cells = dut.size val rand = new Random @@ -46,20 +46,20 @@ class TCMSpec extends AnyFlatSpec with ChiselScalatestTester { dut.io.en_wr.poke(true) dut.clock.step() for (i <- 0 until _n * _n) { - dut.io.r_addr(i).poke(_in_addr(i)) + dut.io.r_addr(0)(i).poke(_in_addr(i)) } for (i <- 0 until _n * _n){ - dut.io.d_out(i).expect(_in_data(i)) + dut.io.d_out(0)(i).expect(_in_data(i)) } println("Result tick @ " + _i + ": ") print_helper.printMatrix(_in_data, _n) - print_helper.printMatrixChisel(dut.io.d_out, _n) + print_helper.printMatrixChisel(dut.io.d_out(0), _n) } } } - "TCM Block" should "read anytime" in { - test(new TCMBlock(2, 64)) { dut => + "SRAM Block" should "read anytime" in { + test(new SRAMBlock(2, 64, 1)) { dut => val _n = dut.n val _cells = dut.size val rand = new Random @@ -81,16 +81,60 @@ class TCMSpec extends AnyFlatSpec with ChiselScalatestTester { val _r_addr = rand.shuffle((0 until _cells).toList).take(_n * _n) val _expected = new Array[Int](_n * _n) for (i <- 0 until _n * _n) { - dut.io.r_addr(i).poke(_r_addr(i)) + dut.io.r_addr(0)(i).poke(_r_addr(i)) } for (i <- 0 until _n * _n) { _expected(i) = _data(_r_addr(i)) } println("Result tick @ " + _i + ": ") print_helper.printMatrix(_expected, _n) - print_helper.printMatrixChisel(dut.io.d_out, _n) + print_helper.printMatrixChisel(dut.io.d_out(0), _n) for (i <- 0 until _n * _n){ - dut.io.d_out(i).expect(_data(_r_addr(i))) + dut.io.d_out(0)(i).expect(_data(_r_addr(i))) + } + } + } + } + + "SRAM Block" should "read anytime on different channels" in { + test(new SRAMBlock(2, 64, 2)) { dut => + val _n = dut.n + val _cells = dut.size + val _rd_ch_num = dut.rd_ch_num + val rand = new Random + val print_helper = new testUtil.PrintHelper() + val _data = new Array[Int](_cells) + for (_i <- 0 until 10) { + val _in_data = new Array[Int](_rd_ch_num * _n * _n) + val _in_addr = rand.shuffle((0 until _cells).toList).take(_rd_ch_num * _n * _n) + for (k <- 0 until _rd_ch_num){ + for (i <- 0 until _n * _n) { + val _ind = k * _n * _n + i + _in_data(_ind) = rand.between(0, 255) + dut.io.d_in(i).poke(_in_data(_ind)) + dut.io.w_addr(i).poke(_in_addr(_ind)) + _data(_in_addr(_ind)) = _in_data(_ind) + } + dut.io.en_wr.poke(true) + dut.clock.step() + } + } + for(_i <- 0 until 10){ + val _r_addr = rand.shuffle((0 until _cells).toList).take(_rd_ch_num * _n * _n) + val _expected = new Array[Int](_rd_ch_num * _n * _n) + for (k <- 0 until _rd_ch_num){ + for (i <- 0 until _n * _n) { + val _ind = k * _n * _n + i + dut.io.r_addr(k)(i).poke(_r_addr(_ind)) + _expected(_ind) = _data(_r_addr(_ind)) + } + } + println("Result tick @ " + _i + ": ") + for (k <- 0 until _rd_ch_num){ + for (i <- 0 until _n * _n){ + val _ind = k * _n * _n + i + dut.io.d_out(k)(i).expect(_data(_r_addr(_ind))) + } } } } diff --git a/src/test/scala/utils/printHelper.scala b/src/test/scala/utils/printHelper.scala index c520bda..cf71ece 100644 --- a/src/test/scala/utils/printHelper.scala +++ b/src/test/scala/utils/printHelper.scala @@ -28,4 +28,12 @@ class PrintHelper(){ } println("]") } + + def printVectorChisel(vec: chisel3.Vec[chisel3.UInt], n: Int): Unit = { + var _row = "" + for (i <- 0 until n) { + _row += vec(i).peekInt().toString() + ", " + } + println("[" + _row + "]") + } } \ No newline at end of file