Skip to content

Commit

Permalink
clean code
Browse files Browse the repository at this point in the history
  • Loading branch information
mpskex committed Apr 6, 2024
1 parent 9b12a92 commit e3c9217
Show file tree
Hide file tree
Showing 16 changed files with 287 additions and 275 deletions.
9 changes: 9 additions & 0 deletions src/main/scala/isa/backend/controlMicroCode.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// See README.md for license details.

package isa.backend
import chisel3._
import chisel3.util._

class NCoreCUBundle (val size: Int = 4096) extends Bundle {
val accum = Bool()
}
7 changes: 7 additions & 0 deletions src/main/scala/isa/backend/memMicroCode.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,10 @@ object MemChannel extends ChiselEnum {
// 16/32 bits will have no ch3
val ch3 = Value(0x3.U)
}

class MMUCtrlBundle (val n: Int = 8, val size: Int = 4096) extends Bundle {
val offset_keep = Bool()
val h_only = Bool()
val in_addr = Vec(n * n, UInt(log2Ceil(size).W))
val out_addr = Vec(n * n, UInt(log2Ceil(size).W))
}
27 changes: 26 additions & 1 deletion src/main/scala/ncore/cu/controlUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,43 @@
package ncore.cu

import chisel3._
import isa.backend._

/**
* Control unit also uses systolic array to pass instructions
*/
class ControlUnit(val n: Int = 8, val ctrl_width: Int = 8) extends Module {
class ControlUnitforTest(val n: Int = 8, val ctrl_width: Int = 8) extends Module {
val io = IO(new Bundle {
val cbus_in = Input(UInt(ctrl_width.W))
val cbus_out = Output(Vec(n * n, UInt(ctrl_width.W)))
})
// Assign each element with diagnal control signal
val reg = RegInit(VecInit(Seq.fill(2*n-1)(0.U(ctrl_width.W))))

// 1D systolic array for control
reg(0) := io.cbus_in
for(i<- 1 until 2*n-1){
reg(i) := reg(i-1)
}
// Boardcast to all elements in the array
for(i <- 0 until n){
for(j <- 0 until n){
io.cbus_out(n*i+j) := reg(i+j)
}
}
}

/**
* Control unit also uses systolic array to pass instructions
*/
class ControlUnit(val n: Int = 8, val sram_size: Int = 4096) extends Module {
val io = IO(new Bundle {
val cbus_in = Input(new NCoreCUBundle(sram_size))
val cbus_out = Output(Vec(n * n, new NCoreCUBundle(sram_size)))
})
// Assign each element with diagnal control signal
val reg = RegInit(VecInit(Seq.fill(2*n-1)(0.U.asTypeOf(new NCoreCUBundle(sram_size)))))

// 1D systolic array for control
reg(0) := io.cbus_in
for(i<- 1 until 2*n-1){
Expand Down
120 changes: 0 additions & 120 deletions src/main/scala/ncore/mmu/memMngUnit.scala

This file was deleted.

48 changes: 13 additions & 35 deletions src/main/scala/ncore/neuralCore.scala
Original file line number Diff line number Diff line change
@@ -1,63 +1,41 @@
// See README.md for license details
package ncore
import isa.backend._
import pe._

import chisel3._


/**
* This is the neural core design
*/
class NeuralCoreforTest(val n: Int = 8, val nbits: Int = 8, val ctrl_width: Int = 8) extends Module {
class NeuralCore(val n: Int = 8, val nbits: Int = 8, val sram_size: Int = 4096) extends Module {
val io = IO(new Bundle {
val vec_a = Input(Vec(n, UInt(nbits.W))) // vector `a` is the left input
val vec_b = Input(Vec(n, UInt(nbits.W))) // vector `b` is the top input
val ctrl = Input(UInt(ctrl_width.W))
val ctrl = Input(new NCoreCUBundle())
val out = Output(Vec(n * n, UInt((2 * nbits + 12).W)))
})

// Create n x n pe blocks
val pe_io = VecInit(Seq.fill(n * n) {Module(new pe.PE(nbits)).io})
// Create 2d register for horizontal & vertical
val pe_reg_h = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W))))
val pe_reg_v = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W))))

// we use systolic array to pipeline the instructions
// this will avoid bubble and inst complexity
// while simplifying design with higher efficiency
val ctrl_array = Module(new cu.ControlUnit(n, ctrl_width))
val ctrl_array = Module(new cu.ControlUnit(n, sram_size))
ctrl_array.io.cbus_in := io.ctrl

val sarray = Module(new sa.SystolicArray2D(n, nbits))
sarray.io.vec_a := io.vec_a
sarray.io.vec_b := io.vec_b

for (i <- 0 until n){
for (j <- 0 until n) {
// ==== OUTPUT ====
// pe array's output mapped to the matrix position
pe_io(n * i + j).in_a := sarray.io.out_a(n * i + j)
pe_io(n * i + j).in_b := sarray.io.out_b(n * i + j)
pe_io(n * i + j).ctrl := ctrl_array.io.cbus_out(n * i + j)
io.out(n * i + j) := pe_io(n * i + j).out

// ==== INPUT ====
// vertical
if (i==0) {
pe_io(j).in_b := io.vec_b(j)
} else {
pe_io(n * i + j).in_b := pe_reg_v(n * (i - 1) + j)
}
if (i < n - 1 && j < n)
pe_reg_v(n * i + j) := pe_io(n * i + j).in_b

// horizontal
if (j==0) {
pe_io(n * i).in_a := io.vec_a(i)
} else {
pe_io(n * i + j).in_a := pe_reg_h((n - 1) * i + (j - 1))
}
if (i < n && j < n - 1)
pe_reg_h((n - 1) * i + j) := pe_io(n * i + j).in_a

// ==== CONTROL ====
// Currently we only have one bit control
// which is `ACCUM`
// TODO:
// Add ALU control to pe elements
val ctrl = ctrl_array.io.cbus_out(n * i + j).asBools
pe_io(n * i + j).accum := ctrl(0)
}
}
}
46 changes: 46 additions & 0 deletions src/main/scala/ncore/sa/systolicArray.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// See README.md for license details
package ncore.sa

import chisel3._


/**
* This is the neural core design
*/
class SystolicArray2D(val n: Int = 8, val nbits: Int = 8) extends Module {
val io = IO(new Bundle {
val vec_a = Input(Vec(n, UInt(nbits.W))) // vector `a` is the left input
val vec_b = Input(Vec(n, UInt(nbits.W))) // vector `b` is the top input
val out_a = Output(Vec(n * n, UInt(nbits.W)))
val out_b = Output(Vec(n * n, UInt(nbits.W)))

})

// Create 2d register for horizontal & vertical
val reg_h = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W))))
val reg_v = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W))))

for (i <- 0 until n){
for (j <- 0 until n) {

// ==== INPUT ====
// vertical
if (i==0) {
io.out_b(j) := io.vec_b(j)
} else {
io.out_b(n * i + j) := reg_v(n * (i - 1) + j)
}
if (i < n - 1 && j < n)
reg_v(n * i + j) := io.out_b(n * i + j)

// horizontal
if (j==0) {
io.out_a(n * i) := io.vec_a(i)
} else {
io.out_a(n * i + j) := reg_h((n - 1) * i + (j - 1))
}
if (i < n && j < n - 1)
reg_h((n - 1) * i + j) := io.out_a(n * i + j)
}
}
}
36 changes: 0 additions & 36 deletions src/main/scala/npu/npu.scala

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
// See README.md for license details.

package ncore.pe
package pe

import chisel3._
import isa.backend._

/**
* processing element unit in npu design.
Expand All @@ -11,7 +12,7 @@ import chisel3._
class PE(val nbits: Int = 8) extends Module {
val io = IO(
new Bundle {
val accum = Input(Bool())
val ctrl = Input(new NCoreCUBundle())
val in_a = Input(UInt(nbits.W))
val in_b = Input(UInt(nbits.W))
// The register bandwith is optimized for large transformer
Expand All @@ -22,7 +23,7 @@ class PE(val nbits: Int = 8) extends Module {

val res = RegInit(0.U((nbits*2 + 12).W))

when (io.accum) {
when (io.ctrl.accum) {
res := res + (io.in_a * io.in_b)
} .otherwise {
res := (io.in_a * io.in_b)
Expand Down
Loading

0 comments on commit e3c9217

Please sign in to comment.