Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

narch proposal #1

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ on:
- main
- releases/**
pull_request:
types:
- opened
branches:
- main
- 'releases/**'

jobs:
Lint:
Expand Down
9 changes: 9 additions & 0 deletions src/main/scala/isa/backend/controlMicroCode.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// See README.md for license details.

package isa.backend
import chisel3._
import chisel3.util._

class NCoreCUBundle (val size: Int = 4096) extends Bundle {
val accum = Bool()
}
28 changes: 28 additions & 0 deletions src/main/scala/isa/backend/memMicroCode.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// See README.md for license details.

package isa.backend
import chisel3._
import chisel3.util._

object MemLayout extends ChiselEnum {
val bit8 = Value(0x0.U)
val bit16 = Value(0x1.U)
val bit32 = Value(0x2.U)
}

object MemChannel extends ChiselEnum {
val ch0 = Value(0x0.U)
// 16/32 bits will have no ch1
val ch1 = Value(0x1.U)
// 32 bits will have no ch2
val ch2 = Value(0x2.U)
// 16/32 bits will have no ch3
val ch3 = Value(0x3.U)
}

class MMUCtrlBundle (val n: Int = 8, val size: Int = 4096) extends Bundle {
val offset_keep = Bool()
val h_only = Bool()
val in_addr = Vec(n * n, UInt(log2Ceil(size).W))
val out_addr = Vec(n * n, UInt(log2Ceil(size).W))
}
10 changes: 9 additions & 1 deletion src/main/scala/isa/instSetArch.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,12 @@ object NeuralISA extends ChiselEnum {
val st = Value(0x2.U(4.W))
val mma = Value(0x3.U(4.W))
val ip = Value (0x4.U(4.W))
}
}

object DType extends ChiselEnum {
val uint = Value(0x0.U)
val int = Value(0x1.U)
val fp = Value(0x2.U)
// no bfp32c0
val bfp = Value(0x3.U)
}
44 changes: 0 additions & 44 deletions src/main/scala/isa/memMicroCode.scala

This file was deleted.

27 changes: 26 additions & 1 deletion src/main/scala/ncore/cu/controlUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,43 @@
package ncore.cu

import chisel3._
import isa.backend._

/**
* Control unit also uses systolic array to pass instructions
*/
class ControlUnit(val n: Int = 8, val ctrl_width: Int = 8) extends Module {
class ControlUnitforTest(val n: Int = 8, val ctrl_width: Int = 8) extends Module {
val io = IO(new Bundle {
val cbus_in = Input(UInt(ctrl_width.W))
val cbus_out = Output(Vec(n * n, UInt(ctrl_width.W)))
})
// Assign each element with diagnal control signal
val reg = RegInit(VecInit(Seq.fill(2*n-1)(0.U(ctrl_width.W))))

// 1D systolic array for control
reg(0) := io.cbus_in
for(i<- 1 until 2*n-1){
reg(i) := reg(i-1)
}
// Boardcast to all elements in the array
for(i <- 0 until n){
for(j <- 0 until n){
io.cbus_out(n*i+j) := reg(i+j)
}
}
}

/**
* Control unit also uses systolic array to pass instructions
*/
class ControlUnit(val n: Int = 8, val sram_size: Int = 4096) extends Module {
val io = IO(new Bundle {
val cbus_in = Input(new NCoreCUBundle(sram_size))
val cbus_out = Output(Vec(n * n, new NCoreCUBundle(sram_size)))
})
// Assign each element with diagnal control signal
val reg = RegInit(VecInit(Seq.fill(2*n-1)(0.U.asTypeOf(new NCoreCUBundle(sram_size)))))

// 1D systolic array for control
reg(0) := io.cbus_in
for(i<- 1 until 2*n-1){
Expand Down
48 changes: 13 additions & 35 deletions src/main/scala/ncore/neuralCore.scala
Original file line number Diff line number Diff line change
@@ -1,63 +1,41 @@
// See README.md for license details
package ncore
import isa.backend._
import pe._

import chisel3._


/**
* This is the neural core design
*/
class NeuralCoreforTest(val n: Int = 8, val nbits: Int = 8, val ctrl_width: Int = 8) extends Module {
class NeuralCore(val n: Int = 8, val nbits: Int = 8, val sram_size: Int = 4096) extends Module {
val io = IO(new Bundle {
val vec_a = Input(Vec(n, UInt(nbits.W))) // vector `a` is the left input
val vec_b = Input(Vec(n, UInt(nbits.W))) // vector `b` is the top input
val ctrl = Input(UInt(ctrl_width.W))
val ctrl = Input(new NCoreCUBundle())
val out = Output(Vec(n * n, UInt((2 * nbits + 12).W)))
})

// Create n x n pe blocks
val pe_io = VecInit(Seq.fill(n * n) {Module(new pe.PE(nbits)).io})
// Create 2d register for horizontal & vertical
val pe_reg_h = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W))))
val pe_reg_v = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W))))

// we use systolic array to pipeline the instructions
// this will avoid bubble and inst complexity
// while simplifying design with higher efficiency
val ctrl_array = Module(new cu.ControlUnit(n, ctrl_width))
val ctrl_array = Module(new cu.ControlUnit(n, sram_size))
ctrl_array.io.cbus_in := io.ctrl

val sarray = Module(new sa.SystolicArray2D(n, nbits))
sarray.io.vec_a := io.vec_a
sarray.io.vec_b := io.vec_b

for (i <- 0 until n){
for (j <- 0 until n) {
// ==== OUTPUT ====
// pe array's output mapped to the matrix position
pe_io(n * i + j).in_a := sarray.io.out_a(n * i + j)
pe_io(n * i + j).in_b := sarray.io.out_b(n * i + j)
pe_io(n * i + j).ctrl := ctrl_array.io.cbus_out(n * i + j)
io.out(n * i + j) := pe_io(n * i + j).out

// ==== INPUT ====
// vertical
if (i==0) {
pe_io(j).in_b := io.vec_b(j)
} else {
pe_io(n * i + j).in_b := pe_reg_v(n * (i - 1) + j)
}
if (i < n - 1 && j < n)
pe_reg_v(n * i + j) := pe_io(n * i + j).in_b

// horizontal
if (j==0) {
pe_io(n * i).in_a := io.vec_a(i)
} else {
pe_io(n * i + j).in_a := pe_reg_h((n - 1) * i + (j - 1))
}
if (i < n && j < n - 1)
pe_reg_h((n - 1) * i + j) := pe_io(n * i + j).in_a

// ==== CONTROL ====
// Currently we only have one bit control
// which is `ACCUM`
// TODO:
// Add ALU control to pe elements
val ctrl = ctrl_array.io.cbus_out(n * i + j).asBools
pe_io(n * i + j).accum := ctrl(0)
}
}
}
46 changes: 46 additions & 0 deletions src/main/scala/ncore/sa/systolicArray.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// See README.md for license details
package ncore.sa

import chisel3._


/**
* This is the neural core design
*/
class SystolicArray2D(val n: Int = 8, val nbits: Int = 8) extends Module {
val io = IO(new Bundle {
val vec_a = Input(Vec(n, UInt(nbits.W))) // vector `a` is the left input
val vec_b = Input(Vec(n, UInt(nbits.W))) // vector `b` is the top input
val out_a = Output(Vec(n * n, UInt(nbits.W)))
val out_b = Output(Vec(n * n, UInt(nbits.W)))

})

// Create 2d register for horizontal & vertical
val reg_h = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W))))
val reg_v = RegInit(VecInit(Seq.fill((n - 1) * n)(0.U(nbits.W))))

for (i <- 0 until n){
for (j <- 0 until n) {

// ==== INPUT ====
// vertical
if (i==0) {
io.out_b(j) := io.vec_b(j)
} else {
io.out_b(n * i + j) := reg_v(n * (i - 1) + j)
}
if (i < n - 1 && j < n)
reg_v(n * i + j) := io.out_b(n * i + j)

// horizontal
if (j==0) {
io.out_a(n * i) := io.vec_a(i)
} else {
io.out_a(n * i + j) := reg_h((n - 1) * i + (j - 1))
}
if (i < n && j < n - 1)
reg_h((n - 1) * i + j) := io.out_a(n * i + j)
}
}
}
93 changes: 0 additions & 93 deletions src/main/scala/ncore/tcm/tightCpldMem.scala

This file was deleted.

Loading