From 9e480e07ad9f1602fb2022a5cc142bf298e0cd6e Mon Sep 17 00:00:00 2001 From: Martijn Bastiaan Date: Mon, 4 Mar 2024 17:00:09 +0100 Subject: [PATCH 1/2] Handle inputs and outputs in correct order Eleminates the need for prepending singals with dummy elements Fixes #1 --- clash-vexriscv-sim/clash-vexriscv-sim.cabal | 2 + clash-vexriscv-sim/src/Utils/Cpu.hs | 11 - clash-vexriscv/src/VexRiscv.hs | 229 ++++++++++++-------- clash-vexriscv/src/VexRiscv/FFI.hsc | 80 ++++--- clash-vexriscv/src/ffi/impl.cpp | 154 +++++++++---- clash-vexriscv/src/ffi/interface.h | 39 +--- 6 files changed, 303 insertions(+), 212 deletions(-) diff --git a/clash-vexriscv-sim/clash-vexriscv-sim.cabal b/clash-vexriscv-sim/clash-vexriscv-sim.cabal index ee6504f..fe75768 100644 --- a/clash-vexriscv-sim/clash-vexriscv-sim.cabal +++ b/clash-vexriscv-sim/clash-vexriscv-sim.cabal @@ -120,6 +120,8 @@ test-suite unittests default-language: Haskell2010 hs-source-dirs: tests type: exitcode-stdio-1.0 + -- TODO: enable parallel tests: + -- ghc-options: -threaded -rtsopts -with-rtsopts=-N ghc-options: -threaded main-is: tests.hs build-depends: diff --git a/clash-vexriscv-sim/src/Utils/Cpu.hs b/clash-vexriscv-sim/src/Utils/Cpu.hs index e887d6c..325492c 100644 --- a/clash-vexriscv-sim/src/Utils/Cpu.hs +++ b/clash-vexriscv-sim/src/Utils/Cpu.hs @@ -18,17 +18,6 @@ import GHC.Stack (HasCallStack) import Utils.ProgramLoad (Memory) import Utils.Interconnect (interconnectTwo) -emptyInput :: Input -emptyInput = - Input - { timerInterrupt = low, - externalInterrupt = low, - softwareInterrupt = low, - iBusWbS2M = (emptyWishboneS2M @(BitVector 32)) {readData = 0}, - dBusWbS2M = (emptyWishboneS2M @(BitVector 32)) {readData = 0} - } - - {- Address space diff --git a/clash-vexriscv/src/VexRiscv.hs b/clash-vexriscv/src/VexRiscv.hs index 24a7154..16c2963 100644 --- a/clash-vexriscv/src/VexRiscv.hs +++ b/clash-vexriscv/src/VexRiscv.hs @@ -1,4 +1,4 @@ --- SPDX-FileCopyrightText: 2022-2023 Google LLC +-- SPDX-FileCopyrightText: 2022-2024 Google LLC -- -- SPDX-License-Identifier: Apache-2.0 @@ -16,17 +16,23 @@ import Clash.Prelude import Clash.Annotations.Primitive import Clash.Signal.Internal +import Data.Bifunctor (first) import Data.String.Interpolate (__i) +import Data.Word (Word64) +import Foreign (Ptr) import Foreign.Marshal (alloca) import Foreign.Storable import GHC.IO (unsafePerformIO) import GHC.Stack (HasCallStack) import Language.Haskell.TH.Syntax import Protocols.Wishbone + +import VexRiscv.ClockTicks import VexRiscv.FFI import VexRiscv.TH import VexRiscv.VecToTuple +import qualified VexRiscv.FFI as FFI data Input = Input { timerInterrupt :: "TIMER_INTERRUPT" ::: Bit @@ -43,48 +49,6 @@ data Output = Output } deriving (Generic, NFDataX, ShowX, Eq, BitPack) -inputToFFI :: Bool -> Input -> INPUT -inputToFFI reset Input {..} = - INPUT - { reset = boolToBit reset - , timerInterrupt - , externalInterrupt - , softwareInterrupt - , iBusWishbone_ACK = boolToBit $ acknowledge iBusWbS2M - , iBusWishbone_DAT_MISO = unpack $ readData iBusWbS2M - , iBusWishbone_ERR = boolToBit $ err iBusWbS2M - , dBusWishbone_ACK = boolToBit $ acknowledge dBusWbS2M - , dBusWishbone_DAT_MISO = unpack $ readData dBusWbS2M - , dBusWishbone_ERR = boolToBit $ err dBusWbS2M - } - -outputFromFFI :: OUTPUT -> Output -outputFromFFI OUTPUT {..} = - Output - { iBusWbM2S = - (emptyWishboneM2S @30 @(BitVector 32)) - { busCycle = bitToBool iBusWishbone_CYC, - strobe = bitToBool iBusWishbone_STB, - writeEnable = bitToBool iBusWishbone_WE, - addr = truncateB $ pack iBusWishbone_ADR, - writeData = pack iBusWishbone_DAT_MOSI, - busSelect = resize $ pack iBusWishbone_SEL, - cycleTypeIdentifier = unpack $ resize $ pack iBusWishbone_CTI, - burstTypeExtension = unpack $ resize $ pack iBusWishbone_BTE - }, - dBusWbM2S = - (emptyWishboneM2S @30 @(BitVector 32)) - { busCycle = bitToBool dBusWishbone_CYC, - strobe = bitToBool dBusWishbone_STB, - writeEnable = bitToBool dBusWishbone_WE, - addr = truncateB $ pack dBusWishbone_ADR, - writeData = pack dBusWishbone_DAT_MOSI, - busSelect = resize $ pack dBusWishbone_SEL, - cycleTypeIdentifier = unpack $ resize $ pack dBusWishbone_CTI, - burstTypeExtension = unpack $ resize $ pack dBusWishbone_BTE - } - } - -- When passing S2M values from Haskell to VexRiscv over the FFI, undefined -- bits/values cause errors when forcing their evaluation to something that can -- be passed through the FFI. @@ -127,9 +91,7 @@ vexRiscv input = where (unbundle -> (timerInterrupt, externalInterrupt, softwareInterrupt, iBusS2M, dBusS2M)) - -- A hack that enables us to both generate synthesizable HDL and simulate vexRisc in Haskell/Clash - = (<$> if clashSimulation then unpack 0 :- input else input) - $ \(Input a b c d e) -> (a, b, c, d, e) + = (\(Input a b c d e) -> (a, b, c, d, e)) <$> input (unbundle -> (iBus_DAT_MISO, iBus_ACK, iBus_ERR)) = (\(WishboneS2M a b c _ _) -> (a, b, c)) @@ -222,7 +184,7 @@ vexRiscv# , Signal dom (BitVector 3) -- ^ dBus_CTI , Signal dom (BitVector 2) -- ^ dBus_BTE ) -vexRiscv# !_sourcePath !_clk rst0 +vexRiscv# !_sourcePath clk rst0 timerInterrupt externalInterrupt softwareInterrupt @@ -236,48 +198,98 @@ vexRiscv# !_sourcePath !_clk rst0 = let - iBusS2M = WishboneS2M <$> iBus_DAT_MISO <*> iBus_ACK <*> iBus_ERR <*> pure False <*> pure False - dBusS2M = WishboneS2M <$> dBus_DAT_MISO <*> dBus_ACK <*> dBus_ERR <*> pure False <*> pure False - - input = Input <$> timerInterrupt <*> externalInterrupt <*> softwareInterrupt <*> iBusS2M <*> dBusS2M - - output = unsafePerformIO $ do - (step, _) <- vexCPU - pure $ go step (unsafeFromReset rst0) input - - (unbundle -> (iBusM2S, dBusM2S)) = (<$> output) $ \(Output iBus dBus) -> (iBus, dBus) - - (unbundle -> (iBus_ADR, iBus_DAT_MOSI, iBus_SEL, iBus_CYC, iBus_STB, iBus_WE, iBus_CTI, iBus_BTE)) = - (<$> iBusM2S) $ \(WishboneM2S a b c _ e f g h i) -> (a, b, c, e, f, g, h, i) - - (unbundle -> (dBus_ADR, dBus_DAT_MOSI, dBus_SEL, dBus_CYC, dBus_STB, dBus_WE, dBus_CTI, dBus_BTE)) = - (<$> dBusM2S) $ \(WishboneM2S a b c _ e f g h i) -> (a, b, c, e, f, g, h, i) + (v, initStage1, initStage2, stepRising, stepFalling, _shutDown) = unsafePerformIO vexCPU + + nonCombInput = NON_COMB_INPUT + <$> (boolToBit <$> unsafeToActiveHigh rst0) + <*> timerInterrupt + <*> externalInterrupt + <*> softwareInterrupt + + combInput = COMB_INPUT + <$> (boolToBit <$> iBus_ACK) + <*> (unpack <$> iBus_DAT_MISO) + <*> (boolToBit <$> iBus_ERR) + <*> (boolToBit <$> dBus_ACK) + <*> (unpack <$> dBus_DAT_MISO) + <*> (boolToBit <$> dBus_ERR) + + wordCast = fromInteger . toInteger + + simInitThenCycles :: + Signal dom NON_COMB_INPUT -> + Signal dom COMB_INPUT -> + Signal dom OUTPUT + simInitThenCycles (cnc :- cncs) ~(cc :- ccs) = + let + -- Note: we don't need @ticks@ for the initialization stages, because this + -- first cycle of a 'Signal' is meant to model what happens _before_ a + -- clock edge. + out0 = unsafePerformIO (initStage1 v cnc) + stage2Out = unsafePerformIO (initStage2 v cc) + ticks = first wordCast <$> singleClockEdgesRelative clk + out1 = simCycles ticks cncs ccs + in + out0 :- (out0 `seq` (stage2Out `seq` out1)) + + simCycles :: + [(Word64, ActiveEdge)] -> + Signal dom NON_COMB_INPUT -> + Signal dom COMB_INPUT -> + Signal dom OUTPUT + simCycles ((fsSinceLastEvent, Rising) : ts) (cnc :- cncs) ccs = + let + out0 = unsafePerformIO (stepRising v fsSinceLastEvent cnc) + out1 = simCycles ts cncs ccs + in + out0 :- (out0 `seq` out1) + + simCycles ((fsSinceLastEvent, Falling) : ts) cncs (cc :- ccs) = + let !() = unsafePerformIO (stepFalling v fsSinceLastEvent cc) + in simCycles ts cncs ccs + + simCycles [] _ _ = error "Empty ticks: should never happen" + + output = simInitThenCycles nonCombInput combInput + + iBus_CYC = FFI.iBusWishbone_CYC <$> output + iBus_STB = FFI.iBusWishbone_STB <$> output + iBus_WE = FFI.iBusWishbone_WE <$> output + iBus_ADR = FFI.iBusWishbone_ADR <$> output + iBus_DAT_MOSI = FFI.iBusWishbone_DAT_MOSI <$> output + iBus_SEL = FFI.iBusWishbone_SEL <$> output + iBus_CTI = FFI.iBusWishbone_CTI <$> output + iBus_BTE = FFI.iBusWishbone_BTE <$> output + + dBus_CYC = FFI.dBusWishbone_CYC <$> output + dBus_STB = FFI.dBusWishbone_STB <$> output + dBus_WE = FFI.dBusWishbone_WE <$> output + dBus_ADR = FFI.dBusWishbone_ADR <$> output + dBus_DAT_MOSI = FFI.dBusWishbone_DAT_MOSI <$> output + dBus_SEL = FFI.dBusWishbone_SEL <$> output + dBus_CTI = FFI.dBusWishbone_CTI <$> output + dBus_BTE = FFI.dBusWishbone_BTE <$> output in ( -- iBus - iBus_CYC - , iBus_STB - , iBus_WE - , iBus_ADR - , iBus_DAT_MOSI - , iBus_SEL - , pack <$> iBus_CTI - , pack <$> iBus_BTE + bitToBool <$> iBus_CYC + , bitToBool <$> iBus_STB + , bitToBool <$> iBus_WE + , truncateB . pack <$> iBus_ADR + , pack <$> iBus_DAT_MOSI + , truncateB . pack <$> iBus_SEL + , truncateB . pack <$> iBus_CTI + , truncateB . pack <$> iBus_BTE -- dBus - , dBus_CYC - , dBus_STB - , dBus_WE - , dBus_ADR - , dBus_DAT_MOSI - , dBus_SEL - , pack <$> dBus_CTI - , pack <$> dBus_BTE + , bitToBool <$> dBus_CYC + , bitToBool <$> dBus_STB + , bitToBool <$> dBus_WE + , truncateB . pack <$> dBus_ADR + , pack <$> dBus_DAT_MOSI + , truncateB . pack <$> dBus_SEL + , truncateB . pack <$> dBus_CTI + , truncateB . pack <$> dBus_BTE ) - where - {-# NOINLINE go #-} - go step (rst :- rsts) (input :- inputs) = unsafePerformIO $ do - out <- step rst input - pure $ out :- go step rsts inputs {-# NOINLINE vexRiscv# #-} {-# ANN vexRiscv# ( let @@ -401,16 +413,49 @@ vexRiscv# !_sourcePath !_clk rst0 |] ) #-} + -- | Return a function that performs an execution step and a function to free -- the internal CPU state -vexCPU :: IO (Bool -> Input -> IO Output, IO ()) +vexCPU :: IO + ( Ptr VexRiscv + , Ptr VexRiscv -> NON_COMB_INPUT -> IO OUTPUT -- initStage1 + , Ptr VexRiscv -> COMB_INPUT -> IO () -- initStage2 + , Ptr VexRiscv -> Word64 -> NON_COMB_INPUT -> IO OUTPUT -- rising + , Ptr VexRiscv -> Word64 -> COMB_INPUT -> IO () -- falling + , Ptr VexRiscv -> IO () + ) vexCPU = do v <- vexrInit + let - step reset input = alloca $ \inputFFI -> alloca $ \outputFFI -> do - poke inputFFI (inputToFFI reset input) - vexrStep v inputFFI outputFFI - outVal <- peek outputFFI - pure $ outputFromFFI outVal - shutDown = vexrShutdown v - pure (step, shutDown) + {-# NOINLINE initStage1 #-} + initStage1 vPtr nonCombInput = + alloca $ \nonCombInputFFI -> alloca $ \outputFFI -> do + poke nonCombInputFFI nonCombInput + vexrInitStage1 vPtr nonCombInputFFI outputFFI + output <- peek outputFFI + pure output + + {-# NOINLINE initStage2 #-} + initStage2 vPtr combInput = + alloca $ \combInputFFI -> do + poke combInputFFI combInput + vexrInitStage2 vPtr combInputFFI + + {-# NOINLINE stepRising #-} + stepRising vPtr fsSinceLastEvent nonCombInput = + alloca $ \nonCombInputFFI -> alloca $ \outputFFI -> do + poke nonCombInputFFI nonCombInput + vexrStepRisingEdge vPtr fsSinceLastEvent nonCombInputFFI outputFFI + output <- peek outputFFI + pure output + + {-# NOINLINE stepFalling #-} + stepFalling vPtr fsSinceLastEvent combInput = + alloca $ \combInputFFI -> do + poke combInputFFI combInput + vexrStepFallingEdge vPtr fsSinceLastEvent combInputFFI + + shutDown = vexrShutdown + + pure (v, initStage1, initStage2, stepRising, stepFalling, shutDown) diff --git a/clash-vexriscv/src/VexRiscv/FFI.hsc b/clash-vexriscv/src/VexRiscv/FFI.hsc index 47b154b..9fd456c 100644 --- a/clash-vexriscv/src/VexRiscv/FFI.hsc +++ b/clash-vexriscv/src/VexRiscv/FFI.hsc @@ -1,4 +1,4 @@ --- SPDX-FileCopyrightText: 2022 Google LLC +-- SPDX-FileCopyrightText: 2022-2024 Google LLC -- -- SPDX-License-Identifier: Apache-2.0 @@ -19,19 +19,27 @@ import Data.Word data VexRiscv foreign import ccall unsafe "vexr_init" vexrInit :: IO (Ptr VexRiscv) - foreign import ccall unsafe "vexr_shutdown" vexrShutdown :: Ptr VexRiscv -> IO () -foreign import ccall unsafe "vexr_step" vexrStep :: Ptr VexRiscv -> Ptr INPUT -> Ptr OUTPUT -> IO () +foreign import ccall unsafe "vexr_init_stage1" vexrInitStage1 :: Ptr VexRiscv -> Ptr NON_COMB_INPUT -> Ptr OUTPUT -> IO () +foreign import ccall unsafe "vexr_init_stage2" vexrInitStage2 :: Ptr VexRiscv -> Ptr COMB_INPUT -> IO () +foreign import ccall unsafe "vexr_step_rising_edge" vexrStepRisingEdge :: Ptr VexRiscv -> Word64 -> Ptr NON_COMB_INPUT -> Ptr OUTPUT -> IO () +foreign import ccall unsafe "vexr_step_falling_edge" vexrStepFallingEdge :: Ptr VexRiscv -> Word64 -> Ptr COMB_INPUT -> IO () -data INPUT = INPUT +-- | CPU input that cannot combinatorially depend on the CPU output +data NON_COMB_INPUT = NON_COMB_INPUT { reset :: Bit , timerInterrupt :: Bit , externalInterrupt :: Bit , softwareInterrupt :: Bit - , iBusWishbone_ACK :: Bit + } + +-- | CPU input that can combinatorially depend on the CPU output +data COMB_INPUT = COMB_INPUT + { iBusWishbone_ACK :: Bit , iBusWishbone_DAT_MISO :: Word32 , iBusWishbone_ERR :: Bit + , dBusWishbone_ACK :: Bit , dBusWishbone_DAT_MISO :: Word32 , dBusWishbone_ERR :: Bit @@ -47,6 +55,7 @@ data OUTPUT = OUTPUT , iBusWishbone_SEL :: Word8 , iBusWishbone_CTI :: Word8 , iBusWishbone_BTE :: Word8 + , dBusWishbone_CYC :: Bit , dBusWishbone_STB :: Bit , dBusWishbone_WE :: Bit @@ -64,34 +73,45 @@ instance Storable Bit where peek = fmap boolToBit . peek . castPtr poke ptr = poke (castPtr ptr) . bitToBool -instance Storable INPUT where - alignment _ = #alignment INPUT - sizeOf _ = #size INPUT +instance Storable NON_COMB_INPUT where + alignment _ = #alignment NON_COMB_INPUT + sizeOf _ = #size NON_COMB_INPUT + {-# INLINE peek #-} + peek ptr = const NON_COMB_INPUT <$> pure () + <*> (#peek NON_COMB_INPUT, reset) ptr + <*> (#peek NON_COMB_INPUT, timerInterrupt) ptr + <*> (#peek NON_COMB_INPUT, externalInterrupt) ptr + <*> (#peek NON_COMB_INPUT, softwareInterrupt) ptr + + {-# INLINE poke #-} + poke ptr this = do + (#poke NON_COMB_INPUT, reset) ptr (reset this) + (#poke NON_COMB_INPUT, timerInterrupt) ptr (timerInterrupt this) + (#poke NON_COMB_INPUT, externalInterrupt) ptr (externalInterrupt this) + (#poke NON_COMB_INPUT, softwareInterrupt) ptr (softwareInterrupt this) + return () + +instance Storable COMB_INPUT where + alignment _ = #alignment COMB_INPUT + sizeOf _ = #size COMB_INPUT {-# INLINE peek #-} - peek ptr = const INPUT <$> pure () - <*> (#peek INPUT, reset) ptr - <*> (#peek INPUT, timerInterrupt) ptr - <*> (#peek INPUT, externalInterrupt) ptr - <*> (#peek INPUT, softwareInterrupt) ptr - <*> (#peek INPUT, iBusWishbone_ACK) ptr - <*> (#peek INPUT, iBusWishbone_DAT_MISO) ptr - <*> (#peek INPUT, iBusWishbone_ERR) ptr - <*> (#peek INPUT, dBusWishbone_ACK) ptr - <*> (#peek INPUT, dBusWishbone_DAT_MISO) ptr - <*> (#peek INPUT, dBusWishbone_ERR) ptr + peek ptr = const COMB_INPUT <$> pure () + <*> (#peek COMB_INPUT, iBusWishbone_ACK) ptr + <*> (#peek COMB_INPUT, iBusWishbone_DAT_MISO) ptr + <*> (#peek COMB_INPUT, iBusWishbone_ERR) ptr + <*> (#peek COMB_INPUT, dBusWishbone_ACK) ptr + <*> (#peek COMB_INPUT, dBusWishbone_DAT_MISO) ptr + <*> (#peek COMB_INPUT, dBusWishbone_ERR) ptr {-# INLINE poke #-} poke ptr this = do - (#poke INPUT, reset) ptr (reset this) - (#poke INPUT, timerInterrupt) ptr (timerInterrupt this) - (#poke INPUT, externalInterrupt) ptr (externalInterrupt this) - (#poke INPUT, softwareInterrupt) ptr (softwareInterrupt this) - (#poke INPUT, iBusWishbone_ACK) ptr (iBusWishbone_ACK this) - (#poke INPUT, iBusWishbone_DAT_MISO) ptr (iBusWishbone_DAT_MISO this) - (#poke INPUT, iBusWishbone_ERR) ptr (iBusWishbone_ERR this) - (#poke INPUT, dBusWishbone_ACK) ptr (dBusWishbone_ACK this) - (#poke INPUT, dBusWishbone_DAT_MISO) ptr (dBusWishbone_DAT_MISO this) - (#poke INPUT, dBusWishbone_ERR) ptr (dBusWishbone_ERR this) + (#poke COMB_INPUT, iBusWishbone_ACK) ptr (iBusWishbone_ACK this) + (#poke COMB_INPUT, iBusWishbone_DAT_MISO) ptr (iBusWishbone_DAT_MISO this) + (#poke COMB_INPUT, iBusWishbone_ERR) ptr (iBusWishbone_ERR this) + + (#poke COMB_INPUT, dBusWishbone_ACK) ptr (dBusWishbone_ACK this) + (#poke COMB_INPUT, dBusWishbone_DAT_MISO) ptr (dBusWishbone_DAT_MISO this) + (#poke COMB_INPUT, dBusWishbone_ERR) ptr (dBusWishbone_ERR this) return () instance Storable OUTPUT where @@ -107,6 +127,7 @@ instance Storable OUTPUT where <*> (#peek OUTPUT, iBusWishbone_SEL) ptr <*> (#peek OUTPUT, iBusWishbone_CTI) ptr <*> (#peek OUTPUT, iBusWishbone_BTE) ptr + <*> (#peek OUTPUT, dBusWishbone_CYC) ptr <*> (#peek OUTPUT, dBusWishbone_STB) ptr <*> (#peek OUTPUT, dBusWishbone_WE) ptr @@ -126,6 +147,7 @@ instance Storable OUTPUT where (#poke OUTPUT, iBusWishbone_SEL) ptr (iBusWishbone_SEL this) (#poke OUTPUT, iBusWishbone_CTI) ptr (iBusWishbone_CTI this) (#poke OUTPUT, iBusWishbone_BTE) ptr (iBusWishbone_BTE this) + (#poke OUTPUT, dBusWishbone_CYC) ptr (dBusWishbone_CYC this) (#poke OUTPUT, dBusWishbone_STB) ptr (dBusWishbone_STB this) (#poke OUTPUT, dBusWishbone_WE) ptr (dBusWishbone_WE this) diff --git a/clash-vexriscv/src/ffi/impl.cpp b/clash-vexriscv/src/ffi/impl.cpp index a7bbba0..230e2c6 100644 --- a/clash-vexriscv/src/ffi/impl.cpp +++ b/clash-vexriscv/src/ffi/impl.cpp @@ -7,57 +7,123 @@ #include "interface.h" extern "C" { - VVexRiscv* vexr_init(); - void vexr_shutdown(VVexRiscv *top); - void vexr_step(VVexRiscv *top, const INPUT *input, OUTPUT *output); + VVexRiscv* vexr_init(); + void vexr_shutdown(VVexRiscv *top); + + void vexr_init_stage1(VVexRiscv *top, const NON_COMB_INPUT *input, OUTPUT *output); + void vexr_init_stage2(VVexRiscv *top, const COMB_INPUT *input); + void vexr_step_rising_edge(VVexRiscv *top, uint64_t time_add, const NON_COMB_INPUT *input, OUTPUT *output); + void vexr_step_falling_edge(VVexRiscv *top, uint64_t time_add, const COMB_INPUT *input); } +static VerilatedContext* contextp = 0; VVexRiscv* vexr_init() { - return new VVexRiscv(); + contextp = new VerilatedContext; + VVexRiscv *v = new VVexRiscv(contextp); + Verilated::traceEverOn(true); + v->clk = false; + return v; +} + +// Set all inputs that cannot combinationaly depend on outputs. I.e., all inputs +// except the Wishbone buses. +void set_non_comb_inputs(VVexRiscv *top, const NON_COMB_INPUT *input) +{ + top->reset = input->reset; + top->timerInterrupt = input->timerInterrupt; + top->externalInterrupt = input->externalInterrupt; + top->softwareInterrupt = input->softwareInterrupt; +} + +// Set all inputs that can combinationaly depend on outputs. I.e., the Wishbone +// buses. +void set_comb_inputs(VVexRiscv *top, const COMB_INPUT *input) +{ + top->iBusWishbone_ACK = input->iBusWishbone_ACK; + top->iBusWishbone_DAT_MISO = input->iBusWishbone_DAT_MISO; + top->iBusWishbone_ERR = input->iBusWishbone_ERR; + top->dBusWishbone_ACK = input->dBusWishbone_ACK; + top->dBusWishbone_DAT_MISO = input->dBusWishbone_DAT_MISO; + top->dBusWishbone_ERR = input->dBusWishbone_ERR; +} + +// Set all outputs +void set_ouputs(VVexRiscv *top, OUTPUT *output) +{ + output->iBusWishbone_CYC = top->iBusWishbone_CYC; + output->iBusWishbone_STB = top->iBusWishbone_STB; + output->iBusWishbone_WE = top->iBusWishbone_WE; + output->iBusWishbone_ADR = top->iBusWishbone_ADR; + output->iBusWishbone_DAT_MOSI = top->iBusWishbone_DAT_MOSI; + output->iBusWishbone_SEL = top->iBusWishbone_SEL; + output->iBusWishbone_CTI = top->iBusWishbone_CTI; + output->iBusWishbone_BTE = top->iBusWishbone_BTE; + output->dBusWishbone_CYC = top->dBusWishbone_CYC; + output->dBusWishbone_STB = top->dBusWishbone_STB; + output->dBusWishbone_WE = top->dBusWishbone_WE; + output->dBusWishbone_ADR = top->dBusWishbone_ADR; + output->dBusWishbone_DAT_MOSI = top->dBusWishbone_DAT_MOSI; + output->dBusWishbone_SEL = top->dBusWishbone_SEL; + output->dBusWishbone_CTI = top->dBusWishbone_CTI; + output->dBusWishbone_BTE = top->dBusWishbone_BTE; +} + +void vexr_init_stage1(VVexRiscv *top, const NON_COMB_INPUT *input, OUTPUT *output) +{ + // Set all inputs that cannot combinationaly depend on outputs. I.e., all inputs + // except the Wishbone buses. + set_non_comb_inputs(top, input); + + // Combinatorially respond to the inputs + top->eval(); + set_ouputs(top, output); + + // Advance time by 50 nanoseconds. This is an arbitrary value. Ideally, we would + // do something similar to Clash's template tag "~LONGESTPERIOD". + contextp->timeInc(50000); +} + +void vexr_init_stage2(VVexRiscv *top, const COMB_INPUT *input) +{ + set_comb_inputs(top, input); } void vexr_shutdown(VVexRiscv *top) { - delete top; -} - -void vexr_step(VVexRiscv *top, const INPUT *input, OUTPUT *output) -{ - // set inputs - top->reset = input->reset; - top->timerInterrupt = input->timerInterrupt; - top->externalInterrupt = input->externalInterrupt; - top->softwareInterrupt = input->softwareInterrupt; - top->iBusWishbone_ACK = input->iBusWishbone_ACK; - top->iBusWishbone_DAT_MISO = input->iBusWishbone_DAT_MISO; - top->iBusWishbone_ERR = input->iBusWishbone_ERR; - top->dBusWishbone_ACK = input->dBusWishbone_ACK; - top->dBusWishbone_DAT_MISO = input->dBusWishbone_DAT_MISO; - top->dBusWishbone_ERR = input->dBusWishbone_ERR; - - // run one cycle of the simulation - top->clk = true; - top->eval(); - top->clk = false; - top->eval(); - - // update outputs - output->iBusWishbone_CYC = top->iBusWishbone_CYC; - output->iBusWishbone_STB = top->iBusWishbone_STB; - output->iBusWishbone_WE = top->iBusWishbone_WE; - output->iBusWishbone_ADR = top->iBusWishbone_ADR; - output->iBusWishbone_DAT_MOSI = top->iBusWishbone_DAT_MOSI; - output->iBusWishbone_SEL = top->iBusWishbone_SEL; - output->iBusWishbone_CTI = top->iBusWishbone_CTI; - output->iBusWishbone_BTE = top->iBusWishbone_BTE; - output->dBusWishbone_CYC = top->dBusWishbone_CYC; - output->dBusWishbone_STB = top->dBusWishbone_STB; - output->dBusWishbone_WE = top->dBusWishbone_WE; - output->dBusWishbone_ADR = top->dBusWishbone_ADR; - output->dBusWishbone_DAT_MOSI = top->dBusWishbone_DAT_MOSI; - output->dBusWishbone_SEL = top->dBusWishbone_SEL; - output->dBusWishbone_CTI = top->dBusWishbone_CTI; - output->dBusWishbone_BTE = top->dBusWishbone_BTE; + delete top; + delete contextp; + contextp = 0; +} + + +void vexr_step_rising_edge(VVexRiscv *top, uint64_t time_add, const NON_COMB_INPUT *input, OUTPUT *output) +{ + // Advance time since last event. Note that this is 0 for the first call to + // this function. To get a sensisble waveform, vexr_init has already advanced + // time. + contextp->timeInc(time_add); // XXX: time_add is in femtoseconds, timeinc expects picoseconds + + // docssss + set_non_comb_inputs(top, input); + + top->clk = true; + top->eval(); + + // Set all outputs + set_ouputs(top, output); +} + +void vexr_step_falling_edge(VVexRiscv *top, uint64_t time_add, const COMB_INPUT *input) +{ + // advance time since last event + contextp->timeInc(time_add); // time_add is in femtoseconds, timeinc expects picoseconds + + // Update inputs + top->clk = false; + set_comb_inputs(top, input); + + // Evaluate the simulation + top->eval(); } diff --git a/clash-vexriscv/src/ffi/interface.h b/clash-vexriscv/src/ffi/interface.h index 312c68a..e82759a 100644 --- a/clash-vexriscv/src/ffi/interface.h +++ b/clash-vexriscv/src/ffi/interface.h @@ -14,7 +14,9 @@ typedef struct { bit timerInterrupt; bit externalInterrupt; bit softwareInterrupt; +} NON_COMB_INPUT; +typedef struct { bit iBusWishbone_ACK; uint32_t iBusWishbone_DAT_MISO; bit iBusWishbone_ERR; @@ -22,7 +24,7 @@ typedef struct { bit dBusWishbone_ACK; uint32_t dBusWishbone_DAT_MISO; bit dBusWishbone_ERR; -} INPUT; +} COMB_INPUT; typedef struct { bit iBusWishbone_CYC; @@ -44,39 +46,4 @@ typedef struct { uint8_t dBusWishbone_BTE; } OUTPUT; - #endif - -/* - input reset - input timerInterrupt, - input externalInterrupt, - input softwareInterrupt, - - input iBusWishbone_ACK, - input [31:0] iBusWishbone_DAT_MISO, - input iBusWishbone_ERR, - - input dBusWishbone_ACK, - input [31:0] dBusWishbone_DAT_MISO, - input dBusWishbone_ERR, - - - output iBusWishbone_CYC, - output iBusWishbone_STB, - output iBusWishbone_WE, - output [29:0] iBusWishbone_ADR, - output [31:0] iBusWishbone_DAT_MOSI, - output [3:0] iBusWishbone_SEL, - output [2:0] iBusWishbone_CTI, - output [1:0] iBusWishbone_BTE, - - output dBusWishbone_CYC, - output dBusWishbone_STB, - output dBusWishbone_WE, - output [29:0] dBusWishbone_ADR, - output [31:0] dBusWishbone_DAT_MOSI, - output reg [3:0] dBusWishbone_SEL, - output [2:0] dBusWishbone_CTI, - output [1:0] dBusWishbone_BTE, -*/ From 968b3dda4f1a5fad957700b8828fe1dd515d8ed4 Mon Sep 17 00:00:00 2001 From: Martijn Bastiaan Date: Tue, 5 Mar 2024 09:01:58 +0100 Subject: [PATCH 2/2] Perform packing/unpacking of arguments and results in HDL Running three experiments with the old and the new code shows that avoiding packing/unpacking in Haskell yields a ~50% performance increase. Figures are taken by observing `clash-vexriscv-sim:unittests`'s debug times. | Old 1 | Old 2 | Old 3 | Old min | New 1 | New 2 | New 3 | New min | New faster | |-------|-------|-------|---------|-------|-------|-------|---------|------------| | 0.25 | 0.31 | 0.3 | 0.25 | 0.28 | 0.24 | 0.26 | 0.24 | 4.2% | | 0.65 | 0.69 | 0.74 | 0.65 | 0.51 | 0.48 | 0.5 | 0.48 | 35.4% | | 0.44 | 0.48 | 0.53 | 0.44 | 0.36 | 0.31 | 0.34 | 0.31 | 41.9% | | 1.84 | 1.72 | 1.7 | 1.7 | 1.12 | 1.08 | 1.12 | 1.08 | 57.4% | | 1.57 | 1.58 | 1.58 | 1.57 | 1.03 | 1.06 | 1.01 | 1.01 | 55.4% | | 0.43 | 0.4 | 0.4 | 0.4 | 0.33 | 0.33 | 0.32 | 0.32 | 25.0% | | 4.62 | 4.18 | 4.5 | 4.18 | 2.69 | 2.73 | 2.73 | 2.69 | 55.4% | | 0.17 | 0.18 | 0.24 | 0.17 | 0.18 | 0.24 | 0.23 | 0.18 | -5.6% | | 2.26 | 2.57 | 2.43 | 2.26 | 1.35 | 1.45 | 1.42 | 1.35 | 67.4% | --- clash-vexriscv-sim/src/Utils/Cpu.hs | 4 +- clash-vexriscv/clash-vexriscv.cabal | 12 + clash-vexriscv/src/VexRiscv.hs | 446 ++++++------------------ clash-vexriscv/src/VexRiscv/BlackBox.hs | 172 +++++++++ 4 files changed, 291 insertions(+), 343 deletions(-) create mode 100644 clash-vexriscv/src/VexRiscv/BlackBox.hs diff --git a/clash-vexriscv-sim/src/Utils/Cpu.hs b/clash-vexriscv-sim/src/Utils/Cpu.hs index 325492c..71da334 100644 --- a/clash-vexriscv-sim/src/Utils/Cpu.hs +++ b/clash-vexriscv-sim/src/Utils/Cpu.hs @@ -61,8 +61,8 @@ cpu bootIMem bootDMem = (output, writes, iS2M, dS2M) { timerInterrupt = low, externalInterrupt = low, softwareInterrupt = low, - iBusWbS2M = makeDefined iBus, - dBusWbS2M = makeDefined dBus + iBusWbS2M = iBus, + dBusWbS2M = dBus } ) <$> iS2M diff --git a/clash-vexriscv/clash-vexriscv.cabal b/clash-vexriscv/clash-vexriscv.cabal index d761071..3c01433 100644 --- a/clash-vexriscv/clash-vexriscv.cabal +++ b/clash-vexriscv/clash-vexriscv.cabal @@ -102,24 +102,36 @@ library default-language: Haskell2010 exposed-modules: VexRiscv + VexRiscv.BlackBox VexRiscv.ClockTicks VexRiscv.FFI VexRiscv.TH VexRiscv.VecToTuple + -- See https://github.com/clash-lang/clash-compiler/pull/2511 + if impl(ghc >= 9.4) + CPP-Options: -DCLASH_OPAQUE=OPAQUE + else + CPP-Options: -DCLASH_OPAQUE=NOINLINE + build-depends: base, bytestring >= 0.10 && < 0.13, + clash-lib, clash-prelude, clash-protocols, containers, directory >= 1.3 && < 1.4, filepath, Glob, + infinite-list, + mtl, + pretty-show, process >= 1.6 && < 1.8, string-interpolate, tagged, template-haskell, + text, extra-libraries: VexRiscvFFI, stdc++ include-dirs: src/ diff --git a/clash-vexriscv/src/VexRiscv.hs b/clash-vexriscv/src/VexRiscv.hs index 16c2963..ed7ba37 100644 --- a/clash-vexriscv/src/VexRiscv.hs +++ b/clash-vexriscv/src/VexRiscv.hs @@ -2,11 +2,12 @@ -- -- SPDX-License-Identifier: Apache-2.0 +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE MagicHash #-} {-# LANGUAGE TemplateHaskellQuotes #-} -{-# LANGUAGE QuasiQuotes #-} {-# OPTIONS_GHC -fconstraint-solver-iterations=10 #-} @@ -27,10 +28,10 @@ import GHC.Stack (HasCallStack) import Language.Haskell.TH.Syntax import Protocols.Wishbone +import VexRiscv.BlackBox (vexRiscvBBF) import VexRiscv.ClockTicks import VexRiscv.FFI import VexRiscv.TH -import VexRiscv.VecToTuple import qualified VexRiscv.FFI as FFI @@ -43,19 +44,67 @@ data Input = Input } deriving (Generic, NFDataX, ShowX, Eq, BitPack) +inputToNonCombInput :: Bool -> Input -> NON_COMB_INPUT +inputToNonCombInput reset Input{..} = NON_COMB_INPUT + { FFI.reset = boolToBit reset + , FFI.timerInterrupt = timerInterrupt + , FFI.externalInterrupt = externalInterrupt + , FFI.softwareInterrupt = softwareInterrupt + } + +inputToCombInput :: Input -> COMB_INPUT +inputToCombInput Input{iBusWbS2M, dBusWbS2M} = makeDefined $ COMB_INPUT + { FFI.iBusWishbone_ACK = boolToBit (acknowledge iBusWbS2M) + , FFI.iBusWishbone_DAT_MISO = unpack (readData iBusWbS2M) + , FFI.iBusWishbone_ERR = boolToBit (err iBusWbS2M) + + , FFI.dBusWishbone_ACK = boolToBit (acknowledge dBusWbS2M) + , FFI.dBusWishbone_DAT_MISO = unpack (readData dBusWbS2M) + , FFI.dBusWishbone_ERR = boolToBit (err dBusWbS2M) + } + data Output = Output { iBusWbM2S :: "IBUS_OUT_" ::: WishboneM2S 30 4 (BitVector 32) , dBusWbM2S :: "DBUS_OUT_" ::: WishboneM2S 30 4 (BitVector 32) } deriving (Generic, NFDataX, ShowX, Eq, BitPack) +outputToOutput :: OUTPUT -> Output +outputToOutput OUTPUT{..} = Output + { iBusWbM2S = WishboneM2S + { addr = truncateB (pack iBusWishbone_ADR) + , writeData = pack (iBusWishbone_DAT_MOSI) + , busSelect = unpack (truncateB (pack iBusWishbone_SEL)) + , lock = False + , busCycle = bitToBool iBusWishbone_CYC + , strobe = bitToBool iBusWishbone_STB + , writeEnable = bitToBool iBusWishbone_WE + , cycleTypeIdentifier = unpack (truncateB (pack iBusWishbone_CTI)) + , burstTypeExtension = unpack (truncateB (pack iBusWishbone_BTE)) + } + , dBusWbM2S = WishboneM2S + { addr = truncateB (pack dBusWishbone_ADR) + , writeData = pack (dBusWishbone_DAT_MOSI) + , busSelect = unpack (truncateB (pack dBusWishbone_SEL)) + , lock = False + , busCycle = bitToBool dBusWishbone_CYC + , strobe = bitToBool dBusWishbone_STB + , writeEnable = bitToBool dBusWishbone_WE + , cycleTypeIdentifier = unpack (truncateB (pack dBusWishbone_CTI)) + , burstTypeExtension = unpack (truncateB (pack dBusWishbone_BTE)) + } + } + -- When passing S2M values from Haskell to VexRiscv over the FFI, undefined -- bits/values cause errors when forcing their evaluation to something that can -- be passed through the FFI. -- -- This function makes sure the Wishbone S2M values are free from undefined bits. -makeDefined :: WishboneS2M (BitVector 32) -> WishboneS2M (BitVector 32) -makeDefined wb = wb {readData = defaultX 0 (readData wb)} +makeDefined :: COMB_INPUT -> COMB_INPUT +makeDefined ci@COMB_INPUT{iBusWishbone_DAT_MISO, dBusWishbone_DAT_MISO} = ci + { FFI.iBusWishbone_DAT_MISO = defaultX 0 iBusWishbone_DAT_MISO + , FFI.dBusWishbone_DAT_MISO = defaultX 0 dBusWishbone_DAT_MISO + } defaultX :: (NFDataX a) => a -> a -> a defaultX dflt val @@ -63,356 +112,71 @@ defaultX dflt val | otherwise = val vexRiscv :: (HasCallStack, HiddenClockResetEnable dom) => Signal dom Input -> Signal dom Output -vexRiscv input = - Output <$> - (WishboneM2S - <$> iBus_ADR - <*> iBus_DAT_MOSI - <*> iBus_SEL - <*> pure False - <*> iBus_CYC - <*> iBus_STB - <*> iBus_WE - <*> (unpack <$> iBus_CTI) - <*> (unpack <$> iBus_BTE) - ) - <*> - (WishboneM2S - <$> dBus_ADR - <*> dBus_DAT_MOSI - <*> dBus_SEL - <*> pure False - <*> dBus_CYC - <*> dBus_STB - <*> dBus_WE - <*> (unpack <$> dBus_CTI) - <*> (unpack <$> dBus_BTE) - ) - +vexRiscv = vexRiscv# sourcePath hasClock hasReset where - (unbundle -> (timerInterrupt, externalInterrupt, softwareInterrupt, iBusS2M, dBusS2M)) - = (\(Input a b c d e) -> (a, b, c, d, e)) <$> input - - (unbundle -> (iBus_DAT_MISO, iBus_ACK, iBus_ERR)) - = (\(WishboneS2M a b c _ _) -> (a, b, c)) - -- A hack that enables us to both generate synthesizable HDL and simulate vexRisc in Haskell/Clash - . (if clashSimulation then makeDefined else id) - <$> iBusS2M - - (unbundle -> (dBus_DAT_MISO, dBus_ACK, dBus_ERR)) - = (\(WishboneS2M a b c _ _) -> (a, b, c)) - -- A hack that enables us to both generate synthesizable HDL and simulate vexRisc in Haskell/Clash - . (if clashSimulation then makeDefined else id) - <$> dBusS2M - sourcePath = $(do - cpuSrcPath <- runIO $ getPackageRelFilePath "example-cpu/VexRiscv.v" - pure $ LitE $ StringL cpuSrcPath - ) - - ( iBus_CYC - , iBus_STB - , iBus_WE - , iBus_ADR - , iBus_DAT_MOSI - , iBus_SEL - , iBus_CTI - , iBus_BTE - , dBus_CYC - , dBus_STB - , dBus_WE - , dBus_ADR - , dBus_DAT_MOSI - , dBus_SEL - , dBus_CTI - , dBus_BTE - ) = vexRiscv# sourcePath hasClock hasReset - timerInterrupt - externalInterrupt - softwareInterrupt - - iBus_ACK - iBus_ERR - iBus_DAT_MISO - - dBus_ACK - dBus_ERR - dBus_DAT_MISO - - - - + cpuSrcPath <- runIO $ getPackageRelFilePath "example-cpu/VexRiscv.v" + pure $ LitE $ StringL cpuSrcPath) vexRiscv# :: KnownDomain dom => String -> Clock dom -> Reset dom - -- input signals - -> Signal dom Bit -- ^ timerInterrupt - -> Signal dom Bit -- ^ externalInterrupt - -> Signal dom Bit -- ^ softwareInterrupt - -- iBusWbS2M - -> Signal dom Bool -- ^ iBus_ACK - -> Signal dom Bool -- ^ iBus_ERR - -> Signal dom (BitVector 32) -- ^ iBus_DAT_MISO - -- dBusWbS2M - -> Signal dom Bool -- ^ dBus_ACK - -> Signal dom Bool -- ^ dBus_ERR - -> Signal dom (BitVector 32) -- ^ dBus_DAT_MISO - - -- output signals - -> - ( - -- iBus M2S - Signal dom Bool -- ^ iBus_CYC - , Signal dom Bool -- ^ iBus_STB - , Signal dom Bool -- ^ iBus_WE - , Signal dom (BitVector 30) -- ^ iBus_ADR - , Signal dom (BitVector 32) -- ^ iBus_DAT_MOSI - , Signal dom (BitVector 4) -- ^ iBus_SEL - , Signal dom (BitVector 3) -- ^ iBus_CTI - , Signal dom (BitVector 2) -- ^ iBus_BTE - - -- dBus M2S - , Signal dom Bool -- ^ dBus_CYC - , Signal dom Bool -- ^ dBus_STB - , Signal dom Bool -- ^ dBus_WE - , Signal dom (BitVector 30) -- ^ dBus_ADR - , Signal dom (BitVector 32) -- ^ dBus_DAT_MOSI - , Signal dom (BitVector 4) -- ^ dBus_SEL - , Signal dom (BitVector 3) -- ^ dBus_CTI - , Signal dom (BitVector 2) -- ^ dBus_BTE - ) -vexRiscv# !_sourcePath clk rst0 - timerInterrupt - externalInterrupt - softwareInterrupt - iBus_ACK - iBus_ERR - iBus_DAT_MISO - - dBus_ACK - dBus_ERR - dBus_DAT_MISO - - = + -> Signal dom Input + -> Signal dom Output +vexRiscv# !_sourcePath clk rst input = + fmap outputToOutput $ + simInitThenCycles + (inputToNonCombInput <$> unsafeToActiveHigh rst <*> input) + (inputToCombInput <$> input) + where + (v, initStage1, initStage2, stepRising, stepFalling, _shutDown) = unsafePerformIO vexCPU + + simInitThenCycles :: + Signal dom NON_COMB_INPUT -> + Signal dom COMB_INPUT -> + Signal dom OUTPUT + simInitThenCycles (cnc :- cncs) ~(cc :- ccs) = let - (v, initStage1, initStage2, stepRising, stepFalling, _shutDown) = unsafePerformIO vexCPU - - nonCombInput = NON_COMB_INPUT - <$> (boolToBit <$> unsafeToActiveHigh rst0) - <*> timerInterrupt - <*> externalInterrupt - <*> softwareInterrupt - - combInput = COMB_INPUT - <$> (boolToBit <$> iBus_ACK) - <*> (unpack <$> iBus_DAT_MISO) - <*> (boolToBit <$> iBus_ERR) - <*> (boolToBit <$> dBus_ACK) - <*> (unpack <$> dBus_DAT_MISO) - <*> (boolToBit <$> dBus_ERR) - - wordCast = fromInteger . toInteger - - simInitThenCycles :: - Signal dom NON_COMB_INPUT -> - Signal dom COMB_INPUT -> - Signal dom OUTPUT - simInitThenCycles (cnc :- cncs) ~(cc :- ccs) = - let - -- Note: we don't need @ticks@ for the initialization stages, because this - -- first cycle of a 'Signal' is meant to model what happens _before_ a - -- clock edge. - out0 = unsafePerformIO (initStage1 v cnc) - stage2Out = unsafePerformIO (initStage2 v cc) - ticks = first wordCast <$> singleClockEdgesRelative clk - out1 = simCycles ticks cncs ccs - in - out0 :- (out0 `seq` (stage2Out `seq` out1)) - - simCycles :: - [(Word64, ActiveEdge)] -> - Signal dom NON_COMB_INPUT -> - Signal dom COMB_INPUT -> - Signal dom OUTPUT - simCycles ((fsSinceLastEvent, Rising) : ts) (cnc :- cncs) ccs = - let - out0 = unsafePerformIO (stepRising v fsSinceLastEvent cnc) - out1 = simCycles ts cncs ccs - in - out0 :- (out0 `seq` out1) - - simCycles ((fsSinceLastEvent, Falling) : ts) cncs (cc :- ccs) = - let !() = unsafePerformIO (stepFalling v fsSinceLastEvent cc) - in simCycles ts cncs ccs - - simCycles [] _ _ = error "Empty ticks: should never happen" - - output = simInitThenCycles nonCombInput combInput - - iBus_CYC = FFI.iBusWishbone_CYC <$> output - iBus_STB = FFI.iBusWishbone_STB <$> output - iBus_WE = FFI.iBusWishbone_WE <$> output - iBus_ADR = FFI.iBusWishbone_ADR <$> output - iBus_DAT_MOSI = FFI.iBusWishbone_DAT_MOSI <$> output - iBus_SEL = FFI.iBusWishbone_SEL <$> output - iBus_CTI = FFI.iBusWishbone_CTI <$> output - iBus_BTE = FFI.iBusWishbone_BTE <$> output - - dBus_CYC = FFI.dBusWishbone_CYC <$> output - dBus_STB = FFI.dBusWishbone_STB <$> output - dBus_WE = FFI.dBusWishbone_WE <$> output - dBus_ADR = FFI.dBusWishbone_ADR <$> output - dBus_DAT_MOSI = FFI.dBusWishbone_DAT_MOSI <$> output - dBus_SEL = FFI.dBusWishbone_SEL <$> output - dBus_CTI = FFI.dBusWishbone_CTI <$> output - dBus_BTE = FFI.dBusWishbone_BTE <$> output + -- Note: we don't need @ticks@ for the initialization stages, because this + -- first cycle of a 'Signal' is meant to model what happens _before_ a + -- clock edge. + out0 = unsafePerformIO (initStage1 v cnc) + stage2Out = unsafePerformIO (initStage2 v cc) + ticks = first (fromInteger . toInteger) <$> singleClockEdgesRelative clk + out1 = simCycles ticks cncs ccs in - ( -- iBus - bitToBool <$> iBus_CYC - , bitToBool <$> iBus_STB - , bitToBool <$> iBus_WE - , truncateB . pack <$> iBus_ADR - , pack <$> iBus_DAT_MOSI - , truncateB . pack <$> iBus_SEL - , truncateB . pack <$> iBus_CTI - , truncateB . pack <$> iBus_BTE - - -- dBus - , bitToBool <$> dBus_CYC - , bitToBool <$> dBus_STB - , bitToBool <$> dBus_WE - , truncateB . pack <$> dBus_ADR - , pack <$> dBus_DAT_MOSI - , truncateB . pack <$> dBus_SEL - , truncateB . pack <$> dBus_CTI - , truncateB . pack <$> dBus_BTE - ) -{-# NOINLINE vexRiscv# #-} -{-# ANN vexRiscv# ( + out0 :- (out0 `seq` (stage2Out `seq` out1)) + + simCycles :: + [(Word64, ActiveEdge)] -> + Signal dom NON_COMB_INPUT -> + Signal dom COMB_INPUT -> + Signal dom OUTPUT + simCycles ((fsSinceLastEvent, Rising) : ts) (cnc :- cncs) ccs = let - primName = 'vexRiscv# - ( _ - , srcPath - , clk - , rst - , timerInterrupt - , externalInterrupt - , softwareInterrupt - , iBus_ACK - , iBus_ERR - , iBus_DAT_MISO - , dBus_ACK - , dBus_ERR - , dBus_DAT_MISO - ) = vecToTuple (indicesI @13) - - ( iBus_CYC - , iBus_STB - , iBus_WE - , iBus_ADR - , iBus_DAT_MOSI - , iBus_SEL - , iBus_CTI - , iBus_BTE - , dBus_CYC - , dBus_STB - , dBus_WE - , dBus_ADR - , dBus_DAT_MOSI - , dBus_SEL - , dBus_CTI - , dBus_BTE - ) = vecToTuple $ (\x -> extend @_ @16 @13 x + 1) <$> indicesI @16 - - cpu = extend @_ @_ @1 dBus_BTE + 1 + out0 = unsafePerformIO (stepRising v fsSinceLastEvent cnc) + out1 = simCycles ts cncs ccs in - InlineYamlPrimitive [Verilog] [__i| - BlackBox: - name: #{primName} - kind: Declaration - template: |- - // vexRiscv begin - - ~DEVNULL[~FILE[~LIT[#{srcPath}]]] - - wire ~GENSYM[iBus_CYC][#{iBus_CYC}]; - wire ~GENSYM[iBus_STB][#{iBus_STB}]; - wire ~GENSYM[iBus_WE][#{iBus_WE}]; - wire [29:0] ~GENSYM[iBus_ADR][#{iBus_ADR}]; - wire [31:0] ~GENSYM[iBus_DAT_MOSI][#{iBus_DAT_MOSI}]; - wire [3:0] ~GENSYM[iBus_SEL][#{iBus_SEL}]; - wire [2:0] ~GENSYM[iBus_CTI][#{iBus_CTI}]; - wire [1:0] ~GENSYM[iBus_BTE][#{iBus_BTE}]; - - wire ~GENSYM[dBus_CYC][#{dBus_CYC}]; - wire ~GENSYM[dBus_STB][#{dBus_STB}]; - wire ~GENSYM[dBus_WE][#{dBus_WE}]; - wire [29:0] ~GENSYM[dBus_ADR][#{dBus_ADR}]; - wire [31:0] ~GENSYM[dBus_DAT_MOSI][#{dBus_DAT_MOSI}]; - wire [3:0] ~GENSYM[dBus_SEL][#{dBus_SEL}]; - wire [2:0] ~GENSYM[dBus_CTI][#{dBus_CTI}]; - wire [1:0] ~GENSYM[dBus_BTE][#{dBus_BTE}]; - - VexRiscv ~GENSYM[cpu][#{cpu}] ( - .timerInterrupt ( ~ARG[#{timerInterrupt}] ), - .externalInterrupt ( ~ARG[#{externalInterrupt}] ), - .softwareInterrupt ( ~ARG[#{softwareInterrupt}] ), - - .iBusWishbone_CYC ( ~SYM[#{iBus_CYC}] ), - .iBusWishbone_STB ( ~SYM[#{iBus_STB}] ), - .iBusWishbone_ACK ( ~ARG[#{iBus_ACK}] ), - .iBusWishbone_WE ( ~SYM[#{iBus_WE}] ), - .iBusWishbone_ADR ( ~SYM[#{iBus_ADR}] ), - .iBusWishbone_DAT_MISO ( ~ARG[#{iBus_DAT_MISO}] ), - .iBusWishbone_DAT_MOSI ( ~SYM[#{iBus_DAT_MOSI}] ), - .iBusWishbone_SEL ( ~SYM[#{iBus_SEL}] ), - .iBusWishbone_ERR ( ~ARG[#{iBus_ERR}] ), - .iBusWishbone_CTI ( ~SYM[#{iBus_CTI}] ), - .iBusWishbone_BTE ( ~SYM[#{iBus_BTE}] ), - - .dBusWishbone_CYC ( ~SYM[#{dBus_CYC}] ), - .dBusWishbone_STB ( ~SYM[#{dBus_STB}] ), - .dBusWishbone_ACK ( ~ARG[#{dBus_ACK}] ), - .dBusWishbone_WE ( ~SYM[#{dBus_WE}] ), - .dBusWishbone_ADR ( ~SYM[#{dBus_ADR}] ), - .dBusWishbone_DAT_MISO ( ~ARG[#{dBus_DAT_MISO}] ), - .dBusWishbone_DAT_MOSI ( ~SYM[#{dBus_DAT_MOSI}] ), - .dBusWishbone_SEL ( ~SYM[#{dBus_SEL}] ), - .dBusWishbone_ERR ( ~ARG[#{dBus_ERR}] ), - .dBusWishbone_CTI ( ~SYM[#{dBus_CTI}] ), - .dBusWishbone_BTE ( ~SYM[#{dBus_BTE}] ), - - .clk ( ~ARG[#{clk}] ), - .reset ( ~ARG[#{rst}] ) - ); - - assign ~RESULT = { - ~SYM[#{iBus_CYC}], - ~SYM[#{iBus_STB}], - ~SYM[#{iBus_WE}], - ~SYM[#{iBus_ADR}], - ~SYM[#{iBus_DAT_MOSI}], - ~SYM[#{iBus_SEL}], - ~SYM[#{iBus_CTI}], - ~SYM[#{iBus_BTE}], - ~SYM[#{dBus_CYC}], - ~SYM[#{dBus_STB}], - ~SYM[#{dBus_WE}], - ~SYM[#{dBus_ADR}], - ~SYM[#{dBus_DAT_MOSI}], - ~SYM[#{dBus_SEL}], - ~SYM[#{dBus_CTI}], - ~SYM[#{dBus_BTE}] - }; - - // vexRiscv end - - |] ) #-} + out0 :- (out0 `seq` out1) + simCycles ((fsSinceLastEvent, Falling) : ts) cncs (cc :- ccs) = + let !() = unsafePerformIO (stepFalling v fsSinceLastEvent cc) + in simCycles ts cncs ccs + simCycles [] _ _ = error "Empty ticks: should never happen" +{-# CLASH_OPAQUE vexRiscv# #-} +{-# ANN vexRiscv# hasBlackBox #-} +{-# ANN vexRiscv# ( + let primName = 'vexRiscv# + tfName = 'vexRiscvBBF + in InlineYamlPrimitive [Verilog] [__i| + BlackBoxHaskell: + name: #{primName} + templateFunction: #{tfName} + workInfo: Always + |]) #-} -- | Return a function that performs an execution step and a function to free -- the internal CPU state diff --git a/clash-vexriscv/src/VexRiscv/BlackBox.hs b/clash-vexriscv/src/VexRiscv/BlackBox.hs new file mode 100644 index 0000000..29e5454 --- /dev/null +++ b/clash-vexriscv/src/VexRiscv/BlackBox.hs @@ -0,0 +1,172 @@ +-- SPDX-FileCopyrightText: 2024 Google LLC +-- +-- SPDX-License-Identifier: Apache-2.0 + +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PostfixOperators #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE ViewPatterns #-} + +{-# OPTIONS_HADDOCK hide #-} + +module VexRiscv.BlackBox where + +import Prelude + +import Control.Monad.State (State) +import Data.List.Infinite (Infinite(..), (...)) +import Data.Text (Text) +import Data.Text.Prettyprint.Doc.Extra (Doc) +import Text.Show.Pretty (ppShow) +import GHC.Stack (HasCallStack) + +import Clash.Backend (Backend) +import Clash.Netlist.Types (TemplateFunction(..), BlackBoxContext) + +import qualified Clash.Netlist.BlackBox.Types as N +import qualified Clash.Netlist.Id as Id +import qualified Clash.Netlist.Types as N +import qualified Clash.Primitives.DSL as DSL + +listToTup5 :: [a] -> (a, a, a, a, a) +listToTup5 [a, b, c, d, e] = (a, b, c, d, e) +listToTup5 _ = error "listToTup5: list must have 5 elements" + +vexRiscvBBF :: HasCallStack => N.BlackBoxFunction +vexRiscvBBF _isD _primName _args _resTys = pure $ Right (bbMeta, bb) + where + bbMeta = N.emptyBlackBoxMeta + { N.bbKind = N.TDecl + , N.bbIncludes = [] + -- TODO: + -- [ ( ("VexRiscv", "v") + -- , BBFunction (show 'vexRiscvVerilogTF) 0 (vexRiscvVerilogTF path)) + -- ] + } + + bb :: N.BlackBox + bb = N.BBFunction (show 'vexRiscvTF) 0 vexRiscvTF + +vexRiscvTF :: TemplateFunction +vexRiscvTF = + let _knownDomain :< srcPath :< clk :< rst :< inp :< _ = (0...) + in TemplateFunction [srcPath, clk, rst, inp] (const True) vexRiscvTF# + +vexRiscvTF# :: Backend backend => BlackBoxContext -> State backend Doc +vexRiscvTF# bbCtx + | [_knownDomain, clk, rst, inp] <- map fst (DSL.tInputs bbCtx) + , [outputTy@(N.Product _ _ [iWishboneM2Sty, dWishboneM2Sty])] <- map snd (N.bbResults bbCtx) + , N.Product _ _ [adrTy, datMosiTy, selTy, _lockTy, cycTy, stbTy, weTy, ctiTy, bteTy] <- iWishboneM2Sty + = do + let + compName :: Text + compName = "VexRiscv" + + instName <- Id.make (compName <> "_inst") + DSL.declarationReturn bbCtx (compName <> "_block") $ do + ( timerInterrupt + , externalInterrupt + , softwareInterrupt + , iBusWbS2M + , dBusWbS2M + ) <- listToTup5 <$> DSL.deconstructProduct inp ["timerInt", "extInt", "softInt", "iBusWbS2M", "dBusWbS2M"] + + ( iBusWishbone_DAT_MISO + , iBusWishbone_ACK + , iBusWishbone_ERR + , _iBusWishbone_STL + , _iBusWishbone_RTY + ) <- listToTup5 <$> DSL.deconstructProduct iBusWbS2M ["i_rdata", "i_ack", "i_err", "i_stall", "i_retry"] + + ( dBusWishbone_DAT_MISO + , dBusWishbone_ACK + , dBusWishbone_ERR + , _dBusWishbone_STL + , _dBusWishbone_RTY + ) <- listToTup5 <$> DSL.deconstructProduct dBusWbS2M ["d_rdata", "d_ack", "d_err", "d_stall", "d_retry"] + + iBusWishbone_CYC <- DSL.declare "i_cyc" cycTy + iBusWishbone_STB <- DSL.declare "i_stb" stbTy + iBusWishbone_WE <- DSL.declare "i_we" weTy + iBusWishbone_ADR <- DSL.declare "i_adr" adrTy + iBusWishbone_DAT_MOSI <- DSL.declare "i_dat_mosi" datMosiTy + iBusWishbone_SEL <- DSL.declare "i_sel" selTy + iBusWishbone_CTI <- DSL.declare "i_cti" ctiTy + iBusWishbone_BTE <- DSL.declare "i_bte" bteTy + + dBusWishbone_CYC <- DSL.declare "d_cyc" cycTy + dBusWishbone_STB <- DSL.declare "d_stb" stbTy + dBusWishbone_WE <- DSL.declare "d_we" weTy + dBusWishbone_ADR <- DSL.declare "d_adr" adrTy + dBusWishbone_DAT_MOSI <- DSL.declare "d_dat_mosi" datMosiTy + dBusWishbone_SEL <- DSL.declare "d_sel" selTy + dBusWishbone_CTI <- DSL.declare "d_cti" ctiTy + dBusWishbone_BTE <- DSL.declare "d_bte" bteTy + + let + generics = [] + + inps :: [(Text, DSL.TExpr)] + inps = + [ ("clk", clk) + , ("reset", rst) + , ("timerInterrupt", timerInterrupt) + , ("externalInterrupt", externalInterrupt) + , ("softwareInterrupt", softwareInterrupt) + , ("iBusWishbone_DAT_MISO", iBusWishbone_DAT_MISO) + , ("iBusWishbone_ACK", iBusWishbone_ACK) + , ("iBusWishbone_ERR", iBusWishbone_ERR) + , ("dBusWishbone_DAT_MISO", dBusWishbone_DAT_MISO) + , ("dBusWishbone_ACK", dBusWishbone_ACK) + , ("dBusWishbone_ERR", dBusWishbone_ERR) + ] + + outs :: [(Text, DSL.TExpr)] + outs = + [ ("iBusWishbone_CYC", iBusWishbone_CYC) + , ("iBusWishbone_STB", iBusWishbone_STB) + , ("iBusWishbone_WE", iBusWishbone_WE) + , ("iBusWishbone_ADR", iBusWishbone_ADR) + , ("iBusWishbone_DAT_MOSI", iBusWishbone_DAT_MOSI) + , ("iBusWishbone_SEL", iBusWishbone_SEL) + , ("iBusWishbone_CTI", iBusWishbone_CTI) + , ("iBusWishbone_BTE", iBusWishbone_BTE) + , ("dBusWishbone_CYC", dBusWishbone_CYC) + , ("dBusWishbone_STB", dBusWishbone_STB) + , ("dBusWishbone_WE", dBusWishbone_WE) + , ("dBusWishbone_ADR", dBusWishbone_ADR) + , ("dBusWishbone_DAT_MOSI", dBusWishbone_DAT_MOSI) + , ("dBusWishbone_SEL", dBusWishbone_SEL) + , ("dBusWishbone_CTI", dBusWishbone_CTI) + , ("dBusWishbone_BTE", dBusWishbone_BTE) + ] + + DSL.instDecl N.Empty (Id.unsafeMake compName) instName generics inps outs + + pure [DSL.constructProduct outputTy + [ DSL.constructProduct iWishboneM2Sty + [ iBusWishbone_ADR + , iBusWishbone_DAT_MOSI + , iBusWishbone_SEL + , DSL.litTExpr (DSL.B False) + , iBusWishbone_CYC + , iBusWishbone_STB + , iBusWishbone_WE + , iBusWishbone_CTI + , iBusWishbone_BTE + ] + , DSL.constructProduct dWishboneM2Sty + [ dBusWishbone_ADR + , dBusWishbone_DAT_MOSI + , dBusWishbone_SEL + , DSL.litTExpr (DSL.B False) + , dBusWishbone_CYC + , dBusWishbone_STB + , dBusWishbone_WE + , dBusWishbone_CTI + , dBusWishbone_BTE + ] + ]] + +vexRiscvTF# bbCtx = error (ppShow bbCtx)