From d4b00a1342cc17ec6bcd1208575d33a459bfd19f Mon Sep 17 00:00:00 2001 From: Matthew Naylor Date: Thu, 19 Aug 2021 10:45:41 +0100 Subject: [PATCH] Initial import --- Haskell/Blarney/VendorIP/PipelinedDivider.hs | 36 + .../Blarney/VendorIP/StreamClockCrosser.hs | 126 ++++ .../AvalonStreamClockCrosser.ip | 670 ++++++++++++++++++ IntelFPGA/AvalonStreamClockCrosser/README.md | 3 + .../post-ipgenerate.sh | 19 + IntelFPGA/PipelinedDivider/PipelinedDivider.v | 34 + Sim/AvalonStreamClockCrosser.v | 22 + Sim/PipelinedDivider.v | 41 ++ 8 files changed, 951 insertions(+) create mode 100644 Haskell/Blarney/VendorIP/PipelinedDivider.hs create mode 100644 Haskell/Blarney/VendorIP/StreamClockCrosser.hs create mode 100644 IntelFPGA/AvalonStreamClockCrosser/AvalonStreamClockCrosser.ip create mode 100644 IntelFPGA/AvalonStreamClockCrosser/README.md create mode 100755 IntelFPGA/AvalonStreamClockCrosser/post-ipgenerate.sh create mode 100644 IntelFPGA/PipelinedDivider/PipelinedDivider.v create mode 100644 Sim/AvalonStreamClockCrosser.v create mode 100644 Sim/PipelinedDivider.v diff --git a/Haskell/Blarney/VendorIP/PipelinedDivider.hs b/Haskell/Blarney/VendorIP/PipelinedDivider.hs new file mode 100644 index 0000000..ad87a29 --- /dev/null +++ b/Haskell/Blarney/VendorIP/PipelinedDivider.hs @@ -0,0 +1,36 @@ +module Blarney.VendorIP.PipelinedDivider where + +-- Blarney imports +import Blarney +import Blarney.Core.BV + +-- | Intel pipelined divider megafunction +pipelinedDivider :: forall n. KnownNat n => + Int + -- ^ Pipeline depth / latency + -> Bit n + -- ^ Numerator + -> Bit n + -- ^ Denominator + -> (Bit n, Bit n) + -- ^ Quotient, remainder +pipelinedDivider latency num denom = (FromBV quot, FromBV rem) + where + width = valueOf @n + + custom = + Custom { + customName = "PipelinedDivider" + , customInputs = [("numer", width), ("denom", width)] + , customOutputs = [("quotient", width), ("remain", width)] + , customParams = [ "LATENCY" :-> show latency + , "DATA_WIDTH" :-> show width + ] + , customIsClocked = True + , customResetable = False + , customNetlist = Nothing + } + + [quot, rem] = + makePrim custom [toBV num, toBV denom] + [Just "quotient", Just "remain"] diff --git a/Haskell/Blarney/VendorIP/StreamClockCrosser.hs b/Haskell/Blarney/VendorIP/StreamClockCrosser.hs new file mode 100644 index 0000000..9dab99b --- /dev/null +++ b/Haskell/Blarney/VendorIP/StreamClockCrosser.hs @@ -0,0 +1,126 @@ +module Blarney.VendorIP.StreamClockCrosser (makeStreamClockCrosser) where + +-- Blarney imports +import Blarney +import Blarney.Stream +import Blarney.Core.BV + +-- | Avalon stream clock crosser inputs +data AvalonStreamCrossIns w = + AvalonStreamCrossIns { + crossClkIn :: Bit 1 + -- ^ Input stream clock + , crossRstIn :: Bit 1 + -- ^ Input stream reset + , crossClkOut :: Bit 1 + -- ^ Output stream clock + , crossRstOut :: Bit 1 + -- ^ Output stream reset + , crossValidIn :: Bit 1 + -- ^ Input stream valid signal + , crossDataIn :: Bit w + -- ^ Input stream data signal + , crossReadyOut :: Bit 1 + -- ^ Output stream ready signal + } + +-- | Avalon stream clock crosser outputs +data AvalonStreamCrossOuts w = + AvalonStreamCrossOuts { + crossReadyIn :: Bit 1 + -- ^ Input stream ready signal + , crossValidOut :: Bit 1 + -- ^ Output streaam valid signal + , crossDataOut :: Bit w + -- ^ Output streaam data signal + } + +-- | Avalon stream clock crossing primtive +avalonStreamCross :: + Int + -> AvalonStreamCrossIns w + -> AvalonStreamCrossOuts w +avalonStreamCross width ins = + AvalonStreamCrossOuts { + crossReadyIn = FromBV in_ready + , crossValidOut = FromBV out_valid + , crossDataOut = FromBV out_data + } + where + custom = + Custom { + customName = "AvalonStreamClockCrosser" + , customInputs = + [ ("in_clk", 1) + , ("in_reset", 1) + , ("out_clk", 1) + , ("out_reset", 1) + , ("in_valid", 1) + , ("in_data", width) + , ("out_ready", 1) + ] + , customOutputs = + [ ("in_ready", 1) + , ("out_valid", 1) + , ("out_data", width) + ] + , customParams = + [ "DATA_WIDTH" :-> show width + ] + , customIsClocked = False + , customResetable = False + , customNetlist = Nothing + } + + [in_ready, out_valid, out_data] = + makePrim custom + [ ins.crossClkIn.toBV + , ins.crossRstIn.toBV + , ins.crossClkOut.toBV + , ins.crossRstOut.toBV + , ins.crossValidIn.toBV + , ins.crossDataIn.toBV + , ins.crossReadyOut.toBV + ] + [Just "in_ready", Just "out_valid", Just "out_data"] + +-- | Stream clock crosser +makeStreamClockCrosser :: forall a. Bits a => + (Clock, Reset) + -- ^ Input stream clock & reset + -> (Clock, Reset) + -- ^ Output stream clock & reset + -> Stream a + -- ^ Input stream + -> Module (Stream a) + -- ^ Output stream +makeStreamClockCrosser + (Clock clkIn, Reset rstIn) + (Clock clkOut, Reset rstOut) + streamIn = do + + consumeWire :: Wire (Bit 1) <- makeWire false + + let width = sizeOf (undefined :: a) + + let outStream = avalonStreamCross width + AvalonStreamCrossIns { + crossClkIn = clkIn + , crossRstIn = rstIn + , crossClkOut = clkOut + , crossRstOut = rstOut + , crossValidIn = streamIn.canPeek + , crossDataIn = streamIn.peek.pack + , crossReadyOut = consumeWire.val + } + + always do + when (streamIn.canPeek .&&. outStream.crossReadyIn) do + streamIn.consume + + return + Source { + consume = do consumeWire <== true + , canPeek = outStream.crossValidOut + , peek = outStream.crossDataOut.unpack + } diff --git a/IntelFPGA/AvalonStreamClockCrosser/AvalonStreamClockCrosser.ip b/IntelFPGA/AvalonStreamClockCrosser/AvalonStreamClockCrosser.ip new file mode 100644 index 0000000..3ea46d7 --- /dev/null +++ b/IntelFPGA/AvalonStreamClockCrosser/AvalonStreamClockCrosser.ip @@ -0,0 +1,670 @@ + + + Intel Corporation + AvalonStreamClockCrosser + st_handshake_clock_crosser_0 + 19.1 + + + in_clk + + + + + + + + clk + + + in_clk + + + + + + + + + clockRate + Clock rate + 0 + + + externallyDriven + Externally driven + false + + + ptfSchematicName + PTF schematic name + + + + + + in_clk_reset + + + + + + + + reset + + + in_reset + + + + + + + + + associatedClock + Associated clock + in_clk + + + synchronousEdges + Synchronous edges + DEASSERT + + + + + out_clk + + + + + + + + clk + + + out_clk + + + + + + + + + clockRate + Clock rate + 0 + + + externallyDriven + Externally driven + false + + + ptfSchematicName + PTF schematic name + + + + + + out_clk_reset + + + + + + + + reset + + + out_reset + + + + + + + + + associatedClock + Associated clock + out_clk + + + synchronousEdges + Synchronous edges + DEASSERT + + + + + in + + + + + + + + ready + + + in_ready + + + + + valid + + + in_valid + + + + + data + + + in_data + + + + + + + + + associatedClock + associatedClock + in_clk + + + associatedReset + associatedReset + in_clk_reset + + + beatsPerCycle + Beats Per Cycle + 1 + + + dataBitsPerSymbol + Data bits per symbol + 1 + + + emptyWithinPacket + emptyWithinPacket + false + + + errorDescriptor + Error descriptor + + + + firstSymbolInHighOrderBits + First Symbol In High-Order Bits + true + + + highOrderSymbolAtMSB + highOrderSymbolAtMSB + false + + + maxChannel + Maximum channel + 0 + + + packetDescription + Packet description + + + + prSafe + Partial Reconfiguration Safe + false + + + readyAllowance + Ready allowance + 0 + + + readyLatency + Ready latency + 0 + + + symbolsPerBeat + Symbols per beat + 1 + + + + + out + + + + + + + + ready + + + out_ready + + + + + valid + + + out_valid + + + + + data + + + out_data + + + + + + + + + associatedClock + associatedClock + out_clk + + + associatedReset + associatedReset + out_clk_reset + + + beatsPerCycle + Beats Per Cycle + 1 + + + dataBitsPerSymbol + Data bits per symbol + 1 + + + emptyWithinPacket + emptyWithinPacket + false + + + errorDescriptor + Error descriptor + + + + firstSymbolInHighOrderBits + First Symbol In High-Order Bits + true + + + highOrderSymbolAtMSB + highOrderSymbolAtMSB + false + + + maxChannel + Maximum channel + 0 + + + packetDescription + Packet description + + + + prSafe + Partial Reconfiguration Safe + false + + + readyAllowance + Ready allowance + 0 + + + readyLatency + Ready latency + 0 + + + symbolsPerBeat + Symbols per beat + 1 + + + + + + + + QUARTUS_SYNTH + :quartus.altera.com: + QUARTUS_SYNTH + + + + + QUARTUS_SYNTH + altera_avalon_st_handshake_clock_crosser + + QUARTUS_SYNTH + + + + + + in_clk + + in + + + STD_LOGIC + QUARTUS_SYNTH + + + + + + in_reset + + in + + + STD_LOGIC + QUARTUS_SYNTH + + + + + + out_clk + + in + + + STD_LOGIC + QUARTUS_SYNTH + + + + + + out_reset + + in + + + STD_LOGIC + QUARTUS_SYNTH + + + + + + in_ready + + out + + + STD_LOGIC + QUARTUS_SYNTH + + + + + + in_valid + + in + + + STD_LOGIC + QUARTUS_SYNTH + + + + + + in_data + + in + + + 0 + 7 + + + + + STD_LOGIC_VECTOR + QUARTUS_SYNTH + + + + + + out_ready + + in + + + STD_LOGIC + QUARTUS_SYNTH + + + + + + out_valid + + out + + + STD_LOGIC + QUARTUS_SYNTH + + + + + + out_data + + out + + + 0 + 7 + + + + + STD_LOGIC_VECTOR + QUARTUS_SYNTH + + + + + + + + + Intel Corporation + AvalonStreamClockCrosser + altera_avalon_st_handshake_clock_crosser + 19.1 + + + + + DATA_WIDTH + Data width + 8 + + + BITS_PER_SYMBOL + Bits per symbol + 1 + + + USE_PACKETS + Use packets + 0 + + + USE_CHANNEL + Use channel + 0 + + + CHANNEL_WIDTH + Channel width + 1 + + + MAX_CHANNEL + Maximum channel value + 0 + + + USE_ERROR + Use error + 0 + + + ERROR_WIDTH + Error width + 1 + + + VALID_SYNC_DEPTH + Valid synchronizer depth + 2 + + + READY_SYNC_DEPTH + Ready synchronizer depth + 2 + + + USE_OUTPUT_PIPELINE + Use output pipeline + 1 + + + SYNC_RESET + Use synchronous resets + 0 + + + + + + + device + Device + 1SX280HU2F50E1VG + + + deviceFamily + Device family + Stratix 10 + + + deviceSpeedGrade + Device Speed Grade + 1 + + + generationId + Generation Id + 0 + + + bonusData + bonusData + bonusData +{ + element st_handshake_clock_crosser_0 + { + datum _sortIndex + { + value = "0"; + type = "int"; + } + } +} + + + + hideFromIPCatalog + Hide from IP Catalog + true + + + lockedInterfaceDefinition + lockedInterfaceDefinition + + + + systemInfos + systemInfos + <systemInfosDefinition> + <connPtSystemInfos/> +</systemInfosDefinition> + + + + + + + + + + + + + + + + + + + + + + + + + + + + false + false + + diff --git a/IntelFPGA/AvalonStreamClockCrosser/README.md b/IntelFPGA/AvalonStreamClockCrosser/README.md new file mode 100644 index 0000000..38f1c04 --- /dev/null +++ b/IntelFPGA/AvalonStreamClockCrosser/README.md @@ -0,0 +1,3 @@ +Run `post-ipgenerate.sh` after `quartus_ipgenerate` to workaround +issue in Intel's code gen (use of `sed` to ensure the `DATA_WIDTH` +parameter is actually used to define the Avalon stream width). diff --git a/IntelFPGA/AvalonStreamClockCrosser/post-ipgenerate.sh b/IntelFPGA/AvalonStreamClockCrosser/post-ipgenerate.sh new file mode 100755 index 0000000..740ba5c --- /dev/null +++ b/IntelFPGA/AvalonStreamClockCrosser/post-ipgenerate.sh @@ -0,0 +1,19 @@ +#! /usr/bin/env bash + +# This script should be run after quartus_ipgenerate to workaround +# issues in Intel's code gen. + +IPDIR=$(dirname $0) + +# Due to a bug in Intel's code gen for the clock crossing IP, +# we use sed to ensure the DATA_WIDTH parameter is actually used +# to define the Avalon stream width + +AVL_ST_CC=$IPDIR/AvalonStreamClockCrosser/synth/AvalonStreamClockCrosser.v +if [ ! -f $AVL_ST_CC ]; then + echo "ERROR: not found: $AVL_ST_CC" + echo "This script should be run after quartus_ipgenerate" + exit -1 +fi +sed -i 's/\[7:0\] in_data/[DATA_WIDTH-1:0] in_data/g' $AVL_ST_CC +sed -i 's/\[7:0\] out_data/[DATA_WIDTH-1:0] out_data/g' $AVL_ST_CC diff --git a/IntelFPGA/PipelinedDivider/PipelinedDivider.v b/IntelFPGA/PipelinedDivider/PipelinedDivider.v new file mode 100644 index 0000000..ae4d44a --- /dev/null +++ b/IntelFPGA/PipelinedDivider/PipelinedDivider.v @@ -0,0 +1,34 @@ +module PipelinedDivider ( + clock, + denom, + numer, + quotient, + remain); + + parameter DATA_WIDTH; + parameter LATENCY; + + input clock; + input [DATA_WIDTH-1:0] denom; + input [DATA_WIDTH-1:0] numer; + output [DATA_WIDTH-1:0] quotient; + output [DATA_WIDTH-1:0] remain; + + lpm_divide LPM_DIVIDE_component ( + .clock(clock), + .denom(denom), + .numer(numer), + .quotient(quotient), + .remain(remain), + .aclr(1'b0), + .clken(1'b1)); + defparam + LPM_DIVIDE_component.lpm_drepresentation = "SIGNED", + LPM_DIVIDE_component.lpm_hint = "LPM_REMAINDERPOSITIVE=FALSE", + LPM_DIVIDE_component.lpm_nrepresentation = "SIGNED", + LPM_DIVIDE_component.lpm_pipeline = LATENCY, + LPM_DIVIDE_component.lpm_type = "LPM_DIVIDE", + LPM_DIVIDE_component.lpm_widthd = DATA_WIDTH, + LPM_DIVIDE_component.lpm_widthn = DATA_WIDTH; + +endmodule diff --git a/Sim/AvalonStreamClockCrosser.v b/Sim/AvalonStreamClockCrosser.v new file mode 100644 index 0000000..28a9009 --- /dev/null +++ b/Sim/AvalonStreamClockCrosser.v @@ -0,0 +1,22 @@ +// Simulation wrapper for clock crossing with Avalon streams +// TODO: avoid assumption that clocks are the same in simulation! +module AvalonStreamClockCrosser #( + parameter DATA_WIDTH = 8 + ) ( + input wire in_clk, + input wire in_reset, + input wire out_clk, + input wire out_reset, + output wire in_ready, + input wire in_valid, + input wire [DATA_WIDTH-1:0] in_data, + input wire out_ready, + output wire out_valid, + output wire [DATA_WIDTH-1:0] out_data + ); + + assign out_data = in_data; + assign out_valid = in_valid; + assign in_ready = out_ready; + +endmodule diff --git a/Sim/PipelinedDivider.v b/Sim/PipelinedDivider.v new file mode 100644 index 0000000..8a68484 --- /dev/null +++ b/Sim/PipelinedDivider.v @@ -0,0 +1,41 @@ +// Simulation wrapper for pipelined divider +module PipelinedDivider ( + input wire [32:0] numer, + input wire [32:0] denom, + input wire clock, + output wire [32:0] quotient, + output wire [32:0] remain + ); + + parameter DATA_WIDTH; + parameter LATENCY; + reg [DATA_WIDTH-1:0] quotient_q[LATENCY-1:0]; + reg [DATA_WIDTH-1:0] remain_q[LATENCY-1:0]; + + generate + genvar i; + for (i = 0; i < LATENCY-1; i=i+1) begin + always @(posedge clock) begin + quotient_q[i] <= quotient_q[i+1]; + remain_q[i] <= remain_q[i+1]; + end + end + endgenerate + + always @(posedge clock) begin + if (denom == 0) begin + quotient_q[LATENCY-1] <= -1; + remain_q[LATENCY-1] <= numer; + end else if (numer == -(2**(DATA_WIDTH-2)) && denom == -1) begin + quotient_q[LATENCY-1] <= numer; + remain_q[LATENCY-1] <= 0; + end else begin + quotient_q[LATENCY-1] <= $signed(numer) / $signed(denom); + remain_q[LATENCY-1] <= $signed(numer) % $signed(denom); + end + end + + assign quotient = quotient_q[0]; + assign remain = remain_q[0]; + +endmodule