@@ -30,13 +30,15 @@ import tensil.tools.compiler.MemoryAddressHelper
30
30
import tensil .{InstructionLayout }
31
31
32
32
import tensil .axi
33
- import tensil .tcu .LocalDataFlowControl
33
+ import tensil .tcu .{ LocalDataFlowControl , TCUOptions }
34
34
import tensil .tcu .instruction .{
35
35
Opcode ,
36
36
Instruction ,
37
37
DataMoveFlags ,
38
38
DataMoveArgs ,
39
- DataMoveKind
39
+ DataMoveKind ,
40
+ ConfigureArgs ,
41
+ Configure
40
42
}
41
43
import tensil .mem .MemKind
42
44
import tensil .tools .{Util , ResNet }
@@ -497,6 +499,87 @@ class AXIWrapperTCUSpec extends FunUnitSpec {
497
499
}
498
500
}
499
501
502
+ def sample (
503
+ programSize : Int ,
504
+ interval : Int ,
505
+ blockSize : Int
506
+ ) =
507
+ it(
508
+ s " should sample with size= $programSize, interval= $interval, blockSize= $blockSize" ,
509
+ Slow
510
+ ) {
511
+ test(
512
+ new AXIWrapperTCU (
513
+ gen,
514
+ layout,
515
+ AXIWrapperTCUOptions (
516
+ inner = TCUOptions (sampleBlockSize = blockSize),
517
+ dramAxiConfig = axiConfig
518
+ )
519
+ )
520
+ ).withAnnotations(Seq (VerilatorBackendAnnotation )) { m =>
521
+ m.setClocks()
522
+ m.clock.setTimeout(Int .MaxValue )
523
+
524
+ implicit val layout : InstructionLayout =
525
+ m.setInstructionParameters()
526
+
527
+ val cycleCount = 3 * programSize
528
+
529
+ // drams to listen
530
+ fork {
531
+ dram0.listen(m.clock, m.dram0)
532
+ }
533
+ fork {
534
+ dram1.listen(m.clock, m.dram1)
535
+ }
536
+ val t0 = fork {
537
+ m.sample.ready.poke(true .B )
538
+
539
+ val samples = (0 until cycleCount / interval).map({
540
+ case 0 => (1 , false )
541
+ case x =>
542
+ (
543
+ Math .min(x * interval - 1 , 1000 ),
544
+ (x + 1 ) % blockSize == 0
545
+ )
546
+ })
547
+
548
+ for ((programCounter, last) <- samples) {
549
+ m.sample.waitForValid()
550
+ m.sample.bits.bits.programCounter.expect(programCounter.U )
551
+ m.sample.bits.last.expect(last.B )
552
+
553
+ m.clock.step()
554
+ }
555
+ }
556
+ val t1 = fork {
557
+ var pc = 0
558
+
559
+ m.instruction.enqueue(
560
+ Instruction (
561
+ Opcode .Configure ,
562
+ ConfigureArgs (Configure .sampleInterval, interval)
563
+ )
564
+ )
565
+
566
+ m.instruction.enqueue(
567
+ Instruction (
568
+ Opcode .Configure ,
569
+ ConfigureArgs (Configure .programCounter, 0 )
570
+ )
571
+ )
572
+
573
+ for (_ <- 0 until programSize) {
574
+ m.instruction.enqueue(Instruction (Opcode .NoOp ))
575
+ }
576
+ }
577
+
578
+ t0.join()
579
+ t1.join()
580
+ }
581
+ }
582
+
500
583
val dataMoveSizes =
501
584
(1 to 7 )
502
585
.map(Math .pow(2 , _).toInt)
@@ -505,14 +588,16 @@ class AXIWrapperTCUSpec extends FunUnitSpec {
505
588
.distinct
506
589
.filter(_ <= arch.accumulatorDepth)
507
590
508
- val tests = Seq (
509
- () => xor4(batchSize = 1 ),
510
- () => xor4(batchSize = 2 ),
511
- () => xor4(batchSize = 4 ),
512
- () => resnet(batchSize = 1 , inputSize = 1 ),
513
- () => resnet(batchSize = 10 , inputSize = 10 ),
514
- ) ++ dataMoveSizes.map(size => () => dataMove(size, 4 , true )) ++
515
- dataMoveSizes.map(size => () => dataMove(size, 4 , false ))
591
+ val tests =
592
+ Seq (
593
+ () => sample(programSize = 1000 , interval = 10 , blockSize = 16 ),
594
+ () => xor4(batchSize = 1 ),
595
+ () => xor4(batchSize = 2 ),
596
+ () => xor4(batchSize = 4 ),
597
+ () => resnet(batchSize = 1 , inputSize = 1 ),
598
+ () => resnet(batchSize = 10 , inputSize = 10 ),
599
+ ) ++ dataMoveSizes.map(size => () => dataMove(size, 4 , true )) ++
600
+ dataMoveSizes.map(size => () => dataMove(size, 4 , false ))
516
601
517
602
for (t <- tests) {
518
603
if (randomizeDrams) {
0 commit comments