diff options
Diffstat (limited to 'VexRiscv/src/main/scala/vexriscv/demo/smp')
4 files changed, 1768 insertions, 0 deletions
diff --git a/VexRiscv/src/main/scala/vexriscv/demo/smp/Misc.scala b/VexRiscv/src/main/scala/vexriscv/demo/smp/Misc.scala new file mode 100644 index 0000000..58bad63 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/smp/Misc.scala @@ -0,0 +1,289 @@ +package vexriscv.demo.smp + + +import spinal.core._ +import spinal.core.fiber._ +import spinal.lib.bus.bmb._ +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneSlaveFactory} +import spinal.lib.com.jtag.Jtag +import spinal.lib._ +import spinal.lib.bus.bmb.sim.{BmbMemoryMultiPort, BmbMemoryTester} +import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} +import spinal.lib.eda.bench.Bench +import spinal.lib.generator._ +import spinal.lib.misc.Clint +import spinal.lib.sim.{SimData, SparseMemory, StreamDriver, StreamMonitor, StreamReadyRandomizer} +import vexriscv.{VexRiscv, VexRiscvConfig} +import vexriscv.plugin.{CsrPlugin, DBusCachedPlugin, DebugPlugin, IBusCachedPlugin} + +import scala.collection.mutable +import scala.util.Random + +case class LiteDramNativeParameter(addressWidth : Int, dataWidth : Int) + +case class LiteDramNativeCmd(p : LiteDramNativeParameter) extends Bundle{ + val we = Bool() + val addr = UInt(p.addressWidth bits) +} + +case class LiteDramNativeWData(p : LiteDramNativeParameter) extends Bundle{ + val data = Bits(p.dataWidth bits) + val we = Bits(p.dataWidth/8 bits) +} + +case class LiteDramNativeRData(p : LiteDramNativeParameter) extends Bundle{ + val data = Bits(p.dataWidth bits) +} + + +case class LiteDramNative(p : LiteDramNativeParameter) extends Bundle with IMasterSlave { + val cmd = Stream(LiteDramNativeCmd(p)) + val wdata = Stream(LiteDramNativeWData(p)) + val rdata = Stream(LiteDramNativeRData(p)) + override def asMaster(): Unit = { + master(cmd, wdata) + slave(rdata) + } + + def fromBmb(bmb : Bmb, wdataFifoSize : Int, rdataFifoSize : Int) = { + val bridge = BmbToLiteDram( + bmbParameter = bmb.p, + liteDramParameter = this.p, + wdataFifoSize = wdataFifoSize, + rdataFifoSize = rdataFifoSize + ) + bridge.io.input << bmb + bridge.io.output <> this + bridge + } + + def simSlave(ram : SparseMemory,cd : ClockDomain, bmb : Bmb = null): Unit ={ + import spinal.core.sim._ + def bus = this + case class Cmd(address : Long, we : Boolean) + case class WData(data : BigInt, we : Long) + val cmdQueue = mutable.Queue[Cmd]() + val wdataQueue = mutable.Queue[WData]() + val rdataQueue = mutable.Queue[BigInt]() + + + case class Ref(address : Long, data : BigInt, we : Long, time : Long) + val ref = mutable.Queue[Ref]() + if(bmb != null) StreamMonitor(bmb.cmd, cd){p => + if(bmb.cmd.opcode.toInt == 1) ref.enqueue(Ref(p.fragment.address.toLong, p.fragment.data.toBigInt, p.fragment.mask.toLong, simTime())) + } + + var writeCmdCounter, writeDataCounter = 0 + StreamReadyRandomizer(bus.cmd, cd).factor = 0.5f + StreamMonitor(bus.cmd, cd) { t => + cmdQueue.enqueue(Cmd(t.addr.toLong * (p.dataWidth/8) , t.we.toBoolean)) + if(t.we.toBoolean) writeCmdCounter += 1 + } + + StreamReadyRandomizer(bus.wdata, cd).factor = 0.5f + StreamMonitor(bus.wdata, cd) { p => + writeDataCounter += 1 + // if(p.data.toBigInt == BigInt("00000002000000020000000200000002",16)){ + // println("ASD") + // } + wdataQueue.enqueue(WData(p.data.toBigInt, p.we.toLong)) + } + + // new SimStreamAssert(cmd,cd) + // new SimStreamAssert(wdata,cd) + // new SimStreamAssert(rdata,cd) + + cd.onSamplings{ + if(writeDataCounter-writeCmdCounter > 2){ + println("miaou") + } + if(cmdQueue.nonEmpty && Random.nextFloat() < 0.5){ + val cmd = cmdQueue.head + if(cmd.we){ + if(wdataQueue.nonEmpty){ + // if(cmd.address == 0xc02ae850l) { + // println(s"! $writeCmdCounter $writeDataCounter") + // } + cmdQueue.dequeue() + val wdata = wdataQueue.dequeue() + val raw = wdata.data.toByteArray + val left = wdata.data.toByteArray.size-1 + if(bmb != null){ + assert(ref.nonEmpty) + assert((ref.head.address & 0xFFFFFFF0l) == cmd.address) + assert(ref.head.data == wdata.data) + assert(ref.head.we == wdata.we) + ref.dequeue() + } + // if(cmd.address == 0xc02ae850l) { + // println(s"$cmd $wdata ${simTime()}") + // } + for(i <- 0 until p.dataWidth/8){ + + + if(((wdata.we >> i) & 1) != 0) { + // if(cmd.address == 0xc02ae850l) { + // println(s"W $i ${ if (left - i >= 0) raw(left - i) else 0}") + // } + ram.write(cmd.address + i, if (left - i >= 0) raw(left - i) else 0) + } + } + } + } else { + cmdQueue.dequeue() + val value = new Array[Byte](p.dataWidth/8+1) + val left = value.size-1 + for(i <- 0 until p.dataWidth/8) { + value(left-i) = ram.read(cmd.address+i) + } + rdataQueue.enqueue(BigInt(value)) + } + } + } + + StreamDriver(bus.rdata, cd){ p => + if(rdataQueue.isEmpty){ + false + } else { + p.data #= rdataQueue.dequeue() + true + } + } + } +} + + + +case class BmbToLiteDram(bmbParameter : BmbParameter, + liteDramParameter : LiteDramNativeParameter, + wdataFifoSize : Int, + rdataFifoSize : Int) extends Component{ + val io = new Bundle { + val input = slave(Bmb(bmbParameter)) + val output = master(LiteDramNative(liteDramParameter)) + } + + val resized = io.input.resize(liteDramParameter.dataWidth) + val unburstified = resized.unburstify() + case class Context() extends Bundle { + val context = Bits(unburstified.p.access.contextWidth bits) + val source = UInt(unburstified.p.access.sourceWidth bits) + val isWrite = Bool() + } + + assert(isPow2(rdataFifoSize)) + val pendingRead = Reg(UInt(log2Up(rdataFifoSize) + 1 bits)) init(0) + + val halt = Bool() + val (cmdFork, dataFork) = StreamFork2(unburstified.cmd.haltWhen(halt)) + val outputCmd = Stream(LiteDramNativeCmd(liteDramParameter)) + outputCmd.arbitrationFrom(cmdFork.haltWhen(pendingRead.msb)) + outputCmd.addr := (cmdFork.address >> log2Up(liteDramParameter.dataWidth/8)).resized + outputCmd.we := cmdFork.isWrite + + io.output.cmd <-< outputCmd + + if(bmbParameter.access.canWrite) { + val wData = Stream(LiteDramNativeWData(liteDramParameter)) + wData.arbitrationFrom(dataFork.throwWhen(dataFork.isRead)) + wData.data := dataFork.data + wData.we := dataFork.mask + io.output.wdata << wData.queueLowLatency(wdataFifoSize, latency = 1) + } else { + dataFork.ready := True + io.output.wdata.valid := False + io.output.wdata.data.assignDontCare() + io.output.wdata.we.assignDontCare() + } + + val cmdContext = Stream(Context()) + cmdContext.valid := unburstified.cmd.fire + cmdContext.context := unburstified.cmd.context + cmdContext.source := unburstified.cmd.source + cmdContext.isWrite := unburstified.cmd.isWrite + halt := !cmdContext.ready + + val rspContext = cmdContext.queue(rdataFifoSize) + val rdataFifo = io.output.rdata.queueLowLatency(rdataFifoSize, latency = 1) + val writeTocken = CounterUpDown( + stateCount = rdataFifoSize*2, + incWhen = io.output.wdata.fire, + decWhen = rspContext.fire && rspContext.isWrite + ) + val canRspWrite = writeTocken =/= 0 + val canRspRead = CombInit(rdataFifo.valid) + + rdataFifo.ready := unburstified.rsp.fire && !rspContext.isWrite + rspContext.ready := unburstified.rsp.fire + unburstified.rsp.valid := rspContext.valid && (rspContext.isWrite ? canRspWrite | canRspRead) + unburstified.rsp.setSuccess() + unburstified.rsp.last := True + unburstified.rsp.source := rspContext.source + unburstified.rsp.context := rspContext.context + unburstified.rsp.data := rdataFifo.data + + + pendingRead := pendingRead + U(outputCmd.fire && !outputCmd.we) - U(rdataFifo.fire) +} + +object BmbToLiteDramTester extends App{ + import spinal.core.sim._ + SimConfig.withWave.compile(BmbToLiteDram( + bmbParameter = BmbParameter( + addressWidth = 20, + dataWidth = 32, + lengthWidth = 6, + sourceWidth = 4, + contextWidth = 16 + ), + liteDramParameter = LiteDramNativeParameter( + addressWidth = 20, + dataWidth = 128 + ), + wdataFifoSize = 16, + rdataFifoSize = 16 + )).doSimUntilVoid(seed = 42){dut => + val tester = new BmbMemoryTester(dut.io.input, dut.clockDomain, rspCounterTarget = 3000) + dut.io.output.simSlave(tester.memory.memory, dut.clockDomain) + } +} + +case class BmbToLiteDramGenerator(mapping : AddressMapping)(implicit interconnect : BmbInterconnectGenerator) extends Area{ + val liteDramParameter = Handle[LiteDramNativeParameter] + val bmb = Handle(logic.io.input) + val dram = Handle(logic.io.output.toIo) + + val accessSource = Handle[BmbAccessCapabilities] + val accessRequirements = Handle[BmbAccessParameter] + interconnect.addSlave( + accessSource = accessSource, + accessCapabilities = accessSource, + accessRequirements = accessRequirements, + bus = bmb, + mapping = mapping + ) + val logic = Handle(BmbToLiteDram( + bmbParameter = accessRequirements.toBmbParameter(), + liteDramParameter = liteDramParameter, + wdataFifoSize = 32, + rdataFifoSize = 32 + )) +} + +case class BmbToWishboneGenerator(mapping : AddressMapping)(implicit interconnect : BmbInterconnectGenerator) extends Area{ + val bmb = Handle(logic.io.input) + val wishbone = Handle(logic.io.output) + + val accessSource = Handle[BmbAccessCapabilities] + val accessRequirements = Handle[BmbAccessParameter] + interconnect.addSlave( + accessSource = accessSource, + accessCapabilities = accessSource, + accessRequirements = accessRequirements, + bus = bmb, + mapping = mapping + ) + val logic = Handle(BmbToWishbone( + p = accessRequirements.toBmbParameter() + )) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala new file mode 100644 index 0000000..ec2aa50 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -0,0 +1,748 @@ +package vexriscv.demo.smp + +import spinal.core +import spinal.core._ +import spinal.core.sim.{onSimEnd, simSuccess} +import spinal.lib._ +import spinal.lib.bus.bmb.sim.BmbMemoryAgent +import spinal.lib.bus.bmb._ +import spinal.lib.bus.misc.{DefaultMapping, SizeMapping} +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneToBmb, WishboneToBmbGenerator} +import spinal.lib.com.jtag.{Jtag, JtagInstructionDebuggerGenerator, JtagTapInstructionCtrl} +import spinal.lib.com.jtag.sim.JtagTcp +import spinal.lib.com.jtag.xilinx.Bscane2BmbMasterGenerator +import spinal.lib.generator._ +import spinal.core.fiber._ +import spinal.idslplugin.PostInitCallback +import spinal.lib.misc.plic.PlicMapping +import spinal.lib.system.debugger.SystemDebuggerConfig +import vexriscv.ip.{DataCacheAck, DataCacheConfig, DataCacheMemBus, InstructionCache, InstructionCacheConfig} +import vexriscv.plugin._ +import vexriscv.{Riscv, VexRiscv, VexRiscvBmbGenerator, VexRiscvConfig, plugin} + +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer +import spinal.lib.generator._ +import vexriscv.ip.fpu.FpuParameter + +case class VexRiscvSmpClusterParameter(cpuConfigs : Seq[VexRiscvConfig], + jtagHeaderIgnoreWidth : Int, + withExclusiveAndInvalidation : Boolean, + forcePeripheralWidth : Boolean = true, + outOfOrderDecoder : Boolean = true, + fpu : Boolean = false) + +class VexRiscvSmpClusterBase(p : VexRiscvSmpClusterParameter) extends Area with PostInitCallback{ + val cpuCount = p.cpuConfigs.size + + val debugCd = ClockDomainResetGenerator() + debugCd.holdDuration.load(4095) + debugCd.makeExternal() + + val systemCd = ClockDomainResetGenerator() + systemCd.holdDuration.load(63) + systemCd.setInput(debugCd) + + + val ctx = systemCd.outputClockDomain.push() + override def postInitCallback(): VexRiscvSmpClusterBase.this.type = { + ctx.restore() + this + } + + implicit val interconnect = BmbInterconnectGenerator() + + val debugBridge = debugCd.outputClockDomain on JtagInstructionDebuggerGenerator(p.jtagHeaderIgnoreWidth) + debugBridge.jtagClockDomain.load(ClockDomain.external("jtag", withReset = false)) + + val debugPort = Handle(debugBridge.logic.jtagBridge.io.ctrl.toIo) + + val dBusCoherent = BmbBridgeGenerator() + val dBusNonCoherent = BmbBridgeGenerator() + + val smp = p.withExclusiveAndInvalidation generate new Area{ + val exclusiveMonitor = BmbExclusiveMonitorGenerator() + interconnect.addConnection(dBusCoherent.bmb, exclusiveMonitor.input) + + val invalidationMonitor = BmbInvalidateMonitorGenerator() + interconnect.addConnection(exclusiveMonitor.output, invalidationMonitor.input) + interconnect.addConnection(invalidationMonitor.output, dBusNonCoherent.bmb) + if(p.outOfOrderDecoder) interconnect.masters(invalidationMonitor.output).withOutOfOrderDecoder() + } + + val noSmp = !p.withExclusiveAndInvalidation generate new Area{ + interconnect.addConnection(dBusCoherent.bmb, dBusNonCoherent.bmb) + } + + val cores = for(cpuId <- 0 until cpuCount) yield new Area{ + val cpu = VexRiscvBmbGenerator() + cpu.config.load(p.cpuConfigs(cpuId)) + interconnect.addConnection( + cpu.dBus -> List(dBusCoherent.bmb) + ) + cpu.enableDebugBmb( + debugCd = debugCd.outputClockDomain, + resetCd = systemCd, + mapping = SizeMapping(cpuId*0x1000, 0x1000) + ) + interconnect.addConnection(debugBridge.bmb, cpu.debugBmb) + } +} + + +class VexRiscvSmpClusterWithPeripherals(p : VexRiscvSmpClusterParameter) extends VexRiscvSmpClusterBase(p) { + val peripheralBridge = BmbToWishboneGenerator(DefaultMapping) + val peripheral = Handle(peripheralBridge.logic.io.output.toIo) + if(p.forcePeripheralWidth) interconnect.slaves(peripheralBridge.bmb).forceAccessSourceDataWidth(32) + + val plic = BmbPlicGenerator()(interconnect = null) + plic.priorityWidth.load(2) + plic.mapping.load(PlicMapping.sifive) + + val plicWishboneBridge = new Generator{ + dependencies += plic.ctrl + + plic.accessRequirements.load(BmbAccessParameter( + addressWidth = 22, + dataWidth = 32 + ).addSources(1, BmbSourceParameter( + contextWidth = 0, + lengthWidth = 2, + alignment = BmbParameter.BurstAlignement.LENGTH + ))) + + val logic = add task new Area{ + val bridge = WishboneToBmb(WishboneConfig(20, 32)) + bridge.io.output >> plic.ctrl + } + } + val plicWishbone = plicWishboneBridge.produceIo(plicWishboneBridge.logic.bridge.io.input) + + val clint = BmbClintGenerator(0)(interconnect = null) + val clintWishboneBridge = new Generator{ + dependencies += clint.ctrl + + clint.accessRequirements.load(BmbAccessParameter( + addressWidth = 16, + dataWidth = 32 + ).addSources(1, BmbSourceParameter( + contextWidth = 0, + lengthWidth = 2, + alignment = BmbParameter.BurstAlignement.LENGTH + ))) + + val logic = add task new Area{ + val bridge = WishboneToBmb(WishboneConfig(14, 32)) + bridge.io.output >> clint.ctrl + } + } + val clintWishbone = clintWishboneBridge.produceIo(clintWishboneBridge.logic.bridge.io.input) + + val interrupts = in Bits(32 bits) + for(i <- 1 to 31) yield plic.addInterrupt(interrupts(i), i) + + for ((core, cpuId) <- cores.zipWithIndex) { + core.cpu.setTimerInterrupt(clint.timerInterrupt(cpuId)) + core.cpu.setSoftwareInterrupt(clint.softwareInterrupt(cpuId)) + plic.priorityWidth.load(2) + plic.mapping.load(PlicMapping.sifive) + plic.addTarget(core.cpu.externalInterrupt) + plic.addTarget(core.cpu.externalSupervisorInterrupt) + List(clint.logic, core.cpu.logic).produce { + for (plugin <- core.cpu.config.plugins) plugin match { + case plugin: CsrPlugin if plugin.utime != null => plugin.utime := clint.logic.io.time + case _ => + } + } + } + + clint.cpuCount.load(cpuCount) +} + + +object VexRiscvSmpClusterGen { + def vexRiscvConfig(hartId : Int, + ioRange : UInt => Bool = (x => x(31 downto 28) === 0xF), + resetVector : Long = 0x80000000l, + iBusWidth : Int = 128, + dBusWidth : Int = 64, + loadStoreWidth : Int = 32, + coherency : Boolean = true, + atomic : Boolean = true, + iCacheSize : Int = 8192, + dCacheSize : Int = 8192, + iCacheWays : Int = 2, + dCacheWays : Int = 2, + iBusRelax : Boolean = false, + injectorStage : Boolean = false, + earlyBranch : Boolean = false, + earlyShifterInjection : Boolean = true, + dBusCmdMasterPipe : Boolean = false, + withMmu : Boolean = true, + withSupervisor : Boolean = true, + withFloat : Boolean = false, + withDouble : Boolean = false, + externalFpu : Boolean = true, + simHalt : Boolean = false, + decoderIsolationBench : Boolean = false, + decoderStupid : Boolean = false, + regfileRead : RegFileReadKind = plugin.ASYNC, + rvc : Boolean = false, + iTlbSize : Int = 4, + dTlbSize : Int = 4, + prediction : BranchPrediction = vexriscv.plugin.NONE, + withDataCache : Boolean = true, + withInstructionCache : Boolean = true, + forceMisa : Boolean = false, + forceMscratch : Boolean = false + ) = { + assert(iCacheSize/iCacheWays <= 4096, "Instruction cache ways can't be bigger than 4096 bytes") + assert(dCacheSize/dCacheWays <= 4096, "Data cache ways can't be bigger than 4096 bytes") + assert(!(withDouble && !withFloat)) + + val csrConfig = if(withSupervisor){ + CsrPluginConfig.openSbi(mhartid = hartId, misa = Riscv.misaToInt(s"ima${if(withFloat) "f" else ""}${if(withDouble) "d" else ""}s")).copy(utimeAccess = CsrAccess.READ_ONLY) + } else { + CsrPluginConfig( + catchIllegalAccess = true, + mvendorid = null, + marchid = null, + mimpid = null, + mhartid = hartId, + misaExtensionsInit = Riscv.misaToInt(s"ima${if(withFloat) "f" else ""}${if(withDouble) "d" else ""}s"), + misaAccess = if(forceMisa) CsrAccess.WRITE_ONLY else CsrAccess.NONE, + mtvecAccess = CsrAccess.READ_WRITE, + mtvecInit = null, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = forceMscratch, + mcauseAccess = CsrAccess.READ_ONLY, + mbadaddrAccess = CsrAccess.READ_ONLY, + mcycleAccess = CsrAccess.NONE, + minstretAccess = CsrAccess.NONE, + ecallGen = true, + ebreakGen = true, + wfiGenAsWait = false, + wfiGenAsNop = true, + ucycleAccess = CsrAccess.NONE + ) + } + val config = VexRiscvConfig( + plugins = List( + if(withMmu)new MmuPlugin( + ioRange = ioRange + )else new StaticMemoryTranslatorPlugin( + ioRange = ioRange + ), + //Uncomment the whole IBusCachedPlugin and comment IBusSimplePlugin if you want cached iBus config + if(withInstructionCache) new IBusCachedPlugin( + resetVector = resetVector, + compressedGen = rvc, + prediction = prediction, + historyRamSizeLog2 = 9, + relaxPredictorAddress = true, + injectorStage = injectorStage, + relaxedPcCalculation = iBusRelax, + config = InstructionCacheConfig( + cacheSize = iCacheSize, + bytePerLine = 64, + wayCount = iCacheWays, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = iBusWidth, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = false, + twoCycleCache = true, + reducedBankWidth = true + ), + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = iTlbSize, + latency = 1, + earlyRequireMmuLockup = true, + earlyCacheHits = true + ) + ) else new IBusSimplePlugin( + resetVector = resetVector, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = rvc, + busLatencyMin = 2, + vecRspBuffer = true + ), + if(withDataCache) new DBusCachedPlugin( + dBusCmdMasterPipe = dBusCmdMasterPipe || dBusWidth == 32, + dBusCmdSlavePipe = true, + dBusRspSlavePipe = true, + relaxedMemoryTranslationRegister = true, + config = new DataCacheConfig( + cacheSize = dCacheSize, + bytePerLine = 64, + wayCount = dCacheWays, + addressWidth = 32, + cpuDataWidth = loadStoreWidth, + memDataWidth = dBusWidth, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true, + withLrSc = atomic, + withAmo = atomic, + withExclusive = coherency, + withInvalidate = coherency, + withWriteAggregation = dBusWidth > 32 + ), + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = dTlbSize, + latency = 1, + earlyRequireMmuLockup = true, + earlyCacheHits = true + ) + ) else new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false, + earlyInjection = false + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true, + decoderIsolationBench = decoderIsolationBench, + stupidDecoder = decoderStupid + ), + new RegFilePlugin( + regFileReadyKind = regfileRead, + zeroBoot = false, + x0Init = true + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false + ), + new FullBarrelShifterPlugin(earlyInjection = earlyShifterInjection), + // new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new MulPlugin, + new MulDivIterativePlugin( + genMul = false, + genDiv = true, + mulUnrollFactor = 32, + divUnrollFactor = 1 + ), + new CsrPlugin(csrConfig), + new BranchPlugin( + earlyBranch = earlyBranch, + catchAddressMisaligned = true, + fenceiGenAsAJump = false + ), + new YamlPlugin(s"cpu$hartId.yaml") + ) + ) + + if(withFloat) config.plugins += new FpuPlugin( + externalFpu = externalFpu, + simHalt = simHalt, + p = FpuParameter(withDouble = withDouble) + ) + config + } + + +// def vexRiscvCluster(cpuCount : Int, resetVector : Long = 0x80000000l) = VexRiscvSmpCluster( +// debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), +// p = VexRiscvSmpClusterParameter( +// cpuConfigs = List.tabulate(cpuCount) { +// vexRiscvConfig(_, resetVector = resetVector) +// } +// ) +// ) +// def main(args: Array[String]): Unit = { +// SpinalVerilog { +// vexRiscvCluster(4) +// } +// } +} +// +// +// +//object VexRiscvSmpClusterTestInfrastructure{ +// val REPORT_OFFSET = 0xF8000000 +// val REPORT_THREAD_ID = 0x00 +// val REPORT_THREAD_COUNT = 0x04 +// val REPORT_END = 0x08 +// val REPORT_BARRIER_START = 0x0C +// val REPORT_BARRIER_END = 0x10 +// val REPORT_CONSISTENCY_VALUES = 0x14 +// +// val PUTC = 0x00 +// val GETC = 0x04 +// val CLINT_ADDR = 0x10000 +// val CLINT_IPI_ADDR = CLINT_ADDR+0x0000 +// val CLINT_CMP_ADDR = CLINT_ADDR+0x4000 +// val CLINT_TIME_ADDR = CLINT_ADDR+0xBFF8 +// +// def ram(dut : VexRiscvSmpCluster, withStall : Boolean) = { +// import spinal.core.sim._ +// val cpuCount = dut.cpus.size +// val ram = new BmbMemoryAgent(0x100000000l){ +// case class Report(hart : Int, code : Int, data : Int){ +// override def toString: String = { +// f"CPU:$hart%2d ${code}%3x -> $data%3d" +// } +// } +// val reports = ArrayBuffer.fill(cpuCount)(ArrayBuffer[Report]()) +// +// +// val writeTable = mutable.HashMap[Int, Int => Unit]() +// val readTable = mutable.HashMap[Int, () => Int]() +// def onWrite(address : Int)(body : Int => Unit) = writeTable(address) = body +// def onRead(address : Int)(body : => Int) = readTable(address) = () => body +// +// var writeData = 0 +// var readData = 0 +// var reportWatchdog = 0 +// val cpuEnd = Array.fill(cpuCount)(false) +// val barriers = mutable.HashMap[Int, Int]() +// var consistancyCounter = 0 +// var consistancyLast = 0 +// var consistancyA = 0 +// var consistancyB = 0 +// var consistancyAB = 0 +// var consistancyNone = 0 +// +// onSimEnd{ +// for((list, hart) <- reports.zipWithIndex){ +// println(f"\n\n**** CPU $hart%2d ****") +// for((report, reportId) <- list.zipWithIndex){ +// println(f" $reportId%3d : ${report.code}%3x -> ${report.data}%3d") +// } +// } +// +// println(s"consistancy NONE:$consistancyNone A:$consistancyA B:$consistancyB AB:$consistancyAB") +// } +// +// override def setByte(address: Long, value: Byte): Unit = { +// if((address & 0xF0000000l) != 0xF0000000l) return super.setByte(address, value) +// val byteId = address & 3 +// val mask = 0xFF << (byteId*8) +// writeData = (writeData & ~mask) | ((value.toInt << (byteId*8)) & mask) +// if(byteId != 3) return +// val offset = (address & ~0xF0000000l)-3 +// // println(s"W[0x${offset.toHexString}] = $writeData @${simTime()}") +// offset match { +// case _ if offset >= 0x8000000 && offset < 0x9000000 => { +// val report = Report( +// hart = ((offset & 0xFF0000) >> 16).toInt, +// code = (offset & 0x00FFFF).toInt, +// data = writeData +// ) +//// println(report) +// reports(report.hart) += report +// reportWatchdog += 1 +// import report._ +// code match { +// case REPORT_THREAD_ID => assert(data == hart) +// case REPORT_THREAD_COUNT => assert(data == cpuCount) +// case REPORT_END => assert(data == 0); assert(cpuEnd(hart) == false); cpuEnd(hart) = true; if(!cpuEnd.exists(_ == false)) simSuccess() +// case REPORT_BARRIER_START => { +// val counter = barriers.getOrElse(data, 0) +// assert(counter < cpuCount) +// barriers(data) = counter + 1 +// } +// case REPORT_BARRIER_END => { +// val counter = barriers.getOrElse(data, 0) +// assert(counter == cpuCount) +// } +// case REPORT_CONSISTENCY_VALUES => consistancyCounter match { +// case 0 => { +// consistancyCounter = 1 +// consistancyLast = data +// } +// case 1 => { +// consistancyCounter = 0 +// (data, consistancyLast) match { +// case (666, 0) => consistancyA += 1 +// case (0, 666) => consistancyB += 1 +// case (666, 666) => consistancyAB += 1 +// case (0,0) => consistancyNone += 1; simFailure("Consistancy issue :(") +// } +// } +// } +// } +// } +// case _ => writeTable.get(offset.toInt) match { +// case Some(x) => x(writeData) +// case _ => simFailure(f"\n\nWrite at ${address-3}%8x with $writeData%8x") +// } +// } +// } +// +// override def getByte(address: Long): Byte = { +// if((address & 0xF0000000l) != 0xF0000000l) return super.getByte(address) +// val byteId = address & 3 +// val offset = (address & ~0xF0000000l) +// if(byteId == 0) readData = readTable.get(offset.toInt) match { +// case Some(x) => x() +// case _ => simFailure(f"\n\nRead at $address%8x") +// } +// (readData >> (byteId*8)).toByte +// } +// +// val clint = new { +// val cmp = Array.fill(cpuCount)(0l) +// var time = 0l +// periodicaly(100){ +// time += 10 +// var timerInterrupts = 0l +// for(i <- 0 until cpuCount){ +// if(cmp(i) < time) timerInterrupts |= 1l << i +// } +// dut.io.timerInterrupts #= timerInterrupts +// } +// +//// delayed(200*1000000){ +//// dut.io.softwareInterrupts #= 0xE +//// enableSimWave() +//// println("force IPI") +//// } +// } +// +// onWrite(PUTC)(data => print(data.toChar)) +// onRead(GETC)( if(System.in.available() != 0) System.in.read() else -1) +// +// dut.io.softwareInterrupts #= 0 +// dut.io.timerInterrupts #= 0 +// dut.io.externalInterrupts #= 0 +// dut.io.externalSupervisorInterrupts #= 0 +// onRead(CLINT_TIME_ADDR)(clint.time.toInt) +// onRead(CLINT_TIME_ADDR+4)((clint.time >> 32).toInt) +// for(hartId <- 0 until cpuCount){ +// onWrite(CLINT_IPI_ADDR + hartId*4) {data => +// val mask = 1l << hartId +// val value = (dut.io.softwareInterrupts.toLong & ~mask) | (if(data == 1) mask else 0) +// dut.io.softwareInterrupts #= value +// } +//// onRead(CLINT_CMP_ADDR + hartId*8)(clint.cmp(hartId).toInt) +//// onRead(CLINT_CMP_ADDR + hartId*8+4)((clint.cmp(hartId) >> 32).toInt) +// onWrite(CLINT_CMP_ADDR + hartId*8){data => clint.cmp(hartId) = (clint.cmp(hartId) & 0xFFFFFFFF00000000l) | data} +// onWrite(CLINT_CMP_ADDR + hartId*8+4){data => clint.cmp(hartId) = (clint.cmp(hartId) & 0x00000000FFFFFFFFl) | (data.toLong << 32)} +// } +// +// +// +// } +// dut.io.iMems.foreach(ram.addPort(_,0,dut.clockDomain,true, withStall)) +// ram.addPort(dut.io.dMem,0,dut.clockDomain,true, withStall) +// ram +// } +// def init(dut : VexRiscvSmpCluster): Unit ={ +// import spinal.core.sim._ +// dut.clockDomain.forkStimulus(10) +// dut.debugClockDomain.forkStimulus(10) +// dut.io.debugBus.cmd.valid #= false +// } +//} +// +//object VexRiscvSmpClusterTest extends App{ +// import spinal.core.sim._ +// +// val simConfig = SimConfig +// simConfig.withWave +// simConfig.allOptimisation +// simConfig.addSimulatorFlag("--threads 1") +// +// val cpuCount = 4 +// val withStall = true +// +// simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => +// disableSimWave() +// SimTimeout(100000000l*10*cpuCount) +// dut.clockDomain.forkSimSpeedPrinter(1.0) +// VexRiscvSmpClusterTestInfrastructure.init(dut) +// val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut, withStall) +// ram.memory.loadBin(0x80000000l, "src/test/cpp/raw/smp/build/smp.bin") +// periodicaly(20000*10){ +// assert(ram.reportWatchdog != 0) +// ram.reportWatchdog = 0 +// } +// } +//} +// +//// echo "echo 10000 | dhrystone >> log" > test +//// time sh test & +//// top -b -n 1 +// +//// TODO +//// MultiChannelFifo.toStream arbitration +//// BmbDecoderOutOfOrder arbitration +//// DataCache to bmb invalidation that are more than single line +//object VexRiscvSmpClusterOpenSbi extends App{ +// import spinal.core.sim._ +// +// val simConfig = SimConfig +// simConfig.withWave +// simConfig.allOptimisation +// simConfig.addSimulatorFlag("--threads 1") +// +// val cpuCount = 2 +// val withStall = false +// +// def gen = { +// val dut = VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount, resetVector = 0x80000000l) +// dut.cpus.foreach{cpu => +// cpu.core.children.foreach{ +// case cache : InstructionCache => cache.io.cpu.decode.simPublic() +// case _ => +// } +// } +// dut +// } +// +// simConfig.workspaceName("rawr_4c").compile(gen).doSimUntilVoid(seed = 42){dut => +//// dut.clockDomain.forkSimSpeedPrinter(1.0) +// VexRiscvSmpClusterTestInfrastructure.init(dut) +// val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut, withStall) +//// ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_payload.bin") +// +//// ram.memory.loadBin(0x40F00000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/fw_jump.bin") +//// ram.memory.loadBin(0x40000000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/Image") +//// ram.memory.loadBin(0x40EF0000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/dtb") +//// ram.memory.loadBin(0x41000000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/rootfs.cpio") +// +// ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") +// ram.memory.loadBin(0xC0000000l, "../buildroot/output/images/Image") +// ram.memory.loadBin(0xC1000000l, "../buildroot/output/images/dtb") +// ram.memory.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") +// +// import spinal.core.sim._ +// var iMemReadBytes, dMemReadBytes, dMemWriteBytes, iMemSequencial,iMemRequests, iMemPrefetchHit = 0l +// var reportTimer = 0 +// var reportCycle = 0 +// val iMemFetchDelta = mutable.HashMap[Long, Long]() +// var iMemFetchDeltaSorted : Seq[(Long, Long)] = null +// var dMemWrites, dMemWritesCached = 0l +// val dMemWriteCacheCtx = List(4,8,16,32,64).map(bytes => new { +// var counter = 0l +// var address = 0l +// val mask = ~((1 << log2Up(bytes))-1) +// }) +// +// import java.io._ +// val csv = new PrintWriter(new File("bench.csv" )) +// val iMemCtx = Array.tabulate(cpuCount)(i => new { +// var sequencialPrediction = 0l +// val cache = dut.cpus(i).core.children.find(_.isInstanceOf[InstructionCache]).head.asInstanceOf[InstructionCache].io.cpu.decode +// var lastAddress = 0l +// }) +// dut.clockDomain.onSamplings{ +// dut.io.time #= simTime()/10 +// +// +// for(i <- 0 until cpuCount; iMem = dut.io.iMems(i); ctx = iMemCtx(i)){ +//// if(iMem.cmd.valid.toBoolean && iMem.cmd.ready.toBoolean){ +//// val length = iMem.cmd.length.toInt + 1 +//// val address = iMem.cmd.address.toLong +//// iMemReadBytes += length +//// iMemRequests += 1 +//// } +// if(ctx.cache.isValid.toBoolean && !ctx.cache.mmuRefilling.toBoolean && !ctx.cache.mmuException.toBoolean){ +// val address = ctx.cache.physicalAddress.toLong +// val length = ctx.cache.p.bytePerLine.toLong +// val mask = ~(length-1) +// if(ctx.cache.cacheMiss.toBoolean) { +// iMemReadBytes += length +// if ((address & mask) == (ctx.sequencialPrediction & mask)) { +// iMemSequencial += 1 +// } +// } +// if(!ctx.cache.isStuck.toBoolean) { +// ctx.sequencialPrediction = address + length +// } +// } +// +// if(iMem.cmd.valid.toBoolean && iMem.cmd.ready.toBoolean){ +// val address = iMem.cmd.address.toLong +// iMemRequests += 1 +// if(iMemCtx(i).lastAddress + ctx.cache.p.bytePerLine == address){ +// iMemPrefetchHit += 1 +// } +// val delta = address-iMemCtx(i).lastAddress +// iMemFetchDelta(delta) = iMemFetchDelta.getOrElse(delta, 0l) + 1l +// if(iMemRequests % 1000 == 999) iMemFetchDeltaSorted = iMemFetchDelta.toSeq.sortBy(_._1) +// iMemCtx(i).lastAddress = address +// } +// } +// if(dut.io.dMem.cmd.valid.toBoolean && dut.io.dMem.cmd.ready.toBoolean){ +// if(dut.io.dMem.cmd.opcode.toInt == Bmb.Cmd.Opcode.WRITE){ +// dMemWriteBytes += dut.io.dMem.cmd.length.toInt+1 +// val address = dut.io.dMem.cmd.address.toLong +// dMemWrites += 1 +// for(ctx <- dMemWriteCacheCtx){ +// if((address & ctx.mask) == (ctx.address & ctx.mask)){ +// ctx.counter += 1 +// } else { +// ctx.address = address +// } +// } +// }else { +// dMemReadBytes += dut.io.dMem.cmd.length.toInt+1 +// for(ctx <- dMemWriteCacheCtx) ctx.address = -1 +// } +// } +// reportTimer = reportTimer + 1 +// reportCycle = reportCycle + 1 +// if(reportTimer == 400000){ +// reportTimer = 0 +//// println(f"\n** c=${reportCycle} ir=${iMemReadBytes*1e-6}%5.2f dr=${dMemReadBytes*1e-6}%5.2f dw=${dMemWriteBytes*1e-6}%5.2f **\n") +// +// +// csv.write(s"$reportCycle,$iMemReadBytes,$dMemReadBytes,$dMemWriteBytes,$iMemRequests,$iMemSequencial,$dMemWrites,${dMemWriteCacheCtx.map(_.counter).mkString(",")},$iMemPrefetchHit\n") +// csv.flush() +// reportCycle = 0 +// iMemReadBytes = 0 +// dMemReadBytes = 0 +// dMemWriteBytes = 0 +// iMemRequests = 0 +// iMemSequencial = 0 +// dMemWrites = 0 +// iMemPrefetchHit = 0 +// for(ctx <- dMemWriteCacheCtx) ctx.counter = 0 +// } +// } +// +// +//// fork{ +//// disableSimWave() +//// val atMs = 3790 +//// val durationMs = 5 +//// sleep(atMs*1000000) +//// enableSimWave() +//// println("** enableSimWave **") +//// sleep(durationMs*1000000) +//// println("** disableSimWave **") +//// while(true) { +//// disableSimWave() +//// sleep(100000 * 10) +//// enableSimWave() +//// sleep( 100 * 10) +//// } +////// simSuccess() +//// } +// +// fork{ +// while(true) { +// disableSimWave() +// sleep(100000 * 10) +// enableSimWave() +// sleep( 100 * 10) +// } +// } +// } +//} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala new file mode 100644 index 0000000..4cd4917 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -0,0 +1,322 @@ +package vexriscv.demo.smp + +import spinal.core._ +import spinal.core.fiber._ +import spinal.lib.bus.bmb._ +import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} +import spinal.lib.bus.wishbone.{WishboneConfig, WishboneToBmbGenerator} +import spinal.lib.generator.GeneratorComponent +import spinal.lib.sim.SparseMemory +import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig +import vexriscv.ip.fpu.{FpuCore, FpuParameter} +import vexriscv.plugin.{AesPlugin, DBusCachedPlugin, FpuPlugin} + + +case class VexRiscvLitexSmpClusterParameter( cluster : VexRiscvSmpClusterParameter, + liteDram : LiteDramNativeParameter, + liteDramMapping : AddressMapping, + coherentDma : Boolean, + wishboneMemory : Boolean, + cpuPerFpu : Int) + + +class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter) extends VexRiscvSmpClusterWithPeripherals(p.cluster) { + val iArbiter = BmbBridgeGenerator() + val iBridge = !p.wishboneMemory generate BmbToLiteDramGenerator(p.liteDramMapping) + val dBridge = !p.wishboneMemory generate BmbToLiteDramGenerator(p.liteDramMapping) + + for(core <- cores) interconnect.addConnection(core.cpu.iBus -> List(iArbiter.bmb)) + !p.wishboneMemory generate interconnect.addConnection( + iArbiter.bmb -> List(iBridge.bmb), + dBusNonCoherent.bmb -> List(dBridge.bmb) + ) + interconnect.addConnection( + iArbiter.bmb -> List(peripheralBridge.bmb), + dBusNonCoherent.bmb -> List(peripheralBridge.bmb) + ) + + val fpuGroups = (cores.reverse.grouped(p.cpuPerFpu)).toList.reverse + val fpu = p.cluster.fpu generate { for(group <- fpuGroups) yield new Area{ + val extraStage = group.size > 2 + + val logic = Handle{ + new FpuCore( + portCount = group.size, + p = FpuParameter( + withDouble = true, + asyncRegFile = false, + schedulerM2sPipe = extraStage + ) + ) + } + + val connect = Handle{ + for(i <- 0 until group.size; + vex = group(i).cpu.logic.cpu; + port = logic.io.port(i)) { + val plugin = vex.service(classOf[FpuPlugin]) + plugin.port.cmd.pipelined(m2s = false, s2m = false) >> port.cmd + plugin.port.commit.pipelined(m2s = extraStage, s2m = false) >> port.commit + plugin.port.completion := port.completion.m2sPipe() + plugin.port.rsp << port.rsp + } + } + }} + + if(p.cluster.withExclusiveAndInvalidation) interconnect.masters(dBusNonCoherent.bmb).withOutOfOrderDecoder() + + if(!p.wishboneMemory) { + dBridge.liteDramParameter.load(p.liteDram) + iBridge.liteDramParameter.load(p.liteDram) + } + + // Coherent DMA interface + val dma = p.coherentDma generate new Area { + val bridge = WishboneToBmbGenerator() + val wishbone = Handle(bridge.logic.io.input.toIo) + val dataWidth = p.cluster.cpuConfigs.head.find(classOf[DBusCachedPlugin]).get.config.memDataWidth + bridge.config.load(WishboneConfig( + addressWidth = 32 - log2Up(dataWidth / 8), + dataWidth = dataWidth, + useSTALL = true, + selWidth = dataWidth/8 + )) + interconnect.addConnection(bridge.bmb, dBusCoherent.bmb) + } + + // Interconnect pipelining (FMax) + for(core <- cores) { + interconnect.setPipelining(core.cpu.dBus)(cmdValid = true, cmdReady = true, rspValid = true, invValid = true, ackValid = true, syncValid = true) + interconnect.setPipelining(core.cpu.iBus)(cmdHalfRate = true, rspValid = true) + interconnect.setPipelining(iArbiter.bmb)(cmdHalfRate = true, rspValid = true) + } + interconnect.setPipelining(dBusCoherent.bmb)(cmdValid = true, cmdReady = true) + interconnect.setPipelining(dBusNonCoherent.bmb)(cmdValid = true, cmdReady = true, rspValid = true) + interconnect.setPipelining(peripheralBridge.bmb)(cmdHalfRate = !p.wishboneMemory, cmdValid = p.wishboneMemory, cmdReady = p.wishboneMemory, rspValid = true) + if(!p.wishboneMemory) { + interconnect.setPipelining(iBridge.bmb)(cmdHalfRate = true) + interconnect.setPipelining(dBridge.bmb)(cmdReady = true) + } +} + + +object VexRiscvLitexSmpClusterCmdGen extends App { + var cpuCount = 1 + var iBusWidth = 64 + var dBusWidth = 64 + var iCacheSize = 8192 + var dCacheSize = 8192 + var iCacheWays = 2 + var dCacheWays = 2 + var liteDramWidth = 128 + var coherentDma = false + var wishboneMemory = false + var outOfOrderDecoder = true + var aesInstruction = false + var fpu = false + var cpuPerFpu = 4 + var rvc = false + var netlistDirectory = "." + var netlistName = "VexRiscvLitexSmpCluster" + var iTlbSize = 4 + var dTlbSize = 4 + assert(new scopt.OptionParser[Unit]("VexRiscvLitexSmpClusterCmdGen") { + help("help").text("prints this usage text") + opt[Unit]("coherent-dma") action { (v, c) => coherentDma = true } + opt[String]("cpu-count") action { (v, c) => cpuCount = v.toInt } + opt[String]("ibus-width") action { (v, c) => iBusWidth = v.toInt } + opt[String]("dbus-width") action { (v, c) => dBusWidth = v.toInt } + opt[String]("icache-size") action { (v, c) => iCacheSize = v.toInt } + opt[String]("dcache-size") action { (v, c) => dCacheSize = v.toInt } + opt[String]("icache-ways") action { (v, c) => iCacheWays = v.toInt } + opt[String]("dcache-ways") action { (v, c) => dCacheWays = v.toInt } + opt[String]("litedram-width") action { (v, c) => liteDramWidth = v.toInt } + opt[String]("netlist-directory") action { (v, c) => netlistDirectory = v } + opt[String]("netlist-name") action { (v, c) => netlistName = v } + opt[String]("aes-instruction") action { (v, c) => aesInstruction = v.toBoolean } + opt[String]("out-of-order-decoder") action { (v, c) => outOfOrderDecoder = v.toBoolean } + opt[String]("wishbone-memory" ) action { (v, c) => wishboneMemory = v.toBoolean } + opt[String]("fpu" ) action { (v, c) => fpu = v.toBoolean } + opt[String]("cpu-per-fpu") action { (v, c) => cpuPerFpu = v.toInt } + opt[String]("rvc") action { (v, c) => rvc = v.toBoolean } + opt[String]("itlb-size") action { (v, c) => iTlbSize = v.toInt } + opt[String]("dtlb-size") action { (v, c) => dTlbSize = v.toInt } + }.parse(args)) + + val coherency = coherentDma || cpuCount > 1 + def parameter = VexRiscvLitexSmpClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => { + val c = vexRiscvConfig( + hartId = hartId, + ioRange = address => address.msb, + resetVector = 0, + iBusWidth = iBusWidth, + dBusWidth = dBusWidth, + iCacheSize = iCacheSize, + dCacheSize = dCacheSize, + iCacheWays = iCacheWays, + dCacheWays = dCacheWays, + coherency = coherency, + iBusRelax = true, + earlyBranch = true, + withFloat = fpu, + withDouble = fpu, + externalFpu = fpu, + loadStoreWidth = if(fpu) 64 else 32, + rvc = rvc, + injectorStage = rvc, + iTlbSize = iTlbSize, + dTlbSize = dTlbSize + ) + if(aesInstruction) c.add(new AesPlugin) + c + }}, + withExclusiveAndInvalidation = coherency, + forcePeripheralWidth = !wishboneMemory, + outOfOrderDecoder = outOfOrderDecoder, + fpu = fpu, + jtagHeaderIgnoreWidth = 0 + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = liteDramWidth), + liteDramMapping = SizeMapping(0x40000000l, 0x40000000l), + coherentDma = coherentDma, + wishboneMemory = wishboneMemory, + cpuPerFpu = cpuPerFpu + ) + + def dutGen = { + val toplevel = new Component { + val body = new VexRiscvLitexSmpCluster( + p = parameter + ) + body.setName("") + } + toplevel + } + + val genConfig = SpinalConfig(targetDirectory = netlistDirectory, inlineRom = true).addStandardMemBlackboxing(blackboxByteEnables) + genConfig.generateVerilog(dutGen.setDefinitionName(netlistName)) + +} + + +//object VexRiscvLitexSmpClusterGen extends App { +// for(cpuCount <- List(1,2,4,8)) { +// def parameter = VexRiscvLitexSmpClusterParameter( +// cluster = VexRiscvSmpClusterParameter( +// cpuConfigs = List.tabulate(cpuCount) { hartId => +// vexRiscvConfig( +// hartId = hartId, +// ioRange = address => address.msb, +// resetVector = 0 +// ) +// }, +// withExclusiveAndInvalidation = true +// ), +// liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), +// liteDramMapping = SizeMapping(0x40000000l, 0x40000000l), +// coherentDma = false +// ) +// +// def dutGen = { +// val toplevel = new VexRiscvLitexSmpCluster( +// p = parameter +// ).toComponent() +// toplevel +// } +// +// val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) +// // genConfig.generateVerilog(Bench.compressIo(dutGen)) +// genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpCluster_${cpuCount}c")) +// } +//} + +////addAttribute("""mark_debug = "true"""") +object VexRiscvLitexSmpClusterOpenSbi extends App{ + import spinal.core.sim._ + + val simConfig = SimConfig + simConfig.withWave + simConfig.allOptimisation + + val cpuCount = 2 + + def parameter = VexRiscvLitexSmpClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => + vexRiscvConfig( + hartId = hartId, + ioRange = address => address(31 downto 28) === 0xF, + resetVector = 0x80000000l + ) + }, + withExclusiveAndInvalidation = true, + jtagHeaderIgnoreWidth = 0 + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), + liteDramMapping = SizeMapping(0x80000000l, 0x70000000l), + coherentDma = false, + wishboneMemory = false, + cpuPerFpu = 4 + ) + + def dutGen = { + import GeneratorComponent.toGenerator + val top = new Component { + val body = new VexRiscvLitexSmpCluster( + p = parameter + ) + } + top.rework{ + top.body.clintWishbone.setAsDirectionLess.allowDirectionLessIo + top.body.peripheral.setAsDirectionLess.allowDirectionLessIo.simPublic() + + val hit = (top.body.peripheral.ADR <<2 >= 0xF0010000l && top.body.peripheral.ADR<<2 < 0xF0020000l) + top.body.clintWishbone.CYC := top.body.peripheral.CYC && hit + top.body.clintWishbone.STB := top.body.peripheral.STB + top.body.clintWishbone.WE := top.body.peripheral.WE + top.body.clintWishbone.ADR := top.body.peripheral.ADR.resized + top.body.clintWishbone.DAT_MOSI := top.body.peripheral.DAT_MOSI + top.body.peripheral.DAT_MISO := top.body.clintWishbone.DAT_MISO + top.body.peripheral.ACK := top.body.peripheral.CYC && (!hit || top.body.clintWishbone.ACK) + top.body.peripheral.ERR := False + } + top + } + + simConfig.compile(dutGen).doSimUntilVoid(seed = 42){dut => + dut.body.debugCd.inputClockDomain.get.forkStimulus(10) + + val ram = SparseMemory() + ram.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") + ram.loadBin(0xC0000000l, "../buildroot/output/images/Image") + ram.loadBin(0xC1000000l, "../buildroot/output/images/dtb") + ram.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") + + + dut.body.iBridge.dram.simSlave(ram, dut.body.debugCd.inputClockDomain) + dut.body.dBridge.dram.simSlave(ram, dut.body.debugCd.inputClockDomain/*, dut.body.dMemBridge.unburstified*/) + + dut.body.interrupts #= 0 + + dut.body.debugCd.inputClockDomain.get.onFallingEdges{ + if(dut.body.peripheral.CYC.toBoolean){ + (dut.body.peripheral.ADR.toLong << 2) match { + case 0xF0000000l => print(dut.body.peripheral.DAT_MOSI.toLong.toChar) + case 0xF0000004l => dut.body.peripheral.DAT_MISO #= (if(System.in.available() != 0) System.in.read() else 0xFFFFFFFFl) + case _ => + } + } + } + + fork{ + while(true) { + disableSimWave() + sleep(100000 * 10) + enableSimWave() + sleep( 100 * 10) + } + } + } +}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala b/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala new file mode 100644 index 0000000..e662dfe --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala @@ -0,0 +1,409 @@ +package vexriscv.demo.smp + +import spinal.core._ +import spinal.lib.bus.bmb._ +import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} +import spinal.lib.bus.wishbone.{WishboneConfig, WishboneToBmbGenerator} +import spinal.lib.sim.SparseMemory +import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig + +//case class VexRiscvLitexSmpMpClusterParameter( cluster : VexRiscvSmpClusterParameter, +// liteDram : LiteDramNativeParameter, +// liteDramMapping : AddressMapping) +// +//class VexRiscvLitexSmpMpCluster(p : VexRiscvLitexSmpMpClusterParameter) extends VexRiscvSmpClusterWithPeripherals(p.cluster) { +// val iArbiter = BmbBridgeGenerator() +// val iBridge = BmbToLiteDramGenerator(p.liteDramMapping) +// val dBridge = BmbToLiteDramGenerator(p.liteDramMapping) +// +// for(core <- cores) interconnect.addConnection(core.cpu.iBus -> List(iArbiter.bmb)) +// interconnect.addConnection( +// iArbiter.bmb -> List(iBridge.bmb, peripheralBridge.bmb), +// invalidationMonitor.output -> List(dBridge.bmb, peripheralBridge.bmb) +// ) +// interconnect.masters(invalidationMonitor.output).withOutOfOrderDecoder() +// +// dBridge.liteDramParameter.load(p.liteDram) +// iBridge.liteDramParameter.load(p.liteDram) +// +// // Interconnect pipelining (FMax) +// for(core <- cores) { +// interconnect.setPipelining(core.cpu.dBus)(cmdValid = true, cmdReady = true, rspValid = true) +// interconnect.setPipelining(core.cpu.iBus)(cmdHalfRate = true, rspValid = true) +// interconnect.setPipelining(iArbiter.bmb)(cmdHalfRate = true, rspValid = true) +// } +// interconnect.setPipelining(invalidationMonitor.output)(cmdValid = true, cmdReady = true, rspValid = true) +// interconnect.setPipelining(peripheralBridge.bmb)(cmdHalfRate = true, rspValid = true) +//} +// +// +//object VexRiscvLitexSmpMpClusterGen extends App { +// for(cpuCount <- List(1,2,4,8)) { +// def parameter = VexRiscvLitexSmpMpClusterParameter( +// cluster = VexRiscvSmpClusterParameter( +// cpuConfigs = List.tabulate(cpuCount) { hartId => +// vexRiscvConfig( +// hartId = hartId, +// ioRange = address => address.msb, +// resetVector = 0 +// ) +// } +// ), +// liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), +// liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) +// ) +// +// def dutGen = { +// val toplevel = new VexRiscvLitexSmpMpCluster( +// p = parameter +// ).toComponent() +// toplevel +// } +// +// val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) +// // genConfig.generateVerilog(Bench.compressIo(dutGen)) +// genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpMpCluster_${cpuCount}c")) +// } +//} + + + +// +////addAttribute("""mark_debug = "true"""") +//class VexRiscvLitexSmpMpCluster(val p : VexRiscvLitexSmpMpClusterParameter, +// val debugClockDomain : ClockDomain, +// val jtagClockDomain : ClockDomain) extends Component{ +// +// val peripheralWishboneConfig = WishboneConfig( +// addressWidth = 30, +// dataWidth = 32, +// selWidth = 4, +// useERR = true, +// useBTE = true, +// useCTI = true +// ) +// +// val cpuCount = p.cluster.cpuConfigs.size +// +// val io = new Bundle { +// val dMem = Vec(master(LiteDramNative(p.liteDram)), cpuCount) +// val iMem = Vec(master(LiteDramNative(p.liteDram)), cpuCount) +// val peripheral = master(Wishbone(peripheralWishboneConfig)) +// val clint = slave(Wishbone(Clint.getWisboneConfig())) +// val plic = slave(Wishbone(WishboneConfig(addressWidth = 20, dataWidth = 32))) +// val interrupts = in Bits(32 bits) +// val jtagInstruction = slave(JtagTapInstructionCtrl()) +// val debugReset = out Bool() +// } +// val clint = Clint(cpuCount) +// clint.driveFrom(WishboneSlaveFactory(io.clint)) +// +// val cluster = VexRiscvSmpCluster(p.cluster, debugClockDomain) +// cluster.io.debugReset <> io.debugReset +// cluster.io.timerInterrupts <> B(clint.harts.map(_.timerInterrupt)) +// cluster.io.softwareInterrupts <> B(clint.harts.map(_.softwareInterrupt)) +// cluster.io.time := clint.time +// +// val debug = debugClockDomain on new Area{ +// val jtagConfig = SystemDebuggerConfig() +// +// val jtagBridge = new JtagBridgeNoTap(jtagConfig, jtagClockDomain) +// jtagBridge.io.ctrl << io.jtagInstruction +// +// val debugger = new SystemDebugger(jtagConfig) +// debugger.io.remote <> jtagBridge.io.remote +// +// cluster.io.debugBus << debugger.io.mem.toBmb() +// +//// io.jtagInstruction.allowDirectionLessIo.setAsDirectionLess +//// val bridge = Bscane2BmbMaster(1) +//// cluster.io.debugBus << bridge.io.bmb +// +// +//// val bscane2 = BSCANE2(usedId) +//// val jtagClockDomain = ClockDomain(bscane2.TCK) +//// +//// val jtagBridge = new JtagBridgeNoTap(jtagConfig, jtagClockDomain) +//// jtagBridge.io.ctrl << bscane2.toJtagTapInstructionCtrl() +//// +//// val debugger = new SystemDebugger(jtagConfig) +//// debugger.io.remote <> jtagBridge.io.remote +//// +//// io.bmb << debugger.io.mem.toBmb() +// } +// +// val dBusDecoder = BmbDecoderOutOfOrder( +// p = cluster.io.dMem.p, +// mappings = Seq(DefaultMapping, p.liteDramMapping), +// capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), +// pendingRspTransactionMax = 32 +// ) +//// val dBusDecoder = BmbDecoderOut( +//// p = cluster.io.dMem.p, +//// mappings = Seq(DefaultMapping, p.liteDramMapping), +//// capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), +//// pendingMax = 31 +//// ) +// dBusDecoder.io.input << cluster.io.dMem.pipelined(cmdValid = true, cmdReady = true, rspValid = true) +// +// +// val perIBus = for(id <- 0 until cpuCount) yield new Area{ +// val decoder = BmbDecoder( +// p = cluster.io.iMems(id).p, +// mappings = Seq(DefaultMapping, p.liteDramMapping), +// capabilities = Seq(cluster.io.iMems(id).p,cluster.io.iMems(id).p), +// pendingMax = 15 +// ) +// +// decoder.io.input << cluster.io.iMems(id) +// io.iMem(id).fromBmb(decoder.io.outputs(1).pipelined(cmdHalfRate = true), wdataFifoSize = 0, rdataFifoSize = 32) +// val toPeripheral = decoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) +// } +// +// val dBusDecoderToPeripheral = dBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) +// +// val peripheralAccessLength = Math.max(perIBus(0).toPeripheral.p.lengthWidth, dBusDecoder.io.outputs(0).p.lengthWidth) +// val peripheralArbiter = BmbArbiter( +// p = dBusDecoder.io.outputs(0).p.copy( +// sourceWidth = List(perIBus(0).toPeripheral, dBusDecoderToPeripheral).map(_.p.sourceWidth).max + log2Up(cpuCount + 1), +// contextWidth = List(perIBus(0).toPeripheral, dBusDecoderToPeripheral).map(_.p.contextWidth).max, +// lengthWidth = peripheralAccessLength, +// dataWidth = 32 +// ), +// portCount = cpuCount+1, +// lowerFirstPriority = true +// ) +// +// for(id <- 0 until cpuCount){ +// peripheralArbiter.io.inputs(id) << perIBus(id).toPeripheral +// } +// peripheralArbiter.io.inputs(cpuCount) << dBusDecoderToPeripheral +// +// val peripheralWishbone = peripheralArbiter.io.output.pipelined(cmdValid = true).toWishbone() +// io.peripheral << peripheralWishbone +// +// +// val dBusDemux = BmbSourceDecoder(dBusDecoder.io.outputs(1).p) +// dBusDemux.io.input << dBusDecoder.io.outputs(1).pipelined(cmdValid = true, cmdReady = true,rspValid = true) +// val dMemBridge = for(id <- 0 until cpuCount) yield { +// io.dMem(id).fromBmb(dBusDemux.io.outputs(id), wdataFifoSize = 32, rdataFifoSize = 32) +// } +// +// +// val plic = new Area{ +// val priorityWidth = 2 +// +// val gateways = for(i <- 1 until 32) yield PlicGatewayActiveHigh( +// source = io.interrupts(i), +// id = i, +// priorityWidth = priorityWidth +// ) +// +// val bus = WishboneSlaveFactory(io.plic) +// +// val targets = for(i <- 0 until cpuCount) yield new Area{ +// val machine = PlicTarget( +// gateways = gateways, +// priorityWidth = priorityWidth +// ) +// val supervisor = PlicTarget( +// gateways = gateways, +// priorityWidth = priorityWidth +// ) +// +// cluster.io.externalInterrupts(i) := machine.iep +// cluster.io.externalSupervisorInterrupts(i) := supervisor.iep +// } +// +// val bridge = PlicMapper(bus, PlicMapping.sifive)( +// gateways = gateways, +// targets = targets.flatMap(t => List(t.machine, t.supervisor)) +// ) +// } +//// +//// io.dMem.foreach(_.cmd.valid.addAttribute("""mark_debug = "true"""")) +//// io.dMem.foreach(_.cmd.ready.addAttribute("""mark_debug = "true"""")) +//// io.iMem.foreach(_.cmd.valid.addAttribute("""mark_debug = "true"""")) +//// io.iMem.foreach(_.cmd.ready.addAttribute("""mark_debug = "true"""")) +//// +//// cluster.io.dMem.cmd.valid.addAttribute("""mark_debug = "true"""") +//// cluster.io.dMem.cmd.ready.addAttribute("""mark_debug = "true"""") +//// cluster.io.dMem.rsp.valid.addAttribute("""mark_debug = "true"""") +//// cluster.io.dMem.rsp.ready.addAttribute("""mark_debug = "true"""") +//} +// +//object VexRiscvLitexSmpMpClusterGen extends App { +// for(cpuCount <- List(1,2,4,8)) { +// def parameter = VexRiscvLitexSmpMpClusterParameter( +// cluster = VexRiscvSmpClusterParameter( +// cpuConfigs = List.tabulate(cpuCount) { hartId => +// vexRiscvConfig( +// hartId = hartId, +// ioRange = address => address.msb, +// resetVector = 0 +// ) +// } +// ), +// liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), +// liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) +// ) +// +// def dutGen = { +// val toplevel = new VexRiscvLitexSmpMpCluster( +// p = parameter, +// debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), +// jtagClockDomain = ClockDomain.external("jtag", withReset = false) +// ) +// toplevel +// } +// +// val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) +// // genConfig.generateVerilog(Bench.compressIo(dutGen)) +// genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpMpCluster_${cpuCount}c")) +// } +// +//} +// +// +//object VexRiscvLitexSmpMpClusterOpenSbi extends App{ +// import spinal.core.sim._ +// +// val simConfig = SimConfig +// simConfig.withWave +// simConfig.withFstWave +// simConfig.allOptimisation +// +// val cpuCount = 2 +// +// def parameter = VexRiscvLitexSmpMpClusterParameter( +// cluster = VexRiscvSmpClusterParameter( +// cpuConfigs = List.tabulate(cpuCount) { hartId => +// vexRiscvConfig( +// hartId = hartId, +// ioRange = address => address(31 downto 28) === 0xF, +// resetVector = 0x80000000l +// ) +// } +// ), +// liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), +// liteDramMapping = SizeMapping(0x80000000l, 0x70000000l) +// ) +// +// def dutGen = { +// val top = new VexRiscvLitexSmpMpCluster( +// p = parameter, +// debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), +// jtagClockDomain = ClockDomain.external("jtag", withReset = false) +// ){ +// io.jtagInstruction.allowDirectionLessIo.setAsDirectionLess +// val jtag = slave(Jtag()) +// jtagClockDomain.readClockWire.setAsDirectionLess() := jtag.tck +// val jtagLogic = jtagClockDomain on new Area{ +// val tap = new JtagTap(jtag, 4) +// val idcodeArea = tap.idcode(B"x10001FFF")(1) +// val wrapper = tap.map(io.jtagInstruction, instructionId = 2) +// } +// } +// top.rework{ +// top.io.clint.setAsDirectionLess.allowDirectionLessIo +// top.io.peripheral.setAsDirectionLess.allowDirectionLessIo.simPublic() +// +// val hit = (top.io.peripheral.ADR <<2 >= 0xF0010000l && top.io.peripheral.ADR<<2 < 0xF0020000l) +// top.io.clint.CYC := top.io.peripheral.CYC && hit +// top.io.clint.STB := top.io.peripheral.STB +// top.io.clint.WE := top.io.peripheral.WE +// top.io.clint.ADR := top.io.peripheral.ADR.resized +// top.io.clint.DAT_MOSI := top.io.peripheral.DAT_MOSI +// top.io.peripheral.DAT_MISO := top.io.clint.DAT_MISO +// top.io.peripheral.ACK := top.io.peripheral.CYC && (!hit || top.io.clint.ACK) +// top.io.peripheral.ERR := False +// +//// top.dMemBridge.unburstified.cmd.simPublic() +// } +// top +// } +// simConfig.compile(dutGen).doSimUntilVoid(seed = 42){dut => +// dut.clockDomain.forkStimulus(10) +// fork { +// dut.debugClockDomain.resetSim #= false +// sleep (0) +// dut.debugClockDomain.resetSim #= true +// sleep (10) +// dut.debugClockDomain.resetSim #= false +// } +// +// JtagTcp(dut.jtag, 10*20) +// +// val ram = SparseMemory() +// ram.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") +// ram.loadBin(0xC0000000l, "../buildroot/output/images/Image") +// ram.loadBin(0xC1000000l, "../buildroot/output/images/dtb") +// ram.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") +// +// for(id <- 0 until cpuCount) { +// dut.io.iMem(id).simSlave(ram, dut.clockDomain) +// dut.io.dMem(id).simSlave(ram, dut.clockDomain) +// } +// +// dut.io.interrupts #= 0 +// +// +//// val stdin = mutable.Queue[Byte]() +//// def stdInPush(str : String) = stdin ++= str.toCharArray.map(_.toByte) +//// fork{ +//// sleep(4000*1000000l) +//// stdInPush("root\n") +//// sleep(1000*1000000l) +//// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +//// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +//// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +//// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +//// sleep(500*1000000l) +//// while(true){ +//// sleep(500*1000000l) +//// stdInPush("uptime\n") +//// printf("\n** uptime **") +//// } +//// } +// dut.clockDomain.onFallingEdges { +// if (dut.io.peripheral.CYC.toBoolean) { +// (dut.io.peripheral.ADR.toLong << 2) match { +// case 0xF0000000l => print(dut.io.peripheral.DAT_MOSI.toLong.toChar) +// case 0xF0000004l => dut.io.peripheral.DAT_MISO #= (if (System.in.available() != 0) System.in.read() else 0xFFFFFFFFl) +// case _ => +// // case 0xF0000004l => { +// // val c = if(stdin.nonEmpty) { +// // stdin.dequeue().toInt & 0xFF +// // } else { +// // 0xFFFFFFFFl +// // } +// // dut.io.peripheral.DAT_MISO #= c +// // } +// // case _ => +// // } +// // println(f"${dut.io.peripheral.ADR.toLong}%x") +// } +// } +// } +// +// fork{ +// val at = 0 +// val duration = 1000 +// while(simTime() < at*1000000l) { +// disableSimWave() +// sleep(100000 * 10) +// enableSimWave() +// sleep( 200 * 10) +// } +// println("\n\n********************") +// sleep(duration*1000000l) +// println("********************\n\n") +// while(true) { +// disableSimWave() +// sleep(100000 * 10) +// enableSimWave() +// sleep( 400 * 10) +// } +// } +// } +//}
\ No newline at end of file |
