diff options
Diffstat (limited to 'VexRiscv/src/main/scala/vexriscv/ip/DataCache.scala')
-rw-r--r-- | VexRiscv/src/main/scala/vexriscv/ip/DataCache.scala | 1184 |
1 files changed, 1184 insertions, 0 deletions
diff --git a/VexRiscv/src/main/scala/vexriscv/ip/DataCache.scala b/VexRiscv/src/main/scala/vexriscv/ip/DataCache.scala new file mode 100644 index 0000000..2b70400 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/ip/DataCache.scala @@ -0,0 +1,1184 @@ +package vexriscv.ip + +import vexriscv._ +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.amba4.axi.{Axi4Config, Axi4Shared} +import spinal.lib.bus.avalon.{AvalonMM, AvalonMMConfig} +import spinal.lib.bus.bmb.{Bmb, BmbAccessParameter, BmbCmd, BmbInvalidationParameter, BmbParameter, BmbSourceParameter} +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig} +import spinal.lib.bus.simple._ +import vexriscv.plugin.DBusSimpleBus + + +case class DataCacheConfig(cacheSize : Int, + bytePerLine : Int, + wayCount : Int, + addressWidth : Int, + cpuDataWidth : Int, + var rfDataWidth : Int = -1, //-1 mean cpuDataWidth + memDataWidth : Int, + catchAccessError : Boolean, + catchIllegal : Boolean, + catchUnaligned : Boolean, + earlyWaysHits : Boolean = true, + earlyDataMux : Boolean = false, + tagSizeShift : Int = 0, //Used to force infering ram + withLrSc : Boolean = false, + withAmo : Boolean = false, + withExclusive : Boolean = false, + withInvalidate : Boolean = false, + pendingMax : Int = 64, + directTlbHit : Boolean = false, + mergeExecuteMemory : Boolean = false, + asyncTagMemory : Boolean = false, + withWriteAggregation : Boolean = false){ + + if(rfDataWidth == -1) rfDataWidth = cpuDataWidth + assert(!(mergeExecuteMemory && (earlyDataMux || earlyWaysHits))) + assert(!(earlyDataMux && !earlyWaysHits)) + assert(isPow2(pendingMax)) + assert(rfDataWidth <= memDataWidth) + + def lineCount = cacheSize/bytePerLine/wayCount + def sizeMax = log2Up(bytePerLine) + def sizeWidth = log2Up(sizeMax + 1) + val aggregationWidth = if(withWriteAggregation) log2Up(memDataBytes+1) else 0 + def withWriteResponse = withExclusive + def burstSize = bytePerLine*8/memDataWidth + val burstLength = bytePerLine/(cpuDataWidth/8) + def catchSomething = catchUnaligned || catchIllegal || catchAccessError + def withInternalAmo = withAmo && !withExclusive + def withInternalLrSc = withLrSc && !withExclusive + def withExternalLrSc = withLrSc && withExclusive + def withExternalAmo = withAmo && withExclusive + def cpuDataBytes = cpuDataWidth/8 + def rfDataBytes = rfDataWidth/8 + def memDataBytes = memDataWidth/8 + def getAxi4SharedConfig() = Axi4Config( + addressWidth = addressWidth, + dataWidth = memDataWidth, + useId = false, + useRegion = false, + useBurst = false, + useLock = false, + useQos = false + ) + + + def getAvalonConfig() = AvalonMMConfig.bursted( + addressWidth = addressWidth, + dataWidth = memDataWidth, + burstCountWidth = log2Up(burstSize + 1)).copy( + useByteEnable = true, + constantBurstBehavior = true, + burstOnBurstBoundariesOnly = true, + useResponse = true, + maximumPendingReadTransactions = 2 + ) + + def getWishboneConfig() = WishboneConfig( + addressWidth = 32-log2Up(memDataWidth/8), + dataWidth = memDataWidth, + selWidth = memDataBytes, + useSTALL = false, + useLOCK = false, + useERR = true, + useRTY = false, + tgaWidth = 0, + tgcWidth = 0, + tgdWidth = 0, + useBTE = true, + useCTI = true + ) + + def getBmbParameter() = BmbParameter( + BmbAccessParameter( + addressWidth = 32, + dataWidth = memDataWidth + ).addSources(1, BmbSourceParameter( + lengthWidth = log2Up(this.bytePerLine), + contextWidth = (if(!withWriteResponse) 1 else 0) + aggregationWidth, + alignment = BmbParameter.BurstAlignement.LENGTH, + canExclusive = withExclusive, + withCachedRead = true, + canInvalidate = withInvalidate, + canSync = withInvalidate + )), + BmbInvalidationParameter( + invalidateLength = log2Up(this.bytePerLine), + invalidateAlignment = BmbParameter.BurstAlignement.LENGTH + ) + ) +} + +object DataCacheCpuExecute{ + implicit def implArgs(that : DataCacheCpuExecute) = that.args +} + +case class DataCacheCpuExecute(p : DataCacheConfig) extends Bundle with IMasterSlave{ + val isValid = Bool + val address = UInt(p.addressWidth bit) + val haltIt = Bool + val args = DataCacheCpuExecuteArgs(p) + val refilling = Bool + + override def asMaster(): Unit = { + out(isValid, args, address) + in(haltIt, refilling) + } +} + +case class DataCacheCpuExecuteArgs(p : DataCacheConfig) extends Bundle{ + val wr = Bool + val size = UInt(log2Up(log2Up(p.cpuDataBytes)+1) bits) + val isLrsc = p.withLrSc generate Bool() + val isAmo = p.withAmo generate Bool() + val amoCtrl = p.withAmo generate new Bundle { + val swap = Bool() + val alu = Bits(3 bits) + } + + val totalyConsistent = Bool() //Only for AMO/LRSC +} + +case class DataCacheCpuMemory(p : DataCacheConfig, mmu : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ + val isValid = Bool + val isStuck = Bool + val isWrite = Bool + val address = UInt(p.addressWidth bit) + val mmuRsp = MemoryTranslatorRsp(mmu) + + override def asMaster(): Unit = { + out(isValid, isStuck, address) + in(isWrite) + out(mmuRsp) + } +} + + +case class FenceFlags() extends Bundle { + val SW,SR,SO,SI,PW,PR,PO,PI = Bool() + val FM = Bits(4 bits) + + def SL = SR || SI + def SS = SW || SO + def PL = PR || PI + def PS = PW || PO + def forceAll(): Unit ={ + List(SW,SR,SO,SI,PW,PR,PO,PI).foreach(_ := True) + } + def clearAll(): Unit ={ + List(SW,SR,SO,SI,PW,PR,PO,PI).foreach(_ := False) + } +} + +case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMasterSlave{ + val isValid = Bool() + val isStuck = Bool() + val isFiring = Bool() + val isUser = Bool() + val haltIt = Bool() + val isWrite = Bool() + val storeData = Bits(p.cpuDataWidth bit) + val data = Bits(p.cpuDataWidth bit) + val address = UInt(p.addressWidth bit) + val mmuException, unalignedAccess, accessError = Bool() + val keepMemRspData = Bool() //Used by external AMO to avoid having an internal buffer + val fence = FenceFlags() + val exclusiveOk = Bool() + + override def asMaster(): Unit = { + out(isValid,isStuck,isUser, address, fence, storeData, isFiring) + in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite, keepMemRspData, exclusiveOk) + } +} + +case class DataCacheFlush(lineCount : Int) extends Bundle{ + val singleLine = Bool() + val lineId = UInt(log2Up(lineCount) bits) +} + +case class DataCacheCpuBus(p : DataCacheConfig, mmu : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ + val execute = DataCacheCpuExecute(p) + val memory = DataCacheCpuMemory(p, mmu) + val writeBack = DataCacheCpuWriteBack(p) + + val redo = Bool() + val flush = Stream(DataCacheFlush(p.lineCount)) + + override def asMaster(): Unit = { + master(execute) + master(memory) + master(writeBack) + master(flush) + in(redo) + } +} + + +case class DataCacheMemCmd(p : DataCacheConfig) extends Bundle{ + val wr = Bool + val uncached = Bool + val address = UInt(p.addressWidth bit) + val data = Bits(p.cpuDataWidth bits) + val mask = Bits(p.cpuDataWidth/8 bits) + val size = UInt(p.sizeWidth bits) //... 1 => 2 bytes ... 2 => 4 bytes ... + val exclusive = p.withExclusive generate Bool() + val last = Bool + +// def beatCountMinusOne = size.muxListDc((0 to p.sizeMax).map(i => i -> U((1 << i)/p.memDataBytes))) +// def beatCount = size.muxListDc((0 to p.sizeMax).map(i => i -> U((1 << i)/p.memDataBytes-1))) + + //Utilities which does quite a few assumtions about the bus utilisation + def byteCountMinusOne = size.muxListDc((0 to p.sizeMax).map(i => i -> U((1 << i)-1, log2Up(p.bytePerLine) bits))) + def beatCountMinusOne = (size === log2Up(p.bytePerLine)) ? U(p.burstSize-1) | U(0) + def beatCount = (size === log2Up(p.bytePerLine)) ? U(p.burstSize) | U(1) + def isBurst = size === log2Up(p.bytePerLine) +} +case class DataCacheMemRsp(p : DataCacheConfig) extends Bundle{ + val aggregated = UInt(p.aggregationWidth bits) + val last = Bool() + val data = Bits(p.memDataWidth bit) + val error = Bool + val exclusive = p.withExclusive generate Bool() +} +case class DataCacheInv(p : DataCacheConfig) extends Bundle{ + val enable = Bool() + val address = UInt(p.addressWidth bit) +} +case class DataCacheAck(p : DataCacheConfig) extends Bundle{ + val hit = Bool() +} + +case class DataCacheSync(p : DataCacheConfig) extends Bundle{ + val aggregated = UInt(p.aggregationWidth bits) +} + +case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave{ + val cmd = Stream (DataCacheMemCmd(p)) + val rsp = Flow (DataCacheMemRsp(p)) + + val inv = p.withInvalidate generate Stream(Fragment(DataCacheInv(p))) + val ack = p.withInvalidate generate Stream(Fragment(DataCacheAck(p))) + val sync = p.withInvalidate generate Stream(DataCacheSync(p)) + + override def asMaster(): Unit = { + master(cmd) + slave(rsp) + + if(p.withInvalidate) { + slave(inv) + master(ack) + slave(sync) + } + } + + def toAxi4Shared(stageCmd : Boolean = false, pendingWritesMax : Int = 7): Axi4Shared = { + val axi = Axi4Shared(p.getAxi4SharedConfig()).setName("dbus_axi") + + val cmdPreFork = if (stageCmd) cmd.stage.stage().s2mPipe() else cmd + + val pendingWrites = CounterUpDown( + stateCount = pendingWritesMax + 1, + incWhen = cmdPreFork.fire && cmdPreFork.wr, + decWhen = axi.writeRsp.fire + ) + + val hazard = (pendingWrites =/= 0 && !cmdPreFork.wr) || pendingWrites === pendingWritesMax + val (cmdFork, dataFork) = StreamFork2(cmdPreFork.haltWhen(hazard)) + val cmdStage = cmdFork.throwWhen(RegNextWhen(!cmdFork.last,cmdFork.fire).init(False)) + val dataStage = dataFork.throwWhen(!dataFork.wr) + + axi.sharedCmd.arbitrationFrom(cmdStage) + axi.sharedCmd.write := cmdStage.wr + axi.sharedCmd.prot := "010" + axi.sharedCmd.cache := "1111" + axi.sharedCmd.size := log2Up(p.memDataBytes) + axi.sharedCmd.addr := cmdStage.address + axi.sharedCmd.len := cmdStage.beatCountMinusOne.resized + + axi.writeData.arbitrationFrom(dataStage) + axi.writeData.data := dataStage.data + axi.writeData.strb := dataStage.mask + axi.writeData.last := dataStage.last + + rsp.valid := axi.r.valid + rsp.error := !axi.r.isOKAY() + rsp.data := axi.r.data + + axi.r.ready := True + axi.b.ready := True + + axi + } + + + def toAvalon(): AvalonMM = { + val avalonConfig = p.getAvalonConfig() + val mm = AvalonMM(avalonConfig) + mm.read := cmd.valid && !cmd.wr + mm.write := cmd.valid && cmd.wr + mm.address := cmd.address(cmd.address.high downto log2Up(p.memDataWidth/8)) @@ U(0,log2Up(p.memDataWidth/8) bits) + mm.burstCount := cmd.beatCount + mm.byteEnable := cmd.mask + mm.writeData := cmd.data + + cmd.ready := mm.waitRequestn + rsp.valid := mm.readDataValid + rsp.data := mm.readData + rsp.error := mm.response =/= AvalonMM.Response.OKAY + + mm + } + + def toWishbone(): Wishbone = { + val wishboneConfig = p.getWishboneConfig() + val bus = Wishbone(wishboneConfig) + val counter = Reg(UInt(log2Up(p.burstSize) bits)) init(0) + val addressShift = log2Up(p.memDataWidth/8) + + val cmdBridge = Stream (DataCacheMemCmd(p)) + val isBurst = cmdBridge.isBurst + cmdBridge.valid := cmd.valid + cmdBridge.address := (isBurst ? (cmd.address(31 downto widthOf(counter) + addressShift) @@ counter @@ U(0, addressShift bits)) | (cmd.address(31 downto addressShift) @@ U(0, addressShift bits))) + cmdBridge.wr := cmd.wr + cmdBridge.mask := cmd.mask + cmdBridge.data := cmd.data + cmdBridge.size := cmd.size + cmdBridge.last := !isBurst || counter === p.burstSize-1 + cmd.ready := cmdBridge.ready && (cmdBridge.wr || cmdBridge.last) + + + when(cmdBridge.fire){ + counter := counter + 1 + when(cmdBridge.last){ + counter := 0 + } + } + + + bus.ADR := cmdBridge.address >> addressShift + bus.CTI := Mux(isBurst, cmdBridge.last ? B"111" | B"010", B"000") + bus.BTE := B"00" + bus.SEL := cmdBridge.wr ? cmdBridge.mask | B((1 << p.memDataBytes)-1) + bus.WE := cmdBridge.wr + bus.DAT_MOSI := cmdBridge.data + + cmdBridge.ready := cmdBridge.valid && bus.ACK + bus.CYC := cmdBridge.valid + bus.STB := cmdBridge.valid + + rsp.valid := RegNext(cmdBridge.valid && !bus.WE && bus.ACK) init(False) + rsp.data := RegNext(bus.DAT_MISO) + rsp.error := False //TODO + bus + } + + + + def toPipelinedMemoryBus(): PipelinedMemoryBus = { + val bus = PipelinedMemoryBus(32,32) + + val counter = Reg(UInt(log2Up(p.burstSize) bits)) init(0) + when(bus.cmd.fire){ counter := counter + 1 } + when( cmd.fire && cmd.last){ counter := 0 } + + bus.cmd.valid := cmd.valid + bus.cmd.address := (cmd.address(31 downto 2) | counter.resized) @@ U"00" + bus.cmd.write := cmd.wr + bus.cmd.mask := cmd.mask + bus.cmd.data := cmd.data + cmd.ready := bus.cmd.ready && (cmd.wr || counter === p.burstSize-1) + rsp.valid := bus.rsp.valid + rsp.data := bus.rsp.payload.data + rsp.error := False + bus + } + + + def toBmb(syncPendingMax : Int = 32, + timeoutCycles : Int = 16) : Bmb = new Area{ + setCompositeName(DataCacheMemBus.this, "Bridge", true) + val pipelinedMemoryBusConfig = p.getBmbParameter() + val bus = Bmb(pipelinedMemoryBusConfig).setCompositeName(this,"toBmb", true) + + case class Context() extends Bundle{ + val isWrite = !p.withWriteResponse generate Bool() + val rspCount = (p.aggregationWidth != 0) generate UInt(p.aggregationWidth bits) + } + + + def sizeToLength(size : UInt) = size.muxListDc((0 to log2Up(p.cpuDataBytes)).map(i => U(i) -> U((1 << i)-1, log2Up(p.cpuDataBytes) bits))) + + val withoutWriteBuffer = if(p.aggregationWidth == 0) new Area { + val busCmdContext = Context() + + bus.cmd.valid := cmd.valid + bus.cmd.last := cmd.last + bus.cmd.opcode := (cmd.wr ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) + bus.cmd.address := cmd.address.resized + bus.cmd.data := cmd.data + bus.cmd.length := cmd.byteCountMinusOne + bus.cmd.mask := cmd.mask + if (p.withExclusive) bus.cmd.exclusive := cmd.exclusive + if (!p.withWriteResponse) busCmdContext.isWrite := cmd.wr + bus.cmd.context := B(busCmdContext) + + cmd.ready := bus.cmd.ready + if(p.withInvalidate) sync.arbitrationFrom(bus.sync) + } + + val withWriteBuffer = if(p.aggregationWidth != 0) new Area { + val buffer = new Area { + val stream = cmd.toEvent().m2sPipe() + val address = Reg(UInt(p.addressWidth bits)) + val length = Reg(UInt(pipelinedMemoryBusConfig.access.lengthWidth bits)) + val write = Reg(Bool) + val exclusive = Reg(Bool) + val data = Reg(Bits(p.memDataWidth bits)) + val mask = Reg(Bits(p.memDataWidth/8 bits)) init(0) + } + + val aggregationRange = log2Up(p.memDataWidth/8)-1 downto log2Up(p.cpuDataWidth/8) + val tagRange = p.addressWidth-1 downto aggregationRange.high+1 + val aggregationEnabled = Reg(Bool) + val aggregationCounter = Reg(UInt(p.aggregationWidth bits)) init(0) + val aggregationCounterFull = aggregationCounter === aggregationCounter.maxValue + val timer = Reg(UInt(log2Up(timeoutCycles)+1 bits)) init(0) + val timerFull = timer.msb + val hit = cmd.address(tagRange) === buffer.address(tagRange) + val cmdExclusive = if(p.withExclusive) cmd.exclusive else False + val canAggregate = cmd.valid && cmd.wr && !cmd.uncached && !cmdExclusive && !timerFull && !aggregationCounterFull && (!buffer.stream.valid || aggregationEnabled && hit) + val doFlush = cmd.valid && !canAggregate || timerFull || aggregationCounterFull || !aggregationEnabled +// val canAggregate = False +// val doFlush = True + val busCmdContext = Context() + val halt = False + + when(cmd.fire){ + aggregationCounter := aggregationCounter + 1 + } + when(buffer.stream.valid && !timerFull){ + timer := timer + 1 + } + when(bus.cmd.fire || !buffer.stream.valid){ + buffer.mask := 0 + aggregationCounter := 0 + timer := 0 + } + + buffer.stream.ready := (bus.cmd.ready && doFlush || canAggregate) && !halt + bus.cmd.valid := buffer.stream.valid && doFlush && !halt + bus.cmd.last := True + bus.cmd.opcode := (buffer.write ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) + bus.cmd.address := buffer.address + bus.cmd.length := buffer.length + bus.cmd.data := buffer.data + bus.cmd.mask := buffer.mask + + if (p.withExclusive) bus.cmd.exclusive := buffer.exclusive + bus.cmd.context.removeAssignments() := B(busCmdContext) + if (!p.withWriteResponse) busCmdContext.isWrite := bus.cmd.isWrite + busCmdContext.rspCount := aggregationCounter + + val aggregationSel = cmd.address(aggregationRange) + when(cmd.fire){ + val dIn = cmd.data.subdivideIn(8 bits) + val dReg = buffer.data.subdivideIn(8 bits) + for(byteId <- 0 until p.memDataBytes){ + when(aggregationSel === byteId / p.cpuDataBytes && cmd.mask(byteId % p.cpuDataBytes)){ + dReg.write(byteId, dIn(byteId % p.cpuDataBytes)) + buffer.mask(byteId) := True + } + } + } + + when(cmd.fire){ + buffer.write := cmd.wr + buffer.address := cmd.address.resized + buffer.length := cmd.byteCountMinusOne + if (p.withExclusive) buffer.exclusive := cmd.exclusive + + when(cmd.wr && !cmd.uncached && !cmdExclusive){ + aggregationEnabled := True + buffer.address(aggregationRange.high downto 0) := 0 + buffer.length := p.memDataBytes-1 + } otherwise { + aggregationEnabled := False + } + } + + + val rspCtx = bus.rsp.context.as(Context()) + rsp.aggregated := rspCtx.rspCount + + val syncLogic = p.withInvalidate generate new Area{ + val cmdCtx = Stream(UInt(p.aggregationWidth bits)) + cmdCtx.valid := bus.cmd.fire && bus.cmd.isWrite + cmdCtx.payload := aggregationCounter + halt setWhen(!cmdCtx.ready) + + val syncCtx = cmdCtx.queue(syncPendingMax).s2mPipe().m2sPipe() //Assume latency of sync is at least 3 cycles + syncCtx.ready := bus.sync.fire + + sync.arbitrationFrom(bus.sync) + sync.aggregated := syncCtx.payload + } + } + + + rsp.valid := bus.rsp.valid + if(!p.withWriteResponse) rsp.valid clearWhen(bus.rsp.context(0)) + rsp.data := bus.rsp.data + rsp.error := bus.rsp.isError + rsp.last := bus.rsp.last + if(p.withExclusive) rsp.exclusive := bus.rsp.exclusive + bus.rsp.ready := True + + val invalidateLogic = p.withInvalidate generate new Area{ + val beatCountMinusOne = bus.inv.transferBeatCountMinusOne(p.bytePerLine) + val counter = Reg(UInt(widthOf(beatCountMinusOne) bits)) init(0) + + inv.valid := bus.inv.valid + inv.address := bus.inv.address + (counter << log2Up(p.bytePerLine)) + inv.enable := bus.inv.all + inv.last := counter === beatCountMinusOne + bus.inv.ready := inv.last && inv.ready + + if(widthOf(counter) != 0) when(inv.fire){ + counter := counter + 1 + when(inv.last){ + counter := 0 + } + } + + bus.ack.arbitrationFrom(ack.throwWhen(!ack.last)) + } + }.bus + +} + +object DataCacheExternalAmoStates extends SpinalEnum{ + val LR_CMD, LR_RSP, SC_CMD, SC_RSP = newElement(); +} + +//If external amo, mem rsp should stay +class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParameter) extends Component{ + import p._ + + val io = new Bundle{ + val cpu = slave(DataCacheCpuBus(p, mmuParameter)) + val mem = master(DataCacheMemBus(p)) + } + + val haltCpu = False + val lineWidth = bytePerLine*8 + val lineCount = cacheSize/bytePerLine + val wordWidth = cpuDataWidth + val wordWidthLog2 = log2Up(wordWidth) + val wordPerLine = lineWidth/wordWidth + val bytePerWord = wordWidth/8 + val wayLineCount = lineCount/wayCount + val wayLineLog2 = log2Up(wayLineCount) + val wayWordCount = wayLineCount * wordPerLine + val memWordPerLine = lineWidth/memDataWidth + val memTransactionPerLine = p.bytePerLine / (p.memDataWidth/8) + val bytePerMemWord = memDataWidth/8 + val wayMemWordCount = wayLineCount * memWordPerLine + + val tagRange = addressWidth-1 downto log2Up(wayLineCount*bytePerLine) + val lineRange = tagRange.low-1 downto log2Up(bytePerLine) + val cpuWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord) + val memWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerMemWord) + val hitRange = tagRange.high downto lineRange.low + val memWordToCpuWordRange = log2Up(bytePerMemWord)-1 downto log2Up(bytePerWord) + val cpuWordToRfWordRange = log2Up(bytePerWord)-1 downto log2Up(p.rfDataBytes) + + + class LineInfo() extends Bundle{ + val valid, error = Bool() + val address = UInt(tagRange.length bit) + } + + val tagsReadCmd = Flow(UInt(log2Up(wayLineCount) bits)) + val tagsInvReadCmd = withInvalidate generate Flow(UInt(log2Up(wayLineCount) bits)) + val tagsWriteCmd = Flow(new Bundle{ + val way = Bits(wayCount bits) + val address = UInt(log2Up(wayLineCount) bits) + val data = new LineInfo() + }) + + val tagsWriteLastCmd = RegNext(tagsWriteCmd) + + val dataReadCmd = Flow(UInt(log2Up(wayMemWordCount) bits)) + val dataWriteCmd = Flow(new Bundle{ + val way = Bits(wayCount bits) + val address = UInt(log2Up(wayMemWordCount) bits) + val data = Bits(memDataWidth bits) + val mask = Bits(memDataWidth/8 bits) + }) + + + val ways = for(i <- 0 until wayCount) yield new Area{ + val tags = Mem(new LineInfo(), wayLineCount) + val data = Mem(Bits(memDataWidth bit), wayMemWordCount) + + //Reads + val tagsReadRsp = asyncTagMemory match { + case false => tags.readSync(tagsReadCmd.payload, tagsReadCmd.valid && !io.cpu.memory.isStuck) + case true => tags.readAsync(RegNextWhen(tagsReadCmd.payload, io.cpu.execute.isValid && !io.cpu.memory.isStuck)) + } + val dataReadRspMem = data.readSync(dataReadCmd.payload, dataReadCmd.valid && !io.cpu.memory.isStuck) + val dataReadRspSel = if(mergeExecuteMemory) io.cpu.writeBack.address else io.cpu.memory.address + val dataReadRsp = dataReadRspMem.subdivideIn(cpuDataWidth bits).read(dataReadRspSel(memWordToCpuWordRange)) + + val tagsInvReadRsp = withInvalidate generate(asyncTagMemory match { + case false => tags.readSync(tagsInvReadCmd.payload, tagsInvReadCmd.valid) + case true => tags.readAsync(RegNextWhen(tagsInvReadCmd.payload, tagsInvReadCmd.valid)) + }) + + //Writes + when(tagsWriteCmd.valid && tagsWriteCmd.way(i)){ + tags.write(tagsWriteCmd.address, tagsWriteCmd.data) + } + when(dataWriteCmd.valid && dataWriteCmd.way(i)){ + data.write( + address = dataWriteCmd.address, + data = dataWriteCmd.data, + mask = dataWriteCmd.mask + ) + } + } + + + tagsReadCmd.valid := False + tagsReadCmd.payload.assignDontCare() + dataReadCmd.valid := False + dataReadCmd.payload.assignDontCare() + tagsWriteCmd.valid := False + tagsWriteCmd.payload.assignDontCare() + dataWriteCmd.valid := False + dataWriteCmd.payload.assignDontCare() + + when(io.cpu.execute.isValid && !io.cpu.memory.isStuck){ + tagsReadCmd.valid := True + dataReadCmd.valid := True + tagsReadCmd.payload := io.cpu.execute.address(lineRange) + dataReadCmd.payload := io.cpu.execute.address(lineRange.high downto memWordRange.low) + } + + def collisionProcess(readAddress : UInt, readMask : Bits): Bits ={ + val ret = Bits(wayCount bits) + val readAddressAligned = (readAddress >> log2Up(memDataWidth/cpuDataWidth)) + val dataWriteMaskAligned = dataWriteCmd.mask.subdivideIn(memDataWidth/cpuDataWidth slices).read(readAddress(log2Up(memDataWidth/cpuDataWidth)-1 downto 0)) + for(i <- 0 until wayCount){ + ret(i) := dataWriteCmd.valid && dataWriteCmd.way(i) && dataWriteCmd.address === readAddressAligned && (readMask & dataWriteMaskAligned) =/= 0 + } + ret + } + + + io.cpu.execute.haltIt := False + + val rspSync = True + val rspLast = True + val memCmdSent = RegInit(False) setWhen (io.mem.cmd.fire) clearWhen (!io.cpu.writeBack.isStuck) + val pending = withExclusive generate new Area{ + val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + val counterNext = counter + U(io.mem.cmd.fire && io.mem.cmd.last) - ((io.mem.rsp.valid && io.mem.rsp.last) ? (io.mem.rsp.aggregated +^ 1) | 0) + counter := counterNext + + val done = RegNext(counterNext === 0) + val full = RegNext(counter.msb) //Has margin + val last = RegNext(counterNext === 1) //Equivalent to counter === 1 but pipelined + + if(!withInvalidate) { + io.cpu.execute.haltIt setWhen(full) + } + + rspSync clearWhen (!last || !memCmdSent) + rspLast clearWhen (!last) + } + + val sync = withInvalidate generate new Area{ + io.mem.sync.ready := True + val syncCount = io.mem.sync.aggregated +^ 1 + val syncContext = new Area{ + val history = Mem(Bool, pendingMax) + val wPtr, rPtr = Reg(UInt(log2Up(pendingMax)+1 bits)) init(0) + when(io.mem.cmd.fire && io.mem.cmd.wr){ + history.write(wPtr.resized, io.mem.cmd.uncached) + wPtr := wPtr + 1 + } + + when(io.mem.sync.fire){ + rPtr := rPtr + syncCount + } + val uncached = history.readAsync(rPtr.resized) + val full = RegNext(wPtr - rPtr >= pendingMax-1) + io.cpu.execute.haltIt setWhen(full) + } + + def pending(inc : Bool, dec : Bool) = new Area { + val pendingSync = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + val pendingSyncNext = pendingSync + U(io.mem.cmd.fire && io.mem.cmd.wr && inc) - ((io.mem.sync.fire && dec) ? syncCount | 0) + pendingSync := pendingSyncNext + } + + val writeCached = pending(inc = !io.mem.cmd.uncached, dec = !syncContext.uncached) + val writeUncached = pending(inc = io.mem.cmd.uncached, dec = syncContext.uncached) + + def track(load : Bool, uncached : Boolean) = new Area { + val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + counter := counter - ((io.mem.sync.fire && counter =/= 0 && (if(uncached) syncContext.uncached else !syncContext.uncached)) ? syncCount | 0) + when(load){ counter := (if(uncached) writeUncached.pendingSyncNext else writeCached.pendingSyncNext) } + + val busy = counter =/= 0 + } + + val w2w = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SW, uncached = false) + val w2r = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SR, uncached = false) + val w2i = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SI, uncached = false) + val w2o = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SO, uncached = false) + val o2w = track(load = io.cpu.writeBack.fence.PO && io.cpu.writeBack.fence.SW, uncached = true) + val o2r = track(load = io.cpu.writeBack.fence.PO && io.cpu.writeBack.fence.SR, uncached = true) + //Assume o2i and o2o are ordered by the interconnect + + val notTotalyConsistent = w2w.busy || w2r.busy || w2i.busy || w2o.busy || o2w.busy || o2r.busy + } + + + + + val stage0 = new Area{ +// val mask = io.cpu.execute.size.mux ( +// U(0) -> B"0001", +// U(1) -> B"0011", +// default -> B"1111" +// ) |<< io.cpu.execute.address(1 downto 0) + + val mask = io.cpu.execute.size.muxListDc((0 to log2Up(p.cpuDataBytes)).map(i => U(i) -> B((1 << (1 << i)) -1, p.cpuDataBytes bits))) |<< io.cpu.execute.address(log2Up(p.cpuDataBytes)-1 downto 0) + + + val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto cpuWordRange.low), mask) + val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled + + val isAmo = if(withAmo) io.cpu.execute.isAmo else False + } + + val stageA = new Area{ + def stagePipe[T <: Data](that : T) = if(mergeExecuteMemory) CombInit(that) else RegNextWhen(that, !io.cpu.memory.isStuck) + val request = stagePipe(io.cpu.execute.args) + val mask = stagePipe(stage0.mask) + io.cpu.memory.isWrite := request.wr + + val isAmo = if(withAmo) request.isAmo else False + val isLrsc = if(withAmo) request.isLrsc else False + val consistancyCheck = (withInvalidate || withWriteResponse) generate new Area { + val hazard = False + val w = sync.w2w.busy || sync.o2w.busy + val r = stagePipe(sync.w2r.busy || sync.o2r.busy) || sync.w2r.busy || sync.o2r.busy // As it use the cache, need to check against the execute stage status too + val o = CombInit(sync.w2o.busy) + val i = CombInit(sync.w2i.busy) + + val s = io.cpu.memory.mmuRsp.isIoAccess ? o | w + val l = io.cpu.memory.mmuRsp.isIoAccess ? i | r + + when(isAmo? (s || l) | (request.wr ? s | l)){ + hazard := True + } + when(request.totalyConsistent && (sync.notTotalyConsistent || io.cpu.writeBack.isValid && io.cpu.writeBack.isWrite)){ + hazard := True + } + } + + val wayHits = earlyWaysHits generate Bits(wayCount bits) + val indirectTlbHitGen = (earlyWaysHits && !directTlbHit) generate new Area { + wayHits := B(ways.map(way => (io.cpu.memory.mmuRsp.physicalAddress(tagRange) === way.tagsReadRsp.address && way.tagsReadRsp.valid))) + } + val directTlbHitGen = (earlyWaysHits && directTlbHit) generate new Area { + val wayTlbHits = for (way <- ways) yield for (tlb <- io.cpu.memory.mmuRsp.ways) yield { + way.tagsReadRsp.address === tlb.physical(tagRange) && tlb.sel + } + val translatedHits = B(wayTlbHits.map(_.orR)) + val bypassHits = B(ways.map(_.tagsReadRsp.address === io.cpu.memory.address(tagRange))) + wayHits := (io.cpu.memory.mmuRsp.bypassTranslation ? bypassHits | translatedHits) & B(ways.map(_.tagsReadRsp.valid)) + } + + val dataMux = earlyDataMux generate MuxOH(wayHits, ways.map(_.dataReadRsp)) + val wayInvalidate = stagePipe(stage0. wayInvalidate) + val dataColisions = if(mergeExecuteMemory){ + stagePipe(stage0.dataColisions) + } else { + //Assume the writeback stage will never be unstall memory acces while memory stage is stalled + stagePipe(stage0.dataColisions) | collisionProcess(io.cpu.memory.address(lineRange.high downto cpuWordRange.low), mask) + } + } + + val stageB = new Area { + def stagePipe[T <: Data](that : T) = RegNextWhen(that, !io.cpu.writeBack.isStuck) + def ramPipe[T <: Data](that : T) = if(mergeExecuteMemory) CombInit(that) else RegNextWhen(that, !io.cpu.writeBack.isStuck) + val request = RegNextWhen(stageA.request, !io.cpu.writeBack.isStuck) + val mmuRspFreeze = False + val mmuRsp = RegNextWhen(io.cpu.memory.mmuRsp, !io.cpu.writeBack.isStuck && !mmuRspFreeze) + val tagsReadRsp = ways.map(w => ramPipe(w.tagsReadRsp)) + val dataReadRsp = !earlyDataMux generate ways.map(w => ramPipe(w.dataReadRsp)) + val wayInvalidate = stagePipe(stageA. wayInvalidate) + val consistancyHazard = if(stageA.consistancyCheck != null) stagePipe(stageA.consistancyCheck.hazard) else False + val dataColisions = stagePipe(stageA.dataColisions) +// val unaligned = if(!catchUnaligned) False else stagePipe((stageA.request.size === 2 && io.cpu.memory.address(1 downto 0) =/= 0) || (stageA.request.size === 1 && io.cpu.memory.address(0 downto 0) =/= 0)) + val unaligned = if(!catchUnaligned) False else stagePipe((1 to log2Up(p.cpuDataBytes)).map(i => stageA.request.size === i && io.cpu.memory.address(i-1 downto 0) =/= 0).orR) + val waysHitsBeforeInvalidate = if(earlyWaysHits) stagePipe(B(stageA.wayHits)) else B(tagsReadRsp.map(tag => mmuRsp.physicalAddress(tagRange) === tag.address && tag.valid).asBits()) + val waysHits = waysHitsBeforeInvalidate & ~wayInvalidate + val waysHit = waysHits.orR + val dataMux = if(earlyDataMux) stagePipe(stageA.dataMux) else MuxOH(waysHits, dataReadRsp) + val mask = stagePipe(stageA.mask) + + //Loader interface + val loaderValid = False + + val ioMemRspMuxed = io.mem.rsp.data.subdivideIn(cpuDataWidth bits).read(io.cpu.writeBack.address(memWordToCpuWordRange)) + + io.cpu.writeBack.haltIt := True + + //Evict the cache after reset logics + val flusher = new Area { + val waitDone = RegInit(False) clearWhen(io.cpu.flush.ready) + val hold = False + val counter = Reg(UInt(lineRange.size + 1 bits)) init(0) + when(!counter.msb) { + tagsWriteCmd.valid := True + tagsWriteCmd.address := counter.resized + tagsWriteCmd.way.setAll() + tagsWriteCmd.data.valid := False + io.cpu.execute.haltIt := True + when(!hold) { + counter := counter + 1 + when(io.cpu.flush.singleLine){ + counter.msb := True + } + } + } + + io.cpu.flush.ready := waitDone && counter.msb + + val start = RegInit(True) //Used to relax timings + start := !waitDone && !start && io.cpu.flush.valid && !io.cpu.execute.isValid && !io.cpu.memory.isValid && !io.cpu.writeBack.isValid && !io.cpu.redo + + when(start){ + waitDone := True + counter := 0 + when(io.cpu.flush.singleLine){ + counter := U"0" @@ io.cpu.flush.lineId + } + } + } + + val lrSc = withInternalLrSc generate new Area{ + val reserved = RegInit(False) + when(io.cpu.writeBack.isValid && io.cpu.writeBack.isFiring){ + reserved setWhen(request.isLrsc) + reserved clearWhen(request.wr) + } + } + + val isAmo = if(withAmo) request.isAmo else False + val isAmoCached = if(withInternalAmo) isAmo else False + val isExternalLsrc = if(withExternalLrSc) request.isLrsc else False + val isExternalAmo = if(withExternalAmo) request.isAmo else False + + val requestDataBypass = CombInit(io.cpu.writeBack.storeData) + import DataCacheExternalAmoStates._ + val amo = withAmo generate new Area{ + def rf = io.cpu.writeBack.storeData(p.rfDataWidth-1 downto 0) + def memLarger = if(withInternalAmo) dataMux else ioMemRspMuxed + def mem = memLarger.subdivideIn(rfDataWidth bits).read(io.cpu.writeBack.address(cpuWordToRfWordRange)) + val compare = request.amoCtrl.alu.msb + val unsigned = request.amoCtrl.alu(2 downto 1) === B"11" + val addSub = (rf.asSInt + Mux(compare, ~mem, mem).asSInt + Mux(compare, S(1), S(0))).asBits + val less = Mux(rf.msb === mem.msb, addSub.msb, Mux(unsigned, mem.msb, rf.msb)) + val selectRf = request.amoCtrl.swap ? True | (request.amoCtrl.alu.lsb ^ less) + + val result = (request.amoCtrl.alu | (request.amoCtrl.swap ## B"00")).mux( + B"000" -> addSub, + B"001" -> (rf ^ mem), + B"010" -> (rf | mem), + B"011" -> (rf & mem), + default -> (selectRf ? rf | mem) + ) + // val resultRegValid = RegNext(True) clearWhen(!io.cpu.writeBack.isStuck) + // val resultReg = RegNext(result) + val resultReg = Reg(Bits(32 bits)) + + val internal = withInternalAmo generate new Area{ + val resultRegValid = RegNext(io.cpu.writeBack.isStuck) + resultReg := result + } + val external = !withInternalAmo generate new Area{ + val state = RegInit(LR_CMD) + } + } + + + val cpuWriteToCache = False + when(cpuWriteToCache){ + dataWriteCmd.valid setWhen(request.wr && waysHit) + dataWriteCmd.address := mmuRsp.physicalAddress(lineRange.high downto memWordRange.low) + dataWriteCmd.data.subdivideIn(cpuDataWidth bits).foreach(_ := requestDataBypass) + dataWriteCmd.mask := 0 + dataWriteCmd.mask.subdivideIn(cpuDataWidth/8 bits).write(io.cpu.writeBack.address(memWordToCpuWordRange), mask) + dataWriteCmd.way := waysHits + } + + val badPermissions = (!mmuRsp.allowWrite && request.wr) || (!mmuRsp.allowRead && (!request.wr || isAmo)) + val loadStoreFault = io.cpu.writeBack.isValid && (mmuRsp.exception || badPermissions) + + io.cpu.redo := False + io.cpu.writeBack.accessError := False + io.cpu.writeBack.mmuException := loadStoreFault && (if(catchIllegal) mmuRsp.isPaging else False) + io.cpu.writeBack.unalignedAccess := io.cpu.writeBack.isValid && unaligned + io.cpu.writeBack.isWrite := request.wr + + + io.mem.cmd.valid := False + io.mem.cmd.address := mmuRsp.physicalAddress + io.mem.cmd.last := True + io.mem.cmd.wr := request.wr + io.mem.cmd.mask := mask + io.mem.cmd.data := requestDataBypass + io.mem.cmd.uncached := mmuRsp.isIoAccess + io.mem.cmd.size := request.size.resized + if(withExternalLrSc) io.mem.cmd.exclusive := request.isLrsc || isAmo + + + val bypassCache = mmuRsp.isIoAccess || isExternalLsrc || isExternalAmo + + io.cpu.writeBack.keepMemRspData := False + when(io.cpu.writeBack.isValid) { + when(isExternalAmo){ + if(withExternalAmo) switch(amo.external.state){ + is(LR_CMD){ + io.mem.cmd.valid := True + io.mem.cmd.wr := False + when(io.mem.cmd.ready) { + amo.external.state := LR_RSP + } + } + is(LR_RSP){ + when(io.mem.rsp.valid && pending.last) { + amo.external.state := SC_CMD + amo.resultReg := amo.result + } + } + is(SC_CMD){ + io.mem.cmd.valid := True + when(io.mem.cmd.ready) { + amo.external.state := SC_RSP + } + } + is(SC_RSP){ + io.cpu.writeBack.keepMemRspData := True + when(io.mem.rsp.valid) { + amo.external.state := LR_CMD + when(io.mem.rsp.exclusive){ //Success + cpuWriteToCache := True + io.cpu.writeBack.haltIt := False + } + } + } + } + } elsewhen(mmuRsp.isIoAccess || isExternalLsrc) { + val waitResponse = !request.wr + if(withExternalLrSc) waitResponse setWhen(request.isLrsc) + + io.cpu.writeBack.haltIt.clearWhen(waitResponse ? (io.mem.rsp.valid && rspSync) | io.mem.cmd.ready) + + io.mem.cmd.valid := !memCmdSent + + if(withInternalLrSc) when(request.isLrsc && !lrSc.reserved){ + io.mem.cmd.valid := False + io.cpu.writeBack.haltIt := False + } + } otherwise { + when(waysHit || request.wr && !isAmoCached) { //Do not require a cache refill ? + cpuWriteToCache := True + + //Write through + io.mem.cmd.valid setWhen(request.wr) + io.cpu.writeBack.haltIt clearWhen(!request.wr || io.mem.cmd.ready) + + if(withInternalAmo) when(isAmo){ + when(!amo.internal.resultRegValid) { + io.mem.cmd.valid := False + dataWriteCmd.valid := False + io.cpu.writeBack.haltIt := True + } + } + + //On write to read dataColisions + when((!request.wr || isAmoCached) && (dataColisions & waysHits) =/= 0){ + io.cpu.redo := True + if(withAmo) io.mem.cmd.valid := False + } + + if(withInternalLrSc) when(request.isLrsc && !lrSc.reserved){ + io.mem.cmd.valid := False + dataWriteCmd.valid := False + io.cpu.writeBack.haltIt := False + } + } otherwise { //Do refill + //Emit cmd + io.mem.cmd.valid setWhen(!memCmdSent) + io.mem.cmd.wr := False + io.mem.cmd.address(0, lineRange.low bits) := 0 + io.mem.cmd.size := log2Up(p.bytePerLine) + + loaderValid setWhen(io.mem.cmd.ready) + } + } + } + + when(bypassCache){ + io.cpu.writeBack.data := ioMemRspMuxed + def isLast = if(pending != null) pending.last else True + if(catchAccessError) io.cpu.writeBack.accessError := !request.wr && isLast && io.mem.rsp.valid && io.mem.rsp.error + } otherwise { + io.cpu.writeBack.data := dataMux + if(catchAccessError) io.cpu.writeBack.accessError := (waysHits & B(tagsReadRsp.map(_.error))) =/= 0 || (loadStoreFault && !mmuRsp.isPaging) + } + + if(withLrSc) { + val success = if(withInternalLrSc)lrSc.reserved else io.mem.rsp.exclusive + io.cpu.writeBack.exclusiveOk := success + when(request.isLrsc && request.wr){ + // io.cpu.writeBack.data := B(!success).resized + if(withExternalLrSc) when(io.cpu.writeBack.isValid && io.mem.rsp.valid && rspSync && success && waysHit){ + cpuWriteToCache := True + } + } + } + if(withAmo) when(request.isAmo){ + requestDataBypass.subdivideIn(p.rfDataWidth bits).foreach(_ := amo.resultReg) + } + + //remove side effects on exceptions + when(consistancyHazard || mmuRsp.refilling || io.cpu.writeBack.accessError || io.cpu.writeBack.mmuException || io.cpu.writeBack.unalignedAccess){ + io.mem.cmd.valid := False + tagsWriteCmd.valid := False + dataWriteCmd.valid := False + loaderValid := False + io.cpu.writeBack.haltIt := False + if(withInternalLrSc) lrSc.reserved := lrSc.reserved + if(withExternalAmo) amo.external.state := LR_CMD + } + io.cpu.redo setWhen(io.cpu.writeBack.isValid && (mmuRsp.refilling || consistancyHazard)) + + assert(!(io.cpu.writeBack.isValid && !io.cpu.writeBack.haltIt && io.cpu.writeBack.isStuck), "writeBack stuck by another plugin is not allowed", ERROR) + } + + val loader = new Area{ + val valid = RegInit(False) setWhen(stageB.loaderValid) + val baseAddress = stageB.mmuRsp.physicalAddress + + val counter = Counter(memTransactionPerLine) + val waysAllocator = Reg(Bits(wayCount bits)) init(1) + val error = RegInit(False) + val kill = False + val killReg = RegInit(False) setWhen(kill) + + when(valid && io.mem.rsp.valid && rspLast){ + dataWriteCmd.valid := True + dataWriteCmd.address := baseAddress(lineRange) @@ counter + dataWriteCmd.data := io.mem.rsp.data + dataWriteCmd.mask.setAll() + dataWriteCmd.way := waysAllocator + error := error | io.mem.rsp.error + counter.increment() + } + + val done = CombInit(counter.willOverflow) + if(withInvalidate) done setWhen(valid && pending.counter === 0) //Used to solve invalidate write request at the same time + + when(done){ + valid := False + + //Update tags + tagsWriteCmd.valid := True + tagsWriteCmd.address := baseAddress(lineRange) + tagsWriteCmd.data.valid := !(kill || killReg) + tagsWriteCmd.data.address := baseAddress(tagRange) + tagsWriteCmd.data.error := error || (io.mem.rsp.valid && io.mem.rsp.error) + tagsWriteCmd.way := waysAllocator + + error := False + killReg := False + } + + when(!valid){ + waysAllocator := (waysAllocator ## waysAllocator.msb).resized + } + + io.cpu.redo setWhen(valid.rise()) + io.cpu.execute.refilling := valid + + stageB.mmuRspFreeze setWhen(stageB.loaderValid || valid) + } + + val invalidate = withInvalidate generate new Area{ + val s0 = new Area{ + val input = io.mem.inv + tagsInvReadCmd.valid := input.fire + tagsInvReadCmd.payload := input.address(lineRange) + + val loaderTagHit = input.address(tagRange) === loader.baseAddress(tagRange) + val loaderLineHit = input.address(lineRange) === loader.baseAddress(lineRange) + when(input.valid && input.enable && loader.valid && loaderLineHit && loaderTagHit){ + loader.kill := True + } + } + val s1 = new Area{ + val input = s0.input.stage() + val loaderValid = RegNextWhen(loader.valid, s0.input.ready) + val loaderWay = RegNextWhen(loader.waysAllocator, s0.input.ready) + val loaderTagHit = RegNextWhen(s0.loaderTagHit, s0.input.ready) + val loaderLineHit = RegNextWhen(s0.loaderLineHit, s0.input.ready) + val invalidations = Bits(wayCount bits) + + var wayHits = B(ways.map(way => (input.address(tagRange) === way.tagsInvReadRsp.address && way.tagsInvReadRsp.valid))) & ~invalidations + + //Handle invalider read during loader write hazard + when(loaderValid && loaderLineHit && !loaderTagHit){ + wayHits \= wayHits & ~loaderWay + } + } + val s2 = new Area{ + val input = s1.input.stage() + val wayHits = RegNextWhen(s1.wayHits, s1.input.ready) + val wayHit = wayHits.orR + + when(input.valid && input.enable) { + //Manage invalidate write during cpu read hazard + when(input.address(lineRange) === io.cpu.execute.address(lineRange)) { + stage0.wayInvalidate := wayHits + } + + //Invalidate cache tag + when(wayHit) { + tagsWriteCmd.valid := True + stageB.flusher.hold := True + tagsWriteCmd.address := input.address(lineRange) + tagsWriteCmd.data.valid := False + tagsWriteCmd.way := wayHits + loader.done := False //Hold loader tags write + } + } + io.mem.ack.arbitrationFrom(input) + io.mem.ack.hit := wayHit + io.mem.ack.last := input.last + + //Manage invalidation read during write hazard + s1.invalidations := RegNextWhen((input.valid && input.enable && input.address(lineRange) === s0.input.address(lineRange)) ? wayHits | 0, s0.input.ready) + } + } +} |