diff options
Diffstat (limited to 'VexRiscv/src/main/scala/vexriscv')
90 files changed, 21020 insertions, 0 deletions
diff --git a/VexRiscv/src/main/scala/vexriscv/Pipeline.scala b/VexRiscv/src/main/scala/vexriscv/Pipeline.scala new file mode 100644 index 0000000..e9d93c2 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/Pipeline.scala @@ -0,0 +1,162 @@ +package vexriscv + +import vexriscv.plugin._ +import spinal.core._ +import spinal.lib._ + +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + +trait PipelineThing[T] + +trait Pipeline { + type T <: Pipeline + val plugins = ArrayBuffer[Plugin[T]]() + var stages = ArrayBuffer[Stage]() + var unremovableStages = mutable.Set[Stage]() + val things = mutable.LinkedHashMap[PipelineThing[_], Any]() +// val services = ArrayBuffer[Any]() + + def stageBefore(stage : Stage) = stages(indexOf(stage)-1) + + def indexOf(stage : Stage) = stages.indexOf(stage) + + def service[T](clazz : Class[T]) = { + val filtered = plugins.filter(o => clazz.isAssignableFrom(o.getClass)) + assert(filtered.length == 1, s"??? ${clazz.getName}") + filtered.head.asInstanceOf[T] + } + + def serviceExist[T](clazz : Class[T]) = { + val filtered = plugins.filter(o => clazz.isAssignableFrom(o.getClass)) + filtered.length != 0 + } + + def serviceElse[T](clazz : Class[T], default : => T) : T = { + if(!serviceExist(clazz)) return default + val filtered = plugins.filter(o => clazz.isAssignableFrom(o.getClass)) + assert(filtered.length == 1) + filtered.head.asInstanceOf[T] + } + + def update[T](that : PipelineThing[T], value : T) : Unit = things(that) = value + def apply[T](that : PipelineThing[T]) : T = things(that).asInstanceOf[T] + + def build(): Unit ={ + plugins.foreach(_.pipeline = this.asInstanceOf[T]) + plugins.foreach(_.setup(this.asInstanceOf[T])) + + plugins.foreach{ p => + p.parentScope = Component.current.dslBody //Put the given plugin as a child of the current component + p.reflectNames() + } + + //Build plugins + plugins.foreach(_.build(this.asInstanceOf[T])) + + + + //Interconnect stages + class KeyInfo{ + var insertStageId = Int.MaxValue + var lastInputStageId = Int.MinValue + var lastOutputStageId = Int.MinValue + + def addInputStageIndex(stageId : Int): Unit = { + require(stageId >= insertStageId) + lastInputStageId = Math.max(lastInputStageId,stageId) + lastOutputStageId = Math.max(lastOutputStageId,stageId-1) + } + + + def addOutputStageIndex(stageId : Int): Unit = { + require(stageId >= insertStageId) + lastInputStageId = Math.max(lastInputStageId,stageId) + lastOutputStageId = Math.max(lastOutputStageId,stageId) + } + + def setInsertStageId(stageId : Int) = insertStageId = stageId + } + + val inputOutputKeys = mutable.LinkedHashMap[Stageable[Data],KeyInfo]() + val insertedStageable = mutable.Set[Stageable[Data]]() + for(stageIndex <- 0 until stages.length; stage = stages(stageIndex)){ + stage.inserts.keysIterator.foreach(signal => inputOutputKeys.getOrElseUpdate(signal,new KeyInfo).setInsertStageId(stageIndex)) + stage.inserts.keysIterator.foreach(insertedStageable += _) + } + + val missingInserts = mutable.Set[Stageable[Data]]() + for(stageIndex <- 0 until stages.length; stage = stages(stageIndex)){ + stage.inputs.keysIterator.foreach(key => if(!insertedStageable.contains(key)) missingInserts += key) + stage.outputs.keysIterator.foreach(key => if(!insertedStageable.contains(key)) missingInserts += key) + } + + if(missingInserts.nonEmpty){ + throw new Exception("Missing inserts : " + missingInserts.map(_.getName()).mkString(", ")) + } + + for(stageIndex <- 0 until stages.length; stage = stages(stageIndex)){ + stage.inputs.keysIterator.foreach(key => inputOutputKeys.getOrElseUpdate(key,new KeyInfo).addInputStageIndex(stageIndex)) + stage.outputs.keysIterator.foreach(key => inputOutputKeys.getOrElseUpdate(key,new KeyInfo).addOutputStageIndex(stageIndex)) + } + + for((key,info) <- inputOutputKeys) { + //Interconnect inputs -> outputs + for (stageIndex <- info.insertStageId to info.lastOutputStageId; + stage = stages(stageIndex)) { + stage.output(key) + val outputDefault = stage.outputsDefault.getOrElse(key, null) + if (outputDefault != null) { + outputDefault := stage.input(key) + } + } + + //Interconnect outputs -> inputs + for (stageIndex <- info.insertStageId to info.lastInputStageId) { + val stage = stages(stageIndex) + stage.input(key) + val inputDefault = stage.inputsDefault.getOrElse(key, null) + if (inputDefault != null) { + if (stageIndex == info.insertStageId) { + inputDefault := stage.inserts(key) + } else { + val stageBefore = stages(stageIndex - 1) + inputDefault := RegNextWhen(stageBefore.output(key), stage.dontSample.getOrElse(key, Nil).foldLeft(!stage.arbitration.isStuck)(_ && !_)).setName(s"${stageBefore.getName()}_to_${stage.getName()}_${key.getName()}") + } + } + } + } + + //Arbitration + for(stageIndex <- 0 until stages.length; stage = stages(stageIndex)) { + stage.arbitration.isFlushed := stages.drop(stageIndex+1).map(_.arbitration.flushNext).orR || stages.drop(stageIndex).map(_.arbitration.flushIt).orR + if(!unremovableStages.contains(stage)) + stage.arbitration.removeIt setWhen stage.arbitration.isFlushed + else + assert(stage.arbitration.removeIt === False,"removeIt should never be asserted on this stage") + + } + + for(stageIndex <- 0 until stages.length; stage = stages(stageIndex)){ + stage.arbitration.isStuckByOthers := stage.arbitration.haltByOther || stages.takeRight(stages.length - stageIndex - 1).map(s => s.arbitration.isStuck/* && !s.arbitration.removeIt*/).foldLeft(False)(_ || _) + stage.arbitration.isStuck := stage.arbitration.haltItself || stage.arbitration.isStuckByOthers + stage.arbitration.isMoving := !stage.arbitration.isStuck && !stage.arbitration.removeIt + stage.arbitration.isFiring := stage.arbitration.isValid && !stage.arbitration.isStuck && !stage.arbitration.removeIt + } + + for(stageIndex <- 1 until stages.length){ + val stageBefore = stages(stageIndex - 1) + val stage = stages(stageIndex) + stage.arbitration.isValid.setAsReg() init(False) + when(!stage.arbitration.isStuck || stage.arbitration.removeIt) { + stage.arbitration.isValid := False + } + when(!stageBefore.arbitration.isStuck && !stageBefore.arbitration.removeIt) { + stage.arbitration.isValid := stageBefore.arbitration.isValid + } + } + } + + + Component.current.addPrePopTask(() => build()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/Riscv.scala b/VexRiscv/src/main/scala/vexriscv/Riscv.scala new file mode 100644 index 0000000..9e45e7a --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/Riscv.scala @@ -0,0 +1,242 @@ +package vexriscv + +import spinal.core._ + + +object Riscv{ + def misaToInt(values : String) = values.toLowerCase.map(e => 1 << (e-'a')).reduce(_ | _) + + def funct7Range = 31 downto 25 + def rdRange = 11 downto 7 + def funct3Range = 14 downto 12 + def rs1Range = 19 downto 15 + def rs2Range = 24 downto 20 + def rs3Range = 31 downto 27 + def csrRange = 31 downto 20 + + case class IMM(instruction : Bits) extends Area{ + // immediates + def i = instruction(31 downto 20) + def h = instruction(31 downto 24) + def s = instruction(31 downto 25) ## instruction(11 downto 7) + def b = instruction(31) ## instruction(7) ## instruction(30 downto 25) ## instruction(11 downto 8) + def u = instruction(31 downto 12) ## U"x000" + def j = instruction(31) ## instruction(19 downto 12) ## instruction(20) ## instruction(30 downto 21) + def z = instruction(19 downto 15) + + // sign-extend immediates + def i_sext = B((19 downto 0) -> i(11)) ## i + def h_sext = B((23 downto 0) -> h(7)) ## h + def s_sext = B((19 downto 0) -> s(11)) ## s + def b_sext = B((18 downto 0) -> b(11)) ## b ## False + def j_sext = B((10 downto 0) -> j(19)) ## j ## False + } + + + def ADD = M"0000000----------000-----0110011" + def SUB = M"0100000----------000-----0110011" + def SLL = M"0000000----------001-----0110011" + def SLT = M"0000000----------010-----0110011" + def SLTU = M"0000000----------011-----0110011" + def XOR = M"0000000----------100-----0110011" + def SRL = M"0000000----------101-----0110011" + def SRA = M"0100000----------101-----0110011" + def OR = M"0000000----------110-----0110011" + def AND = M"0000000----------111-----0110011" + + def ADDI = M"-----------------000-----0010011" + def SLLI = M"000000-----------001-----0010011" + def SLTI = M"-----------------010-----0010011" + def SLTIU = M"-----------------011-----0010011" + def XORI = M"-----------------100-----0010011" + def SRLI = M"000000-----------101-----0010011" + def SRAI = M"010000-----------101-----0010011" + def ORI = M"-----------------110-----0010011" + def ANDI = M"-----------------111-----0010011" + + def LB = M"-----------------000-----0000011" + def LH = M"-----------------001-----0000011" + def LW = M"-----------------010-----0000011" + def LBU = M"-----------------100-----0000011" + def LHU = M"-----------------101-----0000011" + def LWU = M"-----------------110-----0000011" + def SB = M"-----------------000-----0100011" + def SH = M"-----------------001-----0100011" + def SW = M"-----------------010-----0100011" + + def LR = M"00010--00000-----010-----0101111" + def SC = M"00011------------010-----0101111" + + def AMOSWAP = M"00001------------010-----0101111" + def AMOADD = M"00000------------010-----0101111" + def AMOXOR = M"00100------------010-----0101111" + def AMOAND = M"01100------------010-----0101111" + def AMOOR = M"01000------------010-----0101111" + def AMOMIN = M"10000------------010-----0101111" + def AMOMAX = M"10100------------010-----0101111" + def AMOMINU = M"11000------------010-----0101111" + def AMOMAXU = M"11100------------010-----0101111" + + def BEQ (rvc : Boolean) = if(rvc) M"-----------------000-----1100011" else M"-----------------000---0-1100011" + def BNE (rvc : Boolean) = if(rvc) M"-----------------001-----1100011" else M"-----------------001---0-1100011" + def BLT (rvc : Boolean) = if(rvc) M"-----------------100-----1100011" else M"-----------------100---0-1100011" + def BGE (rvc : Boolean) = if(rvc) M"-----------------101-----1100011" else M"-----------------101---0-1100011" + def BLTU(rvc : Boolean) = if(rvc) M"-----------------110-----1100011" else M"-----------------110---0-1100011" + def BGEU(rvc : Boolean) = if(rvc) M"-----------------111-----1100011" else M"-----------------111---0-1100011" + def JALR = M"-----------------000-----1100111" + def JAL(rvc : Boolean) = if(rvc) M"-------------------------1101111" else M"----------0--------------1101111" + def LUI = M"-------------------------0110111" + def AUIPC = M"-------------------------0010111" + + def MULX = M"0000001----------0-------0110011" + def DIVX = M"0000001----------1-------0110011" + + def MUL = M"0000001----------000-----0110011" + def MULH = M"0000001----------001-----0110011" + def MULHSU = M"0000001----------010-----0110011" + def MULHU = M"0000001----------011-----0110011" + + + def DIV = M"0000001----------100-----0110011" + def DIVU = M"0000001----------101-----0110011" + def REM = M"0000001----------110-----0110011" + def REMU = M"0000001----------111-----0110011" + + + + def CSRRW = M"-----------------001-----1110011" + def CSRRS = M"-----------------010-----1110011" + def CSRRC = M"-----------------011-----1110011" + def CSRRWI = M"-----------------101-----1110011" + def CSRRSI = M"-----------------110-----1110011" + def CSRRCI = M"-----------------111-----1110011" + + def ECALL = M"00000000000000000000000001110011" + def EBREAK = M"00000000000100000000000001110011" + def FENCEI = M"00000000000000000001000000001111" + def MRET = M"00110000001000000000000001110011" + def SRET = M"00010000001000000000000001110011" + def WFI = M"00010000010100000000000001110011" + + def FENCE = M"-----------------000-----0001111" + def FENCE_I = M"-----------------001-----0001111" + def SFENCE_VMA = M"0001001----------000000001110011" + + def FMV_W_X = M"111100000000-----000-----1010011" + def FADD_S = M"0000000------------------1010011" + def FSUB_S = M"0000100------------------1010011" + def FMUL_S = M"0001000------------------1010011" + def FDIV_S = M"0001100------------------1010011" + def FSGNJ_S = M"0010000----------000-----1010011" + def FSGNJN_S = M"0010000----------001-----1010011" + def FSGNJX_S = M"0010000----------010-----1010011" + def FMIN_S = M"0010100----------000-----1010011" + def FMAX_S = M"0010100----------001-----1010011" + def FSQRT_S = M"010110000000-------------1010011" + def FCVT_S_W = M"110100000000-------------1010011" + def FCVT_S_WU = M"110100000001-------------1010011" + def FCVT_S_L = M"110100000010-------------1010011" + def FCVT_S_LU = M"110100000011-------------1010011" + def FCVT_W_S = M"110000000000-------------1010011" + def FCVT_WU_S = M"110000000001-------------1010011" + def FCVT_L_S = M"110000000010-------------1010011" + def FCVT_LU_S = M"110000000011-------------1010011" + def FCLASS_S = M"111000000000-----001-----1010011" + def FMADD_S = M"-----00------------------1000011" + def FMSUB_S = M"-----00------------------1000111" + def FNMSUB_S = M"-----00------------------1001011" + def FNMADD_S = M"-----00------------------1001111" + + def FLE_S = M"1010000----------000-----1010011" + def FLT_S = M"1010000----------001-----1010011" + def FEQ_S = M"1010000----------010-----1010011" + def FADD_D = M"0000001------------------1010011" + def FSUB_D = M"0000101------------------1010011" + def FMUL_D = M"0001001------------------1010011" + def FDIV_D = M"0001101------------------1010011" + def FSGNJ_D = M"0010001----------000-----1010011" + def FSGNJN_D = M"0010001----------001-----1010011" + def FSGNJX_D = M"0010001----------010-----1010011" + def FMIN_D = M"0010101----------000-----1010011" + def FMAX_D = M"0010101----------001-----1010011" + def FSQRT_D = M"010110100000-------------1010011" + def FMV_X_W = M"111000000000-----000-----1010011" + def FCVT_W_D = M"110000100000-------------1010011" + def FCVT_WU_D = M"110000100001-------------1010011" + def FCVT_L_D = M"110000100010-------------1010011" + def FCVT_LU_D = M"110000100011-------------1010011" + def FMV_X_D = M"111000100000-----000-----1010011" + def FCLASS_D = M"111000100000-----001-----1010011" + def FCVT_D_W = M"110100100000-------------1010011" + def FCVT_D_WU = M"110100100001-------------1010011" + def FCVT_D_L = M"110100100010-------------1010011" + def FCVT_D_LU = M"110100100011-------------1010011" + def FMV_D_X = M"111100100000-----000-----1010011" + def FMADD_D = M"-----01------------------1000011" + def FMSUB_D = M"-----01------------------1000111" + def FNMSUB_D = M"-----01------------------1001011" + def FNMADD_D = M"-----01------------------1001111" + def FLE_D = M"1010001----------000-----1010011" + def FLT_D = M"1010001----------001-----1010011" + def FEQ_D = M"1010001----------010-----1010011" + + def FCVT_S_D = M"010000000001-------------1010011" + def FCVT_D_S = M"010000100000-------------1010011" + + def FLW = M"-----------------010-----0000111" + def FLD = M"-----------------011-----0000111" + def FSW = M"-----------------010-----0100111" + def FSD = M"-----------------011-----0100111" + + + + object CSR{ + def MVENDORID = 0xF11 // MRO Vendor ID. + def MARCHID = 0xF12 // MRO Architecture ID. + def MIMPID = 0xF13 // MRO Implementation ID. + def MHARTID = 0xF14 // MRO Hardware thread ID.Machine Trap Setup + def MSTATUS = 0x300 // MRW Machine status register. + def MISA = 0x301 // MRW ISA and extensions + def MEDELEG = 0x302 // MRW Machine exception delegation register. + def MIDELEG = 0x303 // MRW Machine interrupt delegation register. + def MIE = 0x304 // MRW Machine interrupt-enable register. + def MTVEC = 0x305 // MRW Machine trap-handler base address. Machine Trap Handling + def MSCRATCH = 0x340 // MRW Scratch register for machine trap handlers. + def MEPC = 0x341 // MRW Machine exception program counter. + def MCAUSE = 0x342 // MRW Machine trap cause. + def MBADADDR = 0x343 // MRW Machine bad address. + def MIP = 0x344 // MRW Machine interrupt pending. + def MBASE = 0x380 // MRW Base register. + def MBOUND = 0x381 // MRW Bound register. + def MIBASE = 0x382 // MRW Instruction base register. + def MIBOUND = 0x383 // MRW Instruction bound register. + def MDBASE = 0x384 // MRW Data base register. + def MDBOUND = 0x385 // MRW Data bound register. + def MCYCLE = 0xB00 // MRW Machine cycle counter. + def MINSTRET = 0xB02 // MRW Machine instructions-retired counter. + def MCYCLEH = 0xB80 // MRW Upper 32 bits of mcycle, RV32I only. + def MINSTRETH = 0xB82 // MRW Upper 32 bits of minstret, RV32I only. + + val SSTATUS = 0x100 + val SIE = 0x104 + val STVEC = 0x105 + val SCOUNTEREN = 0x106 + val SSCRATCH = 0x140 + val SEPC = 0x141 + val SCAUSE = 0x142 + val SBADADDR = 0x143 + val SIP = 0x144 + val SATP = 0x180 + + def UCYCLE = 0xC00 // UR Machine ucycle counter. + def UCYCLEH = 0xC80 + def UTIME = 0xC01 // rdtime + def UTIMEH = 0xC81 + def UINSTRET = 0xC02 // UR Machine instructions-retired counter. + def UINSTRETH = 0xC82 // UR Upper 32 bits of minstret, RV32I only. + + val FFLAGS = 0x1 + val FRM = 0x2 + val FCSR = 0x3 + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/Services.scala b/VexRiscv/src/main/scala/vexriscv/Services.scala new file mode 100644 index 0000000..140c69b --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/Services.scala @@ -0,0 +1,132 @@ +package vexriscv + +import java.util + +import spinal.core._ +import spinal.lib._ + +import scala.beans.BeanProperty + +trait JumpService{ + def createJumpInterface(stage : Stage, priority : Int = 0) : Flow[UInt] //High priority win +} + +trait IBusFetcher{ + def haltIt() : Unit + def incoming() : Bool + def pcValid(stage : Stage) : Bool + def getInjectionPort() : Stream[Bits] + def withRvc() : Boolean + def forceNoDecode() : Unit +} + + +trait DecoderService{ + def add(key : MaskedLiteral,values : Seq[(Stageable[_ <: BaseType],Any)]) + def add(encoding :Seq[(MaskedLiteral,Seq[(Stageable[_ <: BaseType],Any)])]) + def addDefault(key : Stageable[_ <: BaseType], value : Any) + def forceIllegal() : Unit +} + +case class ExceptionCause(codeWidth : Int) extends Bundle{ + val code = UInt(codeWidth bits) + val badAddr = UInt(32 bits) + + def resizeCode(width : Int): ExceptionCause ={ + val ret = ExceptionCause(width) + ret.badAddr := badAddr + ret.code := code.resized + ret + } +} + +trait ExceptionService{ + def newExceptionPort(stage : Stage, priority : Int = 0, codeWidth : Int = 4) : Flow[ExceptionCause] + def isExceptionPending(stage : Stage) : Bool +} + +trait PrivilegeService{ + def isUser() : Bool + def isSupervisor() : Bool + def isMachine() : Bool + def forceMachine() : Unit +} + +case class PrivilegeServiceDefault() extends PrivilegeService{ + override def isUser(): Bool = False + override def isSupervisor(): Bool = False + override def isMachine(): Bool = True + override def forceMachine(): Unit = {} +} + +trait InterruptionInhibitor{ + def inhibateInterrupts() : Unit +} + +trait ExceptionInhibitor{ + def inhibateException() : Unit + def inhibateEbreakException() : Unit +} + + +trait RegFileService{ + def readStage() : Stage +} + + +case class MemoryTranslatorCmd() extends Bundle{ + val isValid = Bool + val isStuck = Bool + val virtualAddress = UInt(32 bits) + val bypassTranslation = Bool +} +case class MemoryTranslatorRsp(p : MemoryTranslatorBusParameter) extends Bundle{ + val physicalAddress = UInt(32 bits) + val isIoAccess = Bool + val isPaging = Bool + val allowRead, allowWrite, allowExecute = Bool + val exception = Bool + val refilling = Bool + val bypassTranslation = Bool + val ways = Vec(MemoryTranslatorRspWay(), p.wayCount) +} +case class MemoryTranslatorRspWay() extends Bundle{ + val sel = Bool() + val physical = UInt(32 bits) +} + +case class MemoryTranslatorBusParameter(wayCount : Int = 0, latency : Int = 0) +case class MemoryTranslatorBus(p : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ + val cmd = Vec(MemoryTranslatorCmd(), p.latency + 1) + val rsp = MemoryTranslatorRsp(p) + val end = Bool + val busy = Bool + + override def asMaster() : Unit = { + out(cmd, end) + in(rsp, busy) + } +} + +trait MemoryTranslator{ + def newTranslationPort(priority : Int, args : Any) : MemoryTranslatorBus +} + + +trait ReportService{ + def add(that : (String,Object)) : Unit +} + +class BusReport{ + @BeanProperty var kind = "" + @BeanProperty var flushInstructions = new util.LinkedList[Int]() + @BeanProperty var info : Object = null +} +class CacheReport { + @BeanProperty var size = 0 + @BeanProperty var bytePerLine = 0 +} + +class DebugReport { + @BeanProperty var hardwareBreakpointCount = 0 +}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/Stage.scala b/VexRiscv/src/main/scala/vexriscv/Stage.scala new file mode 100644 index 0000000..fe20d19 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/Stage.scala @@ -0,0 +1,79 @@ +package vexriscv + +import spinal.core._ +import spinal.lib._ + +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + + +class Stageable[T <: Data](_dataType : => T) extends HardType[T](_dataType) with Nameable{ + def dataType = apply() + setWeakName(this.getClass.getSimpleName.replace("$","")) +} + +class Stage() extends Area{ + def outsideCondScope[T](that : => T) : T = { + val body = Component.current.dslBody // Get the head of the current component symboles tree (AST in other words) + val ctx = body.push() // Now all access to the SpinalHDL API will be append to it (instead of the current context) + val swapContext = body.swap() // Empty the symbole tree (but keep a reference to the old content) + val ret = that // Execute the block of code (will be added to the recently empty body) + ctx.restore() // Restore the original context in which this function was called + swapContext.appendBack() // append the original symboles tree to the modified body + ret // return the value returned by that + } + + def input[T <: Data](key : Stageable[T]) : T = { + inputs.getOrElseUpdate(key.asInstanceOf[Stageable[Data]],outsideCondScope{ + val input,inputDefault = key() + inputsDefault(key.asInstanceOf[Stageable[Data]]) = inputDefault + input := inputDefault + input.setPartialName(this, key.getName()) + }).asInstanceOf[T] + } + + def output[T <: Data](key : Stageable[T]) : T = { + outputs.getOrElseUpdate(key.asInstanceOf[Stageable[Data]],outsideCondScope{ + val output,outputDefault = key() + outputsDefault(key.asInstanceOf[Stageable[Data]]) = outputDefault + output := outputDefault + output //.setPartialName(this,"output_" + key.getName()) + }).asInstanceOf[T] + } + + def insert[T <: Data](key : Stageable[T]) : T = inserts.getOrElseUpdate(key.asInstanceOf[Stageable[Data]],outsideCondScope(key())).asInstanceOf[T] //.setPartialName(this,key.getName()) +// def apply[T <: Data](key : Stageable[T]) : T = ??? + + + val arbitration = new Area{ + val haltItself = False //user settable, stuck the instruction, should only be set by the instruction itself + val haltByOther = False //When settable, stuck the instruction, should only be set by something else than the stucked instruction + val removeIt = False //When settable, unschedule the instruction as if it was never executed (no side effect) + val flushIt = False //When settable, unschedule the current instruction + val flushNext = False //When settable, unschedule instruction above in the pipeline + val isValid = Bool //Inform if a instruction is in the current stage + val isStuck = Bool //Inform if the instruction is stuck (haltItself || haltByOther) + val isStuckByOthers = Bool //Inform if the instruction is stuck by sombody else + def isRemoved = removeIt //Inform if the instruction is going to be unschedule the current cycle + val isFlushed = Bool //Inform if the instruction is flushed (flushAll set in the current or subsequents stages) + val isMoving = Bool //Inform if the instruction is going somewere else (next stage or unscheduled) + val isFiring = Bool //Inform if the current instruction will go to the next stage the next cycle (isValid && !isStuck && !removeIt) + } + + + val inputs = mutable.LinkedHashMap[Stageable[Data],Data]() + val outputs = mutable.LinkedHashMap[Stageable[Data],Data]() + val signals = mutable.LinkedHashMap[Stageable[Data],Data]() + val inserts = mutable.LinkedHashMap[Stageable[Data],Data]() + + val inputsDefault = mutable.LinkedHashMap[Stageable[Data],Data]() + val outputsDefault = mutable.LinkedHashMap[Stageable[Data],Data]() + + val dontSample = mutable.LinkedHashMap[Stageable[_], ArrayBuffer[Bool]]() + + def dontSampleStageable(s : Stageable[_], cond : Bool): Unit ={ + dontSample.getOrElseUpdate(s, ArrayBuffer[Bool]()) += cond + } + def inputInit[T <: BaseType](stageable : Stageable[T],initValue : T) = + Component.current.addPrePopTask(() => inputsDefault(stageable.asInstanceOf[Stageable[Data]]).asInstanceOf[T].getDrivingReg.init(initValue)) +}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/TestsWorkspace.scala b/VexRiscv/src/main/scala/vexriscv/TestsWorkspace.scala new file mode 100644 index 0000000..c961f05 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/TestsWorkspace.scala @@ -0,0 +1,211 @@ +/* + * SpinalHDL + * Copyright (c) Dolu, All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3.0 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. + */ + +package vexriscv + +import vexriscv.plugin._ +import vexriscv.demo.{GenFull, SimdAddPlugin} +import spinal.core._ +import spinal.lib._ +import vexriscv.ip._ +import spinal.lib.bus.avalon.AvalonMM +import spinal.lib.eda.altera.{InterruptReceiverTag, ResetEmitterTag} +import vexriscv.demo.smp.VexRiscvSmpClusterGen +import vexriscv.ip.fpu.FpuParameter + + +// make clean all SEED=42 MMU=no STOP_ON_ERROR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes SUPERVISOR=yes REDO=1 DHRYSTONE=yes LRSC=yes AMO=yes LINUX_REGRESSION=yes TRACE=yes TRACE_START=1000000000 FLOW_INFO=ye IBUS_DATA_WIDTH=128 DBUS_DATA_WIDTH=128 +// make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=ye REDO=1 DEBUG=ye WITH_USER_IO=yes SEED=42 +object TestsWorkspace { + def main(args: Array[String]) { + SpinalConfig().generateVerilog { + + // make clean all REDO=10 CSR=no MMU=no COREMARK=no RVF=yes RVD=yes REDO=1 DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 DEBUG=ye TRACE=ye +// val config = VexRiscvConfig( +// plugins = List( +// new IBusCachedPlugin( +// prediction = DYNAMIC, +// config = InstructionCacheConfig( +// cacheSize = 4096, +// bytePerLine =32, +// wayCount = 1, +// addressWidth = 32, +// cpuDataWidth = 32, +// memDataWidth = 32, +// catchIllegalAccess = true, +// catchAccessFault = true, +// asyncTagMemory = false, +// twoCycleRam = true, +// twoCycleCache = true +// ), +// memoryTranslatorPortConfig = MmuPortConfig( +// portTlbSize = 4 +// ) +// ), +// new DBusCachedPlugin( +// config = new DataCacheConfig( +// cacheSize = 4096, +// bytePerLine = 32, +// wayCount = 1, +// addressWidth = 32, +// cpuDataWidth = 64, +// memDataWidth = 64, +// catchAccessError = true, +// catchIllegal = true, +// catchUnaligned = true +// ), +// memoryTranslatorPortConfig = MmuPortConfig( +// portTlbSize = 6 +// ) +// ), +// new MmuPlugin( +// virtualRange = _(31 downto 28) === 0xC, +// ioRange = _(31 downto 28) === 0xF +// ), +// new DecoderSimplePlugin( +// catchIllegalInstruction = true +// ), +// new RegFilePlugin( +// regFileReadyKind = plugin.SYNC, +// zeroBoot = false +// ), +// new IntAluPlugin, +// new SrcPlugin( +// separatedAddSub = false, +// executeInsertion = true +// ), +// new FullBarrelShifterPlugin, +// new HazardSimplePlugin( +// bypassExecute = true, +// bypassMemory = true, +// bypassWriteBack = true, +// bypassWriteBackBuffer = true, +// pessimisticUseSrc = false, +// pessimisticWriteRegFile = false, +// pessimisticAddressMatch = false +// ), +// new MulPlugin, +// new DivPlugin, +// new CsrPlugin(CsrPluginConfig.small(0x80000020l)), +// new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), +// new BranchPlugin( +// earlyBranch = false, +// catchAddressMisaligned = true +// ), +// new YamlPlugin("cpu0.yaml") +// ) +// ) +// config.plugins += new FpuPlugin( +// externalFpu = false, +// p = FpuParameter( +// withDouble = true +// ) +// ) + +// mkdir buildroot-build +// cd buildroot-build/ +// make O=$PWD BR2_EXTERNAL=../buildroot-spinal-saxon -C ../buildroot saxon_regression_defconfig + + //make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=no TRACE_START=565000000000ll SEED=45 + + //make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no TRACE=yes REDO=100 DEBUG=ye WITH_USER_IO=no FLOW_INFO=no TRACE_START=5600000000000ll SEED=45 STOP_ON_ERROR=ye + + // export IMAGES=/media/data/open/SaxonSoc/artyA7SmpUpdate/buildroot-regression/buildroot-build/images + // make clean all IBUS=CACHED IBUS_DATA_WIDTH=64 COMPRESSED=no DBUS=CACHED DBUS_LOAD_DATA_WIDTH=64 DBUS_STORE_DATA_WIDTH=64 LRSC=yes AMO=yes SUPERVISOR=yes DBUS_EXCLUSIVE=yes DBUS_INVALIDATE=yes MUL=yes DIV=yes RVF=yes RVD=yes DEBUG_PLUGIN=no LINUX_SOC_SMP=yes EMULATOR=$IMAGES/fw_jump.bin VMLINUX=$IMAGES/Image DTB=$IMAGES/linux.dtb RAMDISK=$IMAGES/rootfs.cpio TRACE=yes REDO=1 DEBUG=ye WITH_USER_IO=no FLOW_INFO=no TRACE_START=565000000000ll SEED=45 + val config = VexRiscvSmpClusterGen.vexRiscvConfig( + hartId = 0, + ioRange = _ (31 downto 28) === 0xF, + resetVector = 0x80000000l, + iBusWidth = 64, + dBusWidth = 64, + loadStoreWidth = 64, + iCacheSize = 4096*2, + dCacheSize = 4096*2, + iCacheWays = 2, + dCacheWays = 2, + withFloat = true, + withDouble = true, + externalFpu = false, + simHalt = true + ) + + + println("Args :") + println(config.getRegressionArgs().mkString(" ")) + + + val toplevel = new VexRiscv(config) +// val toplevel = new VexRiscv(configLight) +// val toplevel = new VexRiscv(configTest) + + /*toplevel.rework { + var iBus : AvalonMM = null + for (plugin <- toplevel.config.plugins) plugin match { + case plugin: IBusSimplePlugin => { + plugin.iBus.asDirectionLess() //Unset IO properties of iBus + iBus = master(plugin.iBus.toAvalon()) + .setName("iBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) //Specify a clock domain to the iBus (used by QSysify) + } + case plugin: IBusCachedPlugin => { + plugin.iBus.asDirectionLess() //Unset IO properties of iBus + iBus = master(plugin.iBus.toAvalon()) + .setName("iBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) //Specify a clock domain to the iBus (used by QSysify) + } + case plugin: DBusSimplePlugin => { + plugin.dBus.asDirectionLess() + master(plugin.dBus.toAvalon()) + .setName("dBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) + } + case plugin: DBusCachedPlugin => { + plugin.dBus.asDirectionLess() + master(plugin.dBus.toAvalon()) + .setName("dBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) + } + case plugin: DebugPlugin => { + plugin.io.bus.asDirectionLess() + slave(plugin.io.bus.fromAvalon()) + .setName("debugBusAvalon") + .addTag(ClockDomainTag(plugin.debugClockDomain)) + .parent = null //Avoid the io bundle to be interpreted as a QSys conduit + plugin.io.resetOut + .addTag(ResetEmitterTag(plugin.debugClockDomain)) + .parent = null //Avoid the io bundle to be interpreted as a QSys conduit + } + case _ => + } + for (plugin <- toplevel.config.plugins) plugin match { + case plugin: CsrPlugin => { + plugin.externalInterrupt + .addTag(InterruptReceiverTag(iBus, ClockDomain.current)) + plugin.timerInterrupt + .addTag(InterruptReceiverTag(iBus, ClockDomain.current)) + } + case _ => + } + }*/ +// toplevel.writeBack.input(config.PC).addAttribute(Verilator.public) +// toplevel.service(classOf[DecoderSimplePlugin]).bench(toplevel) + // toplevel.children.find(_.isInstanceOf[DataCache]).get.asInstanceOf[DataCache].io.cpu.execute.addAttribute(Verilator.public) + toplevel + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/VexRiscv.scala b/VexRiscv/src/main/scala/vexriscv/VexRiscv.scala new file mode 100644 index 0000000..ed7e37e --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/VexRiscv.scala @@ -0,0 +1,152 @@ +package vexriscv + +import vexriscv.plugin._ +import spinal.core._ + +import scala.collection.mutable.ArrayBuffer +import scala.collection.Seq + +object VexRiscvConfig{ + def apply(withMemoryStage : Boolean, withWriteBackStage : Boolean, plugins : Seq[Plugin[VexRiscv]]): VexRiscvConfig = { + val config = VexRiscvConfig() + config.plugins ++= plugins + config.withMemoryStage = withMemoryStage + config.withWriteBackStage = withWriteBackStage + config + } + + def apply(plugins : Seq[Plugin[VexRiscv]] = ArrayBuffer()) : VexRiscvConfig = apply(true,true,plugins) +} +trait VexRiscvRegressionArg{ + def getVexRiscvRegressionArgs() : Seq[String] +} +case class VexRiscvConfig(){ + var withMemoryStage = true + var withWriteBackStage = true + val plugins = ArrayBuffer[Plugin[VexRiscv]]() + + def add(that : Plugin[VexRiscv]) : this.type = {plugins += that;this} + def find[T](clazz: Class[T]): Option[T] = { + plugins.find(_.getClass == clazz) match { + case Some(x) => Some(x.asInstanceOf[T]) + case None => None + } + } + def get[T](clazz: Class[T]): T = { + plugins.find(_.getClass == clazz) match { + case Some(x) => x.asInstanceOf[T] + } + } + + def withRvc = plugins.find(_.isInstanceOf[IBusFetcher]) match { + case Some(x) => x.asInstanceOf[IBusFetcher].withRvc + case None => false + } + + def withRvf = find(classOf[FpuPlugin]) match { + case Some(x) => true + case None => false + } + + def withRvd = find(classOf[FpuPlugin]) match { + case Some(x) => x.p.withDouble + case None => false + } + + //Default Stageables + object IS_RVC extends Stageable(Bool) + object BYPASSABLE_EXECUTE_STAGE extends Stageable(Bool) + object BYPASSABLE_MEMORY_STAGE extends Stageable(Bool) + object RS1 extends Stageable(Bits(32 bits)) + object RS2 extends Stageable(Bits(32 bits)) + object RS1_USE extends Stageable(Bool) + object RS2_USE extends Stageable(Bool) + object RESULT extends Stageable(UInt(32 bits)) + object PC extends Stageable(UInt(32 bits)) + object PC_CALC_WITHOUT_JUMP extends Stageable(UInt(32 bits)) + object INSTRUCTION extends Stageable(Bits(32 bits)) + object INSTRUCTION_ANTICIPATED extends Stageable(Bits(32 bits)) + object LEGAL_INSTRUCTION extends Stageable(Bool) + object REGFILE_WRITE_VALID extends Stageable(Bool) + object REGFILE_WRITE_DATA extends Stageable(Bits(32 bits)) + + object MPP extends PipelineThing[UInt] + object DEBUG_BYPASS_CACHE extends PipelineThing[Bool] + + object SRC1 extends Stageable(Bits(32 bits)) + object SRC2 extends Stageable(Bits(32 bits)) + object SRC_ADD_SUB extends Stageable(Bits(32 bits)) + object SRC_ADD extends Stageable(Bits(32 bits)) + object SRC_SUB extends Stageable(Bits(32 bits)) + object SRC_LESS extends Stageable(Bool) + object SRC_USE_SUB_LESS extends Stageable(Bool) + object SRC_LESS_UNSIGNED extends Stageable(Bool) + object SRC_ADD_ZERO extends Stageable(Bool) + + + object HAS_SIDE_EFFECT extends Stageable(Bool) + + //Formal verification purposes + object FORMAL_HALT extends Stageable(Bool) + object FORMAL_PC_NEXT extends Stageable(UInt(32 bits)) + object FORMAL_MEM_ADDR extends Stageable(UInt(32 bits)) + object FORMAL_MEM_RMASK extends Stageable(Bits(4 bits)) + object FORMAL_MEM_WMASK extends Stageable(Bits(4 bits)) + object FORMAL_MEM_RDATA extends Stageable(Bits(32 bits)) + object FORMAL_MEM_WDATA extends Stageable(Bits(32 bits)) + object FORMAL_INSTRUCTION extends Stageable(Bits(32 bits)) + + + object Src1CtrlEnum extends SpinalEnum(binarySequential){ + val RS, IMU, PC_INCREMENT, URS1 = newElement() //IMU, IMZ IMJB + } + + object Src2CtrlEnum extends SpinalEnum(binarySequential){ + val RS, IMI, IMS, PC = newElement() //TODO remplacing ZERO could avoid 32 muxes if SRC_ADD can be disabled + } + object SRC1_CTRL extends Stageable(Src1CtrlEnum()) + object SRC2_CTRL extends Stageable(Src2CtrlEnum()) + + def getRegressionArgs() : Seq[String] = { + val str = ArrayBuffer[String]() + plugins.foreach{ + case e : VexRiscvRegressionArg => str ++= e.getVexRiscvRegressionArgs() + case _ => + } + str + } +} + + + + +class VexRiscv(val config : VexRiscvConfig) extends Component with Pipeline{ + type T = VexRiscv + import config._ + + //Define stages + def newStage(): Stage = { val s = new Stage; stages += s; s } + val decode = newStage() + val execute = newStage() + val memory = ifGen(config.withMemoryStage) (newStage()) + val writeBack = ifGen(config.withWriteBackStage) (newStage()) + + def stagesFromExecute = stages.dropWhile(_ != execute) + + plugins ++= config.plugins + + //regression usage + val lastStageInstruction = CombInit(stages.last.input(config.INSTRUCTION)).dontSimplifyIt().addAttribute (Verilator.public) + val lastStagePc = CombInit(stages.last.input(config.PC)).dontSimplifyIt().addAttribute(Verilator.public) + val lastStageIsValid = CombInit(stages.last.arbitration.isValid).dontSimplifyIt().addAttribute(Verilator.public) + val lastStageIsFiring = CombInit(stages.last.arbitration.isFiring).dontSimplifyIt().addAttribute(Verilator.public) + + //Verilator perf + decode.arbitration.removeIt.noBackendCombMerge + if(withMemoryStage){ + memory.arbitration.removeIt.noBackendCombMerge + } + execute.arbitration.flushNext.noBackendCombMerge +} + + diff --git a/VexRiscv/src/main/scala/vexriscv/VexRiscvBmbGenerator.scala b/VexRiscv/src/main/scala/vexriscv/VexRiscvBmbGenerator.scala new file mode 100644 index 0000000..9b08f68 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/VexRiscvBmbGenerator.scala @@ -0,0 +1,179 @@ +package vexriscv + +import spinal.core._ +import spinal.lib.bus.bmb.{Bmb, BmbAccessCapabilities, BmbAccessParameter, BmbImplicitDebugDecoder, BmbInterconnectGenerator, BmbInvalidationParameter, BmbParameter} +import spinal.lib.bus.misc.AddressMapping +import spinal.lib.com.jtag.{Jtag, JtagTapInstructionCtrl} +import spinal.lib.generator._ +import spinal.lib.{sexport, slave} +import vexriscv.plugin._ +import spinal.core.fiber._ + +object VexRiscvBmbGenerator{ + val DEBUG_NONE = 0 + val DEBUG_JTAG = 1 + val DEBUG_JTAG_CTRL = 2 + val DEBUG_BUS = 3 + val DEBUG_BMB = 4 +} + +case class VexRiscvBmbGenerator()(implicit interconnectSmp: BmbInterconnectGenerator = null) extends Area { + import VexRiscvBmbGenerator._ + + val config = Handle[VexRiscvConfig] + val withDebug = Handle[Int] + val debugClockDomain = Handle[ClockDomain] + val debugReset = Handle[Bool] + val debugAskReset = Handle[() => Unit] + val hardwareBreakpointCount = Handle.sync(0) + + val iBus, dBus = Handle[Bmb] + + val externalInterrupt = Handle[Bool] + val externalSupervisorInterrupt = Handle[Bool] + val timerInterrupt = Handle[Bool] + val softwareInterrupt = Handle[Bool] + + def setTimerInterrupt(that: Handle[Bool]) = Dependable(that, timerInterrupt){timerInterrupt := that} + def setSoftwareInterrupt(that: Handle[Bool]) = Dependable(that, softwareInterrupt){softwareInterrupt := that} + + + def disableDebug() = { + withDebug.load(DEBUG_NONE) + } + + def enableJtag(debugCd : ClockDomainResetGenerator, resetCd : ClockDomainResetGenerator) : Unit = debugCd.rework{ + this.debugClockDomain.load(debugCd.outputClockDomain) + val resetBridge = resetCd.asyncReset(debugReset, ResetSensitivity.HIGH) + debugAskReset.loadNothing() + withDebug.load(DEBUG_JTAG) + } + + def enableJtagInstructionCtrl(debugCd : ClockDomainResetGenerator, resetCd : ClockDomainResetGenerator) : Unit = debugCd.rework{ + this.debugClockDomain.load(debugCd.outputClockDomain) + val resetBridge = resetCd.asyncReset(debugReset, ResetSensitivity.HIGH) + debugAskReset.loadNothing() + withDebug.load(DEBUG_JTAG_CTRL) + } + + def enableDebugBus(debugCd : ClockDomainResetGenerator, resetCd : ClockDomainResetGenerator) : Unit = debugCd.rework{ + this.debugClockDomain.load(debugCd.outputClockDomain) + val resetBridge = resetCd.asyncReset(debugReset, ResetSensitivity.HIGH) + debugAskReset.loadNothing() + withDebug.load(DEBUG_BUS) + } + + val debugBmbAccessSource = Handle[BmbAccessCapabilities] + val debugBmbAccessRequirements = Handle[BmbAccessParameter] + def enableDebugBmb(debugCd : Handle[ClockDomain], resetCd : ClockDomainResetGenerator, mapping : AddressMapping)(implicit debugMaster : BmbImplicitDebugDecoder = null) : Unit = debugCd.on{ + this.debugClockDomain.load(debugCd) + val resetBridge = resetCd.asyncReset(debugReset, ResetSensitivity.HIGH) + debugAskReset.loadNothing() + withDebug.load(DEBUG_BMB) + val slaveModel = debugCd on interconnectSmp.addSlave( + accessSource = debugBmbAccessSource, + accessCapabilities = debugBmbAccessSource.derivate(DebugExtensionBus.getBmbAccessParameter(_)), + accessRequirements = debugBmbAccessRequirements, + bus = debugBmb, + mapping = mapping + ) + debugBmb.derivatedFrom(debugBmbAccessRequirements)(Bmb(_)) + if(debugMaster != null) interconnectSmp.addConnection(debugMaster.bus, debugBmb) + } + + val jtag = Handle(withDebug.get == DEBUG_JTAG generate slave(Jtag())) + val jtagInstructionCtrl = withDebug.produce(withDebug.get == DEBUG_JTAG_CTRL generate JtagTapInstructionCtrl()) + val debugBus = withDebug.produce(withDebug.get == DEBUG_BUS generate DebugExtensionBus()) + val debugBmb = Handle[Bmb] + val jtagClockDomain = Handle[ClockDomain] + + val logic = Handle(new Area { + withDebug.get != DEBUG_NONE generate new Area { + config.add(new DebugPlugin(debugClockDomain, hardwareBreakpointCount)) + } + + val cpu = new VexRiscv(config) + def doExport(value : => Any, postfix : String) = { + sexport(Handle(value).setCompositeName(VexRiscvBmbGenerator.this, postfix)) + } + + doExport(cpu.plugins.exists(_.isInstanceOf[CfuPlugin]), "cfu") + doExport(cpu.plugins.exists(_.isInstanceOf[FpuPlugin]), "fpu") + for (plugin <- cpu.plugins) plugin match { + case plugin: IBusSimplePlugin => iBus.load(plugin.iBus.toBmb()) + case plugin: DBusSimplePlugin => dBus.load(plugin.dBus.toBmb()) + case plugin: IBusCachedPlugin => { + iBus.load(plugin.iBus.toBmb()) + doExport(plugin.config.wayCount, "icacheWays") + doExport(plugin.config.cacheSize, "icacheSize") + doExport(plugin.config.bytePerLine, "bytesPerLine") + } + case plugin: DBusCachedPlugin => { + dBus.load(plugin.dBus.toBmb()) + doExport(plugin.config.wayCount, "dcacheWays") + doExport(plugin.config.cacheSize, "dcacheSize") + doExport(plugin.config.bytePerLine, "bytesPerLine") + } + case plugin: MmuPlugin => { + doExport(true, "mmu") + } + case plugin: StaticMemoryTranslatorPlugin => { + doExport(false, "mmu") + } + case plugin: CsrPlugin => { + doExport(plugin.config.supervisorGen, "supervisor") + externalInterrupt load plugin.externalInterrupt + timerInterrupt load plugin.timerInterrupt + softwareInterrupt load plugin.softwareInterrupt + if (plugin.config.supervisorGen) externalSupervisorInterrupt load plugin.externalInterruptS + } + case plugin: DebugPlugin => plugin.debugClockDomain { + if(debugAskReset.get != null) when(RegNext(plugin.io.resetOut)) { + debugAskReset.get() + } else { + debugReset.load(RegNext(plugin.io.resetOut)) + } + + withDebug.get match { + case DEBUG_JTAG => jtag <> plugin.io.bus.fromJtag() + case DEBUG_JTAG_CTRL => jtagInstructionCtrl <> plugin.io.bus.fromJtagInstructionCtrl(jtagClockDomain, 0) + case DEBUG_BUS => debugBus <> plugin.io.bus + case DEBUG_BMB => debugBmb >> plugin.io.bus.fromBmb() + } + } + case _ => + } + }) + + + logic.soon(debugReset) + + val parameterGenerator = new Generator { + val iBusParameter, dBusParameter = product[BmbParameter] + dependencies += config + + add task { + for (plugin <- config.plugins) plugin match { + case plugin: IBusSimplePlugin => iBusParameter.load(IBusSimpleBus.getBmbParameter()) + case plugin: DBusSimplePlugin => dBusParameter.load(DBusSimpleBus.getBmbParameter()) + case plugin: IBusCachedPlugin => iBusParameter.load(plugin.config.getBmbParameter()) + case plugin: DBusCachedPlugin => dBusParameter.load(plugin.config.getBmbParameter()) + case _ => + } + } + } + + val invalidationSource = Handle[BmbInvalidationParameter] + val invalidationRequirements = Handle[BmbInvalidationParameter] + if(interconnectSmp != null){ + interconnectSmp.addMaster(accessRequirements = parameterGenerator.iBusParameter.derivate(_.access), bus = iBus) + interconnectSmp.addMaster( + accessRequirements = parameterGenerator.dBusParameter.derivate(_.access), + invalidationSource = invalidationSource, + invalidationCapabilities = invalidationSource, + invalidationRequirements = invalidationRequirements, + bus = dBus + ) + } + +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/Briey.scala b/VexRiscv/src/main/scala/vexriscv/demo/Briey.scala new file mode 100644 index 0000000..32e6d62 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/Briey.scala @@ -0,0 +1,490 @@ +package vexriscv.demo + + +import vexriscv.plugin._ +import vexriscv._ +import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.amba3.apb._ +import spinal.lib.bus.amba4.axi._ +import spinal.lib.com.jtag.Jtag +import spinal.lib.com.jtag.sim.JtagTcp +import spinal.lib.com.uart.sim.{UartDecoder, UartEncoder} +import spinal.lib.com.uart.{Apb3UartCtrl, Uart, UartCtrlGenerics, UartCtrlMemoryMappedConfig} +import spinal.lib.graphic.RgbConfig +import spinal.lib.graphic.vga.{Axi4VgaCtrl, Axi4VgaCtrlGenerics, Vga} +import spinal.lib.io.TriStateArray +import spinal.lib.memory.sdram.SdramGeneration.SDR +import spinal.lib.memory.sdram._ +import spinal.lib.memory.sdram.sdr.sim.SdramModel +import spinal.lib.memory.sdram.sdr.{Axi4SharedSdramCtrl, IS42x320D, SdramInterface, SdramTimings} +import spinal.lib.misc.HexTools +import spinal.lib.soc.pinsec.{PinsecTimerCtrl, PinsecTimerCtrlExternal} +import spinal.lib.system.debugger.{JtagAxi4SharedDebugger, JtagBridge, SystemDebugger, SystemDebuggerConfig} + +import scala.collection.mutable.ArrayBuffer +import scala.collection.Seq + +case class BrieyConfig(axiFrequency : HertzNumber, + onChipRamSize : BigInt, + sdramLayout: SdramLayout, + sdramTimings: SdramTimings, + cpuPlugins : ArrayBuffer[Plugin[VexRiscv]], + uartCtrlConfig : UartCtrlMemoryMappedConfig) + +object BrieyConfig{ + + def default = { + val config = BrieyConfig( + axiFrequency = 50 MHz, + onChipRamSize = 4 kB, + sdramLayout = IS42x320D.layout, + sdramTimings = IS42x320D.timingGrade7, + uartCtrlConfig = UartCtrlMemoryMappedConfig( + uartCtrlConfig = UartCtrlGenerics( + dataWidthMax = 8, + clockDividerWidth = 20, + preSamplingSize = 1, + samplingSize = 5, + postSamplingSize = 2 + ), + txFifoDepth = 16, + rxFifoDepth = 16 + ), + cpuPlugins = ArrayBuffer( + new PcManagerSimplePlugin(0x80000000l, false), + // new IBusSimplePlugin( + // interfaceKeepData = false, + // catchAccessFault = true + // ), + new IBusCachedPlugin( + resetVector = 0x80000000l, + prediction = STATIC, + config = InstructionCacheConfig( + cacheSize = 4096, + bytePerLine =32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = true, + twoCycleCache = true + ) + // askMemoryTranslation = true, + // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( + // portTlbSize = 4 + // ) + ), + // new DBusSimplePlugin( + // catchAddressMisaligned = true, + // catchAccessFault = true + // ), + new DBusCachedPlugin( + config = new DataCacheConfig( + cacheSize = 4096, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true + ), + memoryTranslatorPortConfig = null + // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( + // portTlbSize = 6 + // ) + ), + new StaticMemoryTranslatorPlugin( + ioRange = _(31 downto 28) === 0xF + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin, + new MulPlugin, + new DivPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new CsrPlugin( + config = CsrPluginConfig( + catchIllegalAccess = false, + mvendorid = null, + marchid = null, + mimpid = null, + mhartid = null, + misaExtensionsInit = 66, + misaAccess = CsrAccess.NONE, + mtvecAccess = CsrAccess.NONE, + mtvecInit = 0x80000020l, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = false, + mcauseAccess = CsrAccess.READ_ONLY, + mbadaddrAccess = CsrAccess.READ_ONLY, + mcycleAccess = CsrAccess.NONE, + minstretAccess = CsrAccess.NONE, + ecallGen = false, + wfiGenAsWait = false, + ucycleAccess = CsrAccess.NONE, + uinstretAccess = CsrAccess.NONE + ) + ), + new YamlPlugin("cpu0.yaml") + ) + ) + config + } +} + + + +class Briey(val config: BrieyConfig) extends Component{ + + //Legacy constructor + def this(axiFrequency: HertzNumber) { + this(BrieyConfig.default.copy(axiFrequency = axiFrequency)) + } + + import config._ + val debug = true + val interruptCount = 4 + def vgaRgbConfig = RgbConfig(5,6,5) + + val io = new Bundle{ + //Clocks / reset + val asyncReset = in Bool() + val axiClk = in Bool() + val vgaClk = in Bool() + + //Main components IO + val jtag = slave(Jtag()) + val sdram = master(SdramInterface(sdramLayout)) + + //Peripherals IO + val gpioA = master(TriStateArray(32 bits)) + val gpioB = master(TriStateArray(32 bits)) + val uart = master(Uart()) + val vga = master(Vga(vgaRgbConfig)) + val timerExternal = in(PinsecTimerCtrlExternal()) + val coreInterrupt = in Bool() + } + + val resetCtrlClockDomain = ClockDomain( + clock = io.axiClk, + config = ClockDomainConfig( + resetKind = BOOT + ) + ) + + val resetCtrl = new ClockingArea(resetCtrlClockDomain) { + val systemResetUnbuffered = False + // val coreResetUnbuffered = False + + //Implement an counter to keep the reset axiResetOrder high 64 cycles + // Also this counter will automaticly do a reset when the system boot. + val systemResetCounter = Reg(UInt(6 bits)) init(0) + when(systemResetCounter =/= U(systemResetCounter.range -> true)){ + systemResetCounter := systemResetCounter + 1 + systemResetUnbuffered := True + } + when(BufferCC(io.asyncReset)){ + systemResetCounter := 0 + } + + //Create all reset used later in the design + val systemReset = RegNext(systemResetUnbuffered) + val axiReset = RegNext(systemResetUnbuffered) + val vgaReset = BufferCC(axiReset) + } + + val axiClockDomain = ClockDomain( + clock = io.axiClk, + reset = resetCtrl.axiReset, + frequency = FixedFrequency(axiFrequency) //The frequency information is used by the SDRAM controller + ) + + val debugClockDomain = ClockDomain( + clock = io.axiClk, + reset = resetCtrl.systemReset, + frequency = FixedFrequency(axiFrequency) + ) + + val vgaClockDomain = ClockDomain( + clock = io.vgaClk, + reset = resetCtrl.vgaReset + ) + + val axi = new ClockingArea(axiClockDomain) { + val ram = Axi4SharedOnChipRam( + dataWidth = 32, + byteCount = onChipRamSize, + idWidth = 4 + ) + + val sdramCtrl = Axi4SharedSdramCtrl( + axiDataWidth = 32, + axiIdWidth = 4, + layout = sdramLayout, + timing = sdramTimings, + CAS = 3 + ) + + + val apbBridge = Axi4SharedToApb3Bridge( + addressWidth = 20, + dataWidth = 32, + idWidth = 4 + ) + + val gpioACtrl = Apb3Gpio( + gpioWidth = 32, + withReadSync = true + ) + val gpioBCtrl = Apb3Gpio( + gpioWidth = 32, + withReadSync = true + ) + val timerCtrl = PinsecTimerCtrl() + + + val uartCtrl = Apb3UartCtrl(uartCtrlConfig) + uartCtrl.io.apb.addAttribute(Verilator.public) + + + val vgaCtrlConfig = Axi4VgaCtrlGenerics( + axiAddressWidth = 32, + axiDataWidth = 32, + burstLength = 8, + frameSizeMax = 2048*1512*2, + fifoSize = 512, + rgbConfig = vgaRgbConfig, + vgaClock = vgaClockDomain + ) + val vgaCtrl = Axi4VgaCtrl(vgaCtrlConfig) + + + + val core = new Area{ + val config = VexRiscvConfig( + plugins = cpuPlugins += new DebugPlugin(debugClockDomain) + ) + + val cpu = new VexRiscv(config) + var iBus : Axi4ReadOnly = null + var dBus : Axi4Shared = null + for(plugin <- config.plugins) plugin match{ + case plugin : IBusSimplePlugin => iBus = plugin.iBus.toAxi4ReadOnly() + case plugin : IBusCachedPlugin => iBus = plugin.iBus.toAxi4ReadOnly() + case plugin : DBusSimplePlugin => dBus = plugin.dBus.toAxi4Shared() + case plugin : DBusCachedPlugin => dBus = plugin.dBus.toAxi4Shared(true) + case plugin : CsrPlugin => { + plugin.externalInterrupt := BufferCC(io.coreInterrupt) + plugin.timerInterrupt := timerCtrl.io.interrupt + } + case plugin : DebugPlugin => debugClockDomain{ + resetCtrl.axiReset setWhen(RegNext(plugin.io.resetOut)) + io.jtag <> plugin.io.bus.fromJtag() + } + case _ => + } + } + + + val axiCrossbar = Axi4CrossbarFactory() + + axiCrossbar.addSlaves( + ram.io.axi -> (0x80000000L, onChipRamSize), + sdramCtrl.io.axi -> (0x40000000L, sdramLayout.capacity), + apbBridge.io.axi -> (0xF0000000L, 1 MB) + ) + + axiCrossbar.addConnections( + core.iBus -> List(ram.io.axi, sdramCtrl.io.axi), + core.dBus -> List(ram.io.axi, sdramCtrl.io.axi, apbBridge.io.axi), + vgaCtrl.io.axi -> List( sdramCtrl.io.axi) + ) + + + axiCrossbar.addPipelining(apbBridge.io.axi)((crossbar,bridge) => { + crossbar.sharedCmd.halfPipe() >> bridge.sharedCmd + crossbar.writeData.halfPipe() >> bridge.writeData + crossbar.writeRsp << bridge.writeRsp + crossbar.readRsp << bridge.readRsp + }) + + axiCrossbar.addPipelining(sdramCtrl.io.axi)((crossbar,ctrl) => { + crossbar.sharedCmd.halfPipe() >> ctrl.sharedCmd + crossbar.writeData >/-> ctrl.writeData + crossbar.writeRsp << ctrl.writeRsp + crossbar.readRsp << ctrl.readRsp + }) + + axiCrossbar.addPipelining(ram.io.axi)((crossbar,ctrl) => { + crossbar.sharedCmd.halfPipe() >> ctrl.sharedCmd + crossbar.writeData >/-> ctrl.writeData + crossbar.writeRsp << ctrl.writeRsp + crossbar.readRsp << ctrl.readRsp + }) + + axiCrossbar.addPipelining(vgaCtrl.io.axi)((ctrl,crossbar) => { + ctrl.readCmd.halfPipe() >> crossbar.readCmd + ctrl.readRsp << crossbar.readRsp + }) + + axiCrossbar.addPipelining(core.dBus)((cpu,crossbar) => { + cpu.sharedCmd >> crossbar.sharedCmd + cpu.writeData >> crossbar.writeData + cpu.writeRsp << crossbar.writeRsp + cpu.readRsp <-< crossbar.readRsp //Data cache directly use read responses without buffering, so pipeline it for FMax + }) + + axiCrossbar.build() + + + val apbDecoder = Apb3Decoder( + master = apbBridge.io.apb, + slaves = List( + gpioACtrl.io.apb -> (0x00000, 4 kB), + gpioBCtrl.io.apb -> (0x01000, 4 kB), + uartCtrl.io.apb -> (0x10000, 4 kB), + timerCtrl.io.apb -> (0x20000, 4 kB), + vgaCtrl.io.apb -> (0x30000, 4 kB) + ) + ) + } + + io.gpioA <> axi.gpioACtrl.io.gpio + io.gpioB <> axi.gpioBCtrl.io.gpio + io.timerExternal <> axi.timerCtrl.io.external + io.uart <> axi.uartCtrl.io.uart + io.sdram <> axi.sdramCtrl.io.sdram + io.vga <> axi.vgaCtrl.io.vga +} + +//DE1-SoC +object Briey{ + def main(args: Array[String]) { + val config = SpinalConfig() + config.generateVerilog({ + val toplevel = new Briey(BrieyConfig.default) + toplevel.axi.vgaCtrl.vga.ctrl.io.error.addAttribute(Verilator.public) + toplevel.axi.vgaCtrl.vga.ctrl.io.frameStart.addAttribute(Verilator.public) + toplevel + }) + } +} + +//DE1-SoC with memory init +object BrieyWithMemoryInit{ + def main(args: Array[String]) { + val config = SpinalConfig() + config.generateVerilog({ + val toplevel = new Briey(BrieyConfig.default) + toplevel.axi.vgaCtrl.vga.ctrl.io.error.addAttribute(Verilator.public) + toplevel.axi.vgaCtrl.vga.ctrl.io.frameStart.addAttribute(Verilator.public) + HexTools.initRam(toplevel.axi.ram.ram, "src/main/ressource/hex/muraxDemo.hex", 0x80000000l) + toplevel + }) + } +} + + +//DE0-Nano +object BrieyDe0Nano{ + def main(args: Array[String]) { + object IS42x160G { + def layout = SdramLayout( + generation = SDR, + bankWidth = 2, + columnWidth = 9, + rowWidth = 13, + dataWidth = 16 + ) + + def timingGrade7 = SdramTimings( + bootRefreshCount = 8, + tPOW = 100 us, + tREF = 64 ms, + tRC = 60 ns, + tRFC = 60 ns, + tRAS = 37 ns, + tRP = 15 ns, + tRCD = 15 ns, + cMRD = 2, + tWR = 10 ns, + cWR = 1 + ) + } + val config = SpinalConfig() + config.generateVerilog({ + val toplevel = new Briey(BrieyConfig.default.copy(sdramLayout = IS42x160G.layout)) + toplevel + }) + } +} + + + +import spinal.core.sim._ +object BrieySim { + def main(args: Array[String]): Unit = { + val simSlowDown = false + SimConfig.allOptimisation.compile(new Briey(BrieyConfig.default)).doSimUntilVoid{dut => + val mainClkPeriod = (1e12/dut.config.axiFrequency.toDouble).toLong + val jtagClkPeriod = mainClkPeriod*4 + val uartBaudRate = 115200 + val uartBaudPeriod = (1e12/uartBaudRate).toLong + + val clockDomain = ClockDomain(dut.io.axiClk, dut.io.asyncReset) + clockDomain.forkStimulus(mainClkPeriod) + + val tcpJtag = JtagTcp( + jtag = dut.io.jtag, + jtagClkPeriod = jtagClkPeriod + ) + + val uartTx = UartDecoder( + uartPin = dut.io.uart.txd, + baudPeriod = uartBaudPeriod + ) + + val uartRx = UartEncoder( + uartPin = dut.io.uart.rxd, + baudPeriod = uartBaudPeriod + ) + + val sdram = SdramModel( + dut.io.sdram, + dut.config.sdramLayout, + clockDomain + ) + + dut.io.coreInterrupt #= false + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/CustomCsrDemoPlugin.scala b/VexRiscv/src/main/scala/vexriscv/demo/CustomCsrDemoPlugin.scala new file mode 100644 index 0000000..a763c83 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/CustomCsrDemoPlugin.scala @@ -0,0 +1,63 @@ +package vexriscv.demo + +import spinal.core._ +import spinal.lib.io.TriStateArray +import spinal.lib.{Flow, master} +import vexriscv.plugin.{CsrInterface, Plugin} +import vexriscv.{DecoderService, Stageable, VexRiscv} + + + +class CustomCsrDemoPlugin extends Plugin[VexRiscv]{ + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + pipeline plug new Area{ + val instructionCounter = Reg(UInt(32 bits)) + val cycleCounter = Reg(UInt(32 bits)) + + cycleCounter := cycleCounter + 1 + when(writeBack.arbitration.isFiring) { + instructionCounter := instructionCounter + 1 + } + + val csrService = pipeline.service(classOf[CsrInterface]) + csrService.rw(0xB04, instructionCounter) + csrService.r(0xB05, cycleCounter) + csrService.onWrite(0xB06){ + instructionCounter := 0 + } + csrService.onRead(0xB07){ + instructionCounter := 0x40000000 + } + } + } +} + + +class CustomCsrDemoGpioPlugin extends Plugin[VexRiscv]{ + var gpio : TriStateArray = null + + + override def setup(pipeline: VexRiscv): Unit = { + gpio = master(TriStateArray(32 bits)).setName("gpio") + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + pipeline plug new Area{ + val writeReg, writeEnableReg = Reg(Bits(32 bits)) + + val csrService = pipeline.service(classOf[CsrInterface]) + csrService.rw(0xB08, writeReg) + csrService.rw(0xB09, writeEnableReg) + csrService.r(0xB0A, gpio.read) + + gpio.writeEnable := writeEnableReg + gpio.write := writeReg + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/CustomInstruction.scala b/VexRiscv/src/main/scala/vexriscv/demo/CustomInstruction.scala new file mode 100644 index 0000000..dc35997 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/CustomInstruction.scala @@ -0,0 +1,75 @@ +package vexriscv.demo + +import spinal.core._ +import vexriscv.plugin.Plugin +import vexriscv.{Stageable, DecoderService, VexRiscv} + +//This plugin example will add a new instruction named SIMD_ADD which do the following : +// +//RD : Regfile Destination, RS : Regfile Source +//RD( 7 downto 0) = RS1( 7 downto 0) + RS2( 7 downto 0) +//RD(16 downto 8) = RS1(16 downto 8) + RS2(16 downto 8) +//RD(23 downto 16) = RS1(23 downto 16) + RS2(23 downto 16) +//RD(31 downto 24) = RS1(31 downto 24) + RS2(31 downto 24) +// +//Instruction encoding : +//0000011----------000-----0110011 +// |RS2||RS1| |RD | +// +//Note : RS1, RS2, RD positions follow the RISC-V spec and are common for all instruction of the ISA + +class SimdAddPlugin extends Plugin[VexRiscv]{ + //Define the concept of IS_SIMD_ADD signals, which specify if the current instruction is destined for ths plugin + object IS_SIMD_ADD extends Stageable(Bool) + + //Callback to setup the plugin and ask for different services + override def setup(pipeline: VexRiscv): Unit = { + import pipeline.config._ + + //Retrieve the DecoderService instance + val decoderService = pipeline.service(classOf[DecoderService]) + + //Specify the IS_SIMD_ADD default value when instruction are decoded + decoderService.addDefault(IS_SIMD_ADD, False) + + //Specify the instruction decoding which should be applied when the instruction match the 'key' parttern + decoderService.add( + //Bit pattern of the new SIMD_ADD instruction + key = M"0000011----------000-----0110011", + + //Decoding specification when the 'key' pattern is recognized in the instruction + List( + IS_SIMD_ADD -> True, + REGFILE_WRITE_VALID -> True, //Enable the register file write + BYPASSABLE_EXECUTE_STAGE -> True, //Notify the hazard management unit that the instruction result is already accessible in the EXECUTE stage (Bypass ready) + BYPASSABLE_MEMORY_STAGE -> True, //Same as above but for the memory stage + RS1_USE -> True, //Notify the hazard management unit that this instruction use the RS1 value + RS2_USE -> True //Same than above but for RS2. + ) + ) + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + //Add a new scope on the execute stage (used to give a name to signals) + execute plug new Area { + //Define some signals used internally to the plugin + val rs1 = execute.input(RS1).asUInt //32 bits UInt value of the regfile[RS1] + val rs2 = execute.input(RS2).asUInt + val rd = UInt(32 bits) + + //Do some computation + rd(7 downto 0) := rs1(7 downto 0) + rs2(7 downto 0) + rd(16 downto 8) := rs1(16 downto 8) + rs2(16 downto 8) + rd(23 downto 16) := rs1(23 downto 16) + rs2(23 downto 16) + rd(31 downto 24) := rs1(31 downto 24) + rs2(31 downto 24) + + //When the instruction is a SIMD_ADD one, then write the result into the register file data path. + when(execute.input(IS_SIMD_ADD)) { + execute.output(REGFILE_WRITE_DATA) := rd.asBits + } + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/FormalSimple.scala b/VexRiscv/src/main/scala/vexriscv/demo/FormalSimple.scala new file mode 100644 index 0000000..9a4167e --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/FormalSimple.scala @@ -0,0 +1,65 @@ +package vexriscv.demo + +import vexriscv.plugin._ +import vexriscv.{plugin, VexRiscv, VexRiscvConfig} +import spinal.core._ + +/** + * Created by spinalvm on 15.06.17. + */ +object FormalSimple extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new FormalPlugin, + new HaltOnExceptionPlugin, + new IBusSimplePlugin( + resetVector = 0x00000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = DYNAMIC_TARGET, + catchAccessFault = false, + compressedGen = true + ), + new DBusSimplePlugin( + catchAddressMisaligned = true, + catchAccessFault = false + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true, + forceLegalInstructionComputation = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = false + ), + new FullBarrelShifterPlugin, + new HazardSimplePlugin( + bypassExecute = false, + bypassMemory = false, + bypassWriteBack = false, + bypassWriteBackBuffer = false, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + SpinalConfig( + defaultConfigForClockDomains = ClockDomainConfig( + resetKind = spinal.core.SYNC, + resetActiveLevel = spinal.core.HIGH + ) + ).generateVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenCustomCsr.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenCustomCsr.scala new file mode 100644 index 0000000..11db86d --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenCustomCsr.scala @@ -0,0 +1,62 @@ +package vexriscv.demo + +import spinal.core._ +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} + +/** + * Created by spinalvm on 15.06.17. + */ + +//make clean run DBUS=SIMPLE IBUS=SIMPLE CSR=no MMU=no DEBUG_PLUGIN=no MUL=no DIV=no CUSTOM_CSR=yes +object GenCustomCsr extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new CustomCsrDemoPlugin, + new CsrPlugin(CsrPluginConfig.small), + new CustomCsrDemoGpioPlugin, + new IBusSimplePlugin( + resetVector = 0x00000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = false + ), + new FullBarrelShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenCustomInterrupt.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenCustomInterrupt.scala new file mode 100644 index 0000000..d0d9e48 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenCustomInterrupt.scala @@ -0,0 +1,72 @@ +package vexriscv.demo + +import spinal.core._ +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} + +/** + * Created by spinalvm on 15.06.17. + */ +object GenCustomInterrupt extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new UserInterruptPlugin( + interruptName = "miaou", + code = 20 + ), + new UserInterruptPlugin( + interruptName = "rawrrr", + code = 24 + ), + new CsrPlugin( + CsrPluginConfig.smallest.copy( + xtvecModeGen = true, + mtvecAccess = CsrAccess.WRITE_ONLY + ) + ), + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenCustomSimdAdd.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenCustomSimdAdd.scala new file mode 100644 index 0000000..8d9d6be --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenCustomSimdAdd.scala @@ -0,0 +1,58 @@ +package vexriscv.demo + +import spinal.core._ +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} + +/** + * Created by spinalvm on 15.06.17. + */ +object GenCustomSimdAdd extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new SimdAddPlugin, + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = false + ), + new FullBarrelShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenDeterministicVex.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenDeterministicVex.scala new file mode 100644 index 0000000..943ba16 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenDeterministicVex.scala @@ -0,0 +1,66 @@ +package vexriscv.demo + +import spinal.core._ +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} + +/** + * Created by spinalvm on 15.06.17. + */ +object GenDeterministicVex extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = STATIC, + catchAccessFault = true, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = true, + catchAccessFault = true, + earlyInjection = false + ), + new StaticMemoryTranslatorPlugin( + ioRange = _(31 downto 28) === 0xF + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin(earlyInjection = true), + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new MulPlugin, + new DivPlugin, + new CsrPlugin(CsrPluginConfig.small), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = true, + catchAddressMisaligned = true + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenFull.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenFull.scala new file mode 100644 index 0000000..eb1dba3 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenFull.scala @@ -0,0 +1,92 @@ +package vexriscv.demo + +import vexriscv.plugin._ +import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} +import vexriscv.{plugin, VexRiscv, VexRiscvConfig} +import spinal.core._ + +/** + * Created by spinalvm on 15.06.17. + */ +object GenFull extends App{ + def config = VexRiscvConfig( + plugins = List( + new IBusCachedPlugin( + prediction = DYNAMIC, + config = InstructionCacheConfig( + cacheSize = 4096, + bytePerLine =32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = true, + twoCycleCache = true + ), + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 4 + ) + ), + new DBusCachedPlugin( + config = new DataCacheConfig( + cacheSize = 4096, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true + ), + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 6 + ) + ), + new MmuPlugin( + virtualRange = _(31 downto 28) === 0xC, + ioRange = _(31 downto 28) === 0xF + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new MulPlugin, + new DivPlugin, + new CsrPlugin(CsrPluginConfig.small(0x80000020l)), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new YamlPlugin("cpu0.yaml") + ) + ) + + def cpu() = new VexRiscv( + config + ) + + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenFullNoMmu.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenFullNoMmu.scala new file mode 100644 index 0000000..00ba8c9 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenFullNoMmu.scala @@ -0,0 +1,87 @@ +package vexriscv.demo + +import vexriscv.plugin._ +import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} +import vexriscv.{plugin, VexRiscv, VexRiscvConfig} +import spinal.core._ + +/** + * Created by spinalvm on 15.06.17. + */ +object GenFullNoMmu extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new PcManagerSimplePlugin( + resetVector = 0x80000000l, + relaxedPcCalculation = false + ), + new IBusCachedPlugin( + prediction = STATIC, + config = InstructionCacheConfig( + cacheSize = 4096, + bytePerLine =32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = true, + twoCycleCache = true + ) + ), + new DBusCachedPlugin( + config = new DataCacheConfig( + cacheSize = 4096, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true + ) + ), + new StaticMemoryTranslatorPlugin( + ioRange = _(31 downto 28) === 0xF + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new MulPlugin, + new DivPlugin, + new CsrPlugin(CsrPluginConfig.small), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenFullNoMmuMaxPerf.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenFullNoMmuMaxPerf.scala new file mode 100644 index 0000000..6c892f0 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenFullNoMmuMaxPerf.scala @@ -0,0 +1,88 @@ +package vexriscv.demo + +import spinal.core._ +import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} + +/** + * Created by spinalvm on 15.06.17. + */ +object GenFullNoMmuMaxPerf extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new PcManagerSimplePlugin( + resetVector = 0x80000000l, + relaxedPcCalculation = false + ), + new IBusCachedPlugin( + prediction = DYNAMIC_TARGET, + historyRamSizeLog2 = 8, + config = InstructionCacheConfig( + cacheSize = 4096*2, + bytePerLine =32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = false, + twoCycleCache = true + ) + ), + new DBusCachedPlugin( + config = new DataCacheConfig( + cacheSize = 4096*2, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true + ) + ), + new StaticMemoryTranslatorPlugin( + ioRange = _(31 downto 28) === 0xF + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin(earlyInjection = true), + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new MulPlugin, + new DivPlugin, + new CsrPlugin(CsrPluginConfig.small), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenFullNoMmuNoCache.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenFullNoMmuNoCache.scala new file mode 100644 index 0000000..77ed87a --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenFullNoMmuNoCache.scala @@ -0,0 +1,63 @@ +package vexriscv.demo + +import vexriscv.plugin._ +import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} +import vexriscv.{plugin, VexRiscv, VexRiscvConfig} +import spinal.core._ + +/** + * Created by spinalvm on 15.06.17. + */ +object GenFullNoMmuNoCache extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = STATIC, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new MulPlugin, + new DivPlugin, + new CsrPlugin(CsrPluginConfig.small), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenFullNoMmuNoCacheSimpleMul.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenFullNoMmuNoCacheSimpleMul.scala new file mode 100644 index 0000000..f1e9874 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenFullNoMmuNoCacheSimpleMul.scala @@ -0,0 +1,63 @@ +package vexriscv.demo + +import vexriscv.plugin._ +import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} +import vexriscv.{plugin, VexRiscv, VexRiscvConfig} +import spinal.core._ + +/** + * Created by spinalvm on 15.06.17. + */ +object GenFullNoMmuNoCacheSimpleMul extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = STATIC, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new MulSimplePlugin, + new DivPlugin, + new CsrPlugin(CsrPluginConfig.small), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenMicroNoCsr.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenMicroNoCsr.scala new file mode 100644 index 0000000..bcd1b77 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenMicroNoCsr.scala @@ -0,0 +1,61 @@ +package vexriscv.demo + +import vexriscv.plugin._ +import vexriscv.{plugin, VexRiscv, VexRiscvConfig} +import spinal.core._ + +/** + * Created by spinalvm on 15.06.17. + */ +object GenMicroNoCsr extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + withMemoryStage = false, + withWriteBackStage = false, + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false, + earlyInjection = false + ), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false, + writeRfInMemoryStage = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = false + ), + new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = false, + bypassMemory = false, + bypassWriteBack = false, + bypassWriteBackBuffer = false, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = true, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + SpinalConfig(mergeAsyncProcess = false).generateVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenNoCacheNoMmuMaxPerf.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenNoCacheNoMmuMaxPerf.scala new file mode 100644 index 0000000..9bca107 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenNoCacheNoMmuMaxPerf.scala @@ -0,0 +1,68 @@ +package vexriscv.demo + +import spinal.core._ +import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} + +/** + * Created by spinalvm on 15.06.17. + */ +object GenNoCacheNoMmuMaxPerf extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = DYNAMIC_TARGET, + historyRamSizeLog2 = 8, + catchAccessFault = true, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = true, + catchAccessFault = true, + earlyInjection = false + ), + new StaticMemoryTranslatorPlugin( + ioRange = _(31 downto 28) === 0xF + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin(earlyInjection = true), + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new MulPlugin, + new MulDivIterativePlugin(genMul = false, genDiv = true, mulUnrollFactor = 1, divUnrollFactor = 1,dhrystoneOpt = false), + new CsrPlugin(CsrPluginConfig.small), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenSecure.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenSecure.scala new file mode 100644 index 0000000..8b2cd55 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenSecure.scala @@ -0,0 +1,87 @@ +package vexriscv.demo + +import vexriscv.plugin._ +import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} +import vexriscv.{plugin, VexRiscv, VexRiscvConfig} +import spinal.core._ + +object GenSecure extends App { + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new IBusCachedPlugin( + resetVector = 0x80000000l, + prediction = STATIC, + config = InstructionCacheConfig( + cacheSize = 4096, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = true, + twoCycleCache = true + ) + ), + new DBusCachedPlugin( + config = new DataCacheConfig( + cacheSize = 4096, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true + ) + ), + new PmpPlugin( + regions = 16, + granularity = 32, + ioRange = _(31 downto 28) === 0xf + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new MulDivIterativePlugin( + genMul = true, + genDiv = true, + mulUnrollFactor = 1, + divUnrollFactor = 1 + ), + new CsrPlugin(CsrPluginConfig.secure(0x00000020l)), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenSmallAndProductive.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenSmallAndProductive.scala new file mode 100644 index 0000000..9bd6f72 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenSmallAndProductive.scala @@ -0,0 +1,59 @@ +package vexriscv.demo + +import vexriscv.plugin._ +import vexriscv.{plugin, VexRiscv, VexRiscvConfig} +import spinal.core._ + +/** + * Created by spinalvm on 15.06.17. + */ +object GenSmallAndProductive extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new CsrPlugin(CsrPluginConfig.smallest), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenSmallAndProductiveCfu.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenSmallAndProductiveCfu.scala new file mode 100644 index 0000000..d28e318 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenSmallAndProductiveCfu.scala @@ -0,0 +1,87 @@ +package vexriscv.demo + +import spinal.core._ +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} + +/** + * Created by spinalvm on 15.06.17. + */ +object GenSmallAndProductiveCfu extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new CsrPlugin(CsrPluginConfig.smallest), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false + ), + new CfuPlugin( + stageCount = 1, + allowZeroLatency = true, + encodings = List( + CfuPluginEncoding ( + instruction = M"-------------------------0001011", + functionId = List(14 downto 12), + input2Kind = CfuPlugin.Input2Kind.RS + ) + ), + busParameter = CfuBusParameter( + CFU_VERSION = 0, + CFU_INTERFACE_ID_W = 0, + CFU_FUNCTION_ID_W = 3, + CFU_REORDER_ID_W = 0, + CFU_REQ_RESP_ID_W = 0, + CFU_INPUTS = 2, + CFU_INPUT_DATA_W = 32, + CFU_OUTPUTS = 1, + CFU_OUTPUT_DATA_W = 32, + CFU_FLOW_REQ_READY_ALWAYS = false, + CFU_FLOW_RESP_READY_ALWAYS = false, + CFU_WITH_STATUS = true, + CFU_RAW_INSN_W = 32, + CFU_CFU_ID_W = 4, + CFU_STATE_INDEX_NUM = 5 + ) + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenSmallAndProductiveICache.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenSmallAndProductiveICache.scala new file mode 100644 index 0000000..9cad30d --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenSmallAndProductiveICache.scala @@ -0,0 +1,71 @@ +package vexriscv.demo + +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} +import spinal.core._ +import vexriscv.ip.InstructionCacheConfig + +/** + * Created by spinalvm on 15.06.17. + */ +object GenSmallAndProductiveICache extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new PcManagerSimplePlugin( + resetVector = 0x80000000l, + relaxedPcCalculation = false + ), + new IBusCachedPlugin( + config = InstructionCacheConfig( + cacheSize = 4096, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = false, + catchAccessFault = false, + asyncTagMemory = false, + twoCycleRam = false, + twoCycleCache = true + ) + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new CsrPlugin(CsrPluginConfig.smallest), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenSmallAndProductiveVfu.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenSmallAndProductiveVfu.scala new file mode 100644 index 0000000..81ca61b --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenSmallAndProductiveVfu.scala @@ -0,0 +1,64 @@ +package vexriscv.demo + +import spinal.core._ +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} + +/** + * Created by spinalvm on 15.06.17. + */ +object GenSmallAndProductiveVfu extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new CsrPlugin(CsrPluginConfig.smallest), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false + ), + new VfuPlugin( + stageCount = 2, + allowZeroLatency = false, + parameter = VfuParameter() + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenSmallest.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenSmallest.scala new file mode 100644 index 0000000..9813ccf --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenSmallest.scala @@ -0,0 +1,59 @@ +package vexriscv.demo + +import vexriscv.plugin._ +import vexriscv.{plugin, VexRiscv, VexRiscvConfig} +import spinal.core._ + +/** + * Created by spinalvm on 15.06.17. + */ +object GenSmallest extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new CsrPlugin(CsrPluginConfig.smallest), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = false + ), + new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = false, + bypassMemory = false, + bypassWriteBack = false, + bypassWriteBackBuffer = false, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + SpinalVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenSmallestNoCsr.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenSmallestNoCsr.scala new file mode 100644 index 0000000..cd1ee31 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenSmallestNoCsr.scala @@ -0,0 +1,64 @@ +package vexriscv.demo + +import vexriscv.plugin._ +import vexriscv.{plugin, VexRiscv, VexRiscvConfig} +import spinal.core._ + +/** + * Created by spinalvm on 15.06.17. + */ +object GenSmallestNoCsr extends App{ + def cpu() = new VexRiscv( + config = VexRiscvConfig( + plugins = List( +// new PcManagerSimplePlugin( +// resetVector = 0x00000000l, +// relaxedPcCalculation = false +// ), + + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false, + earlyInjection = false + ), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false, + writeRfInMemoryStage = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = false + ), + new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = false, + bypassMemory = false, + bypassWriteBack = false, + bypassWriteBackBuffer = false, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + SpinalConfig(mergeAsyncProcess = false).generateVerilog(cpu()) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/GenTwoThreeStage.scala b/VexRiscv/src/main/scala/vexriscv/demo/GenTwoThreeStage.scala new file mode 100644 index 0000000..c3dd0db --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/GenTwoThreeStage.scala @@ -0,0 +1,72 @@ +package vexriscv.demo + +import spinal.core.SpinalVerilog +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} +import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusSimplePlugin, DecoderSimplePlugin, DivPlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusSimplePlugin, IntAluPlugin, LightShifterPlugin, MulPlugin, MulSimplePlugin, NONE, RegFilePlugin, SrcPlugin, YamlPlugin} + +object GenTwoThreeStage extends App{ + def cpu(withMulDiv : Boolean, + bypass : Boolean, + barrielShifter : Boolean, + withMemoryStage : Boolean) = new VexRiscv( + config = VexRiscvConfig( + withMemoryStage = withMemoryStage, + withWriteBackStage = false, + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false, + injectorStage = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new CsrPlugin(CsrPluginConfig.smallest), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + readInExecute = true, + zeroBoot = true, + x0Init = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new HazardSimplePlugin( + bypassExecute = bypass, + bypassMemory = bypass, + bypassWriteBack = bypass, + bypassWriteBackBuffer = bypass, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = true, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ) ++ (if(!withMulDiv) Nil else List( + new MulSimplePlugin, + new DivPlugin + )) ++ List(if(!barrielShifter) + new LightShifterPlugin + else + new FullBarrelShifterPlugin( + earlyInjection = true + ) + ) + ) + ) + + SpinalVerilog(cpu(true,true,true,true)) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/Linux.scala b/VexRiscv/src/main/scala/vexriscv/demo/Linux.scala new file mode 100644 index 0000000..8508a67 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/Linux.scala @@ -0,0 +1,514 @@ +/* + * SpinalHDL + * Copyright (c) Dolu, All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3.0 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. + */ + +package vexriscv.demo + +import spinal.core._ +import spinal.lib.eda.bench.{AlteraStdTargets, Bench, Rtl, XilinxStdTargets} +import spinal.lib.eda.icestorm.IcestormStdTargets +import spinal.lib.master +import vexriscv._ +import vexriscv.ip._ +import vexriscv.plugin._ + +/* +prerequired stuff => +- JAVA JDK >= 8 +- SBT +- Verilator + +Setup things => +git clone https://github.com/SpinalHDL/SpinalHDL.git -b dev +git clone https://github.com/SpinalHDL/VexRiscv.git -b linux +cd VexRiscv + +Run regressions => +sbt "runMain vexriscv.demo.LinuxGen -r" +cd src/test/cpp/regression +make clean run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yes SUPERVISOR=yes MMU=yes CSR=yes DEBUG_PLUGIN=no COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=10 TRACE=no COREMARK=yes LINUX_REGRESSION=yes + +Run linux in simulation (Require the machine mode emulator compiled in SIM mode) => +sbt "runMain vexriscv.demo.LinuxGen" +cd src/test/cpp/regression +export BUILDROOT=/home/miaou/pro/riscv/buildrootSpinal +make clean run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD SUPERVISOR=yes CSR=yes DEBUG_PLUGIN=no COMPRESSED=no LRSC=yes AMO=yes REDO=0 DHRYSTONE=no LINUX_SOC=yes EMULATOR=../../../main/c/emulator/build/emulator.bin VMLINUX=$BUILDROOT/output/images/Image DTB=$BUILDROOT/board/spinal/vexriscv_sim/rv32.dtb RAMDISK=$BUILDROOT/output/images/rootfs.cpio WITH_USER_IO=yes TRACE=no FLOW_INFO=no + +Run linux with QEMU (Require the machine mode emulator compiled in QEMU mode) +export BUILDROOT=/home/miaou/pro/riscv/buildrootSpinal +qemu-system-riscv32 -nographic -machine virt -m 1536M -device loader,file=src/main/c/emulator/build/emulator.bin,addr=0x80000000,cpu-num=0 -device loader,file=$BUILDROOT/board/spinal/vexriscv_sim/rv32.dtb,addr=0xC3000000 -device loader,file=$BUILDROOT/output/images/Image,addr=0xC0000000 -device loader,file=$BUILDROOT/output/images/rootfs.cpio,addr=0xc2000000 + + +Buildroot => +git clone https://github.com/SpinalHDL/buildroot.git -b vexriscv +cd buildroot +make spinal_vexriscv_sim_defconfig +make -j$(nproc) +output/host/bin/riscv32-linux-objcopy -O binary output/images/vmlinux output/images/Image + +After changing a kernel config into buildroot => +cd buildroot +make spinal_vexriscv_sim_defconfig +make linux-dirclean linux-rebuild -j8 +output/host/bin/riscv32-linux-objcopy -O binary output/images/vmlinux output/images/Image + +Compiling the machine mode emulator (check the config.h file to know the mode) => +cd src/main/c/emulator +make clean all + +Changing the emulator mode => +Edit the src/main/c/emulator/src/config.h file, and comment/uncomment the SIM/QEMU flags + +Other commands (Memo): +decompile file and split it +riscv64-unknown-elf-objdump -S -d vmlinux > vmlinux.asm; split -b 1M vmlinux.asm + +Kernel compilation command => +ARCH=riscv CROSS_COMPILE=riscv32-unknown-linux-gnu- make menuconfig +ARCH=riscv CROSS_COMPILE=riscv32-unknown-linux-gnu- make -j`nproc`; riscv32-unknown-linux-gnu-objcopy -O binary vmlinux vmlinux.bin + +Generate a DTB from a DTS => +dtc -O dtb -o rv32.dtb rv32.dts + +https://github.com/riscv/riscv-qemu/wiki#build-and-install + + +memo : +export DATA=/home/miaou/Downloads/Binaries-master +cd src/test/cpp/regression +rm VexRiscv.v +cp $DATA/VexRiscv.v ../../../.. +make run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD SUPERVISOR=yes CSR=yes COMPRESSED=no LRSC=yes AMO=yes REDO=0 DHRYSTONE=no LINUX_SOC=yes EMULATOR=$DATA/emulator.bin VMLINUX=$DATA/vmlinux.bin DTB=$DATA/rv32.dtb RAMDISK=$DATA/rootfs.cpio TRACE=no FLOW_INFO=no + +make clean run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=no SUPERVISOR=yes CSR=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes MMU=yes REDO=1 TRACE=no LINUX_REGRESSION=yes + +qemu-system-riscv32 -nographic -machine virt -m 1536M -device loader,file=$DATA/emulator.bin,addr=0x80000000,cpu-num=0 -device loader,file=$DATA/rv32.dtb,addr=0xC3000000 -device loader,file=$DATA/vmlinux.bin,addr=0xC0000000 -device loader,file=$DATA/rootfs.cpio,addr=0xc2000000 + + +make run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yess SUPERVISOR=yes CSR=yes COMPRESSED=yes MUL=yes DIV=yes LRSC=yes AMO=yes REDO=1 TRACE=no LINUX_REGRESSION=yes + +program ../../../main/c/emulator/build/emulator.bin 0x80000000 verify + soc.loadBin(EMULATOR, 0x80000000); + soc.loadBin(VMLINUX, 0xC0000000); + soc.loadBin(DTB, 0xC3000000); + soc.loadBin(RAMDISK, 0xC2000000); + +export BUILDROOT=/home/miaou/pro/riscv/buildrootSpinal +make run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD SUPERVISOR=yes CSR=yes COMPRESSED=no LRSC=yes AMO=yes REDO=0 DHRYSTONE=no LINUX_SOC=yes +EMULATOR=../../../main/c/emulator/build/emulator.bin +VMLINUX=/home/miaou/pro/riscv/buildrootSpinal/output/images/Image +DTB=/home/miaou/pro/riscv/buildrootSpinal/board/spinal/vexriscv_sim/rv32.dtb +RAMDISK=/home/miaou/pro/riscv/buildrootSpinal/output/images/rootfs.cpio TRACE=no FLOW_INFO=no + +make run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD SUPERVISOR=yes CSR=yes COMPRESSED=no LRSC=yes AMO=yes REDO=0 DHRYSTONE=no LINUX_SOC=yes DEBUG_PLUGIN_EXTERNAL=yes + +rm -rf cpio +mkdir cpio +cd cpio +sudo cpio -i < ../rootfs.cpio +cd .. + +rm rootfs.cpio +cd cpio +sudo find | sudo cpio -H newc -o > ../rootfs.cpio +cd .. + +make clean run IBUS=CACHED DBUS=CACHED DEBUG_PLUGIN=STD DHRYSTONE=yes SUPERVISOR=yes MMU=yes CSR=yes COMPRESSED=no MUL=yes DIV=yes LRSC=yes AMO=yes REDO=10 TRACE=no COREMARK=yes LINUX_REGRESSION=yes RUN_HEX=~/pro/riscv/zephyr/samples/synchronization/build/zephyr/zephyr.hex + + +*/ + + +object LinuxGen { + def configFull(litex : Boolean, withMmu : Boolean, withSmp : Boolean = false) = { + val config = VexRiscvConfig( + plugins = List( + //Uncomment the whole IBusSimplePlugin and comment IBusCachedPlugin if you want uncached iBus config +// new IBusSimplePlugin( +// resetVector = 0x80000000l, +// cmdForkOnSecondStage = false, +// cmdForkPersistence = false, +// prediction = DYNAMIC_TARGET, +// historyRamSizeLog2 = 10, +// catchAccessFault = true, +// compressedGen = true, +// busLatencyMin = 1, +// injectorStage = true, +// memoryTranslatorPortConfig = withMmu generate MmuPortConfig( +// portTlbSize = 4 +// ) +// ), + + //Uncomment the whole IBusCachedPlugin and comment IBusSimplePlugin if you want cached iBus config + new IBusCachedPlugin( + resetVector = 0x80000000l, + compressedGen = false, + prediction = STATIC, + injectorStage = false, + config = InstructionCacheConfig( + cacheSize = 4096*1, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = false, + twoCycleCache = true +// ) + ), + memoryTranslatorPortConfig = withMmu generate MmuPortConfig( + portTlbSize = 4 + ) + ), + // ).newTightlyCoupledPort(TightlyCoupledPortParameter("iBusTc", a => a(30 downto 28) === 0x0 && a(5))), +// new DBusSimplePlugin( +// catchAddressMisaligned = true, +// catchAccessFault = true, +// earlyInjection = false, +// withLrSc = true, +// memoryTranslatorPortConfig = withMmu generate MmuPortConfig( +// portTlbSize = 4 +// ) +// ), + new DBusCachedPlugin( + dBusCmdMasterPipe = true, + dBusCmdSlavePipe = true, + dBusRspSlavePipe = true, + config = new DataCacheConfig( + cacheSize = 4096*1, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true, + withExclusive = withSmp, + withInvalidate = withSmp, + withLrSc = true, + withAmo = true +// ) + ), + memoryTranslatorPortConfig = withMmu generate MmuPortConfig( + portTlbSize = 4 + ) + ), + + // new MemoryTranslatorPlugin( + // tlbSize = 32, + // virtualRange = _(31 downto 28) === 0xC, + // ioRange = _(31 downto 28) === 0xF + // ), + + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = true + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false + ), + new FullBarrelShifterPlugin(earlyInjection = false), + // new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + // new HazardSimplePlugin(false, true, false, true), + // new HazardSimplePlugin(false, false, false, false), + new MulPlugin, + new MulDivIterativePlugin( + genMul = false, + genDiv = true, + mulUnrollFactor = 32, + divUnrollFactor = 1 + ), + // new DivPlugin, + new CsrPlugin(CsrPluginConfig.linuxMinimal(0x80000020l).copy(ebreakGen = false)), + // new CsrPlugin(//CsrPluginConfig.all2(0x80000020l).copy(ebreakGen = true)/* + // CsrPluginConfig( + // catchIllegalAccess = false, + // mvendorid = null, + // marchid = null, + // mimpid = null, + // mhartid = null, + // misaExtensionsInit = 0, + // misaAccess = CsrAccess.READ_ONLY, + // mtvecAccess = CsrAccess.WRITE_ONLY, + // mtvecInit = 0x80000020l, + // mepcAccess = CsrAccess.READ_WRITE, + // mscratchGen = true, + // mcauseAccess = CsrAccess.READ_ONLY, + // mbadaddrAccess = CsrAccess.READ_ONLY, + // mcycleAccess = CsrAccess.NONE, + // minstretAccess = CsrAccess.NONE, + // ecallGen = true, + // ebreakGen = true, + // wfiGenAsWait = false, + // wfiGenAsNop = true, + // ucycleAccess = CsrAccess.NONE + // )), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true, + fenceiGenAsAJump = false + ), + new YamlPlugin("cpu0.yaml") + ) + ) + if(withMmu) config.plugins += new MmuPlugin( + ioRange = (x => if(litex) x(31 downto 28) === 0xB || x(31 downto 28) === 0xE || x(31 downto 28) === 0xF else x(31 downto 28) === 0xF) + ) else { + config.plugins += new StaticMemoryTranslatorPlugin( + ioRange = _(31 downto 28) === 0xF + ) + } + config + } + + + + def main(args: Array[String]) { +// import spinal.core.sim._ +// SimConfig.withConfig(SpinalConfig(mergeAsyncProcess = false, anonymSignalPrefix = "zz_")).allOptimisation.compile(new VexRiscv(configFull)).doSimUntilVoid{ dut => +// dut.clockDomain.forkStimulus(10) +// dut.clockDomain.forkSimSpeedPrinter(4) +// var iBus : InstructionCacheMemBus = null +// +// dut.plugins.foreach{ +// case plugin: IBusCachedPlugin => iBus = plugin.iBus +// case _ => +// } +// dut.clockDomain.onSamplings{ +//// iBus.cmd.ready.randomize() +// iBus.rsp.data #= 0x13 +// } +// } + + SpinalConfig(mergeAsyncProcess = false, anonymSignalPrefix = "_zz").generateVerilog { + + + val toplevel = new VexRiscv(configFull( + litex = !args.contains("-r"), + withMmu = true + )) +// val toplevel = new VexRiscv(configLight) +// val toplevel = new VexRiscv(configTest) + + /*toplevel.rework { + var iBus : AvalonMM = null + for (plugin <- toplevel.config.plugins) plugin match { + case plugin: IBusSimplePlugin => { + plugin.iBus.asDirectionLess() //Unset IO properties of iBus + iBus = master(plugin.iBus.toAvalon()) + .setName("iBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) //Specify a clock domain to the iBus (used by QSysify) + } + case plugin: IBusCachedPlugin => { + plugin.iBus.asDirectionLess() //Unset IO properties of iBus + iBus = master(plugin.iBus.toAvalon()) + .setName("iBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) //Specify a clock domain to the iBus (used by QSysify) + } + case plugin: DBusSimplePlugin => { + plugin.dBus.asDirectionLess() + master(plugin.dBus.toAvalon()) + .setName("dBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) + } + case plugin: DBusCachedPlugin => { + plugin.dBus.asDirectionLess() + master(plugin.dBus.toAvalon()) + .setName("dBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) + } + case plugin: DebugPlugin => { + plugin.io.bus.asDirectionLess() + slave(plugin.io.bus.fromAvalon()) + .setName("debugBusAvalon") + .addTag(ClockDomainTag(plugin.debugClockDomain)) + .parent = null //Avoid the io bundle to be interpreted as a QSys conduit + plugin.io.resetOut + .addTag(ResetEmitterTag(plugin.debugClockDomain)) + .parent = null //Avoid the io bundle to be interpreted as a QSys conduit + } + case _ => + } + for (plugin <- toplevel.config.plugins) plugin match { + case plugin: CsrPlugin => { + plugin.externalInterrupt + .addTag(InterruptReceiverTag(iBus, ClockDomain.current)) + plugin.timerInterrupt + .addTag(InterruptReceiverTag(iBus, ClockDomain.current)) + } + case _ => + } + }*/ +// toplevel.writeBack.input(config.PC).addAttribute(Verilator.public) +// toplevel.service(classOf[DecoderSimplePlugin]).bench(toplevel) + // toplevel.children.find(_.isInstanceOf[DataCache]).get.asInstanceOf[DataCache].io.cpu.execute.addAttribute(Verilator.public) + + +// toplevel.rework { +// for (plugin <- toplevel.config.plugins) plugin match { +// case plugin: IBusSimplePlugin => { +// plugin.iBus.setAsDirectionLess().unsetName() //Unset IO properties of iBus +// val iBus = master(IBusSimpleBus()).setName("iBus") +// +// iBus.cmd << plugin.iBus.cmd.halfPipe() +// iBus.rsp.stage >> plugin.iBus.rsp +// } +// case plugin: DBusSimplePlugin => { +// plugin.dBus.setAsDirectionLess().unsetName() +// val dBus = master(DBusSimpleBus()).setName("dBus") +// val pending = RegInit(False) setWhen(plugin.dBus.cmd.fire) clearWhen(plugin.dBus.rsp.ready) +// dBus.cmd << plugin.dBus.cmd.haltWhen(pending).halfPipe() +// plugin.dBus.rsp := RegNext(dBus.rsp) +// plugin.dBus.rsp.ready clearWhen(!pending) +// } +// +// case _ => +// } +// } + + toplevel + } + } +} + +object LinuxSyntesisBench extends App{ + val withoutMmu = new Rtl { + override def getName(): String = "VexRiscv Without Mmu" + override def getRtlPath(): String = "VexRiscvWithoutMmu.v" + SpinalConfig(inlineRom=true).generateVerilog(new VexRiscv(LinuxGen.configFull(litex = false, withMmu = false)).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val withMmu = new Rtl { + override def getName(): String = "VexRiscv With Mmu" + override def getRtlPath(): String = "VexRiscvWithMmu.v" + SpinalConfig(inlineRom=true).generateVerilog(new VexRiscv(LinuxGen.configFull(litex = false, withMmu = true)).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val rtls = List(withoutMmu,withMmu) + // val rtls = List(smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache) + // val rtls = List(smallAndProductive, smallAndProductiveWithICache, fullNoMmuMaxPerf, fullNoMmu, full) + // val rtls = List(fullNoMmu) + + val targets = XilinxStdTargets( + vivadoArtix7Path = "/media/miaou/HD/linux/Xilinx/Vivado/2018.3/bin" + ) ++ AlteraStdTargets( + quartusCycloneIVPath = "/media/miaou/HD/linux/intelFPGA_lite/18.1/quartus/bin", + quartusCycloneVPath = "/media/miaou/HD/linux/intelFPGA_lite/18.1/quartus/bin" + ) //++ IcestormStdTargets().take(1) + + Bench(rtls, targets, "/media/miaou/HD/linux/tmp") +} + +object LinuxSim extends App{ + import spinal.core.sim._ + + SimConfig.allOptimisation.compile(new VexRiscv(LinuxGen.configFull(litex = false, withMmu = true))).doSim{dut => +// dut.clockDomain.forkStimulus(10) +// dut.clockDomain.forkSimSpeedPrinter() +// dut.plugins.foreach{ +// case p : IBusSimplePlugin => dut.clockDomain.onRisingEdges{ +// p.iBus.cmd.ready #= ! p.iBus.cmd.ready.toBoolean +//// p.iBus.rsp.valid.randomize() +//// p.iBus.rsp.inst.randomize() +//// p.iBus.rsp.error.randomize() +// } +// case p : DBusSimplePlugin => dut.clockDomain.onRisingEdges{ +// p.dBus.cmd.ready #= ! p.dBus.cmd.ready.toBoolean +//// p.dBus.cmd.ready.randomize() +//// p.dBus.rsp.ready.randomize() +//// p.dBus.rsp.data.randomize() +//// p.dBus.rsp.error.randomize() +// } +// case _ => +// } +// sleep(10*10000000) + + + var cycleCounter = 0l + var lastTime = System.nanoTime() + + + + + var iBus : IBusSimpleBus = null + var dBus : DBusSimpleBus = null + dut.plugins.foreach{ + case p : IBusSimplePlugin => + iBus = p.iBus +// p.iBus.rsp.valid.randomize() +// p.iBus.rsp.inst.randomize() +// p.iBus.rsp.error.randomize() + case p : DBusSimplePlugin => + dBus = p.dBus +// p.dBus.cmd.ready.randomize() +// p.dBus.rsp.ready.randomize() +// p.dBus.rsp.data.randomize() +// p.dBus.rsp.error.randomize() + case _ => + } + + dut.clockDomain.resetSim #= false + dut.clockDomain.clockSim #= false + sleep(1) + dut.clockDomain.resetSim #= true + sleep(1) + + def f(): Unit ={ + cycleCounter += 1 + + if((cycleCounter & 8191) == 0){ + val currentTime = System.nanoTime() + val deltaTime = (currentTime - lastTime)*1e-9 + if(deltaTime > 2.0) { + println(f"[Info] Simulation speed : ${cycleCounter / deltaTime * 1e-3}%4.0f kcycles/s") + lastTime = currentTime + cycleCounter = 0 + } + } + dut.clockDomain.clockSim #= false + iBus.cmd.ready #= ! iBus.cmd.ready.toBoolean + dBus.cmd.ready #= ! dBus.cmd.ready.toBoolean + delayed(1)(f2) + } + def f2(): Unit ={ + dut.clockDomain.clockSim #= true + delayed(1)(f) + } + + delayed(1)(f) + + sleep(100000000) + } +}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/demo/Murax.scala b/VexRiscv/src/main/scala/vexriscv/demo/Murax.scala new file mode 100644 index 0000000..dbff45b --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/Murax.scala @@ -0,0 +1,589 @@ +package vexriscv.demo + +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.amba3.apb._ +import spinal.lib.bus.misc.SizeMapping +import spinal.lib.bus.simple.PipelinedMemoryBus +import spinal.lib.com.jtag.Jtag +import spinal.lib.com.spi.ddr.SpiXdrMaster +import spinal.lib.com.uart._ +import spinal.lib.io.{InOutWrapper, TriStateArray} +import spinal.lib.misc.{InterruptCtrl, Prescaler, Timer} +import spinal.lib.soc.pinsec.{PinsecTimerCtrl, PinsecTimerCtrlExternal} +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} +import spinal.lib.com.spi.ddr._ +import spinal.lib.bus.simple._ +import scala.collection.mutable.ArrayBuffer +import scala.collection.Seq + +/** + * Created by PIC32F_USER on 28/07/2017. + * + * Murax is a very light SoC which could work without any external component. + * - ICE40-hx8k + icestorm => 53 Mhz, 2142 LC + * - 0.37 DMIPS/Mhz + * - 8 kB of on-chip ram + * - JTAG debugger (eclipse/GDB/openocd ready) + * - Interrupt support + * - APB bus for peripherals + * - 32 GPIO pin + * - one 16 bits prescaler, two 16 bits timers + * - one UART with tx/rx fifo + */ + + +case class MuraxConfig(coreFrequency : HertzNumber, + onChipRamSize : BigInt, + onChipRamHexFile : String, + pipelineDBus : Boolean, + pipelineMainBus : Boolean, + pipelineApbBridge : Boolean, + gpioWidth : Int, + uartCtrlConfig : UartCtrlMemoryMappedConfig, + xipConfig : SpiXdrMasterCtrl.MemoryMappingParameters, + hardwareBreakpointCount : Int, + cpuPlugins : ArrayBuffer[Plugin[VexRiscv]]){ + require(pipelineApbBridge || pipelineMainBus, "At least pipelineMainBus or pipelineApbBridge should be enable to avoid wipe transactions") + val genXip = xipConfig != null + +} + + + +object MuraxConfig{ + def default : MuraxConfig = default(false, false) + def default(withXip : Boolean = false, bigEndian : Boolean = false) = MuraxConfig( + coreFrequency = 12 MHz, + onChipRamSize = 8 kB, + onChipRamHexFile = null, + pipelineDBus = true, + pipelineMainBus = false, + pipelineApbBridge = true, + gpioWidth = 32, + xipConfig = ifGen(withXip) (SpiXdrMasterCtrl.MemoryMappingParameters( + SpiXdrMasterCtrl.Parameters(8, 12, SpiXdrParameter(2, 2, 1)).addFullDuplex(0,1,false), + cmdFifoDepth = 32, + rspFifoDepth = 32, + xip = SpiXdrMasterCtrl.XipBusParameters(addressWidth = 24, lengthWidth = 2) + )), + hardwareBreakpointCount = if(withXip) 3 else 0, + cpuPlugins = ArrayBuffer( //DebugPlugin added by the toplevel + new IBusSimplePlugin( + resetVector = if(withXip) 0xF001E000l else 0x80000000l, + cmdForkOnSecondStage = true, + cmdForkPersistence = withXip, //Required by the Xip controller + prediction = NONE, + catchAccessFault = false, + compressedGen = false, + bigEndian = bigEndian + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false, + earlyInjection = false, + bigEndian = bigEndian + ), + new CsrPlugin(CsrPluginConfig.smallest(mtvecInit = if(withXip) 0xE0040020l else 0x80000020l)), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = false + ), + new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = false, + bypassMemory = false, + bypassWriteBack = false, + bypassWriteBackBuffer = false, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ), + uartCtrlConfig = UartCtrlMemoryMappedConfig( + uartCtrlConfig = UartCtrlGenerics( + dataWidthMax = 8, + clockDividerWidth = 20, + preSamplingSize = 1, + samplingSize = 3, + postSamplingSize = 1 + ), + initConfig = UartCtrlInitConfig( + baudrate = 115200, + dataLength = 7, //7 => 8 bits + parity = UartParityType.NONE, + stop = UartStopType.ONE + ), + busCanWriteClockDividerConfig = false, + busCanWriteFrameConfig = false, + txFifoDepth = 16, + rxFifoDepth = 16 + ) + + ) + + def fast = { + val config = default + + //Replace HazardSimplePlugin to get datapath bypass + config.cpuPlugins(config.cpuPlugins.indexWhere(_.isInstanceOf[HazardSimplePlugin])) = new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true + ) +// config.cpuPlugins(config.cpuPlugins.indexWhere(_.isInstanceOf[LightShifterPlugin])) = new FullBarrelShifterPlugin() + + config + } +} + + +case class Murax(config : MuraxConfig) extends Component{ + import config._ + + val io = new Bundle { + //Clocks / reset + val asyncReset = in Bool() + val mainClk = in Bool() + + //Main components IO + val jtag = slave(Jtag()) + + //Peripherals IO + val gpioA = master(TriStateArray(gpioWidth bits)) + val uart = master(Uart()) + + val xip = ifGen(genXip)(master(SpiXdrMaster(xipConfig.ctrl.spi))) + } + + + val resetCtrlClockDomain = ClockDomain( + clock = io.mainClk, + config = ClockDomainConfig( + resetKind = BOOT + ) + ) + + val resetCtrl = new ClockingArea(resetCtrlClockDomain) { + val mainClkResetUnbuffered = False + + //Implement an counter to keep the reset axiResetOrder high 64 cycles + // Also this counter will automatically do a reset when the system boot. + val systemClkResetCounter = Reg(UInt(6 bits)) init(0) + when(systemClkResetCounter =/= U(systemClkResetCounter.range -> true)){ + systemClkResetCounter := systemClkResetCounter + 1 + mainClkResetUnbuffered := True + } + when(BufferCC(io.asyncReset)){ + systemClkResetCounter := 0 + } + + //Create all reset used later in the design + val mainClkReset = RegNext(mainClkResetUnbuffered) + val systemReset = RegNext(mainClkResetUnbuffered) + } + + + val systemClockDomain = ClockDomain( + clock = io.mainClk, + reset = resetCtrl.systemReset, + frequency = FixedFrequency(coreFrequency) + ) + + val debugClockDomain = ClockDomain( + clock = io.mainClk, + reset = resetCtrl.mainClkReset, + frequency = FixedFrequency(coreFrequency) + ) + + val system = new ClockingArea(systemClockDomain) { + val pipelinedMemoryBusConfig = PipelinedMemoryBusConfig( + addressWidth = 32, + dataWidth = 32 + ) + + val bigEndianDBus = config.cpuPlugins.exists(_ match{ case plugin : DBusSimplePlugin => plugin.bigEndian case _ => false}) + + //Arbiter of the cpu dBus/iBus to drive the mainBus + //Priority to dBus, !! cmd transactions can change on the fly !! + val mainBusArbiter = new MuraxMasterArbiter(pipelinedMemoryBusConfig, bigEndianDBus) + + //Instanciate the CPU + val cpu = new VexRiscv( + config = VexRiscvConfig( + plugins = cpuPlugins += new DebugPlugin(debugClockDomain, hardwareBreakpointCount) + ) + ) + + //Checkout plugins used to instanciate the CPU to connect them to the SoC + val timerInterrupt = False + val externalInterrupt = False + for(plugin <- cpu.plugins) plugin match{ + case plugin : IBusSimplePlugin => + mainBusArbiter.io.iBus.cmd <> plugin.iBus.cmd + mainBusArbiter.io.iBus.rsp <> plugin.iBus.rsp + case plugin : DBusSimplePlugin => { + if(!pipelineDBus) + mainBusArbiter.io.dBus <> plugin.dBus + else { + mainBusArbiter.io.dBus.cmd << plugin.dBus.cmd.halfPipe() + mainBusArbiter.io.dBus.rsp <> plugin.dBus.rsp + } + } + case plugin : CsrPlugin => { + plugin.externalInterrupt := externalInterrupt + plugin.timerInterrupt := timerInterrupt + } + case plugin : DebugPlugin => plugin.debugClockDomain{ + resetCtrl.systemReset setWhen(RegNext(plugin.io.resetOut)) + io.jtag <> plugin.io.bus.fromJtag() + } + case _ => + } + + + + //****** MainBus slaves ******** + val mainBusMapping = ArrayBuffer[(PipelinedMemoryBus,SizeMapping)]() + val ram = new MuraxPipelinedMemoryBusRam( + onChipRamSize = onChipRamSize, + onChipRamHexFile = onChipRamHexFile, + pipelinedMemoryBusConfig = pipelinedMemoryBusConfig, + bigEndian = bigEndianDBus + ) + mainBusMapping += ram.io.bus -> (0x80000000l, onChipRamSize) + + val apbBridge = new PipelinedMemoryBusToApbBridge( + apb3Config = Apb3Config( + addressWidth = 20, + dataWidth = 32 + ), + pipelineBridge = pipelineApbBridge, + pipelinedMemoryBusConfig = pipelinedMemoryBusConfig + ) + mainBusMapping += apbBridge.io.pipelinedMemoryBus -> (0xF0000000l, 1 MB) + + + + //******** APB peripherals ********* + val apbMapping = ArrayBuffer[(Apb3, SizeMapping)]() + val gpioACtrl = Apb3Gpio(gpioWidth = gpioWidth, withReadSync = true) + io.gpioA <> gpioACtrl.io.gpio + apbMapping += gpioACtrl.io.apb -> (0x00000, 4 kB) + + val uartCtrl = Apb3UartCtrl(uartCtrlConfig) + uartCtrl.io.uart <> io.uart + externalInterrupt setWhen(uartCtrl.io.interrupt) + apbMapping += uartCtrl.io.apb -> (0x10000, 4 kB) + + val timer = new MuraxApb3Timer() + timerInterrupt setWhen(timer.io.interrupt) + apbMapping += timer.io.apb -> (0x20000, 4 kB) + + val xip = ifGen(genXip)(new Area{ + val ctrl = Apb3SpiXdrMasterCtrl(xipConfig) + ctrl.io.spi <> io.xip + externalInterrupt setWhen(ctrl.io.interrupt) + apbMapping += ctrl.io.apb -> (0x1F000, 4 kB) + + val accessBus = new PipelinedMemoryBus(PipelinedMemoryBusConfig(24,32)) + mainBusMapping += accessBus -> (0xE0000000l, 16 MB) + + ctrl.io.xip.fromPipelinedMemoryBus() << accessBus + val bootloader = Apb3Rom("src/main/c/murax/xipBootloader/crt.bin") + apbMapping += bootloader.io.apb -> (0x1E000, 4 kB) + }) + + + + //******** Memory mappings ********* + val apbDecoder = Apb3Decoder( + master = apbBridge.io.apb, + slaves = apbMapping.toSeq + ) + + val mainBusDecoder = new Area { + val logic = new MuraxPipelinedMemoryBusDecoder( + master = mainBusArbiter.io.masterBus, + specification = mainBusMapping.toSeq, + pipelineMaster = pipelineMainBus + ) + } + } +} + + + +object Murax{ + def main(args: Array[String]) { + SpinalVerilog(Murax(MuraxConfig.default)) + } +} + +object MuraxCfu{ + def main(args: Array[String]) { + SpinalVerilog{ + val config = MuraxConfig.default + config.cpuPlugins += new CfuPlugin( + stageCount = 1, + allowZeroLatency = true, + encodings = List( + CfuPluginEncoding ( + instruction = M"-------------------------0001011", + functionId = List(14 downto 12), + input2Kind = CfuPlugin.Input2Kind.RS + ) + ), + busParameter = CfuBusParameter( + CFU_VERSION = 0, + CFU_INTERFACE_ID_W = 0, + CFU_FUNCTION_ID_W = 3, + CFU_REORDER_ID_W = 0, + CFU_REQ_RESP_ID_W = 0, + CFU_INPUTS = 2, + CFU_INPUT_DATA_W = 32, + CFU_OUTPUTS = 1, + CFU_OUTPUT_DATA_W = 32, + CFU_FLOW_REQ_READY_ALWAYS = false, + CFU_FLOW_RESP_READY_ALWAYS = false, + CFU_WITH_STATUS = true, + CFU_RAW_INSN_W = 32, + CFU_CFU_ID_W = 4, + CFU_STATE_INDEX_NUM = 5 + ) + ) + + val toplevel = Murax(config) + + toplevel.rework { + for (plugin <- toplevel.system.cpu.plugins) plugin match { + case plugin: CfuPlugin => plugin.bus.toIo().setName("miaou") + case _ => + } + } + + toplevel + } + } +} + + +object Murax_iCE40_hx8k_breakout_board_xip{ + + case class SB_GB() extends BlackBox{ + val USER_SIGNAL_TO_GLOBAL_BUFFER = in Bool() + val GLOBAL_BUFFER_OUTPUT = out Bool() + } + + case class SB_IO_SCLK() extends BlackBox{ + addGeneric("PIN_TYPE", B"010000") + val PACKAGE_PIN = out Bool() + val OUTPUT_CLK = in Bool() + val CLOCK_ENABLE = in Bool() + val D_OUT_0 = in Bool() + val D_OUT_1 = in Bool() + setDefinitionName("SB_IO") + } + + case class SB_IO_DATA() extends BlackBox{ + addGeneric("PIN_TYPE", B"110000") + val PACKAGE_PIN = inout(Analog(Bool)) + val CLOCK_ENABLE = in Bool() + val INPUT_CLK = in Bool() + val OUTPUT_CLK = in Bool() + val OUTPUT_ENABLE = in Bool() + val D_OUT_0 = in Bool() + val D_OUT_1 = in Bool() + val D_IN_0 = out Bool() + val D_IN_1 = out Bool() + setDefinitionName("SB_IO") + } + + case class Murax_iCE40_hx8k_breakout_board_xip() extends Component{ + val io = new Bundle { + val mainClk = in Bool() + val jtag_tck = in Bool() + val jtag_tdi = in Bool() + val jtag_tdo = out Bool() + val jtag_tms = in Bool() + val uart_txd = out Bool() + val uart_rxd = in Bool() + + val mosi = inout(Analog(Bool)) + val miso = inout(Analog(Bool)) + val sclk = out Bool() + val spis = out Bool() + + val led = out Bits(8 bits) + } + val murax = Murax(MuraxConfig.default(withXip = true).copy(onChipRamSize = 8 kB)) + murax.io.asyncReset := False + + val mainClkBuffer = SB_GB() + mainClkBuffer.USER_SIGNAL_TO_GLOBAL_BUFFER <> io.mainClk + mainClkBuffer.GLOBAL_BUFFER_OUTPUT <> murax.io.mainClk + + val jtagClkBuffer = SB_GB() + jtagClkBuffer.USER_SIGNAL_TO_GLOBAL_BUFFER <> io.jtag_tck + jtagClkBuffer.GLOBAL_BUFFER_OUTPUT <> murax.io.jtag.tck + + io.led <> murax.io.gpioA.write(7 downto 0) + + murax.io.jtag.tdi <> io.jtag_tdi + murax.io.jtag.tdo <> io.jtag_tdo + murax.io.jtag.tms <> io.jtag_tms + murax.io.gpioA.read <> 0 + murax.io.uart.txd <> io.uart_txd + murax.io.uart.rxd <> io.uart_rxd + + + + val xip = new ClockingArea(murax.systemClockDomain) { + RegNext(murax.io.xip.ss.asBool) <> io.spis + + val sclkIo = SB_IO_SCLK() + sclkIo.PACKAGE_PIN <> io.sclk + sclkIo.CLOCK_ENABLE := True + + sclkIo.OUTPUT_CLK := ClockDomain.current.readClockWire + sclkIo.D_OUT_0 <> murax.io.xip.sclk.write(0) + sclkIo.D_OUT_1 <> RegNext(murax.io.xip.sclk.write(1)) + + val datas = for ((data, pin) <- (murax.io.xip.data, List(io.mosi, io.miso)).zipped) yield new Area { + val dataIo = SB_IO_DATA() + dataIo.PACKAGE_PIN := pin + dataIo.CLOCK_ENABLE := True + + dataIo.OUTPUT_CLK := ClockDomain.current.readClockWire + dataIo.OUTPUT_ENABLE <> data.writeEnable + dataIo.D_OUT_0 <> data.write(0) + dataIo.D_OUT_1 <> RegNext(data.write(1)) + + dataIo.INPUT_CLK := ClockDomain.current.readClockWire + data.read(0) := dataIo.D_IN_0 + data.read(1) := RegNext(dataIo.D_IN_1) + } + } + + } + + def main(args: Array[String]) { + SpinalVerilog(Murax_iCE40_hx8k_breakout_board_xip()) + } +} + +object MuraxDhrystoneReady{ + def main(args: Array[String]) { + SpinalVerilog(Murax(MuraxConfig.fast.copy(onChipRamSize = 256 kB))) + } +} + +object MuraxDhrystoneReadyMulDivStatic{ + def main(args: Array[String]) { + SpinalVerilog({ + val config = MuraxConfig.fast.copy(onChipRamSize = 256 kB) + config.cpuPlugins += new MulPlugin + config.cpuPlugins += new DivPlugin + config.cpuPlugins.remove(config.cpuPlugins.indexWhere(_.isInstanceOf[BranchPlugin])) + config.cpuPlugins +=new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false + ) + config.cpuPlugins += new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = true, + cmdForkPersistence = false, + prediction = STATIC, + catchAccessFault = false, + compressedGen = false + ) + config.cpuPlugins.remove(config.cpuPlugins.indexWhere(_.isInstanceOf[LightShifterPlugin])) + config.cpuPlugins += new FullBarrelShifterPlugin + Murax(config) + }) + } +} + +//Will blink led and echo UART RX to UART TX (in the verilator sim, type some text and press enter to send UART frame to the Murax RX pin) +object MuraxWithRamInit{ + def main(args: Array[String]) { + SpinalVhdl(Murax(MuraxConfig.default.copy(onChipRamSize = 4 kB, onChipRamHexFile = "src/main/ressource/hex/muraxDemo.hex"))) + } +} + +object Murax_arty{ + def main(args: Array[String]) { + val hex = "src/main/c/murax/hello_world/build/hello_world.hex" + SpinalVerilog(Murax(MuraxConfig.default(false).copy(coreFrequency = 100 MHz,onChipRamSize = 32 kB, onChipRamHexFile = hex))) + } +} + + +object MuraxAsicBlackBox extends App{ + println("Warning this soc do not has any rom to boot on.") + val config = SpinalConfig() + config.addStandardMemBlackboxing(blackboxAll) + config.generateVerilog(Murax(MuraxConfig.default())) +} + + +object de1_murax_franz{ + + case class de1_murax_franz() extends Component{ + val io = new Bundle { + val jtag_tck = in Bool() + val jtag_tdi = in Bool() + val jtag_tdo = out Bool() + val jtag_tms = in Bool() + val uart_txd = out Bool() + val uart_rxd = in Bool() + + val KEY0 = in Bool() + val CLOCK_50 = in Bool() + + val LEDR = out Bits(8 bits) + } + noIoPrefix() + + val murax = Murax(MuraxConfig.default.copy( + coreFrequency = 50 MHz, + onChipRamSize = 4 kB, + onChipRamHexFile = "src/main/ressource/hex/muraxDemo.hex")) + + io.LEDR <> murax.io.gpioA.write(7 downto 0) + + murax.io.jtag.tck <> io.jtag_tck + murax.io.jtag.tdi <> io.jtag_tdi + murax.io.jtag.tdo <> io.jtag_tdo + murax.io.jtag.tms <> io.jtag_tms + murax.io.gpioA.read <> 0 + murax.io.uart.txd <> io.uart_txd + murax.io.uart.rxd <> io.uart_rxd + murax.io.asyncReset <> ! io.KEY0 + murax.io.mainClk <> io.CLOCK_50 + + } + + def main(args: Array[String]) { + SpinalVhdl(de1_murax_franz()) + } +} + + + + diff --git a/VexRiscv/src/main/scala/vexriscv/demo/MuraxUtiles.scala b/VexRiscv/src/main/scala/vexriscv/demo/MuraxUtiles.scala new file mode 100644 index 0000000..22bc438 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/MuraxUtiles.scala @@ -0,0 +1,174 @@ +package vexriscv.demo + +import java.nio.{ByteBuffer, ByteOrder} + +import spinal.core._ +import spinal.lib.bus.amba3.apb.{Apb3, Apb3Config, Apb3SlaveFactory} +import spinal.lib.bus.misc.SizeMapping +import spinal.lib.misc.{HexTools, InterruptCtrl, Prescaler, Timer} +import spinal.lib._ +import spinal.lib.bus.simple._ +import vexriscv.plugin.{DBusSimpleBus, IBusSimpleBus} + +class MuraxMasterArbiter(pipelinedMemoryBusConfig : PipelinedMemoryBusConfig, bigEndian : Boolean = false) extends Component{ + val io = new Bundle{ + val iBus = slave(IBusSimpleBus(null)) + val dBus = slave(DBusSimpleBus(bigEndian)) + val masterBus = master(PipelinedMemoryBus(pipelinedMemoryBusConfig)) + } + + io.masterBus.cmd.valid := io.iBus.cmd.valid || io.dBus.cmd.valid + io.masterBus.cmd.write := io.dBus.cmd.valid && io.dBus.cmd.wr + io.masterBus.cmd.address := io.dBus.cmd.valid ? io.dBus.cmd.address | io.iBus.cmd.pc + io.masterBus.cmd.data := io.dBus.cmd.data + io.masterBus.cmd.mask := io.dBus.genMask(io.dBus.cmd) + io.iBus.cmd.ready := io.masterBus.cmd.ready && !io.dBus.cmd.valid + io.dBus.cmd.ready := io.masterBus.cmd.ready + + + val rspPending = RegInit(False) clearWhen(io.masterBus.rsp.valid) + val rspTarget = RegInit(False) + when(io.masterBus.cmd.fire && !io.masterBus.cmd.write){ + rspTarget := io.dBus.cmd.valid + rspPending := True + } + + when(rspPending && !io.masterBus.rsp.valid){ + io.iBus.cmd.ready := False + io.dBus.cmd.ready := False + io.masterBus.cmd.valid := False + } + + io.iBus.rsp.valid := io.masterBus.rsp.valid && !rspTarget + io.iBus.rsp.inst := io.masterBus.rsp.data + io.iBus.rsp.error := False + + io.dBus.rsp.ready := io.masterBus.rsp.valid && rspTarget + io.dBus.rsp.data := io.masterBus.rsp.data + io.dBus.rsp.error := False +} + + +case class MuraxPipelinedMemoryBusRam(onChipRamSize : BigInt, onChipRamHexFile : String, pipelinedMemoryBusConfig : PipelinedMemoryBusConfig, bigEndian : Boolean = false) extends Component{ + val io = new Bundle{ + val bus = slave(PipelinedMemoryBus(pipelinedMemoryBusConfig)) + } + + val ram = Mem(Bits(32 bits), onChipRamSize / 4) + io.bus.rsp.valid := RegNext(io.bus.cmd.fire && !io.bus.cmd.write) init(False) + io.bus.rsp.data := ram.readWriteSync( + address = (io.bus.cmd.address >> 2).resized, + data = io.bus.cmd.data, + enable = io.bus.cmd.valid, + write = io.bus.cmd.write, + mask = io.bus.cmd.mask + ) + io.bus.cmd.ready := True + + if(onChipRamHexFile != null){ + HexTools.initRam(ram, onChipRamHexFile, 0x80000000l) + if(bigEndian) + // HexTools.initRam (incorrectly) assumes little endian byte ordering + for((word, wordIndex) <- ram.initialContent.zipWithIndex) + ram.initialContent(wordIndex) = + ((word & 0xffl) << 24) | + ((word & 0xff00l) << 8) | + ((word & 0xff0000l) >> 8) | + ((word & 0xff000000l) >> 24) + } +} + + + +case class Apb3Rom(onChipRamBinFile : String) extends Component{ + import java.nio.file.{Files, Paths} + val byteArray = Files.readAllBytes(Paths.get(onChipRamBinFile)) + val wordCount = (byteArray.length+3)/4 + val buffer = ByteBuffer.wrap(Files.readAllBytes(Paths.get(onChipRamBinFile))).order(ByteOrder.LITTLE_ENDIAN); + val wordArray = (0 until wordCount).map(i => { + val v = buffer.getInt + if(v < 0) BigInt(v.toLong & 0xFFFFFFFFl) else BigInt(v) + }) + + val io = new Bundle{ + val apb = slave(Apb3(log2Up(wordCount*4),32)) + } + + val rom = Mem(Bits(32 bits), wordCount) initBigInt(wordArray) +// io.apb.PRDATA := rom.readSync(io.apb.PADDR >> 2) + io.apb.PRDATA := rom.readAsync(RegNext(io.apb.PADDR >> 2)) + io.apb.PREADY := True +} + + + +class MuraxPipelinedMemoryBusDecoder(master : PipelinedMemoryBus, val specification : Seq[(PipelinedMemoryBus,SizeMapping)], pipelineMaster : Boolean) extends Area{ + val masterPipelined = PipelinedMemoryBus(master.config) + if(!pipelineMaster) { + masterPipelined.cmd << master.cmd + masterPipelined.rsp >> master.rsp + } else { + masterPipelined.cmd <-< master.cmd + masterPipelined.rsp >> master.rsp + } + + val slaveBuses = specification.map(_._1) + val memorySpaces = specification.map(_._2) + + val hits = for((slaveBus, memorySpace) <- specification) yield { + val hit = memorySpace.hit(masterPipelined.cmd.address) + slaveBus.cmd.valid := masterPipelined.cmd.valid && hit + slaveBus.cmd.payload := masterPipelined.cmd.payload.resized + hit + } + val noHit = !hits.orR + masterPipelined.cmd.ready := (hits,slaveBuses).zipped.map(_ && _.cmd.ready).orR || noHit + + val rspPending = RegInit(False) clearWhen(masterPipelined.rsp.valid) setWhen(masterPipelined.cmd.fire && !masterPipelined.cmd.write) + val rspNoHit = RegNext(False) init(False) setWhen(noHit) + val rspSourceId = RegNextWhen(OHToUInt(hits), masterPipelined.cmd.fire) + masterPipelined.rsp.valid := slaveBuses.map(_.rsp.valid).orR || (rspPending && rspNoHit) + masterPipelined.rsp.payload := slaveBuses.map(_.rsp.payload).read(rspSourceId) + + when(rspPending && !masterPipelined.rsp.valid) { //Only one pending read request is allowed + masterPipelined.cmd.ready := False + slaveBuses.foreach(_.cmd.valid := False) + } +} + +class MuraxApb3Timer extends Component{ + val io = new Bundle { + val apb = slave(Apb3( + addressWidth = 8, + dataWidth = 32 + )) + val interrupt = out Bool() + } + + val prescaler = Prescaler(16) + val timerA,timerB = Timer(16) + + val busCtrl = Apb3SlaveFactory(io.apb) + val prescalerBridge = prescaler.driveFrom(busCtrl,0x00) + + val timerABridge = timerA.driveFrom(busCtrl,0x40)( + ticks = List(True, prescaler.io.overflow), + clears = List(timerA.io.full) + ) + + val timerBBridge = timerB.driveFrom(busCtrl,0x50)( + ticks = List(True, prescaler.io.overflow), + clears = List(timerB.io.full) + ) + + val interruptCtrl = InterruptCtrl(2) + val interruptCtrlBridge = interruptCtrl.driveFrom(busCtrl,0x10) + interruptCtrl.io.inputs(0) := timerA.io.full + interruptCtrl.io.inputs(1) := timerB.io.full + io.interrupt := interruptCtrl.io.pendings.orR +} + + +object MuraxApb3TimerGen extends App{ + SpinalVhdl(new MuraxApb3Timer()) +}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/demo/OpenRoad.scala b/VexRiscv/src/main/scala/vexriscv/demo/OpenRoad.scala new file mode 100644 index 0000000..3938eff --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/OpenRoad.scala @@ -0,0 +1,103 @@ +package vexriscv.demo + +import spinal.core._ +import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} +import vexriscv.{Riscv, VexRiscv, VexRiscvConfig, plugin} +import vexriscv.plugin.{BranchPlugin, CsrAccess, CsrPlugin, CsrPluginConfig, DBusCachedPlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusCachedPlugin, IntAluPlugin, MmuPlugin, MmuPortConfig, MulDivIterativePlugin, MulPlugin, RegFilePlugin, STATIC, SrcPlugin, YamlPlugin} + +object OpenRoad extends App{ + + def linuxConfig = VexRiscvConfig( + withMemoryStage = true, + withWriteBackStage = true, + List( + // new SingleInstructionLimiterPlugin(), + new IBusCachedPlugin( + resetVector = 0, + compressedGen = false, + prediction = vexriscv.plugin.NONE, + injectorStage = false, + config = InstructionCacheConfig( + cacheSize = 4096, + bytePerLine = 64, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = true, + twoCycleRam = false, + twoCycleCache = true + ), + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 4 + ) + ), + new DBusCachedPlugin( + dBusCmdMasterPipe = true, + dBusCmdSlavePipe = true, + dBusRspSlavePipe = true, + config = new DataCacheConfig( + cacheSize = 4096, + bytePerLine = 64, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true, + asyncTagMemory = true, + withLrSc = true, + withAmo = true + // ) + ), + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = 4 + ) + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false, + x0Init = true + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false + ), + new FullBarrelShifterPlugin(earlyInjection = true), + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new MulDivIterativePlugin( + genMul = true, + genDiv = true, + mulUnrollFactor = 32, + divUnrollFactor = 8 + ), + new CsrPlugin(CsrPluginConfig.openSbi(0,Riscv.misaToInt("imas")).copy(ebreakGen = false, mtvecAccess = CsrAccess.READ_WRITE)), //mtvecAccess read required by freertos + + new BranchPlugin( + earlyBranch = true, + catchAddressMisaligned = true, + fenceiGenAsAJump = false + ), + new MmuPlugin( + ioRange = (x => x(31)) + ), + new YamlPlugin("cpu0.yaml") + ) + ) + + SpinalConfig().addStandardMemBlackboxing(blackboxAllWhatsYouCan).generateVerilog(new VexRiscv(linuxConfig).setDefinitionName("VexRiscvMsuI4D4")) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/SynthesisBench.scala b/VexRiscv/src/main/scala/vexriscv/demo/SynthesisBench.scala new file mode 100644 index 0000000..6a044ea --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/SynthesisBench.scala @@ -0,0 +1,500 @@ +package vexriscv.demo + +import spinal.core._ +import spinal.lib._ +import spinal.lib.eda.bench._ +import spinal.lib.eda.icestorm.IcestormStdTargets +import spinal.lib.eda.xilinx.VivadoFlow +import spinal.lib.io.InOutWrapper +import vexriscv.demo.smp.VexRiscvSmpClusterGen +import vexriscv.plugin.CsrAccess.{READ_ONLY, READ_WRITE, WRITE_ONLY} +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} +import vexriscv.plugin.{BranchPlugin, CsrPlugin, CsrPluginConfig, DBusSimplePlugin, DecoderSimplePlugin, FullBarrelShifterPlugin, HazardSimplePlugin, IBusSimplePlugin, IntAluPlugin, LightShifterPlugin, NONE, RegFilePlugin, SrcPlugin, YamlPlugin} + +import scala.collection.mutable.ArrayBuffer +import scala.util.Random + +/** + * Created by PIC32F_USER on 16/07/2017. + */ +object VexRiscvSynthesisBench { + def main(args: Array[String]) { + + def wrap(that : => Component) : Component = that +// def wrap(that : => Component) : Component = { +// val c = that +// c.getAllIo.foreach(io => KeepAttribute(io.asDirectionLess())) +// c +// } +// Wrap with input/output registers +// def wrap(that : => Component) : Component = { +// //new WrapWithReg.Wrapper(that) +// val c = that +// c.rework { +// for (e <- c.getOrdredNodeIo) { +// if (e.isInput) { +// e.asDirectionLess() +// e := RegNext(RegNext(in(cloneOf(e)))) +// +// } else { +// e.asDirectionLess() +// out(cloneOf(e)) := RegNext(RegNext(e)) +// } +// } +// } +// c +// } + + // Wrap to do a decoding bench +// def wrap(that : => VexRiscv) : VexRiscv = { +// val top = that +// top.service(classOf[DecoderSimplePlugin]).bench(top) +// top +// } + + val twoStage = new Rtl { + override def getName(): String = "VexRiscv two stages" + override def getRtlPath(): String = "VexRiscvTwoStages.v" + SpinalVerilog(wrap(GenTwoThreeStage.cpu( + withMulDiv = false, + bypass = false, + barrielShifter = false, + withMemoryStage = false + )).setDefinitionName(getRtlPath().split("\\.").head)) + } + val twoStageBarell = new Rtl { + override def getName(): String = "VexRiscv two stages with barriel" + override def getRtlPath(): String = "VexRiscvTwoStagesBar.v" + SpinalVerilog(wrap(GenTwoThreeStage.cpu( + withMulDiv = false, + bypass = true, + barrielShifter = true, + withMemoryStage = false + )).setDefinitionName(getRtlPath().split("\\.").head)) + } + val twoStageMulDiv = new Rtl { + override def getName(): String = "VexRiscv two stages with Mul Div" + override def getRtlPath(): String = "VexRiscvTwoStagesMD.v" + SpinalVerilog(wrap(GenTwoThreeStage.cpu( + withMulDiv = true, + bypass = false, + barrielShifter = false, + withMemoryStage = false + )).setDefinitionName(getRtlPath().split("\\.").head)) + } + val twoStageAll = new Rtl { + override def getName(): String = "VexRiscv two stages with Mul Div fast" + override def getRtlPath(): String = "VexRiscvTwoStagesMDfast.v" + SpinalVerilog(wrap(GenTwoThreeStage.cpu( + withMulDiv = true, + bypass = true, + barrielShifter = true, + withMemoryStage = false + )).setDefinitionName(getRtlPath().split("\\.").head)) + } + + + val threeStage = new Rtl { + override def getName(): String = "VexRiscv three stages" + override def getRtlPath(): String = "VexRiscvThreeStages.v" + SpinalVerilog(wrap(GenTwoThreeStage.cpu( + withMulDiv = false, + bypass = false, + barrielShifter = false, + withMemoryStage = true + )).setDefinitionName(getRtlPath().split("\\.").head)) + } + val threeStageBarell = new Rtl { + override def getName(): String = "VexRiscv three stages with barriel" + override def getRtlPath(): String = "VexRiscvThreeStagesBar.v" + SpinalVerilog(wrap(GenTwoThreeStage.cpu( + withMulDiv = false, + bypass = true, + barrielShifter = true, + withMemoryStage = true + )).setDefinitionName(getRtlPath().split("\\.").head)) + } + val threeStageMulDiv = new Rtl { + override def getName(): String = "VexRiscv three stages with Mul Div" + override def getRtlPath(): String = "VexRiscvThreeStagesMD.v" + SpinalVerilog(wrap(GenTwoThreeStage.cpu( + withMulDiv = true, + bypass = false, + barrielShifter = false, + withMemoryStage = true + )).setDefinitionName(getRtlPath().split("\\.").head)) + } + val threeStageAll = new Rtl { + override def getName(): String = "VexRiscv three stages with Mul Div fast" + override def getRtlPath(): String = "VexRiscvThreeStagesMDfast.v" + SpinalVerilog(wrap(GenTwoThreeStage.cpu( + withMulDiv = true, + bypass = true, + barrielShifter = true, + withMemoryStage = true + )).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val smallestNoCsr = new Rtl { + override def getName(): String = "VexRiscv smallest no CSR" + override def getRtlPath(): String = "VexRiscvSmallestNoCsr.v" + SpinalVerilog(wrap(GenSmallestNoCsr.cpu()).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val smallest = new Rtl { + override def getName(): String = "VexRiscv smallest" + override def getRtlPath(): String = "VexRiscvSmallest.v" + SpinalVerilog(wrap(GenSmallest.cpu()).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val smallAndProductive = new Rtl { + override def getName(): String = "VexRiscv small and productive" + override def getRtlPath(): String = "VexRiscvSmallAndProductive.v" + SpinalVerilog(wrap(GenSmallAndProductive.cpu()).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val smallAndProductiveWithICache = new Rtl { + override def getName(): String = "VexRiscv small and productive with instruction cache" + override def getRtlPath(): String = "VexRiscvSmallAndProductiveICache.v" + SpinalVerilog(wrap(GenSmallAndProductiveICache.cpu()).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val fullNoMmuNoCache = new Rtl { + override def getName(): String = "VexRiscv full no MMU no cache" + override def getRtlPath(): String = "VexRiscvFullNoMmuNoCache.v" + SpinalVerilog(wrap(GenFullNoMmuNoCache.cpu()).setDefinitionName(getRtlPath().split("\\.").head)) + } + val fullNoMmu = new Rtl { + override def getName(): String = "VexRiscv full no MMU" + override def getRtlPath(): String = "VexRiscvFullNoMmu.v" + SpinalVerilog(wrap(GenFullNoMmu.cpu()).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val noCacheNoMmuMaxPerf= new Rtl { + override def getName(): String = "VexRiscv no cache no MMU max perf" + override def getRtlPath(): String = "VexRiscvNoCacheNoMmuMaxPerf.v" + SpinalVerilog(wrap(GenNoCacheNoMmuMaxPerf.cpu()).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val fullNoMmuMaxPerf= new Rtl { + override def getName(): String = "VexRiscv full no MMU max perf" + override def getRtlPath(): String = "VexRiscvFullNoMmuMaxPerf.v" + SpinalVerilog(wrap(GenFullNoMmuMaxPerf.cpu()).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val full = new Rtl { + override def getName(): String = "VexRiscv full with MMU" + override def getRtlPath(): String = "VexRiscvFull.v" + SpinalVerilog(wrap(GenFull.cpu()).setDefinitionName(getRtlPath().split("\\.").head)) + } + + + val linuxBalanced = new Rtl { + override def getName(): String = "VexRiscv linux balanced" + override def getRtlPath(): String = "VexRiscvLinuxBalanced.v" + SpinalConfig(inlineRom = true).generateVerilog(wrap(new VexRiscv(LinuxGen.configFull(false, true))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val linuxBalancedSmp = new Rtl { + override def getName(): String = "VexRiscv linux balanced SMP" + override def getRtlPath(): String = "VexRiscvLinuxBalancedSmp.v" + SpinalConfig(inlineRom = true).generateVerilog(wrap(new VexRiscv(LinuxGen.configFull(false, true, withSmp = true))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val linuxFpuSmp = new Rtl { + override def getName(): String = "VexRiscv linux Fpu SMP" + override def getRtlPath(): String = "VexRiscvLinuxFpuSmp.v" + SpinalConfig(inlineRom = true).generateVerilog(wrap(new VexRiscv( + VexRiscvSmpClusterGen.vexRiscvConfig( + hartId = 0, + ioRange = _ (31 downto 28) === 0xF, + resetVector = 0x80000000l, + iBusWidth = 64, + dBusWidth = 64, + loadStoreWidth = 64, + iCacheSize = 4096*2, + dCacheSize = 4096*2, + iCacheWays = 2, + dCacheWays = 2, + withFloat = true, + withDouble = true, + externalFpu = false, + simHalt = true + ))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val linuxFpuSmpNoDecoder = new Rtl { + override def getName(): String = "VexRiscv linux Fpu SMP without decoder" + override def getRtlPath(): String = "VexRiscvLinuxFpuSmpNoDecoder.v" + SpinalConfig(inlineRom = true).generateVerilog(wrap(new VexRiscv( + VexRiscvSmpClusterGen.vexRiscvConfig( + hartId = 0, + ioRange = _ (31 downto 28) === 0xF, + resetVector = 0x80000000l, + iBusWidth = 64, + dBusWidth = 64, + loadStoreWidth = 64, + iCacheSize = 4096*2, + dCacheSize = 4096*2, + iCacheWays = 2, + dCacheWays = 2, + withFloat = true, + withDouble = true, + externalFpu = false, + simHalt = true, + decoderIsolationBench = true + ))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val linuxFpuSmpStupidDecoder = new Rtl { + override def getName(): String = "VexRiscv linux Fpu SMP stupid decoder" + override def getRtlPath(): String = "VexRiscvLinuxFpuSmpStupidDecoder.v" + SpinalConfig(inlineRom = true).generateVerilog(wrap(new VexRiscv( + VexRiscvSmpClusterGen.vexRiscvConfig( + hartId = 0, + ioRange = _ (31 downto 28) === 0xF, + resetVector = 0x80000000l, + iBusWidth = 64, + dBusWidth = 64, + loadStoreWidth = 64, + iCacheSize = 4096*2, + dCacheSize = 4096*2, + iCacheWays = 2, + dCacheWays = 2, + withFloat = true, + withDouble = true, + externalFpu = false, + simHalt = true, + decoderStupid = true + ))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + + + val rtls = List( +// linuxFpuSmp, linuxFpuSmpNoDecoder, linuxFpuSmpStupidDecoder + twoStage, twoStageBarell, twoStageMulDiv, twoStageAll, + threeStage, threeStageBarell, threeStageMulDiv, threeStageAll, + smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full, linuxBalanced, linuxBalancedSmp + ) +// val rtls = List(linuxBalanced, linuxBalancedSmp) +// val rtls = List(smallest) + val targets = XilinxStdTargets() ++ AlteraStdTargets() ++ IcestormStdTargets().take(1) ++ List( + new Target { + override def getFamilyName(): String = "Kintex UltraScale" + override def synthesise(rtl: Rtl, workspace: String): Report = { + VivadoFlow( + frequencyTarget = 50 MHz, + vivadoPath=sys.env.getOrElse("VIVADO_ARTIX_7_BIN", null), + workspacePath=workspace + "_area", + rtl=rtl, + family=getFamilyName(), + device="xcku035-fbva900-3-e" + ) + } + }, + new Target { + override def getFamilyName(): String = "Kintex UltraScale" + override def synthesise(rtl: Rtl, workspace: String): Report = { + VivadoFlow( + frequencyTarget = 800 MHz, + vivadoPath=sys.env.getOrElse("VIVADO_ARTIX_7_BIN", null), + workspacePath=workspace + "_fmax", + rtl=rtl, + family=getFamilyName(), + device="xcku035-fbva900-3-e" + ) + } + }, + new Target { + override def getFamilyName(): String = "Kintex UltraScale+" + override def synthesise(rtl: Rtl, workspace: String): Report = { + VivadoFlow( + frequencyTarget = 50 MHz, + vivadoPath=sys.env.getOrElse("VIVADO_ARTIX_7_BIN", null), + workspacePath=workspace + "_area", + rtl=rtl, + family=getFamilyName(), + device="xcku3p-ffvd900-3-e" + ) + } + }, + new Target { + override def getFamilyName(): String = "Kintex UltraScale+" + override def synthesise(rtl: Rtl, workspace: String): Report = { + VivadoFlow( + frequencyTarget = 800 MHz, + vivadoPath=sys.env.getOrElse("VIVADO_ARTIX_7_BIN", null), + workspacePath=workspace + "_fmax", + rtl=rtl, + family=getFamilyName(), + device="xcku3p-ffvd900-3-e" + ) + } + } + ) + // val targets = IcestormStdTargets() + Bench(rtls, targets) + } +} + + +object BrieySynthesisBench { + def main(args: Array[String]) { + val briey = new Rtl { + override def getName(): String = "Briey" + override def getRtlPath(): String = "Briey.v" + SpinalVerilog({ + val briey = InOutWrapper(new Briey(BrieyConfig.default).setDefinitionName(getRtlPath().split("\\.").head)) + briey.io.axiClk.setName("clk") + briey + }) + } + + + val rtls = List(briey) + + val targets = XilinxStdTargets() ++ AlteraStdTargets() ++ IcestormStdTargets().take(1) + + Bench(rtls, targets) + } +} + + + + +object MuraxSynthesisBench { + def main(args: Array[String]) { + val murax = new Rtl { + override def getName(): String = "Murax" + override def getRtlPath(): String = "Murax.v" + SpinalVerilog({ + val murax = InOutWrapper(new Murax(MuraxConfig.default.copy(gpioWidth = 8)).setDefinitionName(getRtlPath().split("\\.").head)) + murax.io.mainClk.setName("clk") + murax + }) + } + + + val muraxFast = new Rtl { + override def getName(): String = "MuraxFast" + override def getRtlPath(): String = "MuraxFast.v" + SpinalVerilog({ + val murax = InOutWrapper(new Murax(MuraxConfig.fast.copy(gpioWidth = 8)).setDefinitionName(getRtlPath().split("\\.").head)) + murax.io.mainClk.setName("clk") + murax + }) + } + + val rtls = List(murax, muraxFast) + + val targets = XilinxStdTargets() ++ AlteraStdTargets() ++ IcestormStdTargets().take(1) + + Bench(rtls, targets) + } +} + +object AllSynthesisBench { + def main(args: Array[String]): Unit = { + VexRiscvSynthesisBench.main(args) + BrieySynthesisBench.main(args) + MuraxSynthesisBench.main(args) + + } +} + + + +object VexRiscvCustomSynthesisBench { + def main(args: Array[String]) { + + + def gen(csr : CsrPlugin) = new VexRiscv( + config = VexRiscvConfig( + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), + new DecoderSimplePlugin( + catchIllegalInstruction = false + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + csr, + new FullBarrelShifterPlugin(), + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = false + ), + new YamlPlugin("cpu0.yaml") + ) + ) + ) + + + val fixedMtvec = new Rtl { + override def getName(): String = "Fixed MTVEC" + override def getRtlPath(): String = "fixedMtvec.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(0x80000000l))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val writeOnlyMtvec = new Rtl { + override def getName(): String = "write only MTVEC" + override def getRtlPath(): String = "woMtvec.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(null).copy(mtvecAccess = WRITE_ONLY))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val readWriteMtvec = new Rtl { + override def getName(): String = "read write MTVEC" + override def getRtlPath(): String = "wrMtvec.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(null).copy(mtvecAccess = READ_WRITE))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + val fixedMtvecRoCounter = new Rtl { + override def getName(): String = "Fixed MTVEC, read only mcycle/minstret" + override def getRtlPath(): String = "fixedMtvecRoCounter.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(0x80000000l).copy(mcycleAccess = READ_ONLY, minstretAccess = READ_ONLY))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + + val rwMtvecRoCounter = new Rtl { + override def getName(): String = "read write MTVEC, read only mcycle/minstret" + override def getRtlPath(): String = "readWriteMtvecRoCounter.v" + SpinalVerilog(gen(new CsrPlugin(CsrPluginConfig.smallest(null).copy(mtvecAccess = READ_WRITE, mcycleAccess = READ_ONLY, minstretAccess = READ_ONLY))).setDefinitionName(getRtlPath().split("\\.").head)) + } + + + // val rtls = List(twoStage, twoStageBarell, twoStageMulDiv, twoStageAll, smallestNoCsr, smallest, smallAndProductive, smallAndProductiveWithICache, fullNoMmuNoCache, noCacheNoMmuMaxPerf, fullNoMmuMaxPerf, fullNoMmu, full, linuxBalanced, linuxBalancedSmp) + val rtls = List(fixedMtvec, writeOnlyMtvec, readWriteMtvec,fixedMtvecRoCounter, rwMtvecRoCounter) + // val rtls = List(smallest) + val targets = XilinxStdTargets() ++ AlteraStdTargets() ++ IcestormStdTargets().take(1) + + // val targets = IcestormStdTargets() + Bench(rtls, targets) + } +}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvAhbLite3.scala b/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvAhbLite3.scala new file mode 100644 index 0000000..f817fb3 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvAhbLite3.scala @@ -0,0 +1,180 @@ +package vexriscv.demo + + +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.avalon.AvalonMM +import spinal.lib.com.jtag.{Jtag, JtagTapInstructionCtrl} +import spinal.lib.eda.altera.{InterruptReceiverTag, QSysify, ResetEmitterTag} +import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} + +/** + * Created by spinalvm on 14.07.17. + */ +//class VexRiscvAvalon(debugClockDomain : ClockDomain) extends Component{ +// +//} + +//make clean run DBUS=SIMPLE_AHBLITE3 IBUS=SIMPLE_AHBLITE3 MMU=no CSR=no DEBUG_PLUGIN=STD + +object VexRiscvAhbLite3{ + def main(args: Array[String]) { + val report = SpinalConfig(mode = if(args.contains("--vhdl")) VHDL else Verilog).generate{ + + //CPU configuration + val cpuConfig = VexRiscvConfig( + plugins = List( + new IBusSimplePlugin( + resetVector = 0x80000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = true, + prediction = STATIC, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ), +// new IBusCachedPlugin( +// config = InstructionCacheConfig( +// cacheSize = 4096, +// bytePerLine =32, +// wayCount = 1, +// addressWidth = 32, +// cpuDataWidth = 32, +// memDataWidth = 32, +// catchIllegalAccess = true, +// catchAccessFault = true, +// catchMemoryTranslationMiss = true, +// asyncTagMemory = false, +// twoCycleRam = true +// ) +// // askMemoryTranslation = true, +// // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( +// // portTlbSize = 4 +// // ) +// ), +// new DBusCachedPlugin( +// config = new DataCacheConfig( +// cacheSize = 4096, +// bytePerLine = 32, +// wayCount = 1, +// addressWidth = 32, +// cpuDataWidth = 32, +// memDataWidth = 32, +// catchAccessError = true, +// catchIllegal = true, +// catchUnaligned = true, +// catchMemoryTranslationMiss = true +// ), +// memoryTranslatorPortConfig = null +// // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( +// // portTlbSize = 6 +// // ) +// ), + new StaticMemoryTranslatorPlugin( + ioRange = _(31 downto 28) === 0xF + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin, + new MulPlugin, + new DivPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new CsrPlugin( + config = CsrPluginConfig( + catchIllegalAccess = false, + mvendorid = null, + marchid = null, + mimpid = null, + mhartid = null, + misaExtensionsInit = 66, + misaAccess = CsrAccess.NONE, + mtvecAccess = CsrAccess.NONE, + mtvecInit = 0x00000020l, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = false, + mcauseAccess = CsrAccess.READ_ONLY, + mbadaddrAccess = CsrAccess.READ_ONLY, + mcycleAccess = CsrAccess.NONE, + minstretAccess = CsrAccess.NONE, + ecallGen = false, + wfiGenAsWait = false, + ucycleAccess = CsrAccess.NONE, + uinstretAccess = CsrAccess.NONE + ) + ), + new YamlPlugin("cpu0.yaml") + ) + ) + + //CPU instanciation + val cpu = new VexRiscv(cpuConfig) + + //CPU modifications to be an AhbLite3 one + cpu.rework { + for (plugin <- cpuConfig.plugins) plugin match { + case plugin: IBusSimplePlugin => { + plugin.iBus.setAsDirectionLess() //Unset IO properties of iBus + master(plugin.iBus.toAhbLite3Master()).setName("iBusAhbLite3") + } + case plugin: DBusSimplePlugin => { + plugin.dBus.setAsDirectionLess() + master(plugin.dBus.toAhbLite3Master(avoidWriteToReadHazard = true)).setName("dBusAhbLite3") + } +// case plugin: IBusCachedPlugin => { +// plugin.iBus.setAsDirectionLess() //Unset IO properties of iBus +// iBus = master(plugin.iBus.toAvalon()) +// .setName("iBusAvalon") +// .addTag(ClockDomainTag(ClockDomain.current)) //Specify a clock domain to the iBus (used by QSysify) +// } +// case plugin: DBusCachedPlugin => { +// plugin.dBus.setAsDirectionLess() +// master(plugin.dBus.toAvalon()) +// .setName("dBusAvalon") +// .addTag(ClockDomainTag(ClockDomain.current)) +// } + case plugin: DebugPlugin if args.contains("--jtag")=> plugin.debugClockDomain { + plugin.io.bus.setAsDirectionLess() + val jtag = slave(new Jtag()).setName("jtag") + jtag <> plugin.io.bus.fromJtag() + +// // On Artix FPGA jtag : +// val jtagCtrl = JtagTapInstructionCtrl() +// val tap = jtagCtrl.fromXilinxBscane2(userId = 1) +// jtagCtrl <> plugin.io.bus.fromJtagInstructionCtrl(ClockDomain(tap.TCK)) + } + case _ => + } + } + cpu + } + } +} + diff --git a/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvAvalonForSim.scala b/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvAvalonForSim.scala new file mode 100644 index 0000000..b2c3f69 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvAvalonForSim.scala @@ -0,0 +1,196 @@ +package vexriscv.demo + +import vexriscv.plugin._ +import vexriscv.{VexRiscv, plugin, VexRiscvConfig} +import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.amba3.apb.Apb3 +import spinal.lib.bus.amba4.axi.{Axi4Shared, Axi4ReadOnly} +import spinal.lib.bus.avalon.AvalonMM +import spinal.lib.eda.altera.{ResetEmitterTag, InterruptReceiverTag, QSysify} + +/** + * Created by spinalvm on 14.07.17. + */ +//class VexRiscvAvalon(debugClockDomain : ClockDomain) extends Component{ +// +//} + +//make clean run DBUS=CACHED_AVALON IBUS=CACHED_AVALON MMU=no CSR=no DEBUG_PLUGIN=AVALON + +object VexRiscvAvalonForSim{ + def main(args: Array[String]) { + val report = SpinalVerilog{ + + //CPU configuration + val cpuConfig = VexRiscvConfig( + plugins = List( + /* new IBusSimplePlugin( + resetVector = 0x00000000l, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = STATIC, + catchAccessFault = false, + compressedGen = false + ), + new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false + ),*/ + new IBusCachedPlugin( + config = InstructionCacheConfig( + cacheSize = 4096, + bytePerLine =32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = true + ) + // askMemoryTranslation = true, + // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( + // portTlbSize = 4 + // ) + ), + new DBusCachedPlugin( + config = new DataCacheConfig( + cacheSize = 4096, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true + ), + memoryTranslatorPortConfig = null + // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( + // portTlbSize = 6 + // ) + ), + new StaticMemoryTranslatorPlugin( + ioRange = _(31 downto 28) === 0xF + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin, + new MulPlugin, + new DivPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new CsrPlugin( + config = CsrPluginConfig( + catchIllegalAccess = false, + mvendorid = null, + marchid = null, + mimpid = null, + mhartid = null, + misaExtensionsInit = 66, + misaAccess = CsrAccess.NONE, + mtvecAccess = CsrAccess.NONE, + mtvecInit = 0x00000020l, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = false, + mcauseAccess = CsrAccess.READ_ONLY, + mbadaddrAccess = CsrAccess.READ_ONLY, + mcycleAccess = CsrAccess.NONE, + minstretAccess = CsrAccess.NONE, + ecallGen = false, + wfiGenAsWait = false, + ucycleAccess = CsrAccess.NONE, + uinstretAccess = CsrAccess.NONE + ) + ), + new YamlPlugin("cpu0.yaml") + ) + ) + + //CPU instanciation + val cpu = new VexRiscv(cpuConfig) + + //CPU modifications to be an Avalon one + //cpu.setDefinitionName("VexRiscvAvalon") + cpu.rework { + var iBus : AvalonMM = null + for (plugin <- cpuConfig.plugins) plugin match { + case plugin: IBusSimplePlugin => { + plugin.iBus.setAsDirectionLess() //Unset IO properties of iBus + iBus = master(plugin.iBus.toAvalon()) + .setName("iBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) //Specify a clock domain to the iBus (used by QSysify) + } + case plugin: IBusCachedPlugin => { + plugin.iBus.setAsDirectionLess() //Unset IO properties of iBus + iBus = master(plugin.iBus.toAvalon()) + .setName("iBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) //Specify a clock domain to the iBus (used by QSysify) + } + case plugin: DBusSimplePlugin => { + plugin.dBus.setAsDirectionLess() + master(plugin.dBus.toAvalon()) + .setName("dBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) + } + case plugin: DBusCachedPlugin => { + plugin.dBus.setAsDirectionLess() + master(plugin.dBus.toAvalon()) + .setName("dBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) + } + case plugin: DebugPlugin => plugin.debugClockDomain { + plugin.io.bus.setAsDirectionLess() + slave(plugin.io.bus.fromAvalon()) + .setName("debugBusAvalon") + .addTag(ClockDomainTag(plugin.debugClockDomain)) + .parent = null //Avoid the io bundle to be interpreted as a QSys conduit + plugin.io.resetOut + .addTag(ResetEmitterTag(plugin.debugClockDomain)) + .parent = null //Avoid the io bundle to be interpreted as a QSys conduit + } + case _ => + } + for (plugin <- cpuConfig.plugins) plugin match { + case plugin: CsrPlugin => { + plugin.externalInterrupt + .addTag(InterruptReceiverTag(iBus, ClockDomain.current)) + plugin.timerInterrupt + .addTag(InterruptReceiverTag(iBus, ClockDomain.current)) + } + case _ => + } + } + cpu + } + + //Generate the QSys TCL script to integrate the CPU + QSysify(report.toplevel) + } +} + diff --git a/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvAvalonWithIntegratedJtag.scala b/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvAvalonWithIntegratedJtag.scala new file mode 100644 index 0000000..063d945 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvAvalonWithIntegratedJtag.scala @@ -0,0 +1,191 @@ +package vexriscv.demo + +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.avalon.AvalonMM +import spinal.lib.com.jtag.Jtag +import spinal.lib.eda.altera.{InterruptReceiverTag, QSysify, ResetEmitterTag} +import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} + +/** + * Created by spinalvm on 14.07.17. + */ +//class VexRiscvAvalon(debugClockDomain : ClockDomain) extends Component{ +// +//} + + +object VexRiscvAvalonWithIntegratedJtag{ + def main(args: Array[String]) { + val report = SpinalVerilog{ + + //CPU configuration + val cpuConfig = VexRiscvConfig( + plugins = List( + new PcManagerSimplePlugin(0x00000000l, false), +// new IBusSimplePlugin( +// interfaceKeepData = false, +// catchAccessFault = false +// ), +// new DBusSimplePlugin( +// catchAddressMisaligned = false, +// catchAccessFault = false +// ), + new IBusCachedPlugin( + prediction = STATIC, + config = InstructionCacheConfig( + cacheSize = 4096, + bytePerLine =32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = true, + twoCycleCache = true + ) + // askMemoryTranslation = true, + // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( + // portTlbSize = 4 + // ) + ), + new DBusCachedPlugin( + config = new DataCacheConfig( + cacheSize = 4096, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true + ), + memoryTranslatorPortConfig = null + // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( + // portTlbSize = 6 + // ) + ), + new StaticMemoryTranslatorPlugin( + ioRange = _(31 downto 28) === 0xF + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin, + new MulPlugin, + new DivPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new CsrPlugin( + config = CsrPluginConfig( + catchIllegalAccess = false, + mvendorid = null, + marchid = null, + mimpid = null, + mhartid = null, + misaExtensionsInit = 66, + misaAccess = CsrAccess.NONE, + mtvecAccess = CsrAccess.NONE, + mtvecInit = 0x00000020l, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = false, + mcauseAccess = CsrAccess.READ_ONLY, + mbadaddrAccess = CsrAccess.READ_ONLY, + mcycleAccess = CsrAccess.NONE, + minstretAccess = CsrAccess.NONE, + ecallGen = false, + wfiGenAsWait = false, + ucycleAccess = CsrAccess.NONE, + uinstretAccess = CsrAccess.NONE + ) + ), + new YamlPlugin("cpu0.yaml") + ) + ) + + //CPU instanciation + val cpu = new VexRiscv(cpuConfig) + + //CPU modifications to be an Avalon one + cpu.setDefinitionName("VexRiscvAvalon") + cpu.rework { + var iBus : AvalonMM = null + for (plugin <- cpuConfig.plugins) plugin match { + case plugin: IBusSimplePlugin => { + plugin.iBus.setAsDirectionLess() //Unset IO properties of iBus + iBus = master(plugin.iBus.toAvalon()) + .setName("iBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) //Specify a clock domain to the iBus (used by QSysify) + } + case plugin: IBusCachedPlugin => { + plugin.iBus.setAsDirectionLess() //Unset IO properties of iBus + iBus = master(plugin.iBus.toAvalon()) + .setName("iBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) //Specify a clock domain to the iBus (used by QSysify) + } + case plugin: DBusSimplePlugin => { + plugin.dBus.setAsDirectionLess() + master(plugin.dBus.toAvalon()) + .setName("dBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) + } + case plugin: DBusCachedPlugin => { + plugin.dBus.setAsDirectionLess() + master(plugin.dBus.toAvalon()) + .setName("dBusAvalon") + .addTag(ClockDomainTag(ClockDomain.current)) + } + case plugin: DebugPlugin => plugin.debugClockDomain { + plugin.io.bus.setAsDirectionLess() + val jtag = slave(new Jtag()) + .setName("jtag") + jtag <> plugin.io.bus.fromJtag() + plugin.io.resetOut + .addTag(ResetEmitterTag(plugin.debugClockDomain)) + .parent = null //Avoid the io bundle to be interpreted as a QSys conduit + } + case _ => + } + for (plugin <- cpuConfig.plugins) plugin match { + case plugin: CsrPlugin => { + plugin.externalInterrupt + .addTag(InterruptReceiverTag(iBus, ClockDomain.current)) + plugin.timerInterrupt + .addTag(InterruptReceiverTag(iBus, ClockDomain.current)) + } + case _ => + } + } + cpu + } + + //Generate the QSys TCL script to integrate the CPU + QSysify(report.toplevel) + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvAxi4WithIntegratedJtag.scala b/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvAxi4WithIntegratedJtag.scala new file mode 100644 index 0000000..67556e9 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvAxi4WithIntegratedJtag.scala @@ -0,0 +1,189 @@ +package vexriscv.demo + +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.amba4.axi.Axi4ReadOnly +import spinal.lib.bus.avalon.AvalonMM +import spinal.lib.com.jtag.Jtag +import spinal.lib.eda.altera.{InterruptReceiverTag, QSysify, ResetEmitterTag} +import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} + +/** + * Created by spinalvm on 14.07.17. + */ +//class VexRiscvAvalon(debugClockDomain : ClockDomain) extends Component{ +// +//} + + +object VexRiscvAxi4WithIntegratedJtag{ + def main(args: Array[String]) { + val report = SpinalVerilog{ + + //CPU configuration + val cpuConfig = VexRiscvConfig( + plugins = List( + new PcManagerSimplePlugin(0x00000000l, false), +// new IBusSimplePlugin( +// interfaceKeepData = false, +// catchAccessFault = false +// ), +// new DBusSimplePlugin( +// catchAddressMisaligned = false, +// catchAccessFault = false +// ), + new IBusCachedPlugin( + prediction = STATIC, + config = InstructionCacheConfig( + cacheSize = 4096, + bytePerLine =32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = true, + twoCycleCache = true + ) + // askMemoryTranslation = true, + // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( + // portTlbSize = 4 + // ) + ), + new DBusCachedPlugin( + config = new DataCacheConfig( + cacheSize = 4096, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true + ), + memoryTranslatorPortConfig = null + // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( + // portTlbSize = 6 + // ) + ), + new StaticMemoryTranslatorPlugin( + ioRange = _(31 downto 28) === 0xF + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin, + new MulPlugin, + new DivPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new CsrPlugin( + config = CsrPluginConfig( + catchIllegalAccess = false, + mvendorid = null, + marchid = null, + mimpid = null, + mhartid = null, + misaExtensionsInit = 66, + misaAccess = CsrAccess.NONE, + mtvecAccess = CsrAccess.NONE, + mtvecInit = 0x00000020l, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = false, + mcauseAccess = CsrAccess.READ_ONLY, + mbadaddrAccess = CsrAccess.READ_ONLY, + mcycleAccess = CsrAccess.NONE, + minstretAccess = CsrAccess.NONE, + ecallGen = false, + wfiGenAsWait = false, + ucycleAccess = CsrAccess.NONE, + uinstretAccess = CsrAccess.NONE + ) + ), + new YamlPlugin("cpu0.yaml") + ) + ) + + //CPU instanciation + val cpu = new VexRiscv(cpuConfig) + + //CPU modifications to be an Avalon one + cpu.setDefinitionName("VexRiscvAxi4") + cpu.rework { + var iBus : Axi4ReadOnly = null + for (plugin <- cpuConfig.plugins) plugin match { + case plugin: IBusSimplePlugin => { + plugin.iBus.setAsDirectionLess() //Unset IO properties of iBus + iBus = master(plugin.iBus.toAxi4ReadOnly().toFullConfig()) + .setName("iBusAxi") + .addTag(ClockDomainTag(ClockDomain.current)) //Specify a clock domain to the iBus (used by QSysify) + } + case plugin: IBusCachedPlugin => { + plugin.iBus.setAsDirectionLess() //Unset IO properties of iBus + iBus = master(plugin.iBus.toAxi4ReadOnly().toFullConfig()) + .setName("iBusAxi") + .addTag(ClockDomainTag(ClockDomain.current)) //Specify a clock domain to the iBus (used by QSysify) + } + case plugin: DBusSimplePlugin => { + plugin.dBus.setAsDirectionLess() + master(plugin.dBus.toAxi4Shared().toAxi4().toFullConfig()) + .setName("dBusAxi") + .addTag(ClockDomainTag(ClockDomain.current)) + } + case plugin: DBusCachedPlugin => { + plugin.dBus.setAsDirectionLess() + master(plugin.dBus.toAxi4Shared().toAxi4().toFullConfig()) + .setName("dBusAxi") + .addTag(ClockDomainTag(ClockDomain.current)) + } + case plugin: DebugPlugin => plugin.debugClockDomain { + plugin.io.bus.setAsDirectionLess() + val jtag = slave(new Jtag()) + .setName("jtag") + jtag <> plugin.io.bus.fromJtag() + plugin.io.resetOut + .addTag(ResetEmitterTag(plugin.debugClockDomain)) + .parent = null //Avoid the io bundle to be interpreted as a QSys conduit + } + case _ => + } + for (plugin <- cpuConfig.plugins) plugin match { + case plugin: CsrPlugin => { + plugin.externalInterrupt + .addTag(InterruptReceiverTag(iBus, ClockDomain.current)) + plugin.timerInterrupt + .addTag(InterruptReceiverTag(iBus, ClockDomain.current)) + } + case _ => + } + } + cpu + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvCachedWishboneForSim.scala b/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvCachedWishboneForSim.scala new file mode 100644 index 0000000..88cad3d --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/VexRiscvCachedWishboneForSim.scala @@ -0,0 +1,144 @@ +package vexriscv.demo + +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.avalon.AvalonMM +import spinal.lib.eda.altera.{InterruptReceiverTag, QSysify, ResetEmitterTag} +import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} +import vexriscv.plugin._ +import vexriscv.{VexRiscv, VexRiscvConfig, plugin} + +/** + * Created by spinalvm on 14.07.17. + */ +//class VexRiscvAvalon(debugClockDomain : ClockDomain) extends Component{ +// +//} + + +// make clean run DBUS=CACHED_WISHBONE IBUS=CACHED_WISHBONE MMU=no CSR=no DEBUG_PLUGIN=no +object VexRiscvCachedWishboneForSim{ + def main(args: Array[String]) { + val report = SpinalVerilog{ + + //CPU configuration + val cpuConfig = VexRiscvConfig( + plugins = List( +// new IBusSimplePlugin( +// resetVector = 0x80000000l, +// prediction = STATIC +// ), +// new DBusSimplePlugin( +// catchAddressMisaligned = false, +// catchAccessFault = false +// ), + new IBusCachedPlugin( + resetVector = 0x80000000l, + prediction = STATIC, + config = InstructionCacheConfig( + cacheSize = 4096, + bytePerLine =32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = true + ) + // askMemoryTranslation = true, + // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( + // portTlbSize = 4 + // ) + ), + new DBusCachedPlugin( + config = new DataCacheConfig( + cacheSize = 4096, + bytePerLine = 32, + wayCount = 1, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = 32, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true + ), + dBusCmdMasterPipe = true, //required for wishbone + memoryTranslatorPortConfig = null + // memoryTranslatorPortConfig = MemoryTranslatorPortConfig( + // portTlbSize = 6 + // ) + ), + new StaticMemoryTranslatorPlugin( + ioRange = _(31 downto 28) === 0xF + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true + ), + new RegFilePlugin( + regFileReadyKind = plugin.SYNC, + zeroBoot = false + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false, + executeInsertion = true + ), + new FullBarrelShifterPlugin, + new MulPlugin, + new DivPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), +// new DebugPlugin(ClockDomain.current.clone(reset = Bool().setName("debugReset"))), + new BranchPlugin( + earlyBranch = false, + catchAddressMisaligned = true + ), + new CsrPlugin( + config = CsrPluginConfig.small(mtvecInit = 0x80000020l) + ), + new YamlPlugin("cpu0.yaml") + ) + ) + + //CPU instanciation + val cpu = new VexRiscv(cpuConfig) + + //CPU modifications to be an Avalon one + //cpu.setDefinitionName("VexRiscvAvalon") + cpu.rework { + for (plugin <- cpuConfig.plugins) plugin match { + case plugin: IBusSimplePlugin => { + plugin.iBus.setAsDirectionLess() //Unset IO properties of iBus + master(plugin.iBus.toWishbone()).setName("iBusWishbone") + } + case plugin: IBusCachedPlugin => { + plugin.iBus.setAsDirectionLess() + master(plugin.iBus.toWishbone()).setName("iBusWishbone") + } + case plugin: DBusSimplePlugin => { + plugin.dBus.setAsDirectionLess() + master(plugin.dBus.toWishbone()).setName("dBusWishbone") + } + case plugin: DBusCachedPlugin => { + plugin.dBus.setAsDirectionLess() + master(plugin.dBus.toWishbone()).setName("dBusWishbone") + } + case _ => + } + } + cpu + } + + //Generate the QSys TCL script to integrate the CPU + QSysify(report.toplevel) + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/smp/Misc.scala b/VexRiscv/src/main/scala/vexriscv/demo/smp/Misc.scala new file mode 100644 index 0000000..58bad63 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/smp/Misc.scala @@ -0,0 +1,289 @@ +package vexriscv.demo.smp + + +import spinal.core._ +import spinal.core.fiber._ +import spinal.lib.bus.bmb._ +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneSlaveFactory} +import spinal.lib.com.jtag.Jtag +import spinal.lib._ +import spinal.lib.bus.bmb.sim.{BmbMemoryMultiPort, BmbMemoryTester} +import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} +import spinal.lib.eda.bench.Bench +import spinal.lib.generator._ +import spinal.lib.misc.Clint +import spinal.lib.sim.{SimData, SparseMemory, StreamDriver, StreamMonitor, StreamReadyRandomizer} +import vexriscv.{VexRiscv, VexRiscvConfig} +import vexriscv.plugin.{CsrPlugin, DBusCachedPlugin, DebugPlugin, IBusCachedPlugin} + +import scala.collection.mutable +import scala.util.Random + +case class LiteDramNativeParameter(addressWidth : Int, dataWidth : Int) + +case class LiteDramNativeCmd(p : LiteDramNativeParameter) extends Bundle{ + val we = Bool() + val addr = UInt(p.addressWidth bits) +} + +case class LiteDramNativeWData(p : LiteDramNativeParameter) extends Bundle{ + val data = Bits(p.dataWidth bits) + val we = Bits(p.dataWidth/8 bits) +} + +case class LiteDramNativeRData(p : LiteDramNativeParameter) extends Bundle{ + val data = Bits(p.dataWidth bits) +} + + +case class LiteDramNative(p : LiteDramNativeParameter) extends Bundle with IMasterSlave { + val cmd = Stream(LiteDramNativeCmd(p)) + val wdata = Stream(LiteDramNativeWData(p)) + val rdata = Stream(LiteDramNativeRData(p)) + override def asMaster(): Unit = { + master(cmd, wdata) + slave(rdata) + } + + def fromBmb(bmb : Bmb, wdataFifoSize : Int, rdataFifoSize : Int) = { + val bridge = BmbToLiteDram( + bmbParameter = bmb.p, + liteDramParameter = this.p, + wdataFifoSize = wdataFifoSize, + rdataFifoSize = rdataFifoSize + ) + bridge.io.input << bmb + bridge.io.output <> this + bridge + } + + def simSlave(ram : SparseMemory,cd : ClockDomain, bmb : Bmb = null): Unit ={ + import spinal.core.sim._ + def bus = this + case class Cmd(address : Long, we : Boolean) + case class WData(data : BigInt, we : Long) + val cmdQueue = mutable.Queue[Cmd]() + val wdataQueue = mutable.Queue[WData]() + val rdataQueue = mutable.Queue[BigInt]() + + + case class Ref(address : Long, data : BigInt, we : Long, time : Long) + val ref = mutable.Queue[Ref]() + if(bmb != null) StreamMonitor(bmb.cmd, cd){p => + if(bmb.cmd.opcode.toInt == 1) ref.enqueue(Ref(p.fragment.address.toLong, p.fragment.data.toBigInt, p.fragment.mask.toLong, simTime())) + } + + var writeCmdCounter, writeDataCounter = 0 + StreamReadyRandomizer(bus.cmd, cd).factor = 0.5f + StreamMonitor(bus.cmd, cd) { t => + cmdQueue.enqueue(Cmd(t.addr.toLong * (p.dataWidth/8) , t.we.toBoolean)) + if(t.we.toBoolean) writeCmdCounter += 1 + } + + StreamReadyRandomizer(bus.wdata, cd).factor = 0.5f + StreamMonitor(bus.wdata, cd) { p => + writeDataCounter += 1 + // if(p.data.toBigInt == BigInt("00000002000000020000000200000002",16)){ + // println("ASD") + // } + wdataQueue.enqueue(WData(p.data.toBigInt, p.we.toLong)) + } + + // new SimStreamAssert(cmd,cd) + // new SimStreamAssert(wdata,cd) + // new SimStreamAssert(rdata,cd) + + cd.onSamplings{ + if(writeDataCounter-writeCmdCounter > 2){ + println("miaou") + } + if(cmdQueue.nonEmpty && Random.nextFloat() < 0.5){ + val cmd = cmdQueue.head + if(cmd.we){ + if(wdataQueue.nonEmpty){ + // if(cmd.address == 0xc02ae850l) { + // println(s"! $writeCmdCounter $writeDataCounter") + // } + cmdQueue.dequeue() + val wdata = wdataQueue.dequeue() + val raw = wdata.data.toByteArray + val left = wdata.data.toByteArray.size-1 + if(bmb != null){ + assert(ref.nonEmpty) + assert((ref.head.address & 0xFFFFFFF0l) == cmd.address) + assert(ref.head.data == wdata.data) + assert(ref.head.we == wdata.we) + ref.dequeue() + } + // if(cmd.address == 0xc02ae850l) { + // println(s"$cmd $wdata ${simTime()}") + // } + for(i <- 0 until p.dataWidth/8){ + + + if(((wdata.we >> i) & 1) != 0) { + // if(cmd.address == 0xc02ae850l) { + // println(s"W $i ${ if (left - i >= 0) raw(left - i) else 0}") + // } + ram.write(cmd.address + i, if (left - i >= 0) raw(left - i) else 0) + } + } + } + } else { + cmdQueue.dequeue() + val value = new Array[Byte](p.dataWidth/8+1) + val left = value.size-1 + for(i <- 0 until p.dataWidth/8) { + value(left-i) = ram.read(cmd.address+i) + } + rdataQueue.enqueue(BigInt(value)) + } + } + } + + StreamDriver(bus.rdata, cd){ p => + if(rdataQueue.isEmpty){ + false + } else { + p.data #= rdataQueue.dequeue() + true + } + } + } +} + + + +case class BmbToLiteDram(bmbParameter : BmbParameter, + liteDramParameter : LiteDramNativeParameter, + wdataFifoSize : Int, + rdataFifoSize : Int) extends Component{ + val io = new Bundle { + val input = slave(Bmb(bmbParameter)) + val output = master(LiteDramNative(liteDramParameter)) + } + + val resized = io.input.resize(liteDramParameter.dataWidth) + val unburstified = resized.unburstify() + case class Context() extends Bundle { + val context = Bits(unburstified.p.access.contextWidth bits) + val source = UInt(unburstified.p.access.sourceWidth bits) + val isWrite = Bool() + } + + assert(isPow2(rdataFifoSize)) + val pendingRead = Reg(UInt(log2Up(rdataFifoSize) + 1 bits)) init(0) + + val halt = Bool() + val (cmdFork, dataFork) = StreamFork2(unburstified.cmd.haltWhen(halt)) + val outputCmd = Stream(LiteDramNativeCmd(liteDramParameter)) + outputCmd.arbitrationFrom(cmdFork.haltWhen(pendingRead.msb)) + outputCmd.addr := (cmdFork.address >> log2Up(liteDramParameter.dataWidth/8)).resized + outputCmd.we := cmdFork.isWrite + + io.output.cmd <-< outputCmd + + if(bmbParameter.access.canWrite) { + val wData = Stream(LiteDramNativeWData(liteDramParameter)) + wData.arbitrationFrom(dataFork.throwWhen(dataFork.isRead)) + wData.data := dataFork.data + wData.we := dataFork.mask + io.output.wdata << wData.queueLowLatency(wdataFifoSize, latency = 1) + } else { + dataFork.ready := True + io.output.wdata.valid := False + io.output.wdata.data.assignDontCare() + io.output.wdata.we.assignDontCare() + } + + val cmdContext = Stream(Context()) + cmdContext.valid := unburstified.cmd.fire + cmdContext.context := unburstified.cmd.context + cmdContext.source := unburstified.cmd.source + cmdContext.isWrite := unburstified.cmd.isWrite + halt := !cmdContext.ready + + val rspContext = cmdContext.queue(rdataFifoSize) + val rdataFifo = io.output.rdata.queueLowLatency(rdataFifoSize, latency = 1) + val writeTocken = CounterUpDown( + stateCount = rdataFifoSize*2, + incWhen = io.output.wdata.fire, + decWhen = rspContext.fire && rspContext.isWrite + ) + val canRspWrite = writeTocken =/= 0 + val canRspRead = CombInit(rdataFifo.valid) + + rdataFifo.ready := unburstified.rsp.fire && !rspContext.isWrite + rspContext.ready := unburstified.rsp.fire + unburstified.rsp.valid := rspContext.valid && (rspContext.isWrite ? canRspWrite | canRspRead) + unburstified.rsp.setSuccess() + unburstified.rsp.last := True + unburstified.rsp.source := rspContext.source + unburstified.rsp.context := rspContext.context + unburstified.rsp.data := rdataFifo.data + + + pendingRead := pendingRead + U(outputCmd.fire && !outputCmd.we) - U(rdataFifo.fire) +} + +object BmbToLiteDramTester extends App{ + import spinal.core.sim._ + SimConfig.withWave.compile(BmbToLiteDram( + bmbParameter = BmbParameter( + addressWidth = 20, + dataWidth = 32, + lengthWidth = 6, + sourceWidth = 4, + contextWidth = 16 + ), + liteDramParameter = LiteDramNativeParameter( + addressWidth = 20, + dataWidth = 128 + ), + wdataFifoSize = 16, + rdataFifoSize = 16 + )).doSimUntilVoid(seed = 42){dut => + val tester = new BmbMemoryTester(dut.io.input, dut.clockDomain, rspCounterTarget = 3000) + dut.io.output.simSlave(tester.memory.memory, dut.clockDomain) + } +} + +case class BmbToLiteDramGenerator(mapping : AddressMapping)(implicit interconnect : BmbInterconnectGenerator) extends Area{ + val liteDramParameter = Handle[LiteDramNativeParameter] + val bmb = Handle(logic.io.input) + val dram = Handle(logic.io.output.toIo) + + val accessSource = Handle[BmbAccessCapabilities] + val accessRequirements = Handle[BmbAccessParameter] + interconnect.addSlave( + accessSource = accessSource, + accessCapabilities = accessSource, + accessRequirements = accessRequirements, + bus = bmb, + mapping = mapping + ) + val logic = Handle(BmbToLiteDram( + bmbParameter = accessRequirements.toBmbParameter(), + liteDramParameter = liteDramParameter, + wdataFifoSize = 32, + rdataFifoSize = 32 + )) +} + +case class BmbToWishboneGenerator(mapping : AddressMapping)(implicit interconnect : BmbInterconnectGenerator) extends Area{ + val bmb = Handle(logic.io.input) + val wishbone = Handle(logic.io.output) + + val accessSource = Handle[BmbAccessCapabilities] + val accessRequirements = Handle[BmbAccessParameter] + interconnect.addSlave( + accessSource = accessSource, + accessCapabilities = accessSource, + accessRequirements = accessRequirements, + bus = bmb, + mapping = mapping + ) + val logic = Handle(BmbToWishbone( + p = accessRequirements.toBmbParameter() + )) +} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala b/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala new file mode 100644 index 0000000..ec2aa50 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpCluster.scala @@ -0,0 +1,748 @@ +package vexriscv.demo.smp + +import spinal.core +import spinal.core._ +import spinal.core.sim.{onSimEnd, simSuccess} +import spinal.lib._ +import spinal.lib.bus.bmb.sim.BmbMemoryAgent +import spinal.lib.bus.bmb._ +import spinal.lib.bus.misc.{DefaultMapping, SizeMapping} +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig, WishboneToBmb, WishboneToBmbGenerator} +import spinal.lib.com.jtag.{Jtag, JtagInstructionDebuggerGenerator, JtagTapInstructionCtrl} +import spinal.lib.com.jtag.sim.JtagTcp +import spinal.lib.com.jtag.xilinx.Bscane2BmbMasterGenerator +import spinal.lib.generator._ +import spinal.core.fiber._ +import spinal.idslplugin.PostInitCallback +import spinal.lib.misc.plic.PlicMapping +import spinal.lib.system.debugger.SystemDebuggerConfig +import vexriscv.ip.{DataCacheAck, DataCacheConfig, DataCacheMemBus, InstructionCache, InstructionCacheConfig} +import vexriscv.plugin._ +import vexriscv.{Riscv, VexRiscv, VexRiscvBmbGenerator, VexRiscvConfig, plugin} + +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer +import spinal.lib.generator._ +import vexriscv.ip.fpu.FpuParameter + +case class VexRiscvSmpClusterParameter(cpuConfigs : Seq[VexRiscvConfig], + jtagHeaderIgnoreWidth : Int, + withExclusiveAndInvalidation : Boolean, + forcePeripheralWidth : Boolean = true, + outOfOrderDecoder : Boolean = true, + fpu : Boolean = false) + +class VexRiscvSmpClusterBase(p : VexRiscvSmpClusterParameter) extends Area with PostInitCallback{ + val cpuCount = p.cpuConfigs.size + + val debugCd = ClockDomainResetGenerator() + debugCd.holdDuration.load(4095) + debugCd.makeExternal() + + val systemCd = ClockDomainResetGenerator() + systemCd.holdDuration.load(63) + systemCd.setInput(debugCd) + + + val ctx = systemCd.outputClockDomain.push() + override def postInitCallback(): VexRiscvSmpClusterBase.this.type = { + ctx.restore() + this + } + + implicit val interconnect = BmbInterconnectGenerator() + + val debugBridge = debugCd.outputClockDomain on JtagInstructionDebuggerGenerator(p.jtagHeaderIgnoreWidth) + debugBridge.jtagClockDomain.load(ClockDomain.external("jtag", withReset = false)) + + val debugPort = Handle(debugBridge.logic.jtagBridge.io.ctrl.toIo) + + val dBusCoherent = BmbBridgeGenerator() + val dBusNonCoherent = BmbBridgeGenerator() + + val smp = p.withExclusiveAndInvalidation generate new Area{ + val exclusiveMonitor = BmbExclusiveMonitorGenerator() + interconnect.addConnection(dBusCoherent.bmb, exclusiveMonitor.input) + + val invalidationMonitor = BmbInvalidateMonitorGenerator() + interconnect.addConnection(exclusiveMonitor.output, invalidationMonitor.input) + interconnect.addConnection(invalidationMonitor.output, dBusNonCoherent.bmb) + if(p.outOfOrderDecoder) interconnect.masters(invalidationMonitor.output).withOutOfOrderDecoder() + } + + val noSmp = !p.withExclusiveAndInvalidation generate new Area{ + interconnect.addConnection(dBusCoherent.bmb, dBusNonCoherent.bmb) + } + + val cores = for(cpuId <- 0 until cpuCount) yield new Area{ + val cpu = VexRiscvBmbGenerator() + cpu.config.load(p.cpuConfigs(cpuId)) + interconnect.addConnection( + cpu.dBus -> List(dBusCoherent.bmb) + ) + cpu.enableDebugBmb( + debugCd = debugCd.outputClockDomain, + resetCd = systemCd, + mapping = SizeMapping(cpuId*0x1000, 0x1000) + ) + interconnect.addConnection(debugBridge.bmb, cpu.debugBmb) + } +} + + +class VexRiscvSmpClusterWithPeripherals(p : VexRiscvSmpClusterParameter) extends VexRiscvSmpClusterBase(p) { + val peripheralBridge = BmbToWishboneGenerator(DefaultMapping) + val peripheral = Handle(peripheralBridge.logic.io.output.toIo) + if(p.forcePeripheralWidth) interconnect.slaves(peripheralBridge.bmb).forceAccessSourceDataWidth(32) + + val plic = BmbPlicGenerator()(interconnect = null) + plic.priorityWidth.load(2) + plic.mapping.load(PlicMapping.sifive) + + val plicWishboneBridge = new Generator{ + dependencies += plic.ctrl + + plic.accessRequirements.load(BmbAccessParameter( + addressWidth = 22, + dataWidth = 32 + ).addSources(1, BmbSourceParameter( + contextWidth = 0, + lengthWidth = 2, + alignment = BmbParameter.BurstAlignement.LENGTH + ))) + + val logic = add task new Area{ + val bridge = WishboneToBmb(WishboneConfig(20, 32)) + bridge.io.output >> plic.ctrl + } + } + val plicWishbone = plicWishboneBridge.produceIo(plicWishboneBridge.logic.bridge.io.input) + + val clint = BmbClintGenerator(0)(interconnect = null) + val clintWishboneBridge = new Generator{ + dependencies += clint.ctrl + + clint.accessRequirements.load(BmbAccessParameter( + addressWidth = 16, + dataWidth = 32 + ).addSources(1, BmbSourceParameter( + contextWidth = 0, + lengthWidth = 2, + alignment = BmbParameter.BurstAlignement.LENGTH + ))) + + val logic = add task new Area{ + val bridge = WishboneToBmb(WishboneConfig(14, 32)) + bridge.io.output >> clint.ctrl + } + } + val clintWishbone = clintWishboneBridge.produceIo(clintWishboneBridge.logic.bridge.io.input) + + val interrupts = in Bits(32 bits) + for(i <- 1 to 31) yield plic.addInterrupt(interrupts(i), i) + + for ((core, cpuId) <- cores.zipWithIndex) { + core.cpu.setTimerInterrupt(clint.timerInterrupt(cpuId)) + core.cpu.setSoftwareInterrupt(clint.softwareInterrupt(cpuId)) + plic.priorityWidth.load(2) + plic.mapping.load(PlicMapping.sifive) + plic.addTarget(core.cpu.externalInterrupt) + plic.addTarget(core.cpu.externalSupervisorInterrupt) + List(clint.logic, core.cpu.logic).produce { + for (plugin <- core.cpu.config.plugins) plugin match { + case plugin: CsrPlugin if plugin.utime != null => plugin.utime := clint.logic.io.time + case _ => + } + } + } + + clint.cpuCount.load(cpuCount) +} + + +object VexRiscvSmpClusterGen { + def vexRiscvConfig(hartId : Int, + ioRange : UInt => Bool = (x => x(31 downto 28) === 0xF), + resetVector : Long = 0x80000000l, + iBusWidth : Int = 128, + dBusWidth : Int = 64, + loadStoreWidth : Int = 32, + coherency : Boolean = true, + atomic : Boolean = true, + iCacheSize : Int = 8192, + dCacheSize : Int = 8192, + iCacheWays : Int = 2, + dCacheWays : Int = 2, + iBusRelax : Boolean = false, + injectorStage : Boolean = false, + earlyBranch : Boolean = false, + earlyShifterInjection : Boolean = true, + dBusCmdMasterPipe : Boolean = false, + withMmu : Boolean = true, + withSupervisor : Boolean = true, + withFloat : Boolean = false, + withDouble : Boolean = false, + externalFpu : Boolean = true, + simHalt : Boolean = false, + decoderIsolationBench : Boolean = false, + decoderStupid : Boolean = false, + regfileRead : RegFileReadKind = plugin.ASYNC, + rvc : Boolean = false, + iTlbSize : Int = 4, + dTlbSize : Int = 4, + prediction : BranchPrediction = vexriscv.plugin.NONE, + withDataCache : Boolean = true, + withInstructionCache : Boolean = true, + forceMisa : Boolean = false, + forceMscratch : Boolean = false + ) = { + assert(iCacheSize/iCacheWays <= 4096, "Instruction cache ways can't be bigger than 4096 bytes") + assert(dCacheSize/dCacheWays <= 4096, "Data cache ways can't be bigger than 4096 bytes") + assert(!(withDouble && !withFloat)) + + val csrConfig = if(withSupervisor){ + CsrPluginConfig.openSbi(mhartid = hartId, misa = Riscv.misaToInt(s"ima${if(withFloat) "f" else ""}${if(withDouble) "d" else ""}s")).copy(utimeAccess = CsrAccess.READ_ONLY) + } else { + CsrPluginConfig( + catchIllegalAccess = true, + mvendorid = null, + marchid = null, + mimpid = null, + mhartid = hartId, + misaExtensionsInit = Riscv.misaToInt(s"ima${if(withFloat) "f" else ""}${if(withDouble) "d" else ""}s"), + misaAccess = if(forceMisa) CsrAccess.WRITE_ONLY else CsrAccess.NONE, + mtvecAccess = CsrAccess.READ_WRITE, + mtvecInit = null, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = forceMscratch, + mcauseAccess = CsrAccess.READ_ONLY, + mbadaddrAccess = CsrAccess.READ_ONLY, + mcycleAccess = CsrAccess.NONE, + minstretAccess = CsrAccess.NONE, + ecallGen = true, + ebreakGen = true, + wfiGenAsWait = false, + wfiGenAsNop = true, + ucycleAccess = CsrAccess.NONE + ) + } + val config = VexRiscvConfig( + plugins = List( + if(withMmu)new MmuPlugin( + ioRange = ioRange + )else new StaticMemoryTranslatorPlugin( + ioRange = ioRange + ), + //Uncomment the whole IBusCachedPlugin and comment IBusSimplePlugin if you want cached iBus config + if(withInstructionCache) new IBusCachedPlugin( + resetVector = resetVector, + compressedGen = rvc, + prediction = prediction, + historyRamSizeLog2 = 9, + relaxPredictorAddress = true, + injectorStage = injectorStage, + relaxedPcCalculation = iBusRelax, + config = InstructionCacheConfig( + cacheSize = iCacheSize, + bytePerLine = 64, + wayCount = iCacheWays, + addressWidth = 32, + cpuDataWidth = 32, + memDataWidth = iBusWidth, + catchIllegalAccess = true, + catchAccessFault = true, + asyncTagMemory = false, + twoCycleRam = false, + twoCycleCache = true, + reducedBankWidth = true + ), + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = iTlbSize, + latency = 1, + earlyRequireMmuLockup = true, + earlyCacheHits = true + ) + ) else new IBusSimplePlugin( + resetVector = resetVector, + cmdForkOnSecondStage = false, + cmdForkPersistence = false, + prediction = NONE, + catchAccessFault = false, + compressedGen = rvc, + busLatencyMin = 2, + vecRspBuffer = true + ), + if(withDataCache) new DBusCachedPlugin( + dBusCmdMasterPipe = dBusCmdMasterPipe || dBusWidth == 32, + dBusCmdSlavePipe = true, + dBusRspSlavePipe = true, + relaxedMemoryTranslationRegister = true, + config = new DataCacheConfig( + cacheSize = dCacheSize, + bytePerLine = 64, + wayCount = dCacheWays, + addressWidth = 32, + cpuDataWidth = loadStoreWidth, + memDataWidth = dBusWidth, + catchAccessError = true, + catchIllegal = true, + catchUnaligned = true, + withLrSc = atomic, + withAmo = atomic, + withExclusive = coherency, + withInvalidate = coherency, + withWriteAggregation = dBusWidth > 32 + ), + memoryTranslatorPortConfig = MmuPortConfig( + portTlbSize = dTlbSize, + latency = 1, + earlyRequireMmuLockup = true, + earlyCacheHits = true + ) + ) else new DBusSimplePlugin( + catchAddressMisaligned = false, + catchAccessFault = false, + earlyInjection = false + ), + new DecoderSimplePlugin( + catchIllegalInstruction = true, + decoderIsolationBench = decoderIsolationBench, + stupidDecoder = decoderStupid + ), + new RegFilePlugin( + regFileReadyKind = regfileRead, + zeroBoot = false, + x0Init = true + ), + new IntAluPlugin, + new SrcPlugin( + separatedAddSub = false + ), + new FullBarrelShifterPlugin(earlyInjection = earlyShifterInjection), + // new LightShifterPlugin, + new HazardSimplePlugin( + bypassExecute = true, + bypassMemory = true, + bypassWriteBack = true, + bypassWriteBackBuffer = true, + pessimisticUseSrc = false, + pessimisticWriteRegFile = false, + pessimisticAddressMatch = false + ), + new MulPlugin, + new MulDivIterativePlugin( + genMul = false, + genDiv = true, + mulUnrollFactor = 32, + divUnrollFactor = 1 + ), + new CsrPlugin(csrConfig), + new BranchPlugin( + earlyBranch = earlyBranch, + catchAddressMisaligned = true, + fenceiGenAsAJump = false + ), + new YamlPlugin(s"cpu$hartId.yaml") + ) + ) + + if(withFloat) config.plugins += new FpuPlugin( + externalFpu = externalFpu, + simHalt = simHalt, + p = FpuParameter(withDouble = withDouble) + ) + config + } + + +// def vexRiscvCluster(cpuCount : Int, resetVector : Long = 0x80000000l) = VexRiscvSmpCluster( +// debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), +// p = VexRiscvSmpClusterParameter( +// cpuConfigs = List.tabulate(cpuCount) { +// vexRiscvConfig(_, resetVector = resetVector) +// } +// ) +// ) +// def main(args: Array[String]): Unit = { +// SpinalVerilog { +// vexRiscvCluster(4) +// } +// } +} +// +// +// +//object VexRiscvSmpClusterTestInfrastructure{ +// val REPORT_OFFSET = 0xF8000000 +// val REPORT_THREAD_ID = 0x00 +// val REPORT_THREAD_COUNT = 0x04 +// val REPORT_END = 0x08 +// val REPORT_BARRIER_START = 0x0C +// val REPORT_BARRIER_END = 0x10 +// val REPORT_CONSISTENCY_VALUES = 0x14 +// +// val PUTC = 0x00 +// val GETC = 0x04 +// val CLINT_ADDR = 0x10000 +// val CLINT_IPI_ADDR = CLINT_ADDR+0x0000 +// val CLINT_CMP_ADDR = CLINT_ADDR+0x4000 +// val CLINT_TIME_ADDR = CLINT_ADDR+0xBFF8 +// +// def ram(dut : VexRiscvSmpCluster, withStall : Boolean) = { +// import spinal.core.sim._ +// val cpuCount = dut.cpus.size +// val ram = new BmbMemoryAgent(0x100000000l){ +// case class Report(hart : Int, code : Int, data : Int){ +// override def toString: String = { +// f"CPU:$hart%2d ${code}%3x -> $data%3d" +// } +// } +// val reports = ArrayBuffer.fill(cpuCount)(ArrayBuffer[Report]()) +// +// +// val writeTable = mutable.HashMap[Int, Int => Unit]() +// val readTable = mutable.HashMap[Int, () => Int]() +// def onWrite(address : Int)(body : Int => Unit) = writeTable(address) = body +// def onRead(address : Int)(body : => Int) = readTable(address) = () => body +// +// var writeData = 0 +// var readData = 0 +// var reportWatchdog = 0 +// val cpuEnd = Array.fill(cpuCount)(false) +// val barriers = mutable.HashMap[Int, Int]() +// var consistancyCounter = 0 +// var consistancyLast = 0 +// var consistancyA = 0 +// var consistancyB = 0 +// var consistancyAB = 0 +// var consistancyNone = 0 +// +// onSimEnd{ +// for((list, hart) <- reports.zipWithIndex){ +// println(f"\n\n**** CPU $hart%2d ****") +// for((report, reportId) <- list.zipWithIndex){ +// println(f" $reportId%3d : ${report.code}%3x -> ${report.data}%3d") +// } +// } +// +// println(s"consistancy NONE:$consistancyNone A:$consistancyA B:$consistancyB AB:$consistancyAB") +// } +// +// override def setByte(address: Long, value: Byte): Unit = { +// if((address & 0xF0000000l) != 0xF0000000l) return super.setByte(address, value) +// val byteId = address & 3 +// val mask = 0xFF << (byteId*8) +// writeData = (writeData & ~mask) | ((value.toInt << (byteId*8)) & mask) +// if(byteId != 3) return +// val offset = (address & ~0xF0000000l)-3 +// // println(s"W[0x${offset.toHexString}] = $writeData @${simTime()}") +// offset match { +// case _ if offset >= 0x8000000 && offset < 0x9000000 => { +// val report = Report( +// hart = ((offset & 0xFF0000) >> 16).toInt, +// code = (offset & 0x00FFFF).toInt, +// data = writeData +// ) +//// println(report) +// reports(report.hart) += report +// reportWatchdog += 1 +// import report._ +// code match { +// case REPORT_THREAD_ID => assert(data == hart) +// case REPORT_THREAD_COUNT => assert(data == cpuCount) +// case REPORT_END => assert(data == 0); assert(cpuEnd(hart) == false); cpuEnd(hart) = true; if(!cpuEnd.exists(_ == false)) simSuccess() +// case REPORT_BARRIER_START => { +// val counter = barriers.getOrElse(data, 0) +// assert(counter < cpuCount) +// barriers(data) = counter + 1 +// } +// case REPORT_BARRIER_END => { +// val counter = barriers.getOrElse(data, 0) +// assert(counter == cpuCount) +// } +// case REPORT_CONSISTENCY_VALUES => consistancyCounter match { +// case 0 => { +// consistancyCounter = 1 +// consistancyLast = data +// } +// case 1 => { +// consistancyCounter = 0 +// (data, consistancyLast) match { +// case (666, 0) => consistancyA += 1 +// case (0, 666) => consistancyB += 1 +// case (666, 666) => consistancyAB += 1 +// case (0,0) => consistancyNone += 1; simFailure("Consistancy issue :(") +// } +// } +// } +// } +// } +// case _ => writeTable.get(offset.toInt) match { +// case Some(x) => x(writeData) +// case _ => simFailure(f"\n\nWrite at ${address-3}%8x with $writeData%8x") +// } +// } +// } +// +// override def getByte(address: Long): Byte = { +// if((address & 0xF0000000l) != 0xF0000000l) return super.getByte(address) +// val byteId = address & 3 +// val offset = (address & ~0xF0000000l) +// if(byteId == 0) readData = readTable.get(offset.toInt) match { +// case Some(x) => x() +// case _ => simFailure(f"\n\nRead at $address%8x") +// } +// (readData >> (byteId*8)).toByte +// } +// +// val clint = new { +// val cmp = Array.fill(cpuCount)(0l) +// var time = 0l +// periodicaly(100){ +// time += 10 +// var timerInterrupts = 0l +// for(i <- 0 until cpuCount){ +// if(cmp(i) < time) timerInterrupts |= 1l << i +// } +// dut.io.timerInterrupts #= timerInterrupts +// } +// +//// delayed(200*1000000){ +//// dut.io.softwareInterrupts #= 0xE +//// enableSimWave() +//// println("force IPI") +//// } +// } +// +// onWrite(PUTC)(data => print(data.toChar)) +// onRead(GETC)( if(System.in.available() != 0) System.in.read() else -1) +// +// dut.io.softwareInterrupts #= 0 +// dut.io.timerInterrupts #= 0 +// dut.io.externalInterrupts #= 0 +// dut.io.externalSupervisorInterrupts #= 0 +// onRead(CLINT_TIME_ADDR)(clint.time.toInt) +// onRead(CLINT_TIME_ADDR+4)((clint.time >> 32).toInt) +// for(hartId <- 0 until cpuCount){ +// onWrite(CLINT_IPI_ADDR + hartId*4) {data => +// val mask = 1l << hartId +// val value = (dut.io.softwareInterrupts.toLong & ~mask) | (if(data == 1) mask else 0) +// dut.io.softwareInterrupts #= value +// } +//// onRead(CLINT_CMP_ADDR + hartId*8)(clint.cmp(hartId).toInt) +//// onRead(CLINT_CMP_ADDR + hartId*8+4)((clint.cmp(hartId) >> 32).toInt) +// onWrite(CLINT_CMP_ADDR + hartId*8){data => clint.cmp(hartId) = (clint.cmp(hartId) & 0xFFFFFFFF00000000l) | data} +// onWrite(CLINT_CMP_ADDR + hartId*8+4){data => clint.cmp(hartId) = (clint.cmp(hartId) & 0x00000000FFFFFFFFl) | (data.toLong << 32)} +// } +// +// +// +// } +// dut.io.iMems.foreach(ram.addPort(_,0,dut.clockDomain,true, withStall)) +// ram.addPort(dut.io.dMem,0,dut.clockDomain,true, withStall) +// ram +// } +// def init(dut : VexRiscvSmpCluster): Unit ={ +// import spinal.core.sim._ +// dut.clockDomain.forkStimulus(10) +// dut.debugClockDomain.forkStimulus(10) +// dut.io.debugBus.cmd.valid #= false +// } +//} +// +//object VexRiscvSmpClusterTest extends App{ +// import spinal.core.sim._ +// +// val simConfig = SimConfig +// simConfig.withWave +// simConfig.allOptimisation +// simConfig.addSimulatorFlag("--threads 1") +// +// val cpuCount = 4 +// val withStall = true +// +// simConfig.compile(VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount)).doSimUntilVoid(seed = 42){dut => +// disableSimWave() +// SimTimeout(100000000l*10*cpuCount) +// dut.clockDomain.forkSimSpeedPrinter(1.0) +// VexRiscvSmpClusterTestInfrastructure.init(dut) +// val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut, withStall) +// ram.memory.loadBin(0x80000000l, "src/test/cpp/raw/smp/build/smp.bin") +// periodicaly(20000*10){ +// assert(ram.reportWatchdog != 0) +// ram.reportWatchdog = 0 +// } +// } +//} +// +//// echo "echo 10000 | dhrystone >> log" > test +//// time sh test & +//// top -b -n 1 +// +//// TODO +//// MultiChannelFifo.toStream arbitration +//// BmbDecoderOutOfOrder arbitration +//// DataCache to bmb invalidation that are more than single line +//object VexRiscvSmpClusterOpenSbi extends App{ +// import spinal.core.sim._ +// +// val simConfig = SimConfig +// simConfig.withWave +// simConfig.allOptimisation +// simConfig.addSimulatorFlag("--threads 1") +// +// val cpuCount = 2 +// val withStall = false +// +// def gen = { +// val dut = VexRiscvSmpClusterGen.vexRiscvCluster(cpuCount, resetVector = 0x80000000l) +// dut.cpus.foreach{cpu => +// cpu.core.children.foreach{ +// case cache : InstructionCache => cache.io.cpu.decode.simPublic() +// case _ => +// } +// } +// dut +// } +// +// simConfig.workspaceName("rawr_4c").compile(gen).doSimUntilVoid(seed = 42){dut => +//// dut.clockDomain.forkSimSpeedPrinter(1.0) +// VexRiscvSmpClusterTestInfrastructure.init(dut) +// val ram = VexRiscvSmpClusterTestInfrastructure.ram(dut, withStall) +//// ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_payload.bin") +// +//// ram.memory.loadBin(0x40F00000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/fw_jump.bin") +//// ram.memory.loadBin(0x40000000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/Image") +//// ram.memory.loadBin(0x40EF0000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/dtb") +//// ram.memory.loadBin(0x41000000l, "/media/data/open/litex_smp/litex_vexriscv_smp/images/rootfs.cpio") +// +// ram.memory.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") +// ram.memory.loadBin(0xC0000000l, "../buildroot/output/images/Image") +// ram.memory.loadBin(0xC1000000l, "../buildroot/output/images/dtb") +// ram.memory.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") +// +// import spinal.core.sim._ +// var iMemReadBytes, dMemReadBytes, dMemWriteBytes, iMemSequencial,iMemRequests, iMemPrefetchHit = 0l +// var reportTimer = 0 +// var reportCycle = 0 +// val iMemFetchDelta = mutable.HashMap[Long, Long]() +// var iMemFetchDeltaSorted : Seq[(Long, Long)] = null +// var dMemWrites, dMemWritesCached = 0l +// val dMemWriteCacheCtx = List(4,8,16,32,64).map(bytes => new { +// var counter = 0l +// var address = 0l +// val mask = ~((1 << log2Up(bytes))-1) +// }) +// +// import java.io._ +// val csv = new PrintWriter(new File("bench.csv" )) +// val iMemCtx = Array.tabulate(cpuCount)(i => new { +// var sequencialPrediction = 0l +// val cache = dut.cpus(i).core.children.find(_.isInstanceOf[InstructionCache]).head.asInstanceOf[InstructionCache].io.cpu.decode +// var lastAddress = 0l +// }) +// dut.clockDomain.onSamplings{ +// dut.io.time #= simTime()/10 +// +// +// for(i <- 0 until cpuCount; iMem = dut.io.iMems(i); ctx = iMemCtx(i)){ +//// if(iMem.cmd.valid.toBoolean && iMem.cmd.ready.toBoolean){ +//// val length = iMem.cmd.length.toInt + 1 +//// val address = iMem.cmd.address.toLong +//// iMemReadBytes += length +//// iMemRequests += 1 +//// } +// if(ctx.cache.isValid.toBoolean && !ctx.cache.mmuRefilling.toBoolean && !ctx.cache.mmuException.toBoolean){ +// val address = ctx.cache.physicalAddress.toLong +// val length = ctx.cache.p.bytePerLine.toLong +// val mask = ~(length-1) +// if(ctx.cache.cacheMiss.toBoolean) { +// iMemReadBytes += length +// if ((address & mask) == (ctx.sequencialPrediction & mask)) { +// iMemSequencial += 1 +// } +// } +// if(!ctx.cache.isStuck.toBoolean) { +// ctx.sequencialPrediction = address + length +// } +// } +// +// if(iMem.cmd.valid.toBoolean && iMem.cmd.ready.toBoolean){ +// val address = iMem.cmd.address.toLong +// iMemRequests += 1 +// if(iMemCtx(i).lastAddress + ctx.cache.p.bytePerLine == address){ +// iMemPrefetchHit += 1 +// } +// val delta = address-iMemCtx(i).lastAddress +// iMemFetchDelta(delta) = iMemFetchDelta.getOrElse(delta, 0l) + 1l +// if(iMemRequests % 1000 == 999) iMemFetchDeltaSorted = iMemFetchDelta.toSeq.sortBy(_._1) +// iMemCtx(i).lastAddress = address +// } +// } +// if(dut.io.dMem.cmd.valid.toBoolean && dut.io.dMem.cmd.ready.toBoolean){ +// if(dut.io.dMem.cmd.opcode.toInt == Bmb.Cmd.Opcode.WRITE){ +// dMemWriteBytes += dut.io.dMem.cmd.length.toInt+1 +// val address = dut.io.dMem.cmd.address.toLong +// dMemWrites += 1 +// for(ctx <- dMemWriteCacheCtx){ +// if((address & ctx.mask) == (ctx.address & ctx.mask)){ +// ctx.counter += 1 +// } else { +// ctx.address = address +// } +// } +// }else { +// dMemReadBytes += dut.io.dMem.cmd.length.toInt+1 +// for(ctx <- dMemWriteCacheCtx) ctx.address = -1 +// } +// } +// reportTimer = reportTimer + 1 +// reportCycle = reportCycle + 1 +// if(reportTimer == 400000){ +// reportTimer = 0 +//// println(f"\n** c=${reportCycle} ir=${iMemReadBytes*1e-6}%5.2f dr=${dMemReadBytes*1e-6}%5.2f dw=${dMemWriteBytes*1e-6}%5.2f **\n") +// +// +// csv.write(s"$reportCycle,$iMemReadBytes,$dMemReadBytes,$dMemWriteBytes,$iMemRequests,$iMemSequencial,$dMemWrites,${dMemWriteCacheCtx.map(_.counter).mkString(",")},$iMemPrefetchHit\n") +// csv.flush() +// reportCycle = 0 +// iMemReadBytes = 0 +// dMemReadBytes = 0 +// dMemWriteBytes = 0 +// iMemRequests = 0 +// iMemSequencial = 0 +// dMemWrites = 0 +// iMemPrefetchHit = 0 +// for(ctx <- dMemWriteCacheCtx) ctx.counter = 0 +// } +// } +// +// +//// fork{ +//// disableSimWave() +//// val atMs = 3790 +//// val durationMs = 5 +//// sleep(atMs*1000000) +//// enableSimWave() +//// println("** enableSimWave **") +//// sleep(durationMs*1000000) +//// println("** disableSimWave **") +//// while(true) { +//// disableSimWave() +//// sleep(100000 * 10) +//// enableSimWave() +//// sleep( 100 * 10) +//// } +////// simSuccess() +//// } +// +// fork{ +// while(true) { +// disableSimWave() +// sleep(100000 * 10) +// enableSimWave() +// sleep( 100 * 10) +// } +// } +// } +//} diff --git a/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala b/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala new file mode 100644 index 0000000..4cd4917 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexCluster.scala @@ -0,0 +1,322 @@ +package vexriscv.demo.smp + +import spinal.core._ +import spinal.core.fiber._ +import spinal.lib.bus.bmb._ +import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} +import spinal.lib.bus.wishbone.{WishboneConfig, WishboneToBmbGenerator} +import spinal.lib.generator.GeneratorComponent +import spinal.lib.sim.SparseMemory +import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig +import vexriscv.ip.fpu.{FpuCore, FpuParameter} +import vexriscv.plugin.{AesPlugin, DBusCachedPlugin, FpuPlugin} + + +case class VexRiscvLitexSmpClusterParameter( cluster : VexRiscvSmpClusterParameter, + liteDram : LiteDramNativeParameter, + liteDramMapping : AddressMapping, + coherentDma : Boolean, + wishboneMemory : Boolean, + cpuPerFpu : Int) + + +class VexRiscvLitexSmpCluster(p : VexRiscvLitexSmpClusterParameter) extends VexRiscvSmpClusterWithPeripherals(p.cluster) { + val iArbiter = BmbBridgeGenerator() + val iBridge = !p.wishboneMemory generate BmbToLiteDramGenerator(p.liteDramMapping) + val dBridge = !p.wishboneMemory generate BmbToLiteDramGenerator(p.liteDramMapping) + + for(core <- cores) interconnect.addConnection(core.cpu.iBus -> List(iArbiter.bmb)) + !p.wishboneMemory generate interconnect.addConnection( + iArbiter.bmb -> List(iBridge.bmb), + dBusNonCoherent.bmb -> List(dBridge.bmb) + ) + interconnect.addConnection( + iArbiter.bmb -> List(peripheralBridge.bmb), + dBusNonCoherent.bmb -> List(peripheralBridge.bmb) + ) + + val fpuGroups = (cores.reverse.grouped(p.cpuPerFpu)).toList.reverse + val fpu = p.cluster.fpu generate { for(group <- fpuGroups) yield new Area{ + val extraStage = group.size > 2 + + val logic = Handle{ + new FpuCore( + portCount = group.size, + p = FpuParameter( + withDouble = true, + asyncRegFile = false, + schedulerM2sPipe = extraStage + ) + ) + } + + val connect = Handle{ + for(i <- 0 until group.size; + vex = group(i).cpu.logic.cpu; + port = logic.io.port(i)) { + val plugin = vex.service(classOf[FpuPlugin]) + plugin.port.cmd.pipelined(m2s = false, s2m = false) >> port.cmd + plugin.port.commit.pipelined(m2s = extraStage, s2m = false) >> port.commit + plugin.port.completion := port.completion.m2sPipe() + plugin.port.rsp << port.rsp + } + } + }} + + if(p.cluster.withExclusiveAndInvalidation) interconnect.masters(dBusNonCoherent.bmb).withOutOfOrderDecoder() + + if(!p.wishboneMemory) { + dBridge.liteDramParameter.load(p.liteDram) + iBridge.liteDramParameter.load(p.liteDram) + } + + // Coherent DMA interface + val dma = p.coherentDma generate new Area { + val bridge = WishboneToBmbGenerator() + val wishbone = Handle(bridge.logic.io.input.toIo) + val dataWidth = p.cluster.cpuConfigs.head.find(classOf[DBusCachedPlugin]).get.config.memDataWidth + bridge.config.load(WishboneConfig( + addressWidth = 32 - log2Up(dataWidth / 8), + dataWidth = dataWidth, + useSTALL = true, + selWidth = dataWidth/8 + )) + interconnect.addConnection(bridge.bmb, dBusCoherent.bmb) + } + + // Interconnect pipelining (FMax) + for(core <- cores) { + interconnect.setPipelining(core.cpu.dBus)(cmdValid = true, cmdReady = true, rspValid = true, invValid = true, ackValid = true, syncValid = true) + interconnect.setPipelining(core.cpu.iBus)(cmdHalfRate = true, rspValid = true) + interconnect.setPipelining(iArbiter.bmb)(cmdHalfRate = true, rspValid = true) + } + interconnect.setPipelining(dBusCoherent.bmb)(cmdValid = true, cmdReady = true) + interconnect.setPipelining(dBusNonCoherent.bmb)(cmdValid = true, cmdReady = true, rspValid = true) + interconnect.setPipelining(peripheralBridge.bmb)(cmdHalfRate = !p.wishboneMemory, cmdValid = p.wishboneMemory, cmdReady = p.wishboneMemory, rspValid = true) + if(!p.wishboneMemory) { + interconnect.setPipelining(iBridge.bmb)(cmdHalfRate = true) + interconnect.setPipelining(dBridge.bmb)(cmdReady = true) + } +} + + +object VexRiscvLitexSmpClusterCmdGen extends App { + var cpuCount = 1 + var iBusWidth = 64 + var dBusWidth = 64 + var iCacheSize = 8192 + var dCacheSize = 8192 + var iCacheWays = 2 + var dCacheWays = 2 + var liteDramWidth = 128 + var coherentDma = false + var wishboneMemory = false + var outOfOrderDecoder = true + var aesInstruction = false + var fpu = false + var cpuPerFpu = 4 + var rvc = false + var netlistDirectory = "." + var netlistName = "VexRiscvLitexSmpCluster" + var iTlbSize = 4 + var dTlbSize = 4 + assert(new scopt.OptionParser[Unit]("VexRiscvLitexSmpClusterCmdGen") { + help("help").text("prints this usage text") + opt[Unit]("coherent-dma") action { (v, c) => coherentDma = true } + opt[String]("cpu-count") action { (v, c) => cpuCount = v.toInt } + opt[String]("ibus-width") action { (v, c) => iBusWidth = v.toInt } + opt[String]("dbus-width") action { (v, c) => dBusWidth = v.toInt } + opt[String]("icache-size") action { (v, c) => iCacheSize = v.toInt } + opt[String]("dcache-size") action { (v, c) => dCacheSize = v.toInt } + opt[String]("icache-ways") action { (v, c) => iCacheWays = v.toInt } + opt[String]("dcache-ways") action { (v, c) => dCacheWays = v.toInt } + opt[String]("litedram-width") action { (v, c) => liteDramWidth = v.toInt } + opt[String]("netlist-directory") action { (v, c) => netlistDirectory = v } + opt[String]("netlist-name") action { (v, c) => netlistName = v } + opt[String]("aes-instruction") action { (v, c) => aesInstruction = v.toBoolean } + opt[String]("out-of-order-decoder") action { (v, c) => outOfOrderDecoder = v.toBoolean } + opt[String]("wishbone-memory" ) action { (v, c) => wishboneMemory = v.toBoolean } + opt[String]("fpu" ) action { (v, c) => fpu = v.toBoolean } + opt[String]("cpu-per-fpu") action { (v, c) => cpuPerFpu = v.toInt } + opt[String]("rvc") action { (v, c) => rvc = v.toBoolean } + opt[String]("itlb-size") action { (v, c) => iTlbSize = v.toInt } + opt[String]("dtlb-size") action { (v, c) => dTlbSize = v.toInt } + }.parse(args)) + + val coherency = coherentDma || cpuCount > 1 + def parameter = VexRiscvLitexSmpClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => { + val c = vexRiscvConfig( + hartId = hartId, + ioRange = address => address.msb, + resetVector = 0, + iBusWidth = iBusWidth, + dBusWidth = dBusWidth, + iCacheSize = iCacheSize, + dCacheSize = dCacheSize, + iCacheWays = iCacheWays, + dCacheWays = dCacheWays, + coherency = coherency, + iBusRelax = true, + earlyBranch = true, + withFloat = fpu, + withDouble = fpu, + externalFpu = fpu, + loadStoreWidth = if(fpu) 64 else 32, + rvc = rvc, + injectorStage = rvc, + iTlbSize = iTlbSize, + dTlbSize = dTlbSize + ) + if(aesInstruction) c.add(new AesPlugin) + c + }}, + withExclusiveAndInvalidation = coherency, + forcePeripheralWidth = !wishboneMemory, + outOfOrderDecoder = outOfOrderDecoder, + fpu = fpu, + jtagHeaderIgnoreWidth = 0 + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = liteDramWidth), + liteDramMapping = SizeMapping(0x40000000l, 0x40000000l), + coherentDma = coherentDma, + wishboneMemory = wishboneMemory, + cpuPerFpu = cpuPerFpu + ) + + def dutGen = { + val toplevel = new Component { + val body = new VexRiscvLitexSmpCluster( + p = parameter + ) + body.setName("") + } + toplevel + } + + val genConfig = SpinalConfig(targetDirectory = netlistDirectory, inlineRom = true).addStandardMemBlackboxing(blackboxByteEnables) + genConfig.generateVerilog(dutGen.setDefinitionName(netlistName)) + +} + + +//object VexRiscvLitexSmpClusterGen extends App { +// for(cpuCount <- List(1,2,4,8)) { +// def parameter = VexRiscvLitexSmpClusterParameter( +// cluster = VexRiscvSmpClusterParameter( +// cpuConfigs = List.tabulate(cpuCount) { hartId => +// vexRiscvConfig( +// hartId = hartId, +// ioRange = address => address.msb, +// resetVector = 0 +// ) +// }, +// withExclusiveAndInvalidation = true +// ), +// liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), +// liteDramMapping = SizeMapping(0x40000000l, 0x40000000l), +// coherentDma = false +// ) +// +// def dutGen = { +// val toplevel = new VexRiscvLitexSmpCluster( +// p = parameter +// ).toComponent() +// toplevel +// } +// +// val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) +// // genConfig.generateVerilog(Bench.compressIo(dutGen)) +// genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpCluster_${cpuCount}c")) +// } +//} + +////addAttribute("""mark_debug = "true"""") +object VexRiscvLitexSmpClusterOpenSbi extends App{ + import spinal.core.sim._ + + val simConfig = SimConfig + simConfig.withWave + simConfig.allOptimisation + + val cpuCount = 2 + + def parameter = VexRiscvLitexSmpClusterParameter( + cluster = VexRiscvSmpClusterParameter( + cpuConfigs = List.tabulate(cpuCount) { hartId => + vexRiscvConfig( + hartId = hartId, + ioRange = address => address(31 downto 28) === 0xF, + resetVector = 0x80000000l + ) + }, + withExclusiveAndInvalidation = true, + jtagHeaderIgnoreWidth = 0 + ), + liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), + liteDramMapping = SizeMapping(0x80000000l, 0x70000000l), + coherentDma = false, + wishboneMemory = false, + cpuPerFpu = 4 + ) + + def dutGen = { + import GeneratorComponent.toGenerator + val top = new Component { + val body = new VexRiscvLitexSmpCluster( + p = parameter + ) + } + top.rework{ + top.body.clintWishbone.setAsDirectionLess.allowDirectionLessIo + top.body.peripheral.setAsDirectionLess.allowDirectionLessIo.simPublic() + + val hit = (top.body.peripheral.ADR <<2 >= 0xF0010000l && top.body.peripheral.ADR<<2 < 0xF0020000l) + top.body.clintWishbone.CYC := top.body.peripheral.CYC && hit + top.body.clintWishbone.STB := top.body.peripheral.STB + top.body.clintWishbone.WE := top.body.peripheral.WE + top.body.clintWishbone.ADR := top.body.peripheral.ADR.resized + top.body.clintWishbone.DAT_MOSI := top.body.peripheral.DAT_MOSI + top.body.peripheral.DAT_MISO := top.body.clintWishbone.DAT_MISO + top.body.peripheral.ACK := top.body.peripheral.CYC && (!hit || top.body.clintWishbone.ACK) + top.body.peripheral.ERR := False + } + top + } + + simConfig.compile(dutGen).doSimUntilVoid(seed = 42){dut => + dut.body.debugCd.inputClockDomain.get.forkStimulus(10) + + val ram = SparseMemory() + ram.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") + ram.loadBin(0xC0000000l, "../buildroot/output/images/Image") + ram.loadBin(0xC1000000l, "../buildroot/output/images/dtb") + ram.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") + + + dut.body.iBridge.dram.simSlave(ram, dut.body.debugCd.inputClockDomain) + dut.body.dBridge.dram.simSlave(ram, dut.body.debugCd.inputClockDomain/*, dut.body.dMemBridge.unburstified*/) + + dut.body.interrupts #= 0 + + dut.body.debugCd.inputClockDomain.get.onFallingEdges{ + if(dut.body.peripheral.CYC.toBoolean){ + (dut.body.peripheral.ADR.toLong << 2) match { + case 0xF0000000l => print(dut.body.peripheral.DAT_MOSI.toLong.toChar) + case 0xF0000004l => dut.body.peripheral.DAT_MISO #= (if(System.in.available() != 0) System.in.read() else 0xFFFFFFFFl) + case _ => + } + } + } + + fork{ + while(true) { + disableSimWave() + sleep(100000 * 10) + enableSimWave() + sleep( 100 * 10) + } + } + } +}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala b/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala new file mode 100644 index 0000000..e662dfe --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/demo/smp/VexRiscvSmpLitexMpCluster.scala @@ -0,0 +1,409 @@ +package vexriscv.demo.smp + +import spinal.core._ +import spinal.lib.bus.bmb._ +import spinal.lib.bus.misc.{AddressMapping, DefaultMapping, SizeMapping} +import spinal.lib.bus.wishbone.{WishboneConfig, WishboneToBmbGenerator} +import spinal.lib.sim.SparseMemory +import vexriscv.demo.smp.VexRiscvSmpClusterGen.vexRiscvConfig + +//case class VexRiscvLitexSmpMpClusterParameter( cluster : VexRiscvSmpClusterParameter, +// liteDram : LiteDramNativeParameter, +// liteDramMapping : AddressMapping) +// +//class VexRiscvLitexSmpMpCluster(p : VexRiscvLitexSmpMpClusterParameter) extends VexRiscvSmpClusterWithPeripherals(p.cluster) { +// val iArbiter = BmbBridgeGenerator() +// val iBridge = BmbToLiteDramGenerator(p.liteDramMapping) +// val dBridge = BmbToLiteDramGenerator(p.liteDramMapping) +// +// for(core <- cores) interconnect.addConnection(core.cpu.iBus -> List(iArbiter.bmb)) +// interconnect.addConnection( +// iArbiter.bmb -> List(iBridge.bmb, peripheralBridge.bmb), +// invalidationMonitor.output -> List(dBridge.bmb, peripheralBridge.bmb) +// ) +// interconnect.masters(invalidationMonitor.output).withOutOfOrderDecoder() +// +// dBridge.liteDramParameter.load(p.liteDram) +// iBridge.liteDramParameter.load(p.liteDram) +// +// // Interconnect pipelining (FMax) +// for(core <- cores) { +// interconnect.setPipelining(core.cpu.dBus)(cmdValid = true, cmdReady = true, rspValid = true) +// interconnect.setPipelining(core.cpu.iBus)(cmdHalfRate = true, rspValid = true) +// interconnect.setPipelining(iArbiter.bmb)(cmdHalfRate = true, rspValid = true) +// } +// interconnect.setPipelining(invalidationMonitor.output)(cmdValid = true, cmdReady = true, rspValid = true) +// interconnect.setPipelining(peripheralBridge.bmb)(cmdHalfRate = true, rspValid = true) +//} +// +// +//object VexRiscvLitexSmpMpClusterGen extends App { +// for(cpuCount <- List(1,2,4,8)) { +// def parameter = VexRiscvLitexSmpMpClusterParameter( +// cluster = VexRiscvSmpClusterParameter( +// cpuConfigs = List.tabulate(cpuCount) { hartId => +// vexRiscvConfig( +// hartId = hartId, +// ioRange = address => address.msb, +// resetVector = 0 +// ) +// } +// ), +// liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), +// liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) +// ) +// +// def dutGen = { +// val toplevel = new VexRiscvLitexSmpMpCluster( +// p = parameter +// ).toComponent() +// toplevel +// } +// +// val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) +// // genConfig.generateVerilog(Bench.compressIo(dutGen)) +// genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpMpCluster_${cpuCount}c")) +// } +//} + + + +// +////addAttribute("""mark_debug = "true"""") +//class VexRiscvLitexSmpMpCluster(val p : VexRiscvLitexSmpMpClusterParameter, +// val debugClockDomain : ClockDomain, +// val jtagClockDomain : ClockDomain) extends Component{ +// +// val peripheralWishboneConfig = WishboneConfig( +// addressWidth = 30, +// dataWidth = 32, +// selWidth = 4, +// useERR = true, +// useBTE = true, +// useCTI = true +// ) +// +// val cpuCount = p.cluster.cpuConfigs.size +// +// val io = new Bundle { +// val dMem = Vec(master(LiteDramNative(p.liteDram)), cpuCount) +// val iMem = Vec(master(LiteDramNative(p.liteDram)), cpuCount) +// val peripheral = master(Wishbone(peripheralWishboneConfig)) +// val clint = slave(Wishbone(Clint.getWisboneConfig())) +// val plic = slave(Wishbone(WishboneConfig(addressWidth = 20, dataWidth = 32))) +// val interrupts = in Bits(32 bits) +// val jtagInstruction = slave(JtagTapInstructionCtrl()) +// val debugReset = out Bool() +// } +// val clint = Clint(cpuCount) +// clint.driveFrom(WishboneSlaveFactory(io.clint)) +// +// val cluster = VexRiscvSmpCluster(p.cluster, debugClockDomain) +// cluster.io.debugReset <> io.debugReset +// cluster.io.timerInterrupts <> B(clint.harts.map(_.timerInterrupt)) +// cluster.io.softwareInterrupts <> B(clint.harts.map(_.softwareInterrupt)) +// cluster.io.time := clint.time +// +// val debug = debugClockDomain on new Area{ +// val jtagConfig = SystemDebuggerConfig() +// +// val jtagBridge = new JtagBridgeNoTap(jtagConfig, jtagClockDomain) +// jtagBridge.io.ctrl << io.jtagInstruction +// +// val debugger = new SystemDebugger(jtagConfig) +// debugger.io.remote <> jtagBridge.io.remote +// +// cluster.io.debugBus << debugger.io.mem.toBmb() +// +//// io.jtagInstruction.allowDirectionLessIo.setAsDirectionLess +//// val bridge = Bscane2BmbMaster(1) +//// cluster.io.debugBus << bridge.io.bmb +// +// +//// val bscane2 = BSCANE2(usedId) +//// val jtagClockDomain = ClockDomain(bscane2.TCK) +//// +//// val jtagBridge = new JtagBridgeNoTap(jtagConfig, jtagClockDomain) +//// jtagBridge.io.ctrl << bscane2.toJtagTapInstructionCtrl() +//// +//// val debugger = new SystemDebugger(jtagConfig) +//// debugger.io.remote <> jtagBridge.io.remote +//// +//// io.bmb << debugger.io.mem.toBmb() +// } +// +// val dBusDecoder = BmbDecoderOutOfOrder( +// p = cluster.io.dMem.p, +// mappings = Seq(DefaultMapping, p.liteDramMapping), +// capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), +// pendingRspTransactionMax = 32 +// ) +//// val dBusDecoder = BmbDecoderOut( +//// p = cluster.io.dMem.p, +//// mappings = Seq(DefaultMapping, p.liteDramMapping), +//// capabilities = Seq(cluster.io.dMem.p, cluster.io.dMem.p), +//// pendingMax = 31 +//// ) +// dBusDecoder.io.input << cluster.io.dMem.pipelined(cmdValid = true, cmdReady = true, rspValid = true) +// +// +// val perIBus = for(id <- 0 until cpuCount) yield new Area{ +// val decoder = BmbDecoder( +// p = cluster.io.iMems(id).p, +// mappings = Seq(DefaultMapping, p.liteDramMapping), +// capabilities = Seq(cluster.io.iMems(id).p,cluster.io.iMems(id).p), +// pendingMax = 15 +// ) +// +// decoder.io.input << cluster.io.iMems(id) +// io.iMem(id).fromBmb(decoder.io.outputs(1).pipelined(cmdHalfRate = true), wdataFifoSize = 0, rdataFifoSize = 32) +// val toPeripheral = decoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) +// } +// +// val dBusDecoderToPeripheral = dBusDecoder.io.outputs(0).resize(dataWidth = 32).pipelined(cmdHalfRate = true, rspValid = true) +// +// val peripheralAccessLength = Math.max(perIBus(0).toPeripheral.p.lengthWidth, dBusDecoder.io.outputs(0).p.lengthWidth) +// val peripheralArbiter = BmbArbiter( +// p = dBusDecoder.io.outputs(0).p.copy( +// sourceWidth = List(perIBus(0).toPeripheral, dBusDecoderToPeripheral).map(_.p.sourceWidth).max + log2Up(cpuCount + 1), +// contextWidth = List(perIBus(0).toPeripheral, dBusDecoderToPeripheral).map(_.p.contextWidth).max, +// lengthWidth = peripheralAccessLength, +// dataWidth = 32 +// ), +// portCount = cpuCount+1, +// lowerFirstPriority = true +// ) +// +// for(id <- 0 until cpuCount){ +// peripheralArbiter.io.inputs(id) << perIBus(id).toPeripheral +// } +// peripheralArbiter.io.inputs(cpuCount) << dBusDecoderToPeripheral +// +// val peripheralWishbone = peripheralArbiter.io.output.pipelined(cmdValid = true).toWishbone() +// io.peripheral << peripheralWishbone +// +// +// val dBusDemux = BmbSourceDecoder(dBusDecoder.io.outputs(1).p) +// dBusDemux.io.input << dBusDecoder.io.outputs(1).pipelined(cmdValid = true, cmdReady = true,rspValid = true) +// val dMemBridge = for(id <- 0 until cpuCount) yield { +// io.dMem(id).fromBmb(dBusDemux.io.outputs(id), wdataFifoSize = 32, rdataFifoSize = 32) +// } +// +// +// val plic = new Area{ +// val priorityWidth = 2 +// +// val gateways = for(i <- 1 until 32) yield PlicGatewayActiveHigh( +// source = io.interrupts(i), +// id = i, +// priorityWidth = priorityWidth +// ) +// +// val bus = WishboneSlaveFactory(io.plic) +// +// val targets = for(i <- 0 until cpuCount) yield new Area{ +// val machine = PlicTarget( +// gateways = gateways, +// priorityWidth = priorityWidth +// ) +// val supervisor = PlicTarget( +// gateways = gateways, +// priorityWidth = priorityWidth +// ) +// +// cluster.io.externalInterrupts(i) := machine.iep +// cluster.io.externalSupervisorInterrupts(i) := supervisor.iep +// } +// +// val bridge = PlicMapper(bus, PlicMapping.sifive)( +// gateways = gateways, +// targets = targets.flatMap(t => List(t.machine, t.supervisor)) +// ) +// } +//// +//// io.dMem.foreach(_.cmd.valid.addAttribute("""mark_debug = "true"""")) +//// io.dMem.foreach(_.cmd.ready.addAttribute("""mark_debug = "true"""")) +//// io.iMem.foreach(_.cmd.valid.addAttribute("""mark_debug = "true"""")) +//// io.iMem.foreach(_.cmd.ready.addAttribute("""mark_debug = "true"""")) +//// +//// cluster.io.dMem.cmd.valid.addAttribute("""mark_debug = "true"""") +//// cluster.io.dMem.cmd.ready.addAttribute("""mark_debug = "true"""") +//// cluster.io.dMem.rsp.valid.addAttribute("""mark_debug = "true"""") +//// cluster.io.dMem.rsp.ready.addAttribute("""mark_debug = "true"""") +//} +// +//object VexRiscvLitexSmpMpClusterGen extends App { +// for(cpuCount <- List(1,2,4,8)) { +// def parameter = VexRiscvLitexSmpMpClusterParameter( +// cluster = VexRiscvSmpClusterParameter( +// cpuConfigs = List.tabulate(cpuCount) { hartId => +// vexRiscvConfig( +// hartId = hartId, +// ioRange = address => address.msb, +// resetVector = 0 +// ) +// } +// ), +// liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), +// liteDramMapping = SizeMapping(0x40000000l, 0x40000000l) +// ) +// +// def dutGen = { +// val toplevel = new VexRiscvLitexSmpMpCluster( +// p = parameter, +// debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), +// jtagClockDomain = ClockDomain.external("jtag", withReset = false) +// ) +// toplevel +// } +// +// val genConfig = SpinalConfig().addStandardMemBlackboxing(blackboxByteEnables) +// // genConfig.generateVerilog(Bench.compressIo(dutGen)) +// genConfig.generateVerilog(dutGen.setDefinitionName(s"VexRiscvLitexSmpMpCluster_${cpuCount}c")) +// } +// +//} +// +// +//object VexRiscvLitexSmpMpClusterOpenSbi extends App{ +// import spinal.core.sim._ +// +// val simConfig = SimConfig +// simConfig.withWave +// simConfig.withFstWave +// simConfig.allOptimisation +// +// val cpuCount = 2 +// +// def parameter = VexRiscvLitexSmpMpClusterParameter( +// cluster = VexRiscvSmpClusterParameter( +// cpuConfigs = List.tabulate(cpuCount) { hartId => +// vexRiscvConfig( +// hartId = hartId, +// ioRange = address => address(31 downto 28) === 0xF, +// resetVector = 0x80000000l +// ) +// } +// ), +// liteDram = LiteDramNativeParameter(addressWidth = 32, dataWidth = 128), +// liteDramMapping = SizeMapping(0x80000000l, 0x70000000l) +// ) +// +// def dutGen = { +// val top = new VexRiscvLitexSmpMpCluster( +// p = parameter, +// debugClockDomain = ClockDomain.current.copy(reset = Bool().setName("debugResetIn")), +// jtagClockDomain = ClockDomain.external("jtag", withReset = false) +// ){ +// io.jtagInstruction.allowDirectionLessIo.setAsDirectionLess +// val jtag = slave(Jtag()) +// jtagClockDomain.readClockWire.setAsDirectionLess() := jtag.tck +// val jtagLogic = jtagClockDomain on new Area{ +// val tap = new JtagTap(jtag, 4) +// val idcodeArea = tap.idcode(B"x10001FFF")(1) +// val wrapper = tap.map(io.jtagInstruction, instructionId = 2) +// } +// } +// top.rework{ +// top.io.clint.setAsDirectionLess.allowDirectionLessIo +// top.io.peripheral.setAsDirectionLess.allowDirectionLessIo.simPublic() +// +// val hit = (top.io.peripheral.ADR <<2 >= 0xF0010000l && top.io.peripheral.ADR<<2 < 0xF0020000l) +// top.io.clint.CYC := top.io.peripheral.CYC && hit +// top.io.clint.STB := top.io.peripheral.STB +// top.io.clint.WE := top.io.peripheral.WE +// top.io.clint.ADR := top.io.peripheral.ADR.resized +// top.io.clint.DAT_MOSI := top.io.peripheral.DAT_MOSI +// top.io.peripheral.DAT_MISO := top.io.clint.DAT_MISO +// top.io.peripheral.ACK := top.io.peripheral.CYC && (!hit || top.io.clint.ACK) +// top.io.peripheral.ERR := False +// +//// top.dMemBridge.unburstified.cmd.simPublic() +// } +// top +// } +// simConfig.compile(dutGen).doSimUntilVoid(seed = 42){dut => +// dut.clockDomain.forkStimulus(10) +// fork { +// dut.debugClockDomain.resetSim #= false +// sleep (0) +// dut.debugClockDomain.resetSim #= true +// sleep (10) +// dut.debugClockDomain.resetSim #= false +// } +// +// JtagTcp(dut.jtag, 10*20) +// +// val ram = SparseMemory() +// ram.loadBin(0x80000000l, "../opensbi/build/platform/spinal/vexriscv/sim/smp/firmware/fw_jump.bin") +// ram.loadBin(0xC0000000l, "../buildroot/output/images/Image") +// ram.loadBin(0xC1000000l, "../buildroot/output/images/dtb") +// ram.loadBin(0xC2000000l, "../buildroot/output/images/rootfs.cpio") +// +// for(id <- 0 until cpuCount) { +// dut.io.iMem(id).simSlave(ram, dut.clockDomain) +// dut.io.dMem(id).simSlave(ram, dut.clockDomain) +// } +// +// dut.io.interrupts #= 0 +// +// +//// val stdin = mutable.Queue[Byte]() +//// def stdInPush(str : String) = stdin ++= str.toCharArray.map(_.toByte) +//// fork{ +//// sleep(4000*1000000l) +//// stdInPush("root\n") +//// sleep(1000*1000000l) +//// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +//// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +//// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +//// stdInPush("ping localhost -i 0.01 > /dev/null &\n") +//// sleep(500*1000000l) +//// while(true){ +//// sleep(500*1000000l) +//// stdInPush("uptime\n") +//// printf("\n** uptime **") +//// } +//// } +// dut.clockDomain.onFallingEdges { +// if (dut.io.peripheral.CYC.toBoolean) { +// (dut.io.peripheral.ADR.toLong << 2) match { +// case 0xF0000000l => print(dut.io.peripheral.DAT_MOSI.toLong.toChar) +// case 0xF0000004l => dut.io.peripheral.DAT_MISO #= (if (System.in.available() != 0) System.in.read() else 0xFFFFFFFFl) +// case _ => +// // case 0xF0000004l => { +// // val c = if(stdin.nonEmpty) { +// // stdin.dequeue().toInt & 0xFF +// // } else { +// // 0xFFFFFFFFl +// // } +// // dut.io.peripheral.DAT_MISO #= c +// // } +// // case _ => +// // } +// // println(f"${dut.io.peripheral.ADR.toLong}%x") +// } +// } +// } +// +// fork{ +// val at = 0 +// val duration = 1000 +// while(simTime() < at*1000000l) { +// disableSimWave() +// sleep(100000 * 10) +// enableSimWave() +// sleep( 200 * 10) +// } +// println("\n\n********************") +// sleep(duration*1000000l) +// println("********************\n\n") +// while(true) { +// disableSimWave() +// sleep(100000 * 10) +// enableSimWave() +// sleep( 400 * 10) +// } +// } +// } +//}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/ip/DataCache.scala b/VexRiscv/src/main/scala/vexriscv/ip/DataCache.scala new file mode 100644 index 0000000..2b70400 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/ip/DataCache.scala @@ -0,0 +1,1184 @@ +package vexriscv.ip + +import vexriscv._ +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.amba4.axi.{Axi4Config, Axi4Shared} +import spinal.lib.bus.avalon.{AvalonMM, AvalonMMConfig} +import spinal.lib.bus.bmb.{Bmb, BmbAccessParameter, BmbCmd, BmbInvalidationParameter, BmbParameter, BmbSourceParameter} +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig} +import spinal.lib.bus.simple._ +import vexriscv.plugin.DBusSimpleBus + + +case class DataCacheConfig(cacheSize : Int, + bytePerLine : Int, + wayCount : Int, + addressWidth : Int, + cpuDataWidth : Int, + var rfDataWidth : Int = -1, //-1 mean cpuDataWidth + memDataWidth : Int, + catchAccessError : Boolean, + catchIllegal : Boolean, + catchUnaligned : Boolean, + earlyWaysHits : Boolean = true, + earlyDataMux : Boolean = false, + tagSizeShift : Int = 0, //Used to force infering ram + withLrSc : Boolean = false, + withAmo : Boolean = false, + withExclusive : Boolean = false, + withInvalidate : Boolean = false, + pendingMax : Int = 64, + directTlbHit : Boolean = false, + mergeExecuteMemory : Boolean = false, + asyncTagMemory : Boolean = false, + withWriteAggregation : Boolean = false){ + + if(rfDataWidth == -1) rfDataWidth = cpuDataWidth + assert(!(mergeExecuteMemory && (earlyDataMux || earlyWaysHits))) + assert(!(earlyDataMux && !earlyWaysHits)) + assert(isPow2(pendingMax)) + assert(rfDataWidth <= memDataWidth) + + def lineCount = cacheSize/bytePerLine/wayCount + def sizeMax = log2Up(bytePerLine) + def sizeWidth = log2Up(sizeMax + 1) + val aggregationWidth = if(withWriteAggregation) log2Up(memDataBytes+1) else 0 + def withWriteResponse = withExclusive + def burstSize = bytePerLine*8/memDataWidth + val burstLength = bytePerLine/(cpuDataWidth/8) + def catchSomething = catchUnaligned || catchIllegal || catchAccessError + def withInternalAmo = withAmo && !withExclusive + def withInternalLrSc = withLrSc && !withExclusive + def withExternalLrSc = withLrSc && withExclusive + def withExternalAmo = withAmo && withExclusive + def cpuDataBytes = cpuDataWidth/8 + def rfDataBytes = rfDataWidth/8 + def memDataBytes = memDataWidth/8 + def getAxi4SharedConfig() = Axi4Config( + addressWidth = addressWidth, + dataWidth = memDataWidth, + useId = false, + useRegion = false, + useBurst = false, + useLock = false, + useQos = false + ) + + + def getAvalonConfig() = AvalonMMConfig.bursted( + addressWidth = addressWidth, + dataWidth = memDataWidth, + burstCountWidth = log2Up(burstSize + 1)).copy( + useByteEnable = true, + constantBurstBehavior = true, + burstOnBurstBoundariesOnly = true, + useResponse = true, + maximumPendingReadTransactions = 2 + ) + + def getWishboneConfig() = WishboneConfig( + addressWidth = 32-log2Up(memDataWidth/8), + dataWidth = memDataWidth, + selWidth = memDataBytes, + useSTALL = false, + useLOCK = false, + useERR = true, + useRTY = false, + tgaWidth = 0, + tgcWidth = 0, + tgdWidth = 0, + useBTE = true, + useCTI = true + ) + + def getBmbParameter() = BmbParameter( + BmbAccessParameter( + addressWidth = 32, + dataWidth = memDataWidth + ).addSources(1, BmbSourceParameter( + lengthWidth = log2Up(this.bytePerLine), + contextWidth = (if(!withWriteResponse) 1 else 0) + aggregationWidth, + alignment = BmbParameter.BurstAlignement.LENGTH, + canExclusive = withExclusive, + withCachedRead = true, + canInvalidate = withInvalidate, + canSync = withInvalidate + )), + BmbInvalidationParameter( + invalidateLength = log2Up(this.bytePerLine), + invalidateAlignment = BmbParameter.BurstAlignement.LENGTH + ) + ) +} + +object DataCacheCpuExecute{ + implicit def implArgs(that : DataCacheCpuExecute) = that.args +} + +case class DataCacheCpuExecute(p : DataCacheConfig) extends Bundle with IMasterSlave{ + val isValid = Bool + val address = UInt(p.addressWidth bit) + val haltIt = Bool + val args = DataCacheCpuExecuteArgs(p) + val refilling = Bool + + override def asMaster(): Unit = { + out(isValid, args, address) + in(haltIt, refilling) + } +} + +case class DataCacheCpuExecuteArgs(p : DataCacheConfig) extends Bundle{ + val wr = Bool + val size = UInt(log2Up(log2Up(p.cpuDataBytes)+1) bits) + val isLrsc = p.withLrSc generate Bool() + val isAmo = p.withAmo generate Bool() + val amoCtrl = p.withAmo generate new Bundle { + val swap = Bool() + val alu = Bits(3 bits) + } + + val totalyConsistent = Bool() //Only for AMO/LRSC +} + +case class DataCacheCpuMemory(p : DataCacheConfig, mmu : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ + val isValid = Bool + val isStuck = Bool + val isWrite = Bool + val address = UInt(p.addressWidth bit) + val mmuRsp = MemoryTranslatorRsp(mmu) + + override def asMaster(): Unit = { + out(isValid, isStuck, address) + in(isWrite) + out(mmuRsp) + } +} + + +case class FenceFlags() extends Bundle { + val SW,SR,SO,SI,PW,PR,PO,PI = Bool() + val FM = Bits(4 bits) + + def SL = SR || SI + def SS = SW || SO + def PL = PR || PI + def PS = PW || PO + def forceAll(): Unit ={ + List(SW,SR,SO,SI,PW,PR,PO,PI).foreach(_ := True) + } + def clearAll(): Unit ={ + List(SW,SR,SO,SI,PW,PR,PO,PI).foreach(_ := False) + } +} + +case class DataCacheCpuWriteBack(p : DataCacheConfig) extends Bundle with IMasterSlave{ + val isValid = Bool() + val isStuck = Bool() + val isFiring = Bool() + val isUser = Bool() + val haltIt = Bool() + val isWrite = Bool() + val storeData = Bits(p.cpuDataWidth bit) + val data = Bits(p.cpuDataWidth bit) + val address = UInt(p.addressWidth bit) + val mmuException, unalignedAccess, accessError = Bool() + val keepMemRspData = Bool() //Used by external AMO to avoid having an internal buffer + val fence = FenceFlags() + val exclusiveOk = Bool() + + override def asMaster(): Unit = { + out(isValid,isStuck,isUser, address, fence, storeData, isFiring) + in(haltIt, data, mmuException, unalignedAccess, accessError, isWrite, keepMemRspData, exclusiveOk) + } +} + +case class DataCacheFlush(lineCount : Int) extends Bundle{ + val singleLine = Bool() + val lineId = UInt(log2Up(lineCount) bits) +} + +case class DataCacheCpuBus(p : DataCacheConfig, mmu : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ + val execute = DataCacheCpuExecute(p) + val memory = DataCacheCpuMemory(p, mmu) + val writeBack = DataCacheCpuWriteBack(p) + + val redo = Bool() + val flush = Stream(DataCacheFlush(p.lineCount)) + + override def asMaster(): Unit = { + master(execute) + master(memory) + master(writeBack) + master(flush) + in(redo) + } +} + + +case class DataCacheMemCmd(p : DataCacheConfig) extends Bundle{ + val wr = Bool + val uncached = Bool + val address = UInt(p.addressWidth bit) + val data = Bits(p.cpuDataWidth bits) + val mask = Bits(p.cpuDataWidth/8 bits) + val size = UInt(p.sizeWidth bits) //... 1 => 2 bytes ... 2 => 4 bytes ... + val exclusive = p.withExclusive generate Bool() + val last = Bool + +// def beatCountMinusOne = size.muxListDc((0 to p.sizeMax).map(i => i -> U((1 << i)/p.memDataBytes))) +// def beatCount = size.muxListDc((0 to p.sizeMax).map(i => i -> U((1 << i)/p.memDataBytes-1))) + + //Utilities which does quite a few assumtions about the bus utilisation + def byteCountMinusOne = size.muxListDc((0 to p.sizeMax).map(i => i -> U((1 << i)-1, log2Up(p.bytePerLine) bits))) + def beatCountMinusOne = (size === log2Up(p.bytePerLine)) ? U(p.burstSize-1) | U(0) + def beatCount = (size === log2Up(p.bytePerLine)) ? U(p.burstSize) | U(1) + def isBurst = size === log2Up(p.bytePerLine) +} +case class DataCacheMemRsp(p : DataCacheConfig) extends Bundle{ + val aggregated = UInt(p.aggregationWidth bits) + val last = Bool() + val data = Bits(p.memDataWidth bit) + val error = Bool + val exclusive = p.withExclusive generate Bool() +} +case class DataCacheInv(p : DataCacheConfig) extends Bundle{ + val enable = Bool() + val address = UInt(p.addressWidth bit) +} +case class DataCacheAck(p : DataCacheConfig) extends Bundle{ + val hit = Bool() +} + +case class DataCacheSync(p : DataCacheConfig) extends Bundle{ + val aggregated = UInt(p.aggregationWidth bits) +} + +case class DataCacheMemBus(p : DataCacheConfig) extends Bundle with IMasterSlave{ + val cmd = Stream (DataCacheMemCmd(p)) + val rsp = Flow (DataCacheMemRsp(p)) + + val inv = p.withInvalidate generate Stream(Fragment(DataCacheInv(p))) + val ack = p.withInvalidate generate Stream(Fragment(DataCacheAck(p))) + val sync = p.withInvalidate generate Stream(DataCacheSync(p)) + + override def asMaster(): Unit = { + master(cmd) + slave(rsp) + + if(p.withInvalidate) { + slave(inv) + master(ack) + slave(sync) + } + } + + def toAxi4Shared(stageCmd : Boolean = false, pendingWritesMax : Int = 7): Axi4Shared = { + val axi = Axi4Shared(p.getAxi4SharedConfig()).setName("dbus_axi") + + val cmdPreFork = if (stageCmd) cmd.stage.stage().s2mPipe() else cmd + + val pendingWrites = CounterUpDown( + stateCount = pendingWritesMax + 1, + incWhen = cmdPreFork.fire && cmdPreFork.wr, + decWhen = axi.writeRsp.fire + ) + + val hazard = (pendingWrites =/= 0 && !cmdPreFork.wr) || pendingWrites === pendingWritesMax + val (cmdFork, dataFork) = StreamFork2(cmdPreFork.haltWhen(hazard)) + val cmdStage = cmdFork.throwWhen(RegNextWhen(!cmdFork.last,cmdFork.fire).init(False)) + val dataStage = dataFork.throwWhen(!dataFork.wr) + + axi.sharedCmd.arbitrationFrom(cmdStage) + axi.sharedCmd.write := cmdStage.wr + axi.sharedCmd.prot := "010" + axi.sharedCmd.cache := "1111" + axi.sharedCmd.size := log2Up(p.memDataBytes) + axi.sharedCmd.addr := cmdStage.address + axi.sharedCmd.len := cmdStage.beatCountMinusOne.resized + + axi.writeData.arbitrationFrom(dataStage) + axi.writeData.data := dataStage.data + axi.writeData.strb := dataStage.mask + axi.writeData.last := dataStage.last + + rsp.valid := axi.r.valid + rsp.error := !axi.r.isOKAY() + rsp.data := axi.r.data + + axi.r.ready := True + axi.b.ready := True + + axi + } + + + def toAvalon(): AvalonMM = { + val avalonConfig = p.getAvalonConfig() + val mm = AvalonMM(avalonConfig) + mm.read := cmd.valid && !cmd.wr + mm.write := cmd.valid && cmd.wr + mm.address := cmd.address(cmd.address.high downto log2Up(p.memDataWidth/8)) @@ U(0,log2Up(p.memDataWidth/8) bits) + mm.burstCount := cmd.beatCount + mm.byteEnable := cmd.mask + mm.writeData := cmd.data + + cmd.ready := mm.waitRequestn + rsp.valid := mm.readDataValid + rsp.data := mm.readData + rsp.error := mm.response =/= AvalonMM.Response.OKAY + + mm + } + + def toWishbone(): Wishbone = { + val wishboneConfig = p.getWishboneConfig() + val bus = Wishbone(wishboneConfig) + val counter = Reg(UInt(log2Up(p.burstSize) bits)) init(0) + val addressShift = log2Up(p.memDataWidth/8) + + val cmdBridge = Stream (DataCacheMemCmd(p)) + val isBurst = cmdBridge.isBurst + cmdBridge.valid := cmd.valid + cmdBridge.address := (isBurst ? (cmd.address(31 downto widthOf(counter) + addressShift) @@ counter @@ U(0, addressShift bits)) | (cmd.address(31 downto addressShift) @@ U(0, addressShift bits))) + cmdBridge.wr := cmd.wr + cmdBridge.mask := cmd.mask + cmdBridge.data := cmd.data + cmdBridge.size := cmd.size + cmdBridge.last := !isBurst || counter === p.burstSize-1 + cmd.ready := cmdBridge.ready && (cmdBridge.wr || cmdBridge.last) + + + when(cmdBridge.fire){ + counter := counter + 1 + when(cmdBridge.last){ + counter := 0 + } + } + + + bus.ADR := cmdBridge.address >> addressShift + bus.CTI := Mux(isBurst, cmdBridge.last ? B"111" | B"010", B"000") + bus.BTE := B"00" + bus.SEL := cmdBridge.wr ? cmdBridge.mask | B((1 << p.memDataBytes)-1) + bus.WE := cmdBridge.wr + bus.DAT_MOSI := cmdBridge.data + + cmdBridge.ready := cmdBridge.valid && bus.ACK + bus.CYC := cmdBridge.valid + bus.STB := cmdBridge.valid + + rsp.valid := RegNext(cmdBridge.valid && !bus.WE && bus.ACK) init(False) + rsp.data := RegNext(bus.DAT_MISO) + rsp.error := False //TODO + bus + } + + + + def toPipelinedMemoryBus(): PipelinedMemoryBus = { + val bus = PipelinedMemoryBus(32,32) + + val counter = Reg(UInt(log2Up(p.burstSize) bits)) init(0) + when(bus.cmd.fire){ counter := counter + 1 } + when( cmd.fire && cmd.last){ counter := 0 } + + bus.cmd.valid := cmd.valid + bus.cmd.address := (cmd.address(31 downto 2) | counter.resized) @@ U"00" + bus.cmd.write := cmd.wr + bus.cmd.mask := cmd.mask + bus.cmd.data := cmd.data + cmd.ready := bus.cmd.ready && (cmd.wr || counter === p.burstSize-1) + rsp.valid := bus.rsp.valid + rsp.data := bus.rsp.payload.data + rsp.error := False + bus + } + + + def toBmb(syncPendingMax : Int = 32, + timeoutCycles : Int = 16) : Bmb = new Area{ + setCompositeName(DataCacheMemBus.this, "Bridge", true) + val pipelinedMemoryBusConfig = p.getBmbParameter() + val bus = Bmb(pipelinedMemoryBusConfig).setCompositeName(this,"toBmb", true) + + case class Context() extends Bundle{ + val isWrite = !p.withWriteResponse generate Bool() + val rspCount = (p.aggregationWidth != 0) generate UInt(p.aggregationWidth bits) + } + + + def sizeToLength(size : UInt) = size.muxListDc((0 to log2Up(p.cpuDataBytes)).map(i => U(i) -> U((1 << i)-1, log2Up(p.cpuDataBytes) bits))) + + val withoutWriteBuffer = if(p.aggregationWidth == 0) new Area { + val busCmdContext = Context() + + bus.cmd.valid := cmd.valid + bus.cmd.last := cmd.last + bus.cmd.opcode := (cmd.wr ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) + bus.cmd.address := cmd.address.resized + bus.cmd.data := cmd.data + bus.cmd.length := cmd.byteCountMinusOne + bus.cmd.mask := cmd.mask + if (p.withExclusive) bus.cmd.exclusive := cmd.exclusive + if (!p.withWriteResponse) busCmdContext.isWrite := cmd.wr + bus.cmd.context := B(busCmdContext) + + cmd.ready := bus.cmd.ready + if(p.withInvalidate) sync.arbitrationFrom(bus.sync) + } + + val withWriteBuffer = if(p.aggregationWidth != 0) new Area { + val buffer = new Area { + val stream = cmd.toEvent().m2sPipe() + val address = Reg(UInt(p.addressWidth bits)) + val length = Reg(UInt(pipelinedMemoryBusConfig.access.lengthWidth bits)) + val write = Reg(Bool) + val exclusive = Reg(Bool) + val data = Reg(Bits(p.memDataWidth bits)) + val mask = Reg(Bits(p.memDataWidth/8 bits)) init(0) + } + + val aggregationRange = log2Up(p.memDataWidth/8)-1 downto log2Up(p.cpuDataWidth/8) + val tagRange = p.addressWidth-1 downto aggregationRange.high+1 + val aggregationEnabled = Reg(Bool) + val aggregationCounter = Reg(UInt(p.aggregationWidth bits)) init(0) + val aggregationCounterFull = aggregationCounter === aggregationCounter.maxValue + val timer = Reg(UInt(log2Up(timeoutCycles)+1 bits)) init(0) + val timerFull = timer.msb + val hit = cmd.address(tagRange) === buffer.address(tagRange) + val cmdExclusive = if(p.withExclusive) cmd.exclusive else False + val canAggregate = cmd.valid && cmd.wr && !cmd.uncached && !cmdExclusive && !timerFull && !aggregationCounterFull && (!buffer.stream.valid || aggregationEnabled && hit) + val doFlush = cmd.valid && !canAggregate || timerFull || aggregationCounterFull || !aggregationEnabled +// val canAggregate = False +// val doFlush = True + val busCmdContext = Context() + val halt = False + + when(cmd.fire){ + aggregationCounter := aggregationCounter + 1 + } + when(buffer.stream.valid && !timerFull){ + timer := timer + 1 + } + when(bus.cmd.fire || !buffer.stream.valid){ + buffer.mask := 0 + aggregationCounter := 0 + timer := 0 + } + + buffer.stream.ready := (bus.cmd.ready && doFlush || canAggregate) && !halt + bus.cmd.valid := buffer.stream.valid && doFlush && !halt + bus.cmd.last := True + bus.cmd.opcode := (buffer.write ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) + bus.cmd.address := buffer.address + bus.cmd.length := buffer.length + bus.cmd.data := buffer.data + bus.cmd.mask := buffer.mask + + if (p.withExclusive) bus.cmd.exclusive := buffer.exclusive + bus.cmd.context.removeAssignments() := B(busCmdContext) + if (!p.withWriteResponse) busCmdContext.isWrite := bus.cmd.isWrite + busCmdContext.rspCount := aggregationCounter + + val aggregationSel = cmd.address(aggregationRange) + when(cmd.fire){ + val dIn = cmd.data.subdivideIn(8 bits) + val dReg = buffer.data.subdivideIn(8 bits) + for(byteId <- 0 until p.memDataBytes){ + when(aggregationSel === byteId / p.cpuDataBytes && cmd.mask(byteId % p.cpuDataBytes)){ + dReg.write(byteId, dIn(byteId % p.cpuDataBytes)) + buffer.mask(byteId) := True + } + } + } + + when(cmd.fire){ + buffer.write := cmd.wr + buffer.address := cmd.address.resized + buffer.length := cmd.byteCountMinusOne + if (p.withExclusive) buffer.exclusive := cmd.exclusive + + when(cmd.wr && !cmd.uncached && !cmdExclusive){ + aggregationEnabled := True + buffer.address(aggregationRange.high downto 0) := 0 + buffer.length := p.memDataBytes-1 + } otherwise { + aggregationEnabled := False + } + } + + + val rspCtx = bus.rsp.context.as(Context()) + rsp.aggregated := rspCtx.rspCount + + val syncLogic = p.withInvalidate generate new Area{ + val cmdCtx = Stream(UInt(p.aggregationWidth bits)) + cmdCtx.valid := bus.cmd.fire && bus.cmd.isWrite + cmdCtx.payload := aggregationCounter + halt setWhen(!cmdCtx.ready) + + val syncCtx = cmdCtx.queue(syncPendingMax).s2mPipe().m2sPipe() //Assume latency of sync is at least 3 cycles + syncCtx.ready := bus.sync.fire + + sync.arbitrationFrom(bus.sync) + sync.aggregated := syncCtx.payload + } + } + + + rsp.valid := bus.rsp.valid + if(!p.withWriteResponse) rsp.valid clearWhen(bus.rsp.context(0)) + rsp.data := bus.rsp.data + rsp.error := bus.rsp.isError + rsp.last := bus.rsp.last + if(p.withExclusive) rsp.exclusive := bus.rsp.exclusive + bus.rsp.ready := True + + val invalidateLogic = p.withInvalidate generate new Area{ + val beatCountMinusOne = bus.inv.transferBeatCountMinusOne(p.bytePerLine) + val counter = Reg(UInt(widthOf(beatCountMinusOne) bits)) init(0) + + inv.valid := bus.inv.valid + inv.address := bus.inv.address + (counter << log2Up(p.bytePerLine)) + inv.enable := bus.inv.all + inv.last := counter === beatCountMinusOne + bus.inv.ready := inv.last && inv.ready + + if(widthOf(counter) != 0) when(inv.fire){ + counter := counter + 1 + when(inv.last){ + counter := 0 + } + } + + bus.ack.arbitrationFrom(ack.throwWhen(!ack.last)) + } + }.bus + +} + +object DataCacheExternalAmoStates extends SpinalEnum{ + val LR_CMD, LR_RSP, SC_CMD, SC_RSP = newElement(); +} + +//If external amo, mem rsp should stay +class DataCache(val p : DataCacheConfig, mmuParameter : MemoryTranslatorBusParameter) extends Component{ + import p._ + + val io = new Bundle{ + val cpu = slave(DataCacheCpuBus(p, mmuParameter)) + val mem = master(DataCacheMemBus(p)) + } + + val haltCpu = False + val lineWidth = bytePerLine*8 + val lineCount = cacheSize/bytePerLine + val wordWidth = cpuDataWidth + val wordWidthLog2 = log2Up(wordWidth) + val wordPerLine = lineWidth/wordWidth + val bytePerWord = wordWidth/8 + val wayLineCount = lineCount/wayCount + val wayLineLog2 = log2Up(wayLineCount) + val wayWordCount = wayLineCount * wordPerLine + val memWordPerLine = lineWidth/memDataWidth + val memTransactionPerLine = p.bytePerLine / (p.memDataWidth/8) + val bytePerMemWord = memDataWidth/8 + val wayMemWordCount = wayLineCount * memWordPerLine + + val tagRange = addressWidth-1 downto log2Up(wayLineCount*bytePerLine) + val lineRange = tagRange.low-1 downto log2Up(bytePerLine) + val cpuWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerWord) + val memWordRange = log2Up(bytePerLine)-1 downto log2Up(bytePerMemWord) + val hitRange = tagRange.high downto lineRange.low + val memWordToCpuWordRange = log2Up(bytePerMemWord)-1 downto log2Up(bytePerWord) + val cpuWordToRfWordRange = log2Up(bytePerWord)-1 downto log2Up(p.rfDataBytes) + + + class LineInfo() extends Bundle{ + val valid, error = Bool() + val address = UInt(tagRange.length bit) + } + + val tagsReadCmd = Flow(UInt(log2Up(wayLineCount) bits)) + val tagsInvReadCmd = withInvalidate generate Flow(UInt(log2Up(wayLineCount) bits)) + val tagsWriteCmd = Flow(new Bundle{ + val way = Bits(wayCount bits) + val address = UInt(log2Up(wayLineCount) bits) + val data = new LineInfo() + }) + + val tagsWriteLastCmd = RegNext(tagsWriteCmd) + + val dataReadCmd = Flow(UInt(log2Up(wayMemWordCount) bits)) + val dataWriteCmd = Flow(new Bundle{ + val way = Bits(wayCount bits) + val address = UInt(log2Up(wayMemWordCount) bits) + val data = Bits(memDataWidth bits) + val mask = Bits(memDataWidth/8 bits) + }) + + + val ways = for(i <- 0 until wayCount) yield new Area{ + val tags = Mem(new LineInfo(), wayLineCount) + val data = Mem(Bits(memDataWidth bit), wayMemWordCount) + + //Reads + val tagsReadRsp = asyncTagMemory match { + case false => tags.readSync(tagsReadCmd.payload, tagsReadCmd.valid && !io.cpu.memory.isStuck) + case true => tags.readAsync(RegNextWhen(tagsReadCmd.payload, io.cpu.execute.isValid && !io.cpu.memory.isStuck)) + } + val dataReadRspMem = data.readSync(dataReadCmd.payload, dataReadCmd.valid && !io.cpu.memory.isStuck) + val dataReadRspSel = if(mergeExecuteMemory) io.cpu.writeBack.address else io.cpu.memory.address + val dataReadRsp = dataReadRspMem.subdivideIn(cpuDataWidth bits).read(dataReadRspSel(memWordToCpuWordRange)) + + val tagsInvReadRsp = withInvalidate generate(asyncTagMemory match { + case false => tags.readSync(tagsInvReadCmd.payload, tagsInvReadCmd.valid) + case true => tags.readAsync(RegNextWhen(tagsInvReadCmd.payload, tagsInvReadCmd.valid)) + }) + + //Writes + when(tagsWriteCmd.valid && tagsWriteCmd.way(i)){ + tags.write(tagsWriteCmd.address, tagsWriteCmd.data) + } + when(dataWriteCmd.valid && dataWriteCmd.way(i)){ + data.write( + address = dataWriteCmd.address, + data = dataWriteCmd.data, + mask = dataWriteCmd.mask + ) + } + } + + + tagsReadCmd.valid := False + tagsReadCmd.payload.assignDontCare() + dataReadCmd.valid := False + dataReadCmd.payload.assignDontCare() + tagsWriteCmd.valid := False + tagsWriteCmd.payload.assignDontCare() + dataWriteCmd.valid := False + dataWriteCmd.payload.assignDontCare() + + when(io.cpu.execute.isValid && !io.cpu.memory.isStuck){ + tagsReadCmd.valid := True + dataReadCmd.valid := True + tagsReadCmd.payload := io.cpu.execute.address(lineRange) + dataReadCmd.payload := io.cpu.execute.address(lineRange.high downto memWordRange.low) + } + + def collisionProcess(readAddress : UInt, readMask : Bits): Bits ={ + val ret = Bits(wayCount bits) + val readAddressAligned = (readAddress >> log2Up(memDataWidth/cpuDataWidth)) + val dataWriteMaskAligned = dataWriteCmd.mask.subdivideIn(memDataWidth/cpuDataWidth slices).read(readAddress(log2Up(memDataWidth/cpuDataWidth)-1 downto 0)) + for(i <- 0 until wayCount){ + ret(i) := dataWriteCmd.valid && dataWriteCmd.way(i) && dataWriteCmd.address === readAddressAligned && (readMask & dataWriteMaskAligned) =/= 0 + } + ret + } + + + io.cpu.execute.haltIt := False + + val rspSync = True + val rspLast = True + val memCmdSent = RegInit(False) setWhen (io.mem.cmd.fire) clearWhen (!io.cpu.writeBack.isStuck) + val pending = withExclusive generate new Area{ + val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + val counterNext = counter + U(io.mem.cmd.fire && io.mem.cmd.last) - ((io.mem.rsp.valid && io.mem.rsp.last) ? (io.mem.rsp.aggregated +^ 1) | 0) + counter := counterNext + + val done = RegNext(counterNext === 0) + val full = RegNext(counter.msb) //Has margin + val last = RegNext(counterNext === 1) //Equivalent to counter === 1 but pipelined + + if(!withInvalidate) { + io.cpu.execute.haltIt setWhen(full) + } + + rspSync clearWhen (!last || !memCmdSent) + rspLast clearWhen (!last) + } + + val sync = withInvalidate generate new Area{ + io.mem.sync.ready := True + val syncCount = io.mem.sync.aggregated +^ 1 + val syncContext = new Area{ + val history = Mem(Bool, pendingMax) + val wPtr, rPtr = Reg(UInt(log2Up(pendingMax)+1 bits)) init(0) + when(io.mem.cmd.fire && io.mem.cmd.wr){ + history.write(wPtr.resized, io.mem.cmd.uncached) + wPtr := wPtr + 1 + } + + when(io.mem.sync.fire){ + rPtr := rPtr + syncCount + } + val uncached = history.readAsync(rPtr.resized) + val full = RegNext(wPtr - rPtr >= pendingMax-1) + io.cpu.execute.haltIt setWhen(full) + } + + def pending(inc : Bool, dec : Bool) = new Area { + val pendingSync = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + val pendingSyncNext = pendingSync + U(io.mem.cmd.fire && io.mem.cmd.wr && inc) - ((io.mem.sync.fire && dec) ? syncCount | 0) + pendingSync := pendingSyncNext + } + + val writeCached = pending(inc = !io.mem.cmd.uncached, dec = !syncContext.uncached) + val writeUncached = pending(inc = io.mem.cmd.uncached, dec = syncContext.uncached) + + def track(load : Bool, uncached : Boolean) = new Area { + val counter = Reg(UInt(log2Up(pendingMax) + 1 bits)) init(0) + counter := counter - ((io.mem.sync.fire && counter =/= 0 && (if(uncached) syncContext.uncached else !syncContext.uncached)) ? syncCount | 0) + when(load){ counter := (if(uncached) writeUncached.pendingSyncNext else writeCached.pendingSyncNext) } + + val busy = counter =/= 0 + } + + val w2w = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SW, uncached = false) + val w2r = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SR, uncached = false) + val w2i = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SI, uncached = false) + val w2o = track(load = io.cpu.writeBack.fence.PW && io.cpu.writeBack.fence.SO, uncached = false) + val o2w = track(load = io.cpu.writeBack.fence.PO && io.cpu.writeBack.fence.SW, uncached = true) + val o2r = track(load = io.cpu.writeBack.fence.PO && io.cpu.writeBack.fence.SR, uncached = true) + //Assume o2i and o2o are ordered by the interconnect + + val notTotalyConsistent = w2w.busy || w2r.busy || w2i.busy || w2o.busy || o2w.busy || o2r.busy + } + + + + + val stage0 = new Area{ +// val mask = io.cpu.execute.size.mux ( +// U(0) -> B"0001", +// U(1) -> B"0011", +// default -> B"1111" +// ) |<< io.cpu.execute.address(1 downto 0) + + val mask = io.cpu.execute.size.muxListDc((0 to log2Up(p.cpuDataBytes)).map(i => U(i) -> B((1 << (1 << i)) -1, p.cpuDataBytes bits))) |<< io.cpu.execute.address(log2Up(p.cpuDataBytes)-1 downto 0) + + + val dataColisions = collisionProcess(io.cpu.execute.address(lineRange.high downto cpuWordRange.low), mask) + val wayInvalidate = B(0, wayCount bits) //Used if invalidate enabled + + val isAmo = if(withAmo) io.cpu.execute.isAmo else False + } + + val stageA = new Area{ + def stagePipe[T <: Data](that : T) = if(mergeExecuteMemory) CombInit(that) else RegNextWhen(that, !io.cpu.memory.isStuck) + val request = stagePipe(io.cpu.execute.args) + val mask = stagePipe(stage0.mask) + io.cpu.memory.isWrite := request.wr + + val isAmo = if(withAmo) request.isAmo else False + val isLrsc = if(withAmo) request.isLrsc else False + val consistancyCheck = (withInvalidate || withWriteResponse) generate new Area { + val hazard = False + val w = sync.w2w.busy || sync.o2w.busy + val r = stagePipe(sync.w2r.busy || sync.o2r.busy) || sync.w2r.busy || sync.o2r.busy // As it use the cache, need to check against the execute stage status too + val o = CombInit(sync.w2o.busy) + val i = CombInit(sync.w2i.busy) + + val s = io.cpu.memory.mmuRsp.isIoAccess ? o | w + val l = io.cpu.memory.mmuRsp.isIoAccess ? i | r + + when(isAmo? (s || l) | (request.wr ? s | l)){ + hazard := True + } + when(request.totalyConsistent && (sync.notTotalyConsistent || io.cpu.writeBack.isValid && io.cpu.writeBack.isWrite)){ + hazard := True + } + } + + val wayHits = earlyWaysHits generate Bits(wayCount bits) + val indirectTlbHitGen = (earlyWaysHits && !directTlbHit) generate new Area { + wayHits := B(ways.map(way => (io.cpu.memory.mmuRsp.physicalAddress(tagRange) === way.tagsReadRsp.address && way.tagsReadRsp.valid))) + } + val directTlbHitGen = (earlyWaysHits && directTlbHit) generate new Area { + val wayTlbHits = for (way <- ways) yield for (tlb <- io.cpu.memory.mmuRsp.ways) yield { + way.tagsReadRsp.address === tlb.physical(tagRange) && tlb.sel + } + val translatedHits = B(wayTlbHits.map(_.orR)) + val bypassHits = B(ways.map(_.tagsReadRsp.address === io.cpu.memory.address(tagRange))) + wayHits := (io.cpu.memory.mmuRsp.bypassTranslation ? bypassHits | translatedHits) & B(ways.map(_.tagsReadRsp.valid)) + } + + val dataMux = earlyDataMux generate MuxOH(wayHits, ways.map(_.dataReadRsp)) + val wayInvalidate = stagePipe(stage0. wayInvalidate) + val dataColisions = if(mergeExecuteMemory){ + stagePipe(stage0.dataColisions) + } else { + //Assume the writeback stage will never be unstall memory acces while memory stage is stalled + stagePipe(stage0.dataColisions) | collisionProcess(io.cpu.memory.address(lineRange.high downto cpuWordRange.low), mask) + } + } + + val stageB = new Area { + def stagePipe[T <: Data](that : T) = RegNextWhen(that, !io.cpu.writeBack.isStuck) + def ramPipe[T <: Data](that : T) = if(mergeExecuteMemory) CombInit(that) else RegNextWhen(that, !io.cpu.writeBack.isStuck) + val request = RegNextWhen(stageA.request, !io.cpu.writeBack.isStuck) + val mmuRspFreeze = False + val mmuRsp = RegNextWhen(io.cpu.memory.mmuRsp, !io.cpu.writeBack.isStuck && !mmuRspFreeze) + val tagsReadRsp = ways.map(w => ramPipe(w.tagsReadRsp)) + val dataReadRsp = !earlyDataMux generate ways.map(w => ramPipe(w.dataReadRsp)) + val wayInvalidate = stagePipe(stageA. wayInvalidate) + val consistancyHazard = if(stageA.consistancyCheck != null) stagePipe(stageA.consistancyCheck.hazard) else False + val dataColisions = stagePipe(stageA.dataColisions) +// val unaligned = if(!catchUnaligned) False else stagePipe((stageA.request.size === 2 && io.cpu.memory.address(1 downto 0) =/= 0) || (stageA.request.size === 1 && io.cpu.memory.address(0 downto 0) =/= 0)) + val unaligned = if(!catchUnaligned) False else stagePipe((1 to log2Up(p.cpuDataBytes)).map(i => stageA.request.size === i && io.cpu.memory.address(i-1 downto 0) =/= 0).orR) + val waysHitsBeforeInvalidate = if(earlyWaysHits) stagePipe(B(stageA.wayHits)) else B(tagsReadRsp.map(tag => mmuRsp.physicalAddress(tagRange) === tag.address && tag.valid).asBits()) + val waysHits = waysHitsBeforeInvalidate & ~wayInvalidate + val waysHit = waysHits.orR + val dataMux = if(earlyDataMux) stagePipe(stageA.dataMux) else MuxOH(waysHits, dataReadRsp) + val mask = stagePipe(stageA.mask) + + //Loader interface + val loaderValid = False + + val ioMemRspMuxed = io.mem.rsp.data.subdivideIn(cpuDataWidth bits).read(io.cpu.writeBack.address(memWordToCpuWordRange)) + + io.cpu.writeBack.haltIt := True + + //Evict the cache after reset logics + val flusher = new Area { + val waitDone = RegInit(False) clearWhen(io.cpu.flush.ready) + val hold = False + val counter = Reg(UInt(lineRange.size + 1 bits)) init(0) + when(!counter.msb) { + tagsWriteCmd.valid := True + tagsWriteCmd.address := counter.resized + tagsWriteCmd.way.setAll() + tagsWriteCmd.data.valid := False + io.cpu.execute.haltIt := True + when(!hold) { + counter := counter + 1 + when(io.cpu.flush.singleLine){ + counter.msb := True + } + } + } + + io.cpu.flush.ready := waitDone && counter.msb + + val start = RegInit(True) //Used to relax timings + start := !waitDone && !start && io.cpu.flush.valid && !io.cpu.execute.isValid && !io.cpu.memory.isValid && !io.cpu.writeBack.isValid && !io.cpu.redo + + when(start){ + waitDone := True + counter := 0 + when(io.cpu.flush.singleLine){ + counter := U"0" @@ io.cpu.flush.lineId + } + } + } + + val lrSc = withInternalLrSc generate new Area{ + val reserved = RegInit(False) + when(io.cpu.writeBack.isValid && io.cpu.writeBack.isFiring){ + reserved setWhen(request.isLrsc) + reserved clearWhen(request.wr) + } + } + + val isAmo = if(withAmo) request.isAmo else False + val isAmoCached = if(withInternalAmo) isAmo else False + val isExternalLsrc = if(withExternalLrSc) request.isLrsc else False + val isExternalAmo = if(withExternalAmo) request.isAmo else False + + val requestDataBypass = CombInit(io.cpu.writeBack.storeData) + import DataCacheExternalAmoStates._ + val amo = withAmo generate new Area{ + def rf = io.cpu.writeBack.storeData(p.rfDataWidth-1 downto 0) + def memLarger = if(withInternalAmo) dataMux else ioMemRspMuxed + def mem = memLarger.subdivideIn(rfDataWidth bits).read(io.cpu.writeBack.address(cpuWordToRfWordRange)) + val compare = request.amoCtrl.alu.msb + val unsigned = request.amoCtrl.alu(2 downto 1) === B"11" + val addSub = (rf.asSInt + Mux(compare, ~mem, mem).asSInt + Mux(compare, S(1), S(0))).asBits + val less = Mux(rf.msb === mem.msb, addSub.msb, Mux(unsigned, mem.msb, rf.msb)) + val selectRf = request.amoCtrl.swap ? True | (request.amoCtrl.alu.lsb ^ less) + + val result = (request.amoCtrl.alu | (request.amoCtrl.swap ## B"00")).mux( + B"000" -> addSub, + B"001" -> (rf ^ mem), + B"010" -> (rf | mem), + B"011" -> (rf & mem), + default -> (selectRf ? rf | mem) + ) + // val resultRegValid = RegNext(True) clearWhen(!io.cpu.writeBack.isStuck) + // val resultReg = RegNext(result) + val resultReg = Reg(Bits(32 bits)) + + val internal = withInternalAmo generate new Area{ + val resultRegValid = RegNext(io.cpu.writeBack.isStuck) + resultReg := result + } + val external = !withInternalAmo generate new Area{ + val state = RegInit(LR_CMD) + } + } + + + val cpuWriteToCache = False + when(cpuWriteToCache){ + dataWriteCmd.valid setWhen(request.wr && waysHit) + dataWriteCmd.address := mmuRsp.physicalAddress(lineRange.high downto memWordRange.low) + dataWriteCmd.data.subdivideIn(cpuDataWidth bits).foreach(_ := requestDataBypass) + dataWriteCmd.mask := 0 + dataWriteCmd.mask.subdivideIn(cpuDataWidth/8 bits).write(io.cpu.writeBack.address(memWordToCpuWordRange), mask) + dataWriteCmd.way := waysHits + } + + val badPermissions = (!mmuRsp.allowWrite && request.wr) || (!mmuRsp.allowRead && (!request.wr || isAmo)) + val loadStoreFault = io.cpu.writeBack.isValid && (mmuRsp.exception || badPermissions) + + io.cpu.redo := False + io.cpu.writeBack.accessError := False + io.cpu.writeBack.mmuException := loadStoreFault && (if(catchIllegal) mmuRsp.isPaging else False) + io.cpu.writeBack.unalignedAccess := io.cpu.writeBack.isValid && unaligned + io.cpu.writeBack.isWrite := request.wr + + + io.mem.cmd.valid := False + io.mem.cmd.address := mmuRsp.physicalAddress + io.mem.cmd.last := True + io.mem.cmd.wr := request.wr + io.mem.cmd.mask := mask + io.mem.cmd.data := requestDataBypass + io.mem.cmd.uncached := mmuRsp.isIoAccess + io.mem.cmd.size := request.size.resized + if(withExternalLrSc) io.mem.cmd.exclusive := request.isLrsc || isAmo + + + val bypassCache = mmuRsp.isIoAccess || isExternalLsrc || isExternalAmo + + io.cpu.writeBack.keepMemRspData := False + when(io.cpu.writeBack.isValid) { + when(isExternalAmo){ + if(withExternalAmo) switch(amo.external.state){ + is(LR_CMD){ + io.mem.cmd.valid := True + io.mem.cmd.wr := False + when(io.mem.cmd.ready) { + amo.external.state := LR_RSP + } + } + is(LR_RSP){ + when(io.mem.rsp.valid && pending.last) { + amo.external.state := SC_CMD + amo.resultReg := amo.result + } + } + is(SC_CMD){ + io.mem.cmd.valid := True + when(io.mem.cmd.ready) { + amo.external.state := SC_RSP + } + } + is(SC_RSP){ + io.cpu.writeBack.keepMemRspData := True + when(io.mem.rsp.valid) { + amo.external.state := LR_CMD + when(io.mem.rsp.exclusive){ //Success + cpuWriteToCache := True + io.cpu.writeBack.haltIt := False + } + } + } + } + } elsewhen(mmuRsp.isIoAccess || isExternalLsrc) { + val waitResponse = !request.wr + if(withExternalLrSc) waitResponse setWhen(request.isLrsc) + + io.cpu.writeBack.haltIt.clearWhen(waitResponse ? (io.mem.rsp.valid && rspSync) | io.mem.cmd.ready) + + io.mem.cmd.valid := !memCmdSent + + if(withInternalLrSc) when(request.isLrsc && !lrSc.reserved){ + io.mem.cmd.valid := False + io.cpu.writeBack.haltIt := False + } + } otherwise { + when(waysHit || request.wr && !isAmoCached) { //Do not require a cache refill ? + cpuWriteToCache := True + + //Write through + io.mem.cmd.valid setWhen(request.wr) + io.cpu.writeBack.haltIt clearWhen(!request.wr || io.mem.cmd.ready) + + if(withInternalAmo) when(isAmo){ + when(!amo.internal.resultRegValid) { + io.mem.cmd.valid := False + dataWriteCmd.valid := False + io.cpu.writeBack.haltIt := True + } + } + + //On write to read dataColisions + when((!request.wr || isAmoCached) && (dataColisions & waysHits) =/= 0){ + io.cpu.redo := True + if(withAmo) io.mem.cmd.valid := False + } + + if(withInternalLrSc) when(request.isLrsc && !lrSc.reserved){ + io.mem.cmd.valid := False + dataWriteCmd.valid := False + io.cpu.writeBack.haltIt := False + } + } otherwise { //Do refill + //Emit cmd + io.mem.cmd.valid setWhen(!memCmdSent) + io.mem.cmd.wr := False + io.mem.cmd.address(0, lineRange.low bits) := 0 + io.mem.cmd.size := log2Up(p.bytePerLine) + + loaderValid setWhen(io.mem.cmd.ready) + } + } + } + + when(bypassCache){ + io.cpu.writeBack.data := ioMemRspMuxed + def isLast = if(pending != null) pending.last else True + if(catchAccessError) io.cpu.writeBack.accessError := !request.wr && isLast && io.mem.rsp.valid && io.mem.rsp.error + } otherwise { + io.cpu.writeBack.data := dataMux + if(catchAccessError) io.cpu.writeBack.accessError := (waysHits & B(tagsReadRsp.map(_.error))) =/= 0 || (loadStoreFault && !mmuRsp.isPaging) + } + + if(withLrSc) { + val success = if(withInternalLrSc)lrSc.reserved else io.mem.rsp.exclusive + io.cpu.writeBack.exclusiveOk := success + when(request.isLrsc && request.wr){ + // io.cpu.writeBack.data := B(!success).resized + if(withExternalLrSc) when(io.cpu.writeBack.isValid && io.mem.rsp.valid && rspSync && success && waysHit){ + cpuWriteToCache := True + } + } + } + if(withAmo) when(request.isAmo){ + requestDataBypass.subdivideIn(p.rfDataWidth bits).foreach(_ := amo.resultReg) + } + + //remove side effects on exceptions + when(consistancyHazard || mmuRsp.refilling || io.cpu.writeBack.accessError || io.cpu.writeBack.mmuException || io.cpu.writeBack.unalignedAccess){ + io.mem.cmd.valid := False + tagsWriteCmd.valid := False + dataWriteCmd.valid := False + loaderValid := False + io.cpu.writeBack.haltIt := False + if(withInternalLrSc) lrSc.reserved := lrSc.reserved + if(withExternalAmo) amo.external.state := LR_CMD + } + io.cpu.redo setWhen(io.cpu.writeBack.isValid && (mmuRsp.refilling || consistancyHazard)) + + assert(!(io.cpu.writeBack.isValid && !io.cpu.writeBack.haltIt && io.cpu.writeBack.isStuck), "writeBack stuck by another plugin is not allowed", ERROR) + } + + val loader = new Area{ + val valid = RegInit(False) setWhen(stageB.loaderValid) + val baseAddress = stageB.mmuRsp.physicalAddress + + val counter = Counter(memTransactionPerLine) + val waysAllocator = Reg(Bits(wayCount bits)) init(1) + val error = RegInit(False) + val kill = False + val killReg = RegInit(False) setWhen(kill) + + when(valid && io.mem.rsp.valid && rspLast){ + dataWriteCmd.valid := True + dataWriteCmd.address := baseAddress(lineRange) @@ counter + dataWriteCmd.data := io.mem.rsp.data + dataWriteCmd.mask.setAll() + dataWriteCmd.way := waysAllocator + error := error | io.mem.rsp.error + counter.increment() + } + + val done = CombInit(counter.willOverflow) + if(withInvalidate) done setWhen(valid && pending.counter === 0) //Used to solve invalidate write request at the same time + + when(done){ + valid := False + + //Update tags + tagsWriteCmd.valid := True + tagsWriteCmd.address := baseAddress(lineRange) + tagsWriteCmd.data.valid := !(kill || killReg) + tagsWriteCmd.data.address := baseAddress(tagRange) + tagsWriteCmd.data.error := error || (io.mem.rsp.valid && io.mem.rsp.error) + tagsWriteCmd.way := waysAllocator + + error := False + killReg := False + } + + when(!valid){ + waysAllocator := (waysAllocator ## waysAllocator.msb).resized + } + + io.cpu.redo setWhen(valid.rise()) + io.cpu.execute.refilling := valid + + stageB.mmuRspFreeze setWhen(stageB.loaderValid || valid) + } + + val invalidate = withInvalidate generate new Area{ + val s0 = new Area{ + val input = io.mem.inv + tagsInvReadCmd.valid := input.fire + tagsInvReadCmd.payload := input.address(lineRange) + + val loaderTagHit = input.address(tagRange) === loader.baseAddress(tagRange) + val loaderLineHit = input.address(lineRange) === loader.baseAddress(lineRange) + when(input.valid && input.enable && loader.valid && loaderLineHit && loaderTagHit){ + loader.kill := True + } + } + val s1 = new Area{ + val input = s0.input.stage() + val loaderValid = RegNextWhen(loader.valid, s0.input.ready) + val loaderWay = RegNextWhen(loader.waysAllocator, s0.input.ready) + val loaderTagHit = RegNextWhen(s0.loaderTagHit, s0.input.ready) + val loaderLineHit = RegNextWhen(s0.loaderLineHit, s0.input.ready) + val invalidations = Bits(wayCount bits) + + var wayHits = B(ways.map(way => (input.address(tagRange) === way.tagsInvReadRsp.address && way.tagsInvReadRsp.valid))) & ~invalidations + + //Handle invalider read during loader write hazard + when(loaderValid && loaderLineHit && !loaderTagHit){ + wayHits \= wayHits & ~loaderWay + } + } + val s2 = new Area{ + val input = s1.input.stage() + val wayHits = RegNextWhen(s1.wayHits, s1.input.ready) + val wayHit = wayHits.orR + + when(input.valid && input.enable) { + //Manage invalidate write during cpu read hazard + when(input.address(lineRange) === io.cpu.execute.address(lineRange)) { + stage0.wayInvalidate := wayHits + } + + //Invalidate cache tag + when(wayHit) { + tagsWriteCmd.valid := True + stageB.flusher.hold := True + tagsWriteCmd.address := input.address(lineRange) + tagsWriteCmd.data.valid := False + tagsWriteCmd.way := wayHits + loader.done := False //Hold loader tags write + } + } + io.mem.ack.arbitrationFrom(input) + io.mem.ack.hit := wayHit + io.mem.ack.last := input.last + + //Manage invalidation read during write hazard + s1.invalidations := RegNextWhen((input.valid && input.enable && input.address(lineRange) === s0.input.address(lineRange)) ? wayHits | 0, s0.input.ready) + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/ip/InstructionCache.scala b/VexRiscv/src/main/scala/vexriscv/ip/InstructionCache.scala new file mode 100644 index 0000000..e09712c --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/ip/InstructionCache.scala @@ -0,0 +1,487 @@ +package vexriscv.ip + +import vexriscv._ +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.amba4.axi.{Axi4Config, Axi4ReadOnly} +import spinal.lib.bus.avalon.{AvalonMM, AvalonMMConfig} +import spinal.lib.bus.bmb.{Bmb, BmbAccessParameter, BmbParameter, BmbSourceParameter} +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig} +import spinal.lib.bus.simple._ +import vexriscv.plugin.{IBusSimpleBus, IBusSimplePlugin} + + +case class InstructionCacheConfig( cacheSize : Int, + bytePerLine : Int, + wayCount : Int, + addressWidth : Int, + cpuDataWidth : Int, + memDataWidth : Int, + catchIllegalAccess : Boolean, + catchAccessFault : Boolean, + asyncTagMemory : Boolean, + twoCycleCache : Boolean = true, + twoCycleRam : Boolean = false, + twoCycleRamInnerMux : Boolean = false, + preResetFlush : Boolean = false, + bypassGen : Boolean = false, + reducedBankWidth : Boolean = false){ + + assert(!(twoCycleRam && !twoCycleCache)) + + def burstSize = bytePerLine*8/memDataWidth + def catchSomething = catchAccessFault || catchIllegalAccess + + def getAxi4Config() = Axi4Config( + addressWidth = addressWidth, + dataWidth = memDataWidth, + useId = false, + useRegion = false, + useLock = false, + useQos = false, + useSize = false + ) + + def getAvalonConfig() = AvalonMMConfig.bursted( + addressWidth = addressWidth, + dataWidth = memDataWidth, + burstCountWidth = log2Up(burstSize + 1)).getReadOnlyConfig.copy( + useResponse = true, + constantBurstBehavior = true + ) + + def getPipelinedMemoryBusConfig() = PipelinedMemoryBusConfig( + addressWidth = 32, + dataWidth = 32 + ) + + def getWishboneConfig() = WishboneConfig( + addressWidth = 32-log2Up(memDataWidth/8), + dataWidth = memDataWidth, + selWidth = memDataWidth/8, + useSTALL = false, + useLOCK = false, + useERR = true, + useRTY = false, + tgaWidth = 0, + tgcWidth = 0, + tgdWidth = 0, + useBTE = true, + useCTI = true + ) + + def getBmbParameter() = BmbParameter( + BmbAccessParameter( + addressWidth = 32, + dataWidth = memDataWidth + ).addSources(1, BmbSourceParameter( + lengthWidth = log2Up(this.bytePerLine), + contextWidth = 0, + canWrite = false, + alignment = BmbParameter.BurstAlignement.LENGTH, + maximumPendingTransaction = 1 + )) + ) +} + + + +case class InstructionCacheCpuPrefetch(p : InstructionCacheConfig) extends Bundle with IMasterSlave{ + val isValid = Bool + val haltIt = Bool + val pc = UInt(p.addressWidth bit) + + override def asMaster(): Unit = { + out(isValid, pc) + in(haltIt) + } +} + +trait InstructionCacheCommons{ + val isValid : Bool + val isStuck : Bool + val pc : UInt + val physicalAddress : UInt + val data : Bits + val cacheMiss, error, mmuRefilling, mmuException, isUser : Bool +} + +case class InstructionCacheCpuFetch(p : InstructionCacheConfig, mmuParameter : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave with InstructionCacheCommons { + val isValid = Bool() + val isStuck = Bool() + val isRemoved = Bool() + val pc = UInt(p.addressWidth bits) + val data = Bits(p.cpuDataWidth bits) + val dataBypassValid = p.bypassGen generate Bool() + val dataBypass = p.bypassGen generate Bits(p.cpuDataWidth bits) + val mmuRsp = MemoryTranslatorRsp(mmuParameter) + val physicalAddress = UInt(p.addressWidth bits) + val cacheMiss, error, mmuRefilling, mmuException, isUser = ifGen(!p.twoCycleCache)(Bool) + + override def asMaster(): Unit = { + out(isValid, isStuck, isRemoved, pc) + inWithNull(error,mmuRefilling,mmuException,data, cacheMiss,physicalAddress) + outWithNull(isUser, dataBypass, dataBypassValid) + out(mmuRsp) + } +} + + +case class InstructionCacheCpuDecode(p : InstructionCacheConfig) extends Bundle with IMasterSlave with InstructionCacheCommons { + val isValid = Bool + val isStuck = Bool + val pc = UInt(p.addressWidth bits) + val physicalAddress = UInt(p.addressWidth bits) + val data = Bits(p.cpuDataWidth bits) + val cacheMiss, error, mmuRefilling, mmuException, isUser = ifGen(p.twoCycleCache)(Bool) + + override def asMaster(): Unit = { + out(isValid, isStuck, pc) + outWithNull(isUser) + inWithNull(error, mmuRefilling, mmuException,data, cacheMiss, physicalAddress) + } +} + +case class InstructionCacheCpuBus(p : InstructionCacheConfig, mmuParameter : MemoryTranslatorBusParameter) extends Bundle with IMasterSlave{ + val prefetch = InstructionCacheCpuPrefetch(p) + val fetch = InstructionCacheCpuFetch(p, mmuParameter) + val decode = InstructionCacheCpuDecode(p) + val fill = Flow(UInt(p.addressWidth bits)) + + override def asMaster(): Unit = { + master(prefetch, fetch, decode, fill) + } +} + +case class InstructionCacheMemCmd(p : InstructionCacheConfig) extends Bundle{ + val address = UInt(p.addressWidth bit) + val size = UInt(log2Up(log2Up(p.bytePerLine) + 1) bits) +} + +case class InstructionCacheMemRsp(p : InstructionCacheConfig) extends Bundle{ + val data = Bits(p.memDataWidth bit) + val error = Bool +} + +case class InstructionCacheMemBus(p : InstructionCacheConfig) extends Bundle with IMasterSlave{ + val cmd = Stream (InstructionCacheMemCmd(p)) + val rsp = Flow (InstructionCacheMemRsp(p)) + + override def asMaster(): Unit = { + master(cmd) + slave(rsp) + } + + def toAxi4ReadOnly(): Axi4ReadOnly = { + val axiConfig = p.getAxi4Config() + val mm = Axi4ReadOnly(axiConfig) + + mm.readCmd.valid := cmd.valid + mm.readCmd.len := p.burstSize-1 + mm.readCmd.addr := cmd.address + mm.readCmd.prot := "110" + mm.readCmd.cache := "1111" + mm.readCmd.setBurstINCR() + cmd.ready := mm.readCmd.ready + rsp.valid := mm.readRsp.valid + rsp.data := mm.readRsp.data + rsp.error := !mm.readRsp.isOKAY() + mm.readRsp.ready := True + mm + } + + def toAvalon(): AvalonMM = { + val avalonConfig = p.getAvalonConfig() + val mm = AvalonMM(avalonConfig) + mm.read := cmd.valid + mm.burstCount := U(p.burstSize) + mm.address := cmd.address + cmd.ready := mm.waitRequestn + rsp.valid := mm.readDataValid + rsp.data := mm.readData + rsp.error := mm.response =/= AvalonMM.Response.OKAY + mm + } + + + def toPipelinedMemoryBus(): PipelinedMemoryBus = { + val pipelinedMemoryBusConfig = p.getPipelinedMemoryBusConfig() + val bus = PipelinedMemoryBus(pipelinedMemoryBusConfig) + val counter = Counter(p.burstSize, bus.cmd.fire) + bus.cmd.valid := cmd.valid + bus.cmd.address := cmd.address(31 downto widthOf(counter.value) + 2) @@ counter @@ U"00" + bus.cmd.write := False + bus.cmd.mask.assignDontCare() + bus.cmd.data.assignDontCare() + cmd.ready := counter.willOverflow + rsp.valid := bus.rsp.valid + rsp.data := bus.rsp.payload.data + rsp.error := False + bus + } + + + def toWishbone(): Wishbone = { + val wishboneConfig = p.getWishboneConfig() + val bus = Wishbone(wishboneConfig) + val counter = Reg(UInt(log2Up(p.burstSize) bits)) init(0) + val pending = counter =/= 0 + val lastCycle = counter === counter.maxValue + + bus.ADR := (cmd.address >> widthOf(counter) + log2Up(p.memDataWidth/8)) @@ counter + bus.CTI := lastCycle ? B"111" | B"010" + bus.BTE := "00" + bus.SEL.setAll() + bus.WE := False + bus.DAT_MOSI.assignDontCare() + bus.CYC := False + bus.STB := False + when(cmd.valid || pending){ + bus.CYC := True + bus.STB := True + when(bus.ACK){ + counter := counter + 1 + } + } + + cmd.ready := cmd.valid && bus.ACK + rsp.valid := RegNext(bus.CYC && bus.ACK) init(False) + rsp.data := RegNext(bus.DAT_MISO) + rsp.error := False //TODO + bus + } + + def toBmb() : Bmb = { + val busParameter = p.getBmbParameter + val bus = Bmb(busParameter).setCompositeName(this,"toBmb", true) + bus.cmd.arbitrationFrom(cmd) + bus.cmd.opcode := Bmb.Cmd.Opcode.READ + bus.cmd.address := cmd.address.resized + bus.cmd.length := p.bytePerLine - 1 + bus.cmd.last := True + rsp.valid := bus.rsp.valid + rsp.data := bus.rsp.data + rsp.error := bus.rsp.isError + bus.rsp.ready := True + bus + } +} + + +case class InstructionCacheFlushBus() extends Bundle with IMasterSlave{ + val cmd = Event + val rsp = Bool + + override def asMaster(): Unit = { + master(cmd) + in(rsp) + } +} + +class InstructionCache(p : InstructionCacheConfig, mmuParameter : MemoryTranslatorBusParameter) extends Component{ + import p._ + val io = new Bundle{ + val flush = in Bool() + val cpu = slave(InstructionCacheCpuBus(p, mmuParameter)) + val mem = master(InstructionCacheMemBus(p)) + } + + val lineWidth = bytePerLine*8 + val lineCount = cacheSize/bytePerLine + val cpuWordWidth = cpuDataWidth + val memWordPerLine = lineWidth/memDataWidth + val bytePerCpuWord = cpuWordWidth/8 + val wayLineCount = lineCount/wayCount + + val tagRange = addressWidth-1 downto log2Up(wayLineCount*bytePerLine) + val lineRange = tagRange.low-1 downto log2Up(bytePerLine) + + case class LineTag() extends Bundle{ + val valid = Bool + val error = Bool + val address = UInt(tagRange.length bit) + } + + val bankCount = wayCount + val bankWidth = if(!reducedBankWidth) memDataWidth else Math.max(cpuDataWidth, memDataWidth/wayCount) + val bankByteSize = cacheSize/bankCount + val bankWordCount = bankByteSize*8/bankWidth + val bankWordToCpuWordRange = log2Up(bankWidth/8)-1 downto log2Up(bytePerCpuWord) + val memToBankRatio = bankWidth*bankCount / memDataWidth + + val banks = Seq.fill(bankCount)(Mem(Bits(bankWidth bits), bankWordCount)) + + val ways = Seq.fill(wayCount)(new Area{ + val tags = Mem(LineTag(),wayLineCount) + + if(preResetFlush){ + tags.initBigInt(List.fill(wayLineCount)(BigInt(0))) + } + }) + + + val lineLoader = new Area{ + val fire = False + val valid = RegInit(False) clearWhen(fire) + val address = KeepAttribute(Reg(UInt(addressWidth bits))) + val hadError = RegInit(False) clearWhen(fire) + val flushPending = RegInit(True) + + when(io.cpu.fill.valid){ + valid := True + address := io.cpu.fill.payload + } + + io.cpu.prefetch.haltIt := valid || flushPending + + val flushCounter = Reg(UInt(log2Up(wayLineCount) + 1 bit)) + when(!flushCounter.msb){ + io.cpu.prefetch.haltIt := True + flushCounter := flushCounter + 1 + } + when(!RegNext(flushCounter.msb)){ + io.cpu.prefetch.haltIt := True + } + + when(io.flush){ + io.cpu.prefetch.haltIt := True + flushPending := True + } + + when(flushPending && !(valid || io.cpu.fetch.isValid) ){ + flushCounter := 0 + flushPending := False + } + + + + val cmdSent = RegInit(False) setWhen(io.mem.cmd.fire) clearWhen(fire) + io.mem.cmd.valid := valid && !cmdSent + io.mem.cmd.address := address(tagRange.high downto lineRange.low) @@ U(0,lineRange.low bit) + io.mem.cmd.size := log2Up(p.bytePerLine) + + val wayToAllocate = Counter(wayCount, !valid) + val wordIndex = KeepAttribute(Reg(UInt(log2Up(memWordPerLine) bits)) init(0)) + + + val write = new Area{ + val tag = ways.map(_.tags.writePort) + val data = banks.map(_.writePort) + } + + for(wayId <- 0 until wayCount){ + val wayHit = wayToAllocate === wayId + val tag = write.tag(wayId) + tag.valid := ((wayHit && fire) || !flushCounter.msb) + tag.address := (flushCounter.msb ? address(lineRange) | flushCounter(flushCounter.high-1 downto 0)) + tag.data.valid := flushCounter.msb + tag.data.error := hadError || io.mem.rsp.error + tag.data.address := address(tagRange) + } + + for((writeBank, bankId) <- write.data.zipWithIndex){ + if(!reducedBankWidth) { + writeBank.valid := io.mem.rsp.valid && wayToAllocate === bankId + writeBank.address := address(lineRange) @@ wordIndex + writeBank.data := io.mem.rsp.data + } else { + val sel = U(bankId) - wayToAllocate.value + val groupSel = wayToAllocate(log2Up(bankCount)-1 downto log2Up(bankCount/memToBankRatio)) + val subSel = sel(log2Up(bankCount/memToBankRatio) -1 downto 0) + writeBank.valid := io.mem.rsp.valid && groupSel === (bankId >> log2Up(bankCount/memToBankRatio)) + writeBank.address := address(lineRange) @@ wordIndex @@ (subSel) + writeBank.data := io.mem.rsp.data.subdivideIn(bankCount/memToBankRatio slices)(subSel) + } + } + + + when(io.mem.rsp.valid) { + wordIndex := (wordIndex + 1).resized + hadError.setWhen(io.mem.rsp.error) + when(wordIndex === wordIndex.maxValue) { + fire := True + } + } + } + + val fetchStage = new Area{ + val read = new Area{ + val banksValue = for(bank <- banks) yield new Area{ + val dataMem = bank.readSync(io.cpu.prefetch.pc(lineRange.high downto log2Up(bankWidth/8)), !io.cpu.fetch.isStuck) + val data = if(!twoCycleRamInnerMux) dataMem.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange)) else dataMem + } + + val waysValues = for((way, wayId) <- ways.zipWithIndex) yield new Area{ + val tag = if(asyncTagMemory) { + way.tags.readAsync(io.cpu.fetch.pc(lineRange)) + }else { + way.tags.readSync(io.cpu.prefetch.pc(lineRange), !io.cpu.fetch.isStuck) + } +// val data = CombInit(banksValue(wayId).data) + } + } + + + val hit = (!twoCycleRam) generate new Area{ + val hits = read.waysValues.map(way => way.tag.valid && way.tag.address === io.cpu.fetch.mmuRsp.physicalAddress(tagRange)) + val valid = Cat(hits).orR + val wayId = OHToUInt(hits) + val bankId = if(!reducedBankWidth) wayId else (wayId >> log2Up(bankCount/memToBankRatio)) @@ ((wayId + (io.cpu.fetch.mmuRsp.physicalAddress(log2Up(bankWidth/8), log2Up(bankCount) bits))).resize(log2Up(bankCount/memToBankRatio))) + val error = read.waysValues.map(_.tag.error).read(wayId) + val data = read.banksValue.map(_.data).read(bankId) + val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(data) else data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange)) + io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | word) else word) + if(twoCycleCache){ + io.cpu.decode.data := RegNextWhen(io.cpu.fetch.data,!io.cpu.decode.isStuck) + } + } + + if(twoCycleRam && wayCount == 1){ + val cacheData = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) CombInit(read.banksValue.head.data) else read.banksValue.head.data.subdivideIn(cpuDataWidth bits).read(io.cpu.fetch.pc(bankWordToCpuWordRange)) + io.cpu.fetch.data := (if(p.bypassGen) (io.cpu.fetch.dataBypassValid ? io.cpu.fetch.dataBypass | cacheData) else cacheData) + } + + io.cpu.fetch.physicalAddress := io.cpu.fetch.mmuRsp.physicalAddress + + val resolution = ifGen(!twoCycleCache)( new Area{ + val mmuRsp = io.cpu.fetch.mmuRsp + + io.cpu.fetch.cacheMiss := !hit.valid + io.cpu.fetch.error := hit.error || (!mmuRsp.isPaging && (mmuRsp.exception || !mmuRsp.allowExecute)) + io.cpu.fetch.mmuRefilling := mmuRsp.refilling + io.cpu.fetch.mmuException := !mmuRsp.refilling && mmuRsp.isPaging && (mmuRsp.exception || !mmuRsp.allowExecute) + }) + } + + + + val decodeStage = ifGen(twoCycleCache) (new Area{ + def stage[T <: Data](that : T) = RegNextWhen(that,!io.cpu.decode.isStuck) + val mmuRsp = stage(io.cpu.fetch.mmuRsp) + + val hit = if(!twoCycleRam) new Area{ + val valid = stage(fetchStage.hit.valid) + val error = stage(fetchStage.hit.error) + } else new Area{ + val tags = fetchStage.read.waysValues.map(way => stage(way.tag)) + val hits = tags.map(tag => tag.valid && tag.address === mmuRsp.physicalAddress(tagRange)) + val valid = Cat(hits).orR + val wayId = OHToUInt(hits) + val bankId = if(!reducedBankWidth) wayId else (wayId >> log2Up(bankCount/memToBankRatio)) @@ ((wayId + (mmuRsp.physicalAddress(log2Up(bankWidth/8), log2Up(bankCount) bits))).resize(log2Up(bankCount/memToBankRatio))) + val error = tags(wayId).error + val data = fetchStage.read.banksValue.map(bank => stage(bank.data)).read(bankId) + val word = if(cpuDataWidth == memDataWidth || !twoCycleRamInnerMux) data else data.subdivideIn(cpuDataWidth bits).read(io.cpu.decode.pc(bankWordToCpuWordRange)) + if(p.bypassGen) when(stage(io.cpu.fetch.dataBypassValid)){ + word := stage(io.cpu.fetch.dataBypass) + } + io.cpu.decode.data := word + } + + io.cpu.decode.cacheMiss := !hit.valid + io.cpu.decode.error := hit.error || (!mmuRsp.isPaging && (mmuRsp.exception || !mmuRsp.allowExecute)) + io.cpu.decode.mmuRefilling := mmuRsp.refilling + io.cpu.decode.mmuException := !mmuRsp.refilling && mmuRsp.isPaging && (mmuRsp.exception || !mmuRsp.allowExecute) + io.cpu.decode.physicalAddress := mmuRsp.physicalAddress + }) +} + diff --git a/VexRiscv/src/main/scala/vexriscv/ip/fpu/FpuCore.scala b/VexRiscv/src/main/scala/vexriscv/ip/fpu/FpuCore.scala new file mode 100644 index 0000000..657b2fb --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/ip/fpu/FpuCore.scala @@ -0,0 +1,1944 @@ +package vexriscv.ip.fpu + +import spinal.core._ +import spinal.lib._ +import spinal.lib.eda.bench.{Bench, Rtl, XilinxStdTargets} +import spinal.lib.math.UnsignedDivider + +import scala.collection.mutable.ArrayBuffer + +object FpuDivSqrtIterationState extends SpinalEnum{ + val IDLE, YY, XYY, Y2_XYY, DIV, _15_XYY2, Y_15_XYY2, Y_15_XYY2_RESULT, SQRT = newElement() +} + + +case class FpuCore( portCount : Int, p : FpuParameter) extends Component{ + val io = new Bundle { + val port = Vec(slave(FpuPort(p)), portCount) + } + + val portCountWidth = log2Up(portCount) + val Source = HardType(UInt(portCountWidth bits)) + val exponentOne = (1 << p.internalExponentSize-1) - 1 + val exponentF32Subnormal = exponentOne-127 + val exponentF64Subnormal = exponentOne-1023 + val exponentF32Infinity = exponentOne+127+1 + val exponentF64Infinity = exponentOne+1023+1 + + + + def whenDouble(format : FpuFormat.C)(yes : => Unit)(no : => Unit): Unit ={ + if(p.withDouble) when(format === FpuFormat.DOUBLE) { yes } otherwise{ no } + if(!p.withDouble) no + } + + def muxDouble[T <: Data](format : FpuFormat.C)(yes : => T)(no : => T): T ={ + if(p.withDouble) ((format === FpuFormat.DOUBLE) ? { yes } | { no }) + else no + } + + case class RfReadInput() extends Bundle{ + val source = Source() + val opcode = p.Opcode() + val rs1, rs2, rs3 = p.rfAddress() + val rd = p.rfAddress() + val arg = p.Arg() + val roundMode = FpuRoundMode() + val format = p.withDouble generate FpuFormat() + } + + case class RfReadOutput() extends Bundle{ + val source = Source() + val opcode = p.Opcode() + val rs1, rs2, rs3 = p.internalFloating() + val rd = p.rfAddress() + val arg = p.Arg() + val roundMode = FpuRoundMode() + val format = p.withDouble generate FpuFormat() + val rs1Boxed, rs2Boxed = p.withDouble generate Bool() + } + + + case class LoadInput() extends Bundle{ + val source = Source() + val rd = p.rfAddress() + val i2f = Bool() + val arg = Bits(2 bits) + val roundMode = FpuRoundMode() + val format = p.withDouble generate FpuFormat() + } + + case class ShortPipInput() extends Bundle{ + val source = Source() + val opcode = p.Opcode() + val rs1, rs2 = p.internalFloating() + val rd = p.rfAddress() + val value = Bits(32 bits) + val arg = Bits(2 bits) + val roundMode = FpuRoundMode() + val format = p.withDouble generate FpuFormat() + val rs1Boxed, rs2Boxed = p.withDouble generate Bool() + } + + class MulInput() extends Bundle{ + val source = Source() + val rs1, rs2, rs3 = p.internalFloating() + val rd = p.rfAddress() + val add = Bool() + val divSqrt = Bool() + val msb1, msb2 = Bool() //allow usage of msb bits of mul + val roundMode = FpuRoundMode() + val format = p.withDouble generate FpuFormat() + } + + + case class DivSqrtInput() extends Bundle{ + val source = Source() + val rs1, rs2 = p.internalFloating() + val rd = p.rfAddress() + val div = Bool() + val roundMode = FpuRoundMode() + val format = p.withDouble generate FpuFormat() + } + + case class DivInput() extends Bundle{ + val source = Source() + val rs1, rs2 = p.internalFloating() + val rd = p.rfAddress() + val roundMode = FpuRoundMode() + val format = p.withDouble generate FpuFormat() + } + + + case class SqrtInput() extends Bundle{ + val source = Source() + val rs1 = p.internalFloating() + val rd = p.rfAddress() + val roundMode = FpuRoundMode() + val format = p.withDouble generate FpuFormat() + } + + + val addExtraBits = 2 + case class AddInput() extends Bundle{ + val source = Source() + val rs1, rs2 = FpuFloat(exponentSize = p.internalExponentSize, mantissaSize = p.internalMantissaSize+addExtraBits) + val rd = p.rfAddress() + val roundMode = FpuRoundMode() + val format = p.withDouble generate FpuFormat() + val needCommit = Bool() + } + + + class MergeInput() extends Bundle{ + val source = Source() + val rd = p.rfAddress() + val value = p.writeFloating() + val scrap = Bool() + val roundMode = FpuRoundMode() + val format = p.withDouble generate FpuFormat() + val NV = Bool() + val DZ = Bool() + } + + case class RoundOutput() extends Bundle{ + val source = Source() + val rd = p.rfAddress() + val value = p.internalFloating() + val format = p.withDouble generate FpuFormat() + val NV, NX, OF, UF, DZ = Bool() + val write = Bool() + } + + val rf = new Area{ + case class Entry() extends Bundle{ + val value = p.internalFloating() + val boxed = p.withDouble generate Bool() + } + val ram = Mem(Entry(), 32*portCount) + + val init = new Area{ + val counter = Reg(UInt(6 bits)) init(0) + val done = CombInit(counter.msb) + when(!done){ + counter := counter + 1 + } + def apply(port : Flow[MemWriteCmd[Bool]]) = { + port.valid := !done + port.address := counter.resized + port.data := False + port + } + } + + val scoreboards = Array.fill(portCount)(new Area{ + val target, hit = Mem(Bool, 32) // XOR + val writes = Mem(Bool, 32) + + val targetWrite = init(target.writePort) + val hitWrite = init(hit.writePort) + }) + } + + val commitFork = new Area{ + val load, commit = Vec(Stream(FpuCommit(p)), portCount) + for(i <- 0 until portCount){ + val fork = new StreamFork(FpuCommit(p), 2, synchronous = true) + fork.io.input << io.port(i).commit + fork.io.outputs(0) >> load(i) + fork.io.outputs(1).pipelined(m2s = false, s2m = true) >> commit(i) //Pipelining here is light, as it only use the flags of the payload + } + } + + class Tracker(width : Int) extends Area{ + val counter = Reg(UInt(width bits)) init(0) + val full = counter.andR + val notEmpty = counter.orR + val inc = False + val dec = False + counter := counter + U(inc) - U(dec) + } + + class CommitArea(source : Int) extends Area{ + val pending = new Tracker(4) + val add, mul, div, sqrt, short = new Tracker(4) + val input = commitFork.commit(source).haltWhen(List(add, mul, div, sqrt, short).map(_.full).orR || !pending.notEmpty).toFlow + + when(input.fire){ + add.inc setWhen(List(FpuOpcode.ADD).map(input.opcode === _).orR) + mul.inc setWhen(List(FpuOpcode.MUL, FpuOpcode.FMA).map(input.opcode === _).orR) + div.inc setWhen(List(FpuOpcode.DIV).map(input.opcode === _).orR) + sqrt.inc setWhen(List(FpuOpcode.SQRT).map(input.opcode === _).orR) + short.inc setWhen(List(FpuOpcode.SGNJ, FpuOpcode.MIN_MAX, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR) + rf.scoreboards(source).writes(input.rd) := input.write + pending.dec := True + } + } + + val commitLogic = for(source <- 0 until portCount) yield new CommitArea(source) + + def commitConsume(what : CommitArea => Tracker, source : UInt, fire : Bool) : Bool = { + for(i <- 0 until portCount) what(commitLogic(i)).dec setWhen(fire && source === i) + commitLogic.map(what(_).notEmpty).read(source) + } + + + val scheduler = for(portId <- 0 until portCount; + scoreboard = rf.scoreboards(portId)) yield new Area{ + val input = io.port(portId).cmd.pipelined(s2m = true) + val useRs1, useRs2, useRs3, useRd = False + switch(input.opcode){ + is(p.Opcode.LOAD) { useRd := True } + is(p.Opcode.STORE) { useRs2 := True } + is(p.Opcode.ADD) { useRd := True; useRs1 := True; useRs2 := True } + is(p.Opcode.MUL) { useRd := True; useRs1 := True; useRs2 := True } + is(p.Opcode.DIV) { useRd := True; useRs1 := True; useRs2 := True } + is(p.Opcode.SQRT) { useRd := True; useRs1 := True } + is(p.Opcode.FMA) { useRd := True; useRs1 := True; useRs2 := True; useRs3 := True } + is(p.Opcode.I2F) { useRd := True } + is(p.Opcode.F2I) { useRs1 := True } + is(p.Opcode.MIN_MAX) { useRd := True; useRs1 := True; useRs2 := True } + is(p.Opcode.CMP) { useRs1 := True; useRs2 := True } + is(p.Opcode.SGNJ) { useRd := True; useRs1 := True; useRs2 := True } + is(p.Opcode.FMV_X_W) { useRs1 := True } + is(p.Opcode.FMV_W_X) { useRd := True } + is(p.Opcode.FCLASS ) { useRs1 := True } + is(p.Opcode.FCVT_X_X ) { useRd := True; useRs1 := True } + } + + val uses = List(useRs1, useRs2, useRs3, useRd) + val regs = List(input.rs1, input.rs2, input.rs3, input.rd) + val rfHits = regs.map(scoreboard.hit.readAsync(_)) + val rfTargets = regs.map(scoreboard.target.readAsync(_)) + val rfBusy = (rfHits, rfTargets).zipped.map(_ ^ _) + + val hits = (0 to 3).map(id => uses(id) && rfBusy(id)) + val hazard = hits.orR || !rf.init.done || commitLogic(portId).pending.full + val output = input.haltWhen(hazard) + when(input.opcode === p.Opcode.STORE){ + output.rs1 := input.rs2 //Datapath optimisation to unify rs source in the store pipeline + } + when(input.valid && rf.init.done){ + scoreboard.targetWrite.address := input.rd + scoreboard.targetWrite.data := !rfTargets.last + } + when(output.fire && useRd){ + scoreboard.targetWrite.valid := True + commitLogic(portId).pending.inc := True + } + } + + + val cmdArbiter = new Area{ + val arbiter = StreamArbiterFactory.noLock.roundRobin.build(FpuCmd(p), portCount) + arbiter.io.inputs <> Vec(scheduler.map(_.output.pipelined(m2s = p.schedulerM2sPipe))) + + val output = arbiter.io.output.swapPayload(RfReadInput()) + output.source := arbiter.io.chosen + output.payload.assignSomeByName(arbiter.io.output.payload) + } + + val read = new Area{ + val s0 = cmdArbiter.output.pipelined() + val s1 = s0.m2sPipe() + val output = s1.swapPayload(RfReadOutput()) + val rs = if(p.asyncRegFile){ + List(s1.rs1, s1.rs2, s1.rs3).map(a => rf.ram.readAsync(s1.source @@ a)) + } else { + List(s0.rs1, s0.rs2, s0.rs3).map(a => rf.ram.readSync(s0.source @@ a, enable = !output.isStall)) + } + output.source := s1.source + output.opcode := s1.opcode + output.arg := s1.arg + output.roundMode := s1.roundMode + output.rd := s1.rd + output.rs1 := rs(0).value + output.rs2 := rs(1).value + output.rs3 := rs(2).value + if(p.withDouble){ + output.rs1Boxed := rs(0).boxed + output.rs2Boxed := rs(1).boxed + output.format := s1.format + val store = s1.opcode === FpuOpcode.STORE ||s1.opcode === FpuOpcode.FMV_X_W + val sgnjBypass = s1.opcode === FpuOpcode.SGNJ && s1.format === FpuFormat.DOUBLE + when(!sgnjBypass) { + when(store) { //Pass through + output.format := rs(0).boxed ? FpuFormat.FLOAT | FpuFormat.DOUBLE + } elsewhen (s1.format === FpuFormat.FLOAT =/= rs(0).boxed) { + output.rs1.setNanQuiet + output.rs1.sign := False + } + } + when(s1.format === FpuFormat.FLOAT =/= rs(1).boxed) { + output.rs2.setNanQuiet + output.rs2.sign := False + } + when(s1.format === FpuFormat.FLOAT =/= rs(2).boxed) { + output.rs3.setNanQuiet + } + } + } + + val decode = new Area{ + val input = read.output/*.s2mPipe()*/.combStage() + input.ready := False + + val loadHit = List(FpuOpcode.LOAD, FpuOpcode.FMV_W_X, FpuOpcode.I2F).map(input.opcode === _).orR + val load = Stream(LoadInput()) + load.valid := input.valid && loadHit + input.ready setWhen(loadHit && load.ready) + load.payload.assignSomeByName(input.payload) + load.i2f := input.opcode === FpuOpcode.I2F + + val shortPipHit = List(FpuOpcode.STORE, FpuOpcode.F2I, FpuOpcode.CMP, FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FMV_X_W, FpuOpcode.FCLASS, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR + val shortPip = Stream(ShortPipInput()) + input.ready setWhen(shortPipHit && shortPip.ready) + shortPip.valid := input.valid && shortPipHit + shortPip.payload.assignSomeByName(input.payload) + + val divSqrtHit = input.opcode === p.Opcode.DIV || input.opcode === p.Opcode.SQRT + val divSqrt = Stream(DivSqrtInput()) + if(p.withDivSqrt) { + input.ready setWhen (divSqrtHit && divSqrt.ready) + divSqrt.valid := input.valid && divSqrtHit + divSqrt.payload.assignSomeByName(input.payload) + divSqrt.div := input.opcode === p.Opcode.DIV + } + + val divHit = input.opcode === p.Opcode.DIV + val div = Stream(DivInput()) + if(p.withDiv) { + input.ready setWhen (divHit && div.ready) + div.valid := input.valid && divHit + div.payload.assignSomeByName(input.payload) + } + + val sqrtHit = input.opcode === p.Opcode.SQRT + val sqrt = Stream(SqrtInput()) + if(p.withSqrt) { + input.ready setWhen (sqrtHit && sqrt.ready) + sqrt.valid := input.valid && sqrtHit + sqrt.payload.assignSomeByName(input.payload) + } + + + val fmaHit = input.opcode === p.Opcode.FMA + val mulHit = input.opcode === p.Opcode.MUL || fmaHit + val mul = Stream(new MulInput()) + val divSqrtToMul = Stream(new MulInput()) + if(!p.withDivSqrt){ + divSqrtToMul.valid := False + divSqrtToMul.payload.assignDontCare() + } + + if(p.withMul) { + input.ready setWhen (mulHit && mul.ready && !divSqrtToMul.valid) + mul.valid := input.valid && mulHit || divSqrtToMul.valid + + divSqrtToMul.ready := mul.ready + mul.payload := divSqrtToMul.payload + when(!divSqrtToMul.valid) { + mul.payload.assignSomeByName(input.payload) + mul.add := fmaHit + mul.divSqrt := False + mul.msb1 := True + mul.msb2 := True + mul.rs2.sign.allowOverride(); + mul.rs2.sign := input.rs2.sign ^ input.arg(0) + mul.rs3.sign.allowOverride(); + mul.rs3.sign := input.rs3.sign ^ input.arg(1) + } + } + + val addHit = input.opcode === p.Opcode.ADD + val add = Stream(AddInput()) + val mulToAdd = Stream(AddInput()) + + + if(p.withAdd) { + input.ready setWhen (addHit && add.ready && !mulToAdd.valid) + add.valid := input.valid && addHit || mulToAdd.valid + + mulToAdd.ready := add.ready + add.payload := mulToAdd.payload + when(!mulToAdd.valid) { + add.source := input.source + add.rd := input.rd + add.roundMode := input.roundMode + if(p.withDouble) add.format := input.format + add.needCommit := True + add.rs1.special := input.rs1.special + add.rs2.special := input.rs2.special + add.rs1.exponent := input.rs1.exponent + add.rs2.exponent := input.rs2.exponent + add.rs1.sign := input.rs1.sign + add.rs2.sign := input.rs2.sign ^ input.arg(0) + add.rs1.mantissa := input.rs1.mantissa << addExtraBits + add.rs2.mantissa := input.rs2.mantissa << addExtraBits + } + } + } + + val load = new Area{ + + case class S0() extends Bundle{ + val source = Source() + val rd = p.rfAddress() + val value = p.storeLoadType() + val i2f = Bool() + val arg = Bits(2 bits) + val roundMode = FpuRoundMode() + val format = p.withDouble generate FpuFormat() + } + + val s0 = new Area{ + val input = decode.load.pipelined(m2s = true, s2m = true).stage() + val filtred = commitFork.load.map(port => port.takeWhen(List(FpuOpcode.LOAD, FpuOpcode.FMV_W_X, FpuOpcode.I2F).map(_ === port.opcode).orR)) + def feed = filtred(input.source) + val hazard = !feed.valid + + + val output = input.haltWhen(hazard).swapPayload(S0()) + filtred.foreach(_.ready := False) + feed.ready := input.valid && output.ready + output.source := input.source + output.rd := input.rd + output.value := feed.value + output.i2f := input.i2f + output.arg := input.arg + output.roundMode := input.roundMode + if(p.withDouble) { + output.format := input.format + when(!input.i2f && input.format === FpuFormat.DOUBLE && output.value(63 downto 32).andR){ //Detect boxing + output.format := FpuFormat.FLOAT + } + } + } + + val s1 = new Area{ + val input = s0.output.stage() + val busy = False + + val f32 = new Area{ + val mantissa = input.value(0, 23 bits).asUInt + val exponent = input.value(23, 8 bits).asUInt + val sign = input.value(31) + } + val f64 = p.withDouble generate new Area{ + val mantissa = input.value(0, 52 bits).asUInt + val exponent = input.value(52, 11 bits).asUInt + val sign = input.value(63) + } + + val recodedExpOffset = UInt(p.internalExponentSize bits) + val passThroughFloat = p.internalFloating() + passThroughFloat.special := False + + whenDouble(input.format){ + passThroughFloat.sign := f64.sign + passThroughFloat.exponent := f64.exponent.resized + passThroughFloat.mantissa := f64.mantissa + recodedExpOffset := exponentF64Subnormal + } { + passThroughFloat.sign := f32.sign + passThroughFloat.exponent := f32.exponent.resized + passThroughFloat.mantissa := f32.mantissa << (if (p.withDouble) 29 else 0) + recodedExpOffset := exponentF32Subnormal + } + + + val manZero = passThroughFloat.mantissa === 0 + val expZero = passThroughFloat.exponent === 0 + val expOne = passThroughFloat.exponent(7 downto 0).andR + if(p.withDouble) { + expZero.clearWhen(input.format === FpuFormat.DOUBLE && input.value(62 downto 60) =/= 0) + expOne.clearWhen(input.format === FpuFormat.DOUBLE && input.value(62 downto 60) =/= 7) + } + + val isZero = expZero && manZero + val isSubnormal = expZero && !manZero + val isInfinity = expOne && manZero + val isNan = expOne && !manZero + + + val fsm = new Area{ + val done, boot, patched = Reg(Bool()) + val ohInputWidth = 32 max p.internalMantissaSize + val ohInput = Bits(ohInputWidth bits).assignDontCare() + when(!input.i2f) { + if(!p.withDouble) ohInput := input.value(0, 23 bits) << 9 + if( p.withDouble) ohInput := passThroughFloat.mantissa.asBits + } otherwise { + ohInput(ohInputWidth-32-1 downto 0) := 0 + ohInput(ohInputWidth-32, 32 bits) := input.value(31 downto 0) + } + + val i2fZero = Reg(Bool) + + val shift = new Area{ + val by = Reg(UInt(log2Up(ohInputWidth) bits)) + val input = UInt(ohInputWidth bits).assignDontCare() + var logic = input + for(i <- by.range){ + logic \= by(i) ? (logic |<< (BigInt(1) << i)) | logic + } + val output = RegNextWhen(logic, !done) + } + shift.input := (ohInput.asUInt |<< 1).resized + + when(input.valid && (input.i2f || isSubnormal) && !done){ + busy := True + when(boot){ + when(input.i2f && !patched && input.value(31) && input.arg(0)){ + input.value.getDrivingReg(0, 32 bits) := B(input.value.asUInt.twoComplement(True).resize(32 bits)) + patched := True + } otherwise { + shift.by := OHToUInt(OHMasking.first((ohInput).reversed)) + boot := False + i2fZero := input.value(31 downto 0) === 0 + } + } otherwise { + done := True + } + } + + val expOffset = (UInt(p.internalExponentSize bits)) + expOffset := 0 + when(isSubnormal){ + expOffset := shift.by.resized + } + + when(!input.isStall){ + done := False + boot := True + patched := False + } + } + + + val i2fSign = fsm.patched + val (i2fHigh, i2fLow) = fsm.shift.output.splitAt(if(p.withDouble) 0 else widthOf(input.value)-24) + val scrap = i2fLow =/= 0 + + val recoded = p.internalFloating() + recoded.mantissa := passThroughFloat.mantissa + recoded.exponent := (passThroughFloat.exponent -^ fsm.expOffset + recodedExpOffset).resized + recoded.sign := passThroughFloat.sign + recoded.setNormal + when(isZero){recoded.setZero} + when(isInfinity){recoded.setInfinity} + when(isNan){recoded.setNan} + + val output = input.haltWhen(busy).swapPayload(new MergeInput()) + output.source := input.source + output.roundMode := input.roundMode + if(p.withDouble) { + output.format := input.format + } + output.rd := input.rd + output.value.sign := recoded.sign + output.value.exponent := recoded.exponent + output.value.mantissa := recoded.mantissa @@ U"0" + output.value.special := recoded.special + output.scrap := False + output.NV := False + output.DZ := False + when(input.i2f){ + output.value.sign := i2fSign + output.value.exponent := (U(exponentOne+31) - fsm.shift.by).resized + output.value.setNormal + output.scrap := scrap + when(fsm.i2fZero) { output.value.setZero } + } + + when(input.i2f || isSubnormal){ + output.value.mantissa := U(i2fHigh) @@ (if(p.withDouble) U"0" else U"") + } + } + + } + + val shortPip = new Area{ + val input = decode.shortPip.stage() + + val toFpuRf = List(FpuOpcode.MIN_MAX, FpuOpcode.SGNJ, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR + val rfOutput = Stream(new MergeInput()) + + val isCommited = commitConsume(_.short, input.source, input.fire && toFpuRf) + val output = rfOutput.haltWhen(!isCommited) + + val result = p.storeLoadType().assignDontCare() + + val halt = False + val recodedResult = p.storeLoadType() + val f32 = new Area{ + val exp = (input.rs1.exponent - (exponentOne-127)).resize(8 bits) + val man = CombInit(input.rs1.mantissa(if(p.withDouble) 51 downto 29 else 22 downto 0)) + } + val f64 = p.withDouble generate new Area{ + val exp = (input.rs1.exponent - (exponentOne-1023)).resize(11 bits) + val man = CombInit(input.rs1.mantissa) + } + + whenDouble(input.format){ + recodedResult := input.rs1.sign ## f64.exp ## f64.man + } { + recodedResult := (if(p.withDouble) B"xFFFFFFFF" else B"") ## input.rs1.sign ## f32.exp ## f32.man + } + + val expSubnormalThreshold = muxDouble[UInt](input.format)(exponentF64Subnormal)(exponentF32Subnormal) + val expInSubnormalRange = input.rs1.exponent <= expSubnormalThreshold + val isSubnormal = !input.rs1.special && expInSubnormalRange + val isNormal = !input.rs1.special && !expInSubnormalRange + val fsm = new Area{ + val f2iShift = input.rs1.exponent - U(exponentOne) + val isF2i = input.opcode === FpuOpcode.F2I + val needRecoding = List(FpuOpcode.FMV_X_W, FpuOpcode.STORE).map(_ === input.opcode).orR && isSubnormal + val done, boot = Reg(Bool()) + val isZero = input.rs1.isZero// || input.rs1.exponent < exponentOne-1 + + val shift = new Area{ + val by = Reg(UInt(log2Up(p.internalMantissaSize+1 max 33) bits)) + val input = UInt(p.internalMantissaSize+1 max 33 bits).assignDontCare() + var logic = input + val scrap = Reg(Bool) + for(i <- by.range.reverse){ + scrap setWhen(by(i) && logic(0, 1 << i bits) =/= 0) + logic \= by(i) ? (logic |>> (BigInt(1) << i)) | logic + } + when(boot){ + scrap := False + } + val output = RegNextWhen(logic, !done) + } + + shift.input := (U(!isZero) @@ input.rs1.mantissa) << (if(p.withDouble) 0 else 9) + + val formatShiftOffset = muxDouble[UInt](input.format)(exponentOne-1023+1)(exponentOne - (if(p.withDouble) (127+34) else (127-10))) + when(input.valid && (needRecoding || isF2i) && !done){ + halt := True + when(boot){ + when(isF2i){ + shift.by := ((U(exponentOne + 31) - input.rs1.exponent).min(U(33)) + (if(p.withDouble) 20 else 0)).resized //TODO merge + } otherwise { + shift.by := (formatShiftOffset - input.rs1.exponent).resized + } + boot := False + } otherwise { + done := True + } + } + + when(!input.isStall){ + done := False + boot := True + } + } + + val mantissaForced = False + val exponentForced = False + val mantissaForcedValue = Bool().assignDontCare() + val exponentForcedValue = Bool().assignDontCare() + val cononicalForced = False + + + when(input.rs1.special){ + switch(input.rs1.exponent(1 downto 0)){ + is(FpuFloat.ZERO){ + mantissaForced := True + exponentForced := True + mantissaForcedValue := False + exponentForcedValue := False + } + is(FpuFloat.INFINITY){ + mantissaForced := True + exponentForced := True + mantissaForcedValue := False + exponentForcedValue := True + } + is(FpuFloat.NAN){ + exponentForced := True + exponentForcedValue := True + when(input.rs1.isCanonical){ + cononicalForced := True + mantissaForced := True + mantissaForcedValue := False + } + } + } + } + + + + when(isSubnormal){ + exponentForced := True + exponentForcedValue := False + recodedResult(0,23 bits) := fsm.shift.output(22 downto 0).asBits + whenDouble(input.format){ + recodedResult(51 downto 23) := fsm.shift.output(51 downto 23).asBits + }{} + } + when(mantissaForced){ + recodedResult(0,23 bits) := (default -> mantissaForcedValue) + whenDouble(input.format){ + recodedResult(23, 52-23 bits) := (default -> mantissaForcedValue) + }{} + } + when(exponentForced){ + whenDouble(input.format){ + recodedResult(52, 11 bits) := (default -> exponentForcedValue) + } { + recodedResult(23, 8 bits) := (default -> exponentForcedValue) + } + } + when(cononicalForced){ + whenDouble(input.format){ + recodedResult(63) := False + recodedResult(51) := True + } { + recodedResult(31) := False + recodedResult(22) := True + } + } + + val rspNv = False + val rspNx = False + + val f2i = new Area{ //Will not work for 64 bits float max value rounding + val unsigned = fsm.shift.output(32 downto 0) >> 1 + val resign = input.arg(0) && input.rs1.sign + val round = fsm.shift.output(0) ## fsm.shift.scrap + val increment = input.roundMode.mux( + FpuRoundMode.RNE -> (round(1) && (round(0) || unsigned(0))), + FpuRoundMode.RTZ -> False, + FpuRoundMode.RDN -> (round =/= 0 && input.rs1.sign), + FpuRoundMode.RUP -> (round =/= 0 && !input.rs1.sign), + FpuRoundMode.RMM -> (round(1)) + ) + val result = (Mux(resign, ~unsigned, unsigned) + (resign ^ increment).asUInt) + val overflow = (input.rs1.exponent > (input.arg(0) ? U(exponentOne+30) | U(exponentOne+31)) || input.rs1.isInfinity) && !input.rs1.sign || input.rs1.isNan + val underflow = (input.rs1.exponent > U(exponentOne+31) || input.arg(0) && unsigned.msb && (unsigned(30 downto 0) =/= 0 || increment) || !input.arg(0) && (unsigned =/= 0 || increment) || input.rs1.isInfinity) && input.rs1.sign + val isZero = input.rs1.isZero + if(p.withDouble){ + overflow setWhen(!input.rs1.sign && increment && unsigned(30 downto 0).andR && (input.arg(0) || unsigned(31))) + } + when(isZero){ + result := 0 + } elsewhen(underflow || overflow) { + val low = overflow + val high = input.arg(0) ^ overflow + result := (31 -> high, default -> low) + rspNv := input.valid && input.opcode === FpuOpcode.F2I && fsm.done && !isZero + } otherwise { + rspNx := input.valid && input.opcode === FpuOpcode.F2I && fsm.done && round =/= 0 + } + } + + val bothZero = input.rs1.isZero && input.rs2.isZero + val rs1Equal = input.rs1 === input.rs2 + val rs1AbsSmaller = (input.rs1.exponent @@ input.rs1.mantissa) < (input.rs2.exponent @@ input.rs2.mantissa) + rs1AbsSmaller.setWhen(input.rs2.isInfinity) + rs1AbsSmaller.setWhen(input.rs1.isZero) + rs1AbsSmaller.clearWhen(input.rs2.isZero) + rs1AbsSmaller.clearWhen(input.rs1.isInfinity) + rs1Equal setWhen(input.rs1.sign === input.rs2.sign && input.rs1.isInfinity && input.rs2.isInfinity) + val rs1Smaller = (input.rs1.sign ## input.rs2.sign).mux( + 0 -> rs1AbsSmaller, + 1 -> False, + 2 -> True, + 3 -> (!rs1AbsSmaller && !rs1Equal) + ) + + + val minMaxSelectRs2 = !(((rs1Smaller ^ input.arg(0)) && !input.rs1.isNan || input.rs2.isNan)) + val minMaxSelectNanQuiet = input.rs1.isNan && input.rs2.isNan + val cmpResult = B(rs1Smaller && !bothZero && !input.arg(1) || (rs1Equal || bothZero) && !input.arg(0)) + when(input.rs1.isNan || input.rs2.isNan) { cmpResult := 0 } + val sgnjRs1Sign = CombInit(input.rs1.sign) + val sgnjRs2Sign = CombInit(input.rs2.sign) + if(p.withDouble){ + sgnjRs2Sign setWhen(input.rs2Boxed && input.format === FpuFormat.DOUBLE) + } + val sgnjResult = (sgnjRs1Sign && input.arg(1)) ^ sgnjRs2Sign ^ input.arg(0) + val fclassResult = B(0, 32 bits) + val decoded = input.rs1.decode() + fclassResult(0) := input.rs1.sign && decoded.isInfinity + fclassResult(1) := input.rs1.sign && isNormal + fclassResult(2) := input.rs1.sign && isSubnormal + fclassResult(3) := input.rs1.sign && decoded.isZero + fclassResult(4) := !input.rs1.sign && decoded.isZero + fclassResult(5) := !input.rs1.sign && isSubnormal + fclassResult(6) := !input.rs1.sign && isNormal + fclassResult(7) := !input.rs1.sign && decoded.isInfinity + fclassResult(8) := decoded.isNan && !decoded.isQuiet + fclassResult(9) := decoded.isNan && decoded.isQuiet + + + switch(input.opcode){ + is(FpuOpcode.STORE) { result := recodedResult } + is(FpuOpcode.FMV_X_W) { result := recodedResult } + is(FpuOpcode.F2I) { result(31 downto 0) := f2i.result.asBits } + is(FpuOpcode.CMP) { result(31 downto 0) := cmpResult.resized } + is(FpuOpcode.FCLASS) { result(31 downto 0) := fclassResult.resized } + } + + + rfOutput.valid := input.valid && toFpuRf && !halt + rfOutput.source := input.source + rfOutput.rd := input.rd + rfOutput.roundMode := input.roundMode + if(p.withDouble) rfOutput.format := input.format + rfOutput.scrap := False + rfOutput.value.sign := input.rs1.sign + rfOutput.value.exponent := input.rs1.exponent + rfOutput.value.mantissa := input.rs1.mantissa @@ U"0" + rfOutput.value.special := input.rs1.special + + switch(input.opcode){ + is(FpuOpcode.MIN_MAX){ + when(minMaxSelectRs2) { + rfOutput.value.sign := input.rs2.sign + rfOutput.value.exponent := input.rs2.exponent + rfOutput.value.mantissa := input.rs2.mantissa @@ U"0" + rfOutput.value.special := input.rs2.special + } + when(minMaxSelectNanQuiet){ + rfOutput.value.setNanQuiet + } + } + is(FpuOpcode.SGNJ){ + when(!input.rs1.isNan) { + rfOutput.value.sign := sgnjResult + } + if(p.withDouble) when(input.rs1Boxed && input.format === FpuFormat.DOUBLE){ + rfOutput.value.sign := input.rs1.sign + rfOutput.format := FpuFormat.FLOAT + } + } + if(p.withDouble) is(FpuOpcode.FCVT_X_X){ + rfOutput.format := ((input.format === FpuFormat.FLOAT) ? FpuFormat.DOUBLE | FpuFormat.FLOAT) + when(input.rs1.isNan){ + rfOutput.value.setNanQuiet + } + } + } + + val signalQuiet = input.opcode === FpuOpcode.CMP && input.arg =/= 2 + val rs1Nan = input.rs1.isNan + val rs2Nan = input.rs2.isNan + val rs1NanNv = input.rs1.isNan && (!input.rs1.isQuiet || signalQuiet) + val rs2NanNv = input.rs2.isNan && (!input.rs2.isQuiet || signalQuiet) + val NV = List(FpuOpcode.CMP, FpuOpcode.MIN_MAX, FpuOpcode.FCVT_X_X).map(input.opcode === _).orR && rs1NanNv || + List(FpuOpcode.CMP, FpuOpcode.MIN_MAX).map(input.opcode === _).orR && rs2NanNv + rspNv setWhen(NV) + + val rspStreams = Vec(Stream(FpuRsp(p)), portCount) + input.ready := !halt && (toFpuRf ? rfOutput.ready | rspStreams.map(_.ready).read(input.source)) + for(i <- 0 until portCount){ + def rsp = rspStreams(i) + rsp.valid := input.valid && input.source === i && !toFpuRf && !halt + rsp.value := result + rsp.NV := rspNv + rsp.NX := rspNx + io.port(i).rsp << rsp.stage() + } + + + rfOutput.NV := NV + rfOutput.DZ := False + } + + val mul = p.withMul generate new Area{ + val inWidthA = p.internalMantissaSize+1 + val inWidthB = p.internalMantissaSize+1 + val outWidth = p.internalMantissaSize*2+2 + + case class MulSplit(offsetA : Int, offsetB : Int, widthA : Int, widthB : Int, id : Int){ + val offsetC = offsetA+offsetB + val widthC = widthA + widthB + val endC = offsetC+widthC + } + val splitsUnordered = for(offsetA <- 0 until inWidthA by p.mulWidthA; + offsetB <- 0 until inWidthB by p.mulWidthB; + widthA = (inWidthA - offsetA) min p.mulWidthA; + widthB = (inWidthB - offsetB) min p.mulWidthB) yield { + MulSplit(offsetA, offsetB, widthA, widthB, -1) + } + val splits = splitsUnordered.sortWith(_.endC < _.endC).zipWithIndex.map(e => e._1.copy(id=e._2)) + + class MathWithExp extends MulInput{ + val exp = UInt(p.internalExponentSize+1 bits) + } + val preMul = new Area{ + val input = decode.mul.stage() + val output = input.swapPayload(new MathWithExp()) + output.payload.assignSomeByName(input.payload) + output.exp := input.rs1.exponent +^ input.rs2.exponent + } + class MathWithMul extends MathWithExp{ + val muls = Vec(splits.map(e => UInt(e.widthA + e.widthB bits))) + } + val mul = new Area{ + val input = preMul.output.stage() + val output = input.swapPayload(new MathWithMul()) + val mulA = U(input.msb1) @@ input.rs1.mantissa + val mulB = U(input.msb2) @@ input.rs2.mantissa + output.payload.assignSomeByName(input.payload) + splits.foreach(e => output.muls(e.id) := mulA(e.offsetA, e.widthA bits) * mulB(e.offsetB, e.widthB bits)) + } + + val sumSplitAt = splits.size/2//splits.filter(e => e.endC <= p.internalMantissaSize).size + + class Sum1Output extends MathWithExp{ + val muls2 = Vec(splits.drop(sumSplitAt).map(e => UInt(e.widthA + e.widthB bits))) + val mulC2 = UInt(p.internalMantissaSize*2+2 bits) + } + class Sum2Output extends MathWithExp{ + val mulC = UInt(p.internalMantissaSize*2+2 bits) + } + + val sum1 = new Area { + val input = mul.output.stage() + val sum = splits.take(sumSplitAt).map(e => (input.muls(e.id) << e.offsetC).resize(outWidth)).reduceBalancedTree(_ + _) + + val output = input.swapPayload(new Sum1Output()) + output.payload.assignSomeByName(input.payload) + output.mulC2 := sum.resized + output.muls2 := Vec(input.muls.drop(sumSplitAt)) + } + + val sum2 = new Area { + val input = sum1.output.stage() + val sum = input.mulC2 + splits.drop(sumSplitAt).map(e => (input.muls2(e.id-sumSplitAt) << e.offsetC).resize(outWidth)).reduceBalancedTree(_ + _) + + val isCommited = commitConsume(_.mul, input.source, input.fire) + val output = input.haltWhen(!isCommited).swapPayload(new Sum2Output()) + output.payload.assignSomeByName(input.payload) + output.mulC := sum + } + + val norm = new Area{ + val input = sum2.output.stage() + val (mulHigh, mulLow) = input.mulC.splitAt(p.internalMantissaSize-1) + val scrap = mulLow =/= 0 + val needShift = mulHigh.msb + val exp = input.exp + U(needShift) + val man = needShift ? mulHigh(1, p.internalMantissaSize+1 bits) | mulHigh(0, p.internalMantissaSize+1 bits) + scrap setWhen(needShift && mulHigh(0)) + val forceZero = input.rs1.isZero || input.rs2.isZero + val underflowThreshold = muxDouble[UInt](input.format)(exponentOne + exponentOne - 1023 - 53) (exponentOne + exponentOne - 127 - 24) + val underflowExp = muxDouble[UInt](input.format)(exponentOne - 1023 - 54) (exponentOne - 127 - 25) + val forceUnderflow = exp < underflowThreshold + val forceOverflow = input.rs1.isInfinity || input.rs2.isInfinity + val infinitynan = ((input.rs1.isInfinity || input.rs2.isInfinity) && (input.rs1.isZero || input.rs2.isZero)) + val forceNan = input.rs1.isNan || input.rs2.isNan || infinitynan + + val output = p.writeFloating() + output.sign := input.rs1.sign ^ input.rs2.sign + output.exponent := (exp - exponentOne).resized + output.mantissa := man.asUInt + output.setNormal + val NV = False + + when(exp(exp.getWidth-3, 3 bits) >= 5) { output.exponent(p.internalExponentSize-2, 2 bits) := 3 } + + when(forceNan) { + output.setNanQuiet + NV setWhen(infinitynan || input.rs1.isNanSignaling || input.rs2.isNanSignaling) + } elsewhen(forceOverflow) { + output.setInfinity + } elsewhen(forceZero) { + output.setZero + } elsewhen(forceUnderflow) { + output.exponent := underflowExp.resized + } + } + + val result = new Area { + def input = norm.input + def NV = norm.NV + + val notMul = new Area { + val output = Flow(UInt(p.internalMantissaSize + 1 bits)) + output.valid := input.valid && input.divSqrt + output.payload := input.mulC(p.internalMantissaSize, p.internalMantissaSize + 1 bits) + } + + val output = Stream(new MergeInput()) + output.valid := input.valid && !input.add && !input.divSqrt + output.source := input.source + output.rd := input.rd + if (p.withDouble) output.format := input.format + output.roundMode := input.roundMode + output.scrap := norm.scrap + output.value := norm.output + output.NV := NV + output.DZ := False + + val mulToAdd = Stream(AddInput()) + decode.mulToAdd << mulToAdd.stage() + + mulToAdd.valid := input.valid && input.add + mulToAdd.source := input.source + mulToAdd.rs1.mantissa := norm.output.mantissa @@ norm.scrap //FMA Precision lost + mulToAdd.rs1.exponent := norm.output.exponent + mulToAdd.rs1.sign := norm.output.sign + mulToAdd.rs1.special := norm.output.special + mulToAdd.rs2 := input.rs3 + mulToAdd.rs2.mantissa.removeAssignments() := input.rs3.mantissa << addExtraBits + mulToAdd.rd := input.rd + mulToAdd.roundMode := input.roundMode + mulToAdd.needCommit := False + if (p.withDouble) mulToAdd.format := input.format + + when(NV){ + mulToAdd.rs1.mantissa.msb := False + } + + input.ready := (input.add ? mulToAdd.ready | output.ready) || input.divSqrt + } + } + + + val div = p.withDiv generate new Area{ + val input = decode.div.halfPipe() + val haltIt = True + val isCommited = RegNext(commitConsume(_.div, input.source, input.fire)) + val output = input.haltWhen(haltIt || !isCommited).swapPayload(new MergeInput()) + + val dividerShift = if(p.withDouble) 0 else 1 + val divider = FpuDiv(p.internalMantissaSize + dividerShift) + divider.io.input.a := input.rs1.mantissa << dividerShift + divider.io.input.b := input.rs2.mantissa << dividerShift + val dividerResult = divider.io.output.result >> dividerShift + val dividerScrap = divider.io.output.remain =/= 0 || divider.io.output.result(0, dividerShift bits) =/= 0 + + val cmdSent = RegInit(False) setWhen(divider.io.input.fire) clearWhen(!haltIt) + divider.io.input.valid := input.valid && !cmdSent + divider.io.output.ready := input.ready + output.payload.assignSomeByName(input.payload) + + val needShift = !dividerResult.msb + val mantissa = needShift ? dividerResult(0, p.internalMantissaSize + 1 bits) | dividerResult(1, p.internalMantissaSize + 1 bits) + val scrap = dividerScrap || !needShift && dividerResult(0) + val exponentOffset = 1 << (p.internalExponentSize + 1) + val exponent = input.rs1.exponent + U(exponentOffset | exponentOne) - input.rs2.exponent - U(needShift) + + output.value.setNormal + output.value.sign := input.rs1.sign ^ input.rs2.sign + output.value.exponent := exponent.resized + output.value.mantissa := mantissa + output.scrap := scrap + when(exponent.takeHigh(2) === 3){ output.value.exponent(p.internalExponentSize-3, 3 bits) := 7} //Handle overflow + + + + val underflowThreshold = muxDouble[UInt](input.format)(exponentOne + exponentOffset - 1023 - 53) (exponentOne + exponentOffset - 127 - 24) + val underflowExp = muxDouble[UInt](input.format)(exponentOne + exponentOffset - 1023 - 54) (exponentOne + exponentOffset - 127 - 25) + val forceUnderflow = exponent < underflowThreshold + val forceOverflow = input.rs1.isInfinity || input.rs2.isZero + val infinitynan = input.rs1.isZero && input.rs2.isZero || input.rs1.isInfinity && input.rs2.isInfinity + val forceNan = input.rs1.isNan || input.rs2.isNan || infinitynan + val forceZero = input.rs1.isZero || input.rs2.isInfinity + + + + output.NV := False + output.DZ := !forceNan && !input.rs1.isInfinity && input.rs2.isZero + + when(exponent(exponent.getWidth-3, 3 bits) === 7) { output.value.exponent(p.internalExponentSize-2, 2 bits) := 3 } + + when(forceNan) { + output.value.setNanQuiet + output.NV setWhen((infinitynan || input.rs1.isNanSignaling || input.rs2.isNanSignaling)) + } elsewhen(forceOverflow) { + output.value.setInfinity + } elsewhen(forceZero) { + output.value.setZero + } elsewhen(forceUnderflow) { + output.value.exponent := underflowExp.resized + } + + + haltIt clearWhen(divider.io.output.valid) + } + + + + val sqrt = p.withSqrt generate new Area{ + val input = decode.sqrt.halfPipe() + val haltIt = True + val isCommited = RegNext(commitConsume(_.sqrt, input.source, input.fire)) + val output = input.haltWhen(haltIt || !isCommited).swapPayload(new MergeInput()) + + val needShift = !input.rs1.exponent.lsb + val sqrt = FpuSqrt(p.internalMantissaSize) + sqrt.io.input.a := (needShift ? (U"1" @@ input.rs1.mantissa @@ U"0") | (U"01" @@ input.rs1.mantissa)) + + val cmdSent = RegInit(False) setWhen(sqrt.io.input.fire) clearWhen(!haltIt) + sqrt.io.input.valid := input.valid && !cmdSent + sqrt.io.output.ready := input.ready + output.payload.assignSomeByName(input.payload) + + + val scrap = sqrt.io.output.remain =/= 0 + val exponent = RegNext(exponentOne-exponentOne/2 -1 +^ (input.rs1.exponent >> 1) + U(input.rs1.exponent.lsb)) + + output.value.setNormal + output.value.sign := input.rs1.sign + output.value.exponent := exponent + output.value.mantissa := sqrt.io.output.result + output.scrap := scrap + output.NV := False + output.DZ := False + + val negative = !input.rs1.isNan && !input.rs1.isZero && input.rs1.sign + + when(input.rs1.isInfinity){ + output.value.setInfinity + } + when(negative){ + output.value.setNanQuiet + output.NV := True + } + when(input.rs1.isNan){ + output.value.setNanQuiet + output.NV := !input.rs1.isQuiet + } + when(input.rs1.isZero){ + output.value.setZero + } + + +// val underflowThreshold = muxDouble[UInt](input.format)(exponentOne + exponentOffset - 1023 - 53) (exponentOne + exponentOffset - 127 - 24) +// val underflowExp = muxDouble[UInt](input.format)(exponentOne + exponentOffset - 1023 - 54) (exponentOne + exponentOffset - 127 - 25) +// val forceUnderflow = exponent < underflowThreshold +// val forceOverflow = input.rs1.isInfinity// || input.rs2.isInfinity +// val infinitynan = input.rs1.isZero && input.rs2.isZero +// val forceNan = input.rs1.isNan || input.rs2.isNan || infinitynan +// val forceZero = input.rs1.isZero +// +// +// +// output.NV := False +// output.DZ := !forceNan && input.rs2.isZero +// +// when(exponent(exponent.getWidth-3, 3 bits) === 7) { output.value.exponent(p.internalExponentSize-2, 2 bits) := 3 } +// +// when(forceNan) { +// output.value.setNanQuiet +// output.NV setWhen((infinitynan || input.rs1.isNanSignaling || input.rs2.isNanSignaling)) +// } elsewhen(forceOverflow) { +// output.value.setInfinity +// } elsewhen(forceZero) { +// output.value.setZero +// } elsewhen(forceUnderflow) { +// output.value.exponent := underflowExp.resized +// } + + + haltIt clearWhen(sqrt.io.output.valid) + } + + //divSqrt isn't realy used anymore + val divSqrt = p.withDivSqrt generate new Area { + val input = decode.divSqrt.halfPipe() + assert(false, "Need to implement commit tracking") + val aproxWidth = 8 + val aproxDepth = 64 + val divIterationCount = 3 + val sqrtIterationCount = 3 + + val mulWidth = p.internalMantissaSize + 1 + + import FpuDivSqrtIterationState._ + val state = RegInit(FpuDivSqrtIterationState.IDLE()) + val iteration = Reg(UInt(log2Up(divIterationCount max sqrtIterationCount) bits)) + + decode.divSqrtToMul.valid := False + decode.divSqrtToMul.source := input.source + decode.divSqrtToMul.rs1.assignDontCare() + decode.divSqrtToMul.rs2.assignDontCare() + decode.divSqrtToMul.rs3.assignDontCare() + decode.divSqrtToMul.rd := input.rd + decode.divSqrtToMul.add := False + decode.divSqrtToMul.divSqrt := True + decode.divSqrtToMul.msb1 := True + decode.divSqrtToMul.msb2 := True + decode.divSqrtToMul.rs1.special := False //TODO + decode.divSqrtToMul.rs2.special := False + decode.divSqrtToMul.roundMode := input.roundMode + if(p.withDouble) decode.divSqrtToMul.format := input.format + + + val aprox = new Area { + val rom = Mem(UInt(aproxWidth bits), aproxDepth * 2) + val divTable, sqrtTable = ArrayBuffer[Double]() + for(i <- 0 until aproxDepth){ + val value = 1+(i+0.5)/aproxDepth + divTable += 1/value + } + for(i <- 0 until aproxDepth){ + val scale = if(i < aproxDepth/2) 2 else 1 + val value = scale+(scale*(i%(aproxDepth/2)+0.5)/aproxDepth*2) +// println(s"$i => $value" ) + sqrtTable += 1/Math.sqrt(value) + } + val romElaboration = (sqrtTable ++ divTable).map(v => BigInt(((v-0.5)*2*(1 << aproxWidth)).round)) + + rom.initBigInt(romElaboration) + val div = input.rs2.mantissa.takeHigh(log2Up(aproxDepth)) + val sqrt = U(input.rs1.exponent.lsb ## input.rs1.mantissa).takeHigh(log2Up(aproxDepth)) + val address = U(input.div ## (input.div ? div | sqrt)) + val raw = rom.readAsync(address) + val result = U"01" @@ (raw << (mulWidth-aproxWidth-2)) + } + + val divExp = new Area{ + val value = (1 << p.internalExponentSize) - 3 - input.rs2.exponent + } + val sqrtExp = new Area{ + val value = ((1 << p.internalExponentSize-1) + (1 << p.internalExponentSize-2) - 2 -1) - (input.rs1.exponent >> 1) + U(!input.rs1.exponent.lsb) + } + + def mulArg(rs1 : UInt, rs2 : UInt): Unit ={ + decode.divSqrtToMul.rs1.mantissa := rs1.resized + decode.divSqrtToMul.rs2.mantissa := rs2.resized + decode.divSqrtToMul.msb1 := rs1.msb + decode.divSqrtToMul.msb2 := rs2.msb + } + + val mulBuffer = mul.result.notMul.output.toStream.stage + mulBuffer.ready := False + + val iterationValue = Reg(UInt(mulWidth bits)) + + input.ready := False + switch(state){ + is(IDLE){ + iterationValue := aprox.result + iteration := 0 + when(input.valid) { + state := YY + } + } + is(YY){ + decode.divSqrtToMul.valid := True + mulArg(iterationValue, iterationValue) + when(decode.divSqrtToMul.ready) { + state := XYY + } + } + is(XYY){ + decode.divSqrtToMul.valid := mulBuffer.valid + val sqrtIn = !input.rs1.exponent.lsb ? (U"1" @@ input.rs1.mantissa) | ((U"1" @@ input.rs1.mantissa) |>> 1) + val divIn = U"1" @@ input.rs2.mantissa + mulArg(input.div ? divIn| sqrtIn, mulBuffer.payload) + when(mulBuffer.valid && decode.divSqrtToMul.ready) { + state := (input.div ? Y2_XYY | _15_XYY2) + mulBuffer.ready := True + } + } + is(Y2_XYY){ + mulBuffer.ready := True + when(mulBuffer.valid) { + iterationValue := ((iterationValue << 1) - mulBuffer.payload).resized + mulBuffer.ready := True + iteration := iteration + 1 + when(iteration =/= divIterationCount-1){ //TODO + state := YY + } otherwise { + state := DIV + } + } + } + is(DIV){ + decode.divSqrtToMul.valid := True + decode.divSqrtToMul.divSqrt := False + decode.divSqrtToMul.rs1 := input.rs1 + decode.divSqrtToMul.rs2.sign := input.rs2.sign + decode.divSqrtToMul.rs2.exponent := divExp.value + iterationValue.msb.asUInt + decode.divSqrtToMul.rs2.mantissa := (iterationValue << 1).resized + val zero = input.rs2.isInfinity + val overflow = input.rs2.isZero + val nan = input.rs2.isNan || (input.rs1.isZero && input.rs2.isZero) + + when(nan){ + decode.divSqrtToMul.rs2.setNanQuiet + } elsewhen(overflow) { + decode.divSqrtToMul.rs2.setInfinity + } elsewhen(zero) { + decode.divSqrtToMul.rs2.setZero + } + when(decode.divSqrtToMul.ready) { + state := IDLE + input.ready := True + } + } + is(_15_XYY2){ + when(mulBuffer.valid) { + state := Y_15_XYY2 + mulBuffer.payload.getDrivingReg := (U"11" << mulWidth-2) - (mulBuffer.payload) + } + } + is(Y_15_XYY2){ + decode.divSqrtToMul.valid := True + mulArg(iterationValue, mulBuffer.payload) + when(decode.divSqrtToMul.ready) { + mulBuffer.ready := True + state := Y_15_XYY2_RESULT + } + } + is(Y_15_XYY2_RESULT){ + iterationValue := mulBuffer.payload + mulBuffer.ready := True + when(mulBuffer.valid) { + iteration := iteration + 1 + when(iteration =/= sqrtIterationCount-1){ + state := YY + } otherwise { + state := SQRT + } + } + } + is(SQRT){ + decode.divSqrtToMul.valid := True + decode.divSqrtToMul.divSqrt := False + decode.divSqrtToMul.rs1 := input.rs1 + decode.divSqrtToMul.rs2.sign := False + decode.divSqrtToMul.rs2.exponent := sqrtExp.value + iterationValue.msb.asUInt + decode.divSqrtToMul.rs2.mantissa := (iterationValue << 1).resized + + val nan = input.rs1.sign && !input.rs1.isZero + + when(nan){ + decode.divSqrtToMul.rs2.setNanQuiet + } + + when(decode.divSqrtToMul.ready) { + state := IDLE + input.ready := True + } + } + } + } + + val add = p.withAdd generate new Area{ + + + class PreShifterOutput extends AddInput{ + val absRs1Bigger = Bool() + val rs1ExponentBigger = Bool() + } + + val preShifter = new Area{ + val input = decode.add.combStage() + val output = input.swapPayload(new PreShifterOutput) + + val exp21 = input.rs2.exponent -^ input.rs1.exponent + val rs1ExponentBigger = (exp21.msb || input.rs2.isZero) && !input.rs1.isZero + val rs1ExponentEqual = input.rs1.exponent === input.rs2.exponent + val rs1MantissaBigger = input.rs1.mantissa > input.rs2.mantissa + val absRs1Bigger = ((rs1ExponentBigger || rs1ExponentEqual && rs1MantissaBigger) && !input.rs1.isZero || input.rs1.isInfinity) && !input.rs2.isInfinity + + output.payload.assignSomeByName(input.payload) + output.absRs1Bigger := absRs1Bigger + output.rs1ExponentBigger := rs1ExponentBigger + } + + class ShifterOutput extends AddInput{ + val xSign, ySign = Bool() + val xMantissa, yMantissa = UInt(p.internalMantissaSize+1+addExtraBits bits) + val xyExponent = UInt(p.internalExponentSize bits) + val xySign = Bool() + val roundingScrap = Bool() + } + + val shifter = new Area { + val input = preShifter.output.stage() + val output = input.swapPayload(new ShifterOutput) + output.payload.assignSomeByName(input.payload) + + val exp21 = input.rs2.exponent -^ input.rs1.exponent + val shiftBy = exp21.asSInt.abs//rs1ExponentBigger ? (0-exp21) | exp21 + val shiftOverflow = (shiftBy >= p.internalMantissaSize+1+addExtraBits) + val passThrough = shiftOverflow || (input.rs1.isZero) || (input.rs2.isZero) + + def absRs1Bigger = input.absRs1Bigger + def rs1ExponentBigger = input.rs1ExponentBigger + + //Note that rs1ExponentBigger can be replaced by absRs1Bigger bellow to avoid xsigned two complement in math block at expense of combinatorial path + val xySign = absRs1Bigger ? input.rs1.sign | input.rs2.sign + output.xSign := xySign ^ (rs1ExponentBigger ? input.rs1.sign | input.rs2.sign) + output.ySign := xySign ^ (rs1ExponentBigger ? input.rs2.sign | input.rs1.sign) + val xMantissa = U"1" @@ (rs1ExponentBigger ? input.rs1.mantissa | input.rs2.mantissa) + val yMantissaUnshifted = U"1" @@ (rs1ExponentBigger ? input.rs2.mantissa | input.rs1.mantissa) + var yMantissa = CombInit(yMantissaUnshifted) + val roundingScrap = False + for(i <- log2Up(p.internalMantissaSize) - 1 downto 0){ + roundingScrap setWhen(shiftBy(i) && yMantissa(0, 1 << i bits) =/= 0) + yMantissa \= shiftBy(i) ? (yMantissa |>> (BigInt(1) << i)) | yMantissa + } + when(passThrough) { yMantissa := 0 } + when(shiftOverflow) { roundingScrap := True } + when(input.rs1.special || input.rs2.special){ roundingScrap := False } + output.xyExponent := rs1ExponentBigger ? input.rs1.exponent | input.rs2.exponent + output.xMantissa := xMantissa + output.yMantissa := yMantissa + output.xySign := xySign + output.roundingScrap := roundingScrap + } + + class MathOutput extends ShifterOutput{ + val xyMantissa = UInt(p.internalMantissaSize+1+addExtraBits+1 bits) + } + + val math = new Area { + val input = shifter.output.stage() + val output = input.swapPayload(new MathOutput) + output.payload.assignSomeByName(input.payload) + import input.payload._ + + val xSigned = xMantissa.twoComplement(xSign) //TODO Is that necessary ? + val ySigned = ((ySign ## Mux(ySign, ~yMantissa, yMantissa)).asUInt + (ySign && !roundingScrap).asUInt).asSInt //rounding here + output.xyMantissa := U(xSigned +^ ySigned).trim(1 bits) + + } + + class OhOutput extends MathOutput{ + val shift = UInt(log2Up(p.internalMantissaSize+1+addExtraBits+1) bits) + } + + val oh = new Area { + val input = math.output.stage() + val isCommited = commitConsume(_.add, input.source, input.fire && input.needCommit) + val output = input.haltWhen(input.needCommit && !isCommited).swapPayload(new OhOutput) + output.payload.assignSomeByName(input.payload) + import input.payload._ + + val shiftOh = OHMasking.first(output.xyMantissa.asBools.reverse) //The OhMasking.first can be processed in parallel to the xyMantissa carry chaine +// output.shiftOh := shiftOh + + val shift = OHToUInt(shiftOh) + output.shift := shift + } + + + class NormOutput extends AddInput{ + val mantissa = UInt(p.internalMantissaSize+1+addExtraBits+1 bits) + val exponent = UInt(p.internalExponentSize+1 bits) + val infinityNan, forceNan, forceZero, forceInfinity = Bool() + val xySign, roundingScrap = Bool() + val xyMantissaZero = Bool() + } + + val norm = new Area{ + val input = oh.output.stage() + val output = input.swapPayload(new NormOutput) + output.payload.assignSomeByName(input.payload) + import input.payload._ + + output.mantissa := (xyMantissa |<< shift) + output.exponent := xyExponent -^ shift + 1 + output.forceInfinity := (input.rs1.isInfinity || input.rs2.isInfinity) + output.forceZero := xyMantissa === 0 || (input.rs1.isZero && input.rs2.isZero) + output.infinityNan := (input.rs1.isInfinity && input.rs2.isInfinity && (input.rs1.sign ^ input.rs2.sign)) + output.forceNan := input.rs1.isNan || input.rs2.isNan || output.infinityNan + output.xyMantissaZero := xyMantissa === 0 + } + + val result = new Area { + val input = norm.output.pipelined() + val output = input.swapPayload(new MergeInput()) + import input.payload._ + + output.source := input.source + output.rd := input.rd + output.value.sign := xySign + output.value.mantissa := (mantissa >> addExtraBits).resized + output.value.exponent := exponent.resized + output.value.special := False + output.roundMode := input.roundMode + if (p.withDouble) output.format := input.format + output.scrap := (mantissa(1) | mantissa(0) | roundingScrap) + + output.NV := infinityNan || input.rs1.isNanSignaling || input.rs2.isNanSignaling + output.DZ := False + when(forceNan) { + output.value.setNanQuiet + } elsewhen (forceInfinity) { + output.value.setInfinity + } elsewhen (forceZero) { + output.value.setZero + when(xyMantissaZero || input.rs1.isZero && input.rs2.isZero) { + output.value.sign := input.rs1.sign && input.rs2.sign + } + when((input.rs1.sign || input.rs2.sign) && input.roundMode === FpuRoundMode.RDN) { + output.value.sign := True + } + } + } + } + + + val merge = new Area { + val inputs = ArrayBuffer[Stream[MergeInput]]() + inputs += load.s1.output.stage() + if(p.withSqrt) (inputs += sqrt.output) + if(p.withDiv) (inputs += div.output) + if(p.withAdd) (inputs += add.result.output) + if(p.withMul) (inputs += mul.result.output) + if(p.withShortPipMisc) (inputs += shortPip.output.pipelined(m2s = true)) + val arbitrated = StreamArbiterFactory.lowerFirst.noLock.on(inputs).toFlow + } + + class RoundFront extends MergeInput{ + val mantissaIncrement = Bool() + val roundAdjusted = Bits(2 bits) + val exactMask = UInt(p.internalMantissaSize + 2 bits) + } + + val roundFront = new Area { + val input = merge.arbitrated.stage() + val output = input.swapPayload(new RoundFront()) + output.payload.assignSomeByName(input.payload) + + val manAggregate = input.value.mantissa @@ input.scrap + val expBase = muxDouble[UInt](input.format)(exponentF64Subnormal + 1)(exponentF32Subnormal + 1) + val expDif = expBase -^ input.value.exponent + val expSubnormal = !expDif.msb + var discardCount = (expSubnormal ? expDif.resize(log2Up(p.internalMantissaSize) bits) | U(0)) + if (p.withDouble) when(input.format === FpuFormat.FLOAT) { + discardCount \= discardCount + 29 + } + val exactMask = (List(True) ++ (0 until p.internalMantissaSize + 1).map(_ < discardCount)).asBits.asUInt + val roundAdjusted = (True ## (manAggregate >> 1)) (discardCount) ## ((manAggregate & exactMask) =/= 0) + + val mantissaIncrement = !input.value.special && input.roundMode.mux( + FpuRoundMode.RNE -> (roundAdjusted(1) && (roundAdjusted(0) || (U"01" ## (manAggregate >> 2)) (discardCount))), + FpuRoundMode.RTZ -> False, + FpuRoundMode.RDN -> (roundAdjusted =/= 0 && input.value.sign), + FpuRoundMode.RUP -> (roundAdjusted =/= 0 && !input.value.sign), + FpuRoundMode.RMM -> (roundAdjusted(1)) + ) + + output.mantissaIncrement := mantissaIncrement + output.roundAdjusted := roundAdjusted + output.exactMask := exactMask + } + + val roundBack = new Area{ + val input = roundFront.output.stage() + val output = input.swapPayload(RoundOutput()) + import input.payload._ + + val math = p.internalFloating() + val mantissaRange = p.internalMantissaSize downto 1 + val adderMantissa = input.value.mantissa(mantissaRange) & (mantissaIncrement ? ~(exactMask.trim(1) >> 1) | input.value.mantissa(mantissaRange).maxValue) + val adderRightOp = (mantissaIncrement ? (exactMask >> 1)| U(0)).resize(p.internalMantissaSize bits) + val adder = KeepAttribute(KeepAttribute(input.value.exponent @@ adderMantissa) + KeepAttribute(adderRightOp) + KeepAttribute(U(mantissaIncrement))) + math.special := input.value.special + math.sign := input.value.sign + math.exponent := adder(p.internalMantissaSize, p.internalExponentSize bits) + math.mantissa := adder(0, p.internalMantissaSize bits) + + val patched = CombInit(math) + val nx,of,uf = False + + val ufSubnormalThreshold = muxDouble[UInt](input.format)(exponentF64Subnormal)(exponentF32Subnormal) + val ufThreshold = muxDouble[UInt](input.format)(exponentF64Subnormal-52+1)(exponentF32Subnormal-23+1) + val ofThreshold = muxDouble[UInt](input.format)(exponentF64Infinity-1)(exponentF32Infinity-1) + + //catch exact 1.17549435E-38 underflow, but, who realy care ? +// val borringCase = input.value.exponent === ufSubnormalThreshold && roundAdjusted.asUInt < U"11" +// when(!math.special && (math.exponent <= ufSubnormalThreshold || borringCase) && roundAdjusted.asUInt =/= 0){ +// uf := True +// } + val threshold = input.roundMode.mux( + FpuRoundMode.RNE -> U"110", + FpuRoundMode.RTZ -> U"110", + FpuRoundMode.RDN -> (input.value.sign ? U"101" | U"111"), + FpuRoundMode.RUP -> (input.value.sign ? U"111" | U"101"), + FpuRoundMode.RMM -> U"110" + ) + val borringRound = (input.value.mantissa(1 downto 0) ## input.scrap) + if(p.withDouble) when(input.format === FpuFormat.FLOAT) { borringRound := (input.value.mantissa(30 downto 29) ## input.value.mantissa(28 downto 0).orR)} + + val borringCase = input.value.exponent === ufSubnormalThreshold && borringRound.asUInt < threshold + when(!math.special && (math.exponent <= ufSubnormalThreshold || borringCase) && roundAdjusted.asUInt =/= 0){ + uf := True + } + when(!math.special && math.exponent > ofThreshold){ + nx := True + of := True + val doMax = input.roundMode.mux( + FpuRoundMode.RNE -> (False), + FpuRoundMode.RTZ -> (True), + FpuRoundMode.RDN -> (!math.sign), + FpuRoundMode.RUP -> (math.sign), + FpuRoundMode.RMM -> (False) + ) + when(doMax){ + patched.exponent := ofThreshold + patched.mantissa.setAll() + } otherwise { + patched.setInfinity + } + } + + + when(!math.special && math.exponent < ufThreshold){ + nx := True + uf := True + val doMin = input.roundMode.mux( + FpuRoundMode.RNE -> (False), + FpuRoundMode.RTZ -> (False), + FpuRoundMode.RDN -> (math.sign), + FpuRoundMode.RUP -> (!math.sign), + FpuRoundMode.RMM -> (False) + ) + when(doMin){ + patched.exponent := ufThreshold.resized + patched.mantissa := 0 + } otherwise { + patched.setZero + } + } + + + nx setWhen(!input.value.special && (roundAdjusted =/= 0)) + val writes = rf.scoreboards.map(_.writes.readAsync(input.rd)) + val write = writes.toList.read(input.source) + output.NX := nx & write + output.OF := of & write + output.UF := uf & write + output.NV := input.NV & write + output.DZ := input.DZ & write + output.source := input.source + output.rd := input.rd + output.write := write + if(p.withDouble) output.format := input.format + output.value := patched + } + + val writeback = new Area{ + val input = roundBack.output.stage() + + for(i <- 0 until portCount){ + val c = io.port(i).completion + c.valid := input.fire && input.source === i + c.flags.NX := input.NX + c.flags.OF := input.OF + c.flags.UF := input.UF + c.flags.NV := input.NV + c.flags.DZ := input.DZ + c.written := input.write + } + + when(input.valid){ + for(i <- 0 until portCount) { + val port = rf.scoreboards(i).hitWrite + port.valid setWhen(input.source === i) + port.address := input.rd + port.data := !rf.scoreboards(i).hit(input.rd) //TODO improve + } + } + + val port = rf.ram.writePort + port.valid := input.valid && input.write + port.address := input.source @@ input.rd + port.data.value := input.value + if(p.withDouble) port.data.boxed := input.format === FpuFormat.FLOAT + + val randomSim = p.sim generate (in UInt(p.internalMantissaSize bits)) + if(p.sim) when(port.data.value.isZero || port.data.value.isInfinity){ + port.data.value.mantissa := randomSim + } + if(p.sim) when(input.value.special){ + port.data.value.exponent(p.internalExponentSize-1 downto 3) := randomSim.resized + when(!input.value.isNan){ + port.data.value.exponent(2 downto 2) := randomSim.resized + } + } + + when(port.valid){ + assert(!(port.data.value.exponent === 0 && !port.data.value.special), "Special violation") + assert(!(port.data.value.exponent === port.data.value.exponent.maxValue && !port.data.value.special), "Special violation") + } + } +} + + + + +object FpuSynthesisBench extends App{ + val payloadType = HardType(Bits(8 bits)) + class Fpu(name : String, portCount : Int, p : FpuParameter) extends Rtl{ + override def getName(): String = "Fpu_" + name + override def getRtlPath(): String = getName() + ".v" + SpinalVerilog(new FpuCore(portCount, p){ + + setDefinitionName(Fpu.this.getName()) + }) + } + + class Shifter(width : Int) extends Rtl{ + override def getName(): String = "shifter_" + width + override def getRtlPath(): String = getName() + ".v" + SpinalVerilog(new Component{ + val a = in UInt(width bits) + val sel = in UInt(log2Up(width) bits) + val result = out(a >> sel) + setDefinitionName(Shifter.this.getName()) + }) + } + + class Rotate(width : Int) extends Rtl{ + override def getName(): String = "rotate_" + width + override def getRtlPath(): String = getName() + ".v" + SpinalVerilog(new Component{ + val a = in UInt(width bits) + val sel = in UInt(log2Up(width) bits) + val result = out(Delay(Delay(a,3).rotateLeft(Delay(sel,3)),3)) + setDefinitionName(Rotate.this.getName()) + }) + } + +// rotate2_24 -> +// Artix 7 -> 233 Mhz 96 LUT 167 FF +// Artix 7 -> 420 Mhz 86 LUT 229 FF +// rotate2_32 -> +// Artix 7 -> 222 Mhz 108 LUT 238 FF +// Artix 7 -> 399 Mhz 110 LUT 300 FF +// rotate2_52 -> +// Artix 7 -> 195 Mhz 230 LUT 362 FF +// Artix 7 -> 366 Mhz 225 LUT 486 FF +// rotate2_64 -> +// Artix 7 -> 182 Mhz 257 LUT 465 FF +// Artix 7 -> 359 Mhz 266 LUT 591 FF + class Rotate2(width : Int) extends Rtl{ + override def getName(): String = "rotate2_" + width + override def getRtlPath(): String = getName() + ".v" + SpinalVerilog(new Component{ + val a = in UInt(width bits) + val sel = in UInt(log2Up(width) bits) + val result = out(Delay((U(0, width bits) @@ Delay(a,3)).rotateLeft(Delay(sel,3)),3)) + setDefinitionName(Rotate2.this.getName()) + }) + } + + class Rotate3(width : Int) extends Rtl{ + override def getName(): String = "rotate3_" + width + override def getRtlPath(): String = getName() + ".v" + SpinalVerilog(new Component{ + val a = Delay(in UInt(width bits), 3) + val sel = Delay(in UInt(log2Up(width) bits),3) + // val result = + // val output = Delay(result, 3) + setDefinitionName(Rotate3.this.getName()) + }) + } + + class Div(width : Int) extends Rtl{ + override def getName(): String = "div_" + width + override def getRtlPath(): String = getName() + ".v" + SpinalVerilog(new UnsignedDivider(width,width, false).setDefinitionName(Div.this.getName())) + } + + class Add(width : Int) extends Rtl{ + override def getName(): String = "add_" + width + override def getRtlPath(): String = getName() + ".v" + SpinalVerilog(new Component{ + val a, b = in UInt(width bits) + val result = out(a + b) + setDefinitionName(Add.this.getName()) + }) + } + + class DivSqrtRtl(width : Int) extends Rtl{ + override def getName(): String = "DivSqrt_" + width + override def getRtlPath(): String = getName() + ".v" + SpinalVerilog(new FpuDiv(width).setDefinitionName(DivSqrtRtl.this.getName())) + } + + val rtls = ArrayBuffer[Rtl]() + rtls += new Fpu( + "32", + portCount = 1, + FpuParameter( +// withDivSqrt = false, + withDouble = false + ) + ) + rtls += new Fpu( + "64", + portCount = 1, + FpuParameter( +// withDivSqrt = false, + withDouble = true + ) + ) + +// rtls += new Div(52) +// rtls += new Div(23) +// rtls += new Add(64) +// rtls += new DivSqrtRtl(52) +// rtls += new DivSqrtRtl(23) + + // rtls += new Shifter(24) +// rtls += new Shifter(32) +// rtls += new Shifter(52) +// rtls += new Shifter(64) +// rtls += new Rotate(24) +// rtls += new Rotate(32) +// rtls += new Rotate(52) +// rtls += new Rotate(64) +// rtls += new Rotate3(24) +// rtls += new Rotate3(32) +// rtls += new Rotate3(52) +// rtls += new Rotate3(64) + + val targets = XilinxStdTargets()// ++ AlteraStdTargets() + + + Bench(rtls, targets) +} + +//Fpu_32 -> +//Artix 7 -> 136 Mhz 1471 LUT 1336 FF +//Artix 7 -> 196 Mhz 1687 LUT 1371 FF +//Fpu_64 -> +//Artix 7 -> 105 Mhz 2822 LUT 2132 FF +//Artix 7 -> 161 Mhz 3114 LUT 2272 FF +// +// +// +//Fpu_32 -> +//Artix 7 -> 128 Mhz 1693 LUT 1481 FF +//Artix 7 -> 203 Mhz 1895 LUT 1481 FF +//Fpu_64 -> +//Artix 7 -> 99 Mhz 3073 LUT 2396 FF +//Artix 7 -> 164 Mhz 3433 LUT 2432 FF + + +//Fpu_32 -> +//Artix 7 -> 112 Mhz 1790 LUT 1666 FF +//Artix 7 -> 158 Mhz 1989 LUT 1701 FF +//Fpu_64 -> +//Artix 7 -> 100 Mhz 3294 LUT 2763 FF +//Artix 7 -> 151 Mhz 3708 LUT 2904 FF + +//Fpu_32 -> +//Artix 7 -> 139 Mhz 1879 LUT 1713 FF +//Artix 7 -> 206 Mhz 2135 LUT 1723 FF +//Fpu_64 -> +//Artix 7 -> 106 Mhz 3502 LUT 2811 FF +//Artix 7 -> 163 Mhz 3905 LUT 2951 FF + +//Fpu_32 -> +//Artix 7 -> 130 Mhz 1889 LUT 1835 FF +//Artix 7 -> 210 Mhz 2131 LUT 1845 FF +//Fpu_64 -> +//Artix 7 -> 106 Mhz 3322 LUT 3023 FF +//Artix 7 -> 161 Mhz 3675 LUT 3163 FF + +//Fpu_32 -> +//Artix 7 -> 132 Mhz 1891 LUT 1837 FF +//Artix 7 -> 209 Mhz 2132 LUT 1847 FF +//Fpu_64 -> +//Artix 7 -> 105 Mhz 3348 LUT 3024 FF +//Artix 7 -> 162 Mhz 3712 LUT 3165 FF + +//Fpu_32 -> +//Artix 7 -> 128 Mhz 1796 LUT 1727 FF +//Artix 7 -> 208 Mhz 2049 LUT 1727 FF +//Fpu_64 -> +//Artix 7 -> 109 Mhz 3417 LUT 2913 FF +//Artix 7 -> 168 Mhz 3844 LUT 3053 FF + +/* +testfloat -tininessafter -all1 > all1.txt +cat all1.txt | grep "Errors found in" + +testfloat -tininessafter -all2 > all2.txt +cat all2.txt | grep "Errors found in" + +testfloat -tininessafter -f32_mulAdd > fma.txt + +testfloat -tininessafter -all2 -level 2 -checkall > all2.txt + + + +all1 => +Errors found in f32_to_ui64_rx_minMag: +Errors found in f32_to_i64_rx_minMag: +Errors found in f64_to_ui64_rx_minMag: +Errors found in f64_to_i64_rx_minMag: + +all2 => + + +Errors found in f32_mulAdd, rounding min: ++00.7FFFFF +67.000001 -01.000000 + => -01.000000 ...ux expected -01.000000 ....x ++67.000001 +00.7FFFFF -01.000000 + => -01.000000 ...ux expected -01.000000 ....x +-00.7FFFFF -67.000001 -01.000000 + => -01.000000 ...ux expected -01.000000 ....x +-67.000001 -00.7FFFFF -01.000000 + => -01.000000 ...ux expected -01.000000 ....x +Errors found in f32_mulAdd, rounding max: ++00.7FFFFF -67.000001 +01.000000 + => +01.000000 ...ux expected +01.000000 ....x ++67.000001 -00.7FFFFF +01.000000 + => +01.000000 ...ux expected +01.000000 ....x ++66.7FFFFE -01.000001 +01.000000 + => +01.000000 ...ux expected +01.000000 ....x +-00.7FFFFF +67.000001 +01.000000 + => +01.000000 ...ux expected +01.000000 ....x +-67.000001 +00.7FFFFF +01.000000 + => +01.000000 ...ux expected +01.000000 ....x + + + + */
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/ip/fpu/FpuDiv.scala b/VexRiscv/src/main/scala/vexriscv/ip/fpu/FpuDiv.scala new file mode 100644 index 0000000..7c9e713 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/ip/fpu/FpuDiv.scala @@ -0,0 +1,140 @@ +package vexriscv.ip.fpu + + +import spinal.core._ +import spinal.lib.math.{UnsignedDividerCmd, UnsignedDividerRsp} +import spinal.lib._ +import spinal.lib.sim.{StreamDriver, StreamMonitor, StreamReadyRandomizer} + +import scala.collection.mutable +import scala.util.Random + +case class FpuDivCmd(mantissaWidth : Int) extends Bundle{ + val a,b = UInt(mantissaWidth bits) +} + +case class FpuDivRsp(mantissaWidth : Int) extends Bundle{ + val result = UInt(mantissaWidth+1 + 2 bits) + val remain = UInt(mantissaWidth+1 bits) +} + +case class FpuDiv(val mantissaWidth : Int) extends Component { + assert(mantissaWidth % 2 == 0) + val io = new Bundle{ + val input = slave Stream(FpuDivCmd(mantissaWidth)) + val output = master Stream(FpuDivRsp(mantissaWidth)) + } + + val iterations = (mantissaWidth+2+2)/2 + val counter = Reg(UInt(log2Up(iterations) bits)) + val busy = RegInit(False) clearWhen(io.output.fire) + val done = RegInit(False) setWhen(busy && counter === iterations-1) clearWhen(io.output.fire) + + val shifter = Reg(UInt(mantissaWidth + 3 bits)) + val result = Reg(UInt(mantissaWidth+1+2 bits)) + + val div1, div3 = Reg(UInt(mantissaWidth+3 bits)) + val div2 = div1 |<< 1 + + val sub1 = shifter -^ div1 + val sub2 = shifter -^ div2 + val sub3 = shifter -^ div3 + + io.output.valid := done + io.output.result := (result << 0).resized + io.output.remain := (shifter >> 2).resized + io.input.ready := !busy + + when(!done){ + counter := counter + 1 + val sel = CombInit(shifter) + result := result |<< 2 + when(!sub1.msb){ + sel := sub1.resized + result(1 downto 0) := 1 + } + when(!sub2.msb){ + sel := sub2.resized + result(1 downto 0) := 2 + } + when(!sub3.msb){ + sel := sub3.resized + result(1 downto 0) := 3 + } + shifter := sel |<< 2 + } + + when(!busy){ + counter := 0 + shifter := (U"1" @@ io.input.a @@ U"").resized + div1 := (U"1" @@ io.input.b).resized + div3 := (U"1" @@ io.input.b) +^ (((U"1" @@ io.input.b)) << 1) + busy := io.input.valid + } +} + + +object FpuDivTester extends App{ + import spinal.core.sim._ + + for(w <- List(16, 20)) { + val config = SimConfig + config.withFstWave + config.compile(new FpuDiv(w)).doSim(seed=2){dut => + dut.clockDomain.forkStimulus(10) + + + val (cmdDriver, cmdQueue) = StreamDriver.queue(dut.io.input, dut.clockDomain) + val rspQueue = mutable.Queue[FpuDivRsp => Unit]() + StreamMonitor(dut.io.output, dut.clockDomain)( rspQueue.dequeue()(_)) + StreamReadyRandomizer(dut.io.output, dut.clockDomain) + + def test(a : Int, b : Int): Unit ={ + cmdQueue +={p => + p.a #= a + p.b #= b + } + rspQueue += {p => + val x = (a | (1 << dut.mantissaWidth)).toLong + val y = (b | (1 << dut.mantissaWidth)).toLong + val result = (x << dut.mantissaWidth+2) / y + val remain = (x << dut.mantissaWidth+2) % y + + assert(p.result.toLong == result, f"$x%x/$y%x=${p.result.toLong}%x instead of $result%x") + assert(p.remain.toLong == remain, f"$x%x %% $y%x=${p.remain.toLong}%x instead of $remain%x") + } + } + + val s = dut.mantissaWidth-16 + val f = (1 << dut.mantissaWidth)-1 + test(0xE000 << s, 0x8000 << s) + test(0xC000 << s, 0x4000 << s) + test(0xC835 << s, 0x4742 << s) + test(0,0) + test(0,f) + test(f,0) + test(f,f) + + for(i <- 0 until 10000){ + test(Random.nextInt(1 << dut.mantissaWidth), Random.nextInt(1 << dut.mantissaWidth)) + } + + waitUntil(rspQueue.isEmpty) + + dut.clockDomain.waitSampling(100) + + } + } +} + +object FpuDivTester2 extends App{ + val mantissaWidth = 52 + val a = BigInt(0xfffffff810000l) + val b = BigInt(0x0000000000FF0l) + val x = (a | (1l << mantissaWidth)) + val y = (b | (1l << mantissaWidth)) + val result = (x << mantissaWidth+2) / y + val remain = (x << mantissaWidth+2) % y + println("done") + +}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/ip/fpu/FpuSqrt.scala b/VexRiscv/src/main/scala/vexriscv/ip/fpu/FpuSqrt.scala new file mode 100644 index 0000000..0f80905 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/ip/fpu/FpuSqrt.scala @@ -0,0 +1,116 @@ +package vexriscv.ip.fpu + +import spinal.core._ +import spinal.lib._ +import spinal.lib.sim.{StreamDriver, StreamMonitor, StreamReadyRandomizer} + +import scala.collection.mutable +import scala.util.Random + +case class FpuSqrtCmd(mantissaWidth : Int) extends Bundle{ + val a = UInt(mantissaWidth+2 bits) +} + +case class FpuSqrtRsp(mantissaWidth : Int) extends Bundle{ + val result = UInt(mantissaWidth+1 bits) + val remain = UInt(mantissaWidth+5 bits) +} + +case class FpuSqrt(val mantissaWidth : Int) extends Component { + val io = new Bundle{ + val input = slave Stream(FpuSqrtCmd(mantissaWidth)) + val output = master Stream(FpuSqrtRsp(mantissaWidth)) + } + + val iterations = mantissaWidth+2 + val counter = Reg(UInt(log2Up(iterations ) bits)) + val busy = RegInit(False) clearWhen(io.output.fire) + val done = RegInit(False) setWhen(busy && counter === iterations-1) clearWhen(io.output.fire) + + val a = Reg(UInt(mantissaWidth+5 bits)) + val x = Reg(UInt(mantissaWidth bits)) + val q = Reg(UInt(mantissaWidth+1 bits)) + val t = a-(q @@ U"01") + + + io.output.valid := done + io.output.result := (q << 0).resized + io.output.remain := a + io.input.ready := !busy + + when(!done){ + counter := counter + 1 + val sel = CombInit(a) + when(!t.msb){ + sel := t.resized + } + q := (q @@ !t.msb).resized + a := (sel @@ x(widthOf(x)-2,2 bits)).resized + x := x |<< 2 + } + + when(!busy){ + q := 0 + a := io.input.a(widthOf(io.input.a)-2,2 bits).resized + x := (io.input.a).resized + counter := 0 + when(io.input.valid){ + busy := True + } + } +} + + +object FpuSqrtTester extends App{ + import spinal.core.sim._ + + for(w <- List(16)) { + val config = SimConfig + config.withFstWave + config.compile(new FpuSqrt(w)).doSim(seed=2){dut => + dut.clockDomain.forkStimulus(10) + + + val (cmdDriver, cmdQueue) = StreamDriver.queue(dut.io.input, dut.clockDomain) + val rspQueue = mutable.Queue[FpuSqrtRsp => Unit]() + StreamMonitor(dut.io.output, dut.clockDomain)( rspQueue.dequeue()(_)) + StreamReadyRandomizer(dut.io.output, dut.clockDomain) + + def test(a : Int): Unit ={ + cmdQueue +={p => + p.a #= a + } + rspQueue += {p => +// val x = (a * (1l << dut.mantissaWidth)).toLong +// val result = Math.sqrt(x).toLong/(1 << dut.mantissaWidth/2) +// val remain = a-x*x + val x = a.toDouble / (1 << dut.mantissaWidth) + val result = (Math.sqrt(x)*(1 << dut.mantissaWidth+1)).toLong + val filtred = result % (1 << dut.mantissaWidth+1) +// val remain = (a-(result*result)).toLong + assert(p.result.toLong == filtred, f"$a%x=${p.result.toLong}%x instead of $filtred%x") +// assert(p.remain.toLong == remain, f"$a%x=${p.remain.toLong}%x instead of $remain%x") + } + } + + val s = dut.mantissaWidth-16 + val f = (1 << dut.mantissaWidth)-1 +// test(121) + test(0x20000) + test(0x18000) +// test(0,0) +// test(0,f) +// test(f,0) +// test(f,f) + + for(i <- 0 until 10000){ + test(Random.nextInt(3 << dut.mantissaWidth) + (1 << dut.mantissaWidth)) + } + + waitUntil(rspQueue.isEmpty) + + dut.clockDomain.waitSampling(100) + + } + } +}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/ip/fpu/Interface.scala b/VexRiscv/src/main/scala/vexriscv/ip/fpu/Interface.scala new file mode 100644 index 0000000..9338c35 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/ip/fpu/Interface.scala @@ -0,0 +1,186 @@ +package vexriscv.ip.fpu + +import spinal.core._ +import spinal.lib._ + + +object Fpu{ + + object Function{ + val MUL = 0 + val ADD = 1 + } + +} + + +case class FpuFloatDecoded() extends Bundle{ + val isNan = Bool() + val isNormal = Bool() + val isSubnormal = Bool() + val isZero = Bool() + val isInfinity = Bool() + val isQuiet = Bool() +} + +object FpuFloat{ + val ZERO = 0 + val INFINITY = 1 + val NAN = 2 + val NAN_CANONICAL_BIT = 2 +} + +case class FpuFloat(exponentSize: Int, + mantissaSize: Int) extends Bundle { + val mantissa = UInt(mantissaSize bits) + val exponent = UInt(exponentSize bits) + val sign = Bool() + val special = Bool() + + def withInvertSign : FpuFloat ={ + val ret = FpuFloat(exponentSize,mantissaSize) + ret.sign := !sign + ret.exponent := exponent + ret.mantissa := mantissa + ret + } + + def isNormal = !special + def isZero = special && exponent(1 downto 0) === FpuFloat.ZERO + def isInfinity = special && exponent(1 downto 0) === FpuFloat.INFINITY + def isNan = special && exponent(1 downto 0) === FpuFloat.NAN + def isQuiet = mantissa.msb + def isNanSignaling = special && exponent(1 downto 0) === FpuFloat.NAN && !isQuiet + def isCanonical = exponent(FpuFloat.NAN_CANONICAL_BIT) + + def setNormal = { special := False } + def setZero = { special := True; exponent(1 downto 0) := FpuFloat.ZERO } + def setInfinity = { special := True; exponent(1 downto 0) := FpuFloat.INFINITY } + def setNan = { special := True; exponent(1 downto 0) := FpuFloat.NAN; exponent(FpuFloat.NAN_CANONICAL_BIT) := False} + def setNanQuiet = { special := True; exponent(1 downto 0) := FpuFloat.NAN; exponent(FpuFloat.NAN_CANONICAL_BIT) := True; mantissa.msb := True; } + + def decode() = { + val ret = FpuFloatDecoded() + ret.isZero := isZero + ret.isNormal := isNormal + ret.isInfinity := isInfinity + ret.isNan := isNan + ret.isQuiet := mantissa.msb + ret + } + + def decodeIeee754() = { + val ret = FpuFloatDecoded() + val expZero = exponent === 0 + val expOne = exponent === exponent.maxValue + val manZero = mantissa === 0 + ret.isZero := expZero && manZero + ret.isSubnormal := expZero && !manZero + ret.isNormal := !expOne && !expZero + ret.isInfinity := expOne && manZero + ret.isNan := expOne && !manZero + ret.isQuiet := mantissa.msb + ret + } +} + +object FpuOpcode extends SpinalEnum{ + val LOAD, STORE, MUL, ADD, FMA, I2F, F2I, CMP, DIV, SQRT, MIN_MAX, SGNJ, FMV_X_W, FMV_W_X, FCLASS, FCVT_X_X = newElement() +} + +object FpuFormat extends SpinalEnum{ + val FLOAT, DOUBLE = newElement() +} + +object FpuRoundMode extends SpinalEnum(){ + val RNE, RTZ, RDN, RUP, RMM = newElement() + defaultEncoding = SpinalEnumEncoding("opt")( + RNE -> 0, + RTZ -> 1, + RDN -> 2, + RUP -> 3, + RMM -> 4 + ) +} +object FpuRoundModeInstr extends SpinalEnum(){ + val RNE, RTZ, RDN, RUP, RMM, DYN = newElement() + defaultEncoding = SpinalEnumEncoding("opt")( + RNE -> 0, + RTZ -> 1, + RDN -> 2, + RUP -> 3, + RMM -> 4, + DYN -> 7 + ) +} + + +case class FpuParameter( withDouble : Boolean, + asyncRegFile : Boolean = false, + mulWidthA : Int = 18, + mulWidthB : Int = 18, + schedulerM2sPipe : Boolean = false, + sim : Boolean = false, + withAdd : Boolean = true, + withMul : Boolean = true, + withDivSqrt : Boolean = false, + withDiv : Boolean = true, + withSqrt : Boolean = true, + withShortPipMisc : Boolean = true){ + + val internalMantissaSize = if(withDouble) 52 else 23 + val storeLoadType = HardType(Bits(if(withDouble) 64 bits else 32 bits)) + val internalExponentSize = (if(withDouble) 11 else 8) + 1 + val internalFloating = HardType(FpuFloat(exponentSize = internalExponentSize, mantissaSize = internalMantissaSize)) + val writeFloating = HardType(FpuFloat(exponentSize = internalExponentSize, mantissaSize = internalMantissaSize+1)) + + val rfAddress = HardType(UInt(5 bits)) + + val Opcode = FpuOpcode + val Format = FpuFormat + val argWidth = 2 + val Arg = HardType(Bits(2 bits)) +} + +case class FpuFlags() extends Bundle{ + val NX, UF, OF, DZ, NV = Bool() +} + +case class FpuCompletion() extends Bundle{ + val flags = FpuFlags() + val written = Bool() //Used for verification purposes +} + +case class FpuCmd(p : FpuParameter) extends Bundle{ + val opcode = p.Opcode() + val arg = Bits(2 bits) + val rs1, rs2, rs3 = p.rfAddress() + val rd = p.rfAddress() + val format = p.Format() + val roundMode = FpuRoundMode() +} + +case class FpuCommit(p : FpuParameter) extends Bundle{ + val opcode = FpuOpcode() + val rd = UInt(5 bits) + val write = Bool() + val value = p.storeLoadType() // IEEE 754 +} + +case class FpuRsp(p : FpuParameter) extends Bundle{ + val value = p.storeLoadType() // IEEE754 store || Integer + val NV, NX = Bool() +} + +case class FpuPort(p : FpuParameter) extends Bundle with IMasterSlave { + val cmd = Stream(FpuCmd(p)) + val commit = Stream(FpuCommit(p)) + val rsp = Stream(FpuRsp(p)) + val completion = Flow(FpuCompletion()) + + override def asMaster(): Unit = { + master(cmd, commit) + slave(rsp) + in(completion) + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/AesPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/AesPlugin.scala new file mode 100644 index 0000000..0d4556a --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/AesPlugin.scala @@ -0,0 +1,329 @@ +package vexriscv.plugin + +import spinal.core._ +import spinal.lib._ +import vexriscv.{DecoderService, Stageable, VexRiscv} + +/** + * The AesPlugin allow to reduce the instruction count of each AES round by providing the following instruction : + * 1) aes_enc_round(rs1, rs2, sel). rd = rs1 ^ quad_mul(sel, sbox(byte_sel(rs2, sel))) + * 2) aes_enc_round_last(rs1, rs2, sel). rd = rs1 ^ quad_sbox(byte_sel(rs2, sel)) + * 3) aes_dec_round(rs1, rs2, sel). rd = rs1 ^ quad_inv_sbox(quad_mul(sel,byte_sel(rs2, sel))) + * 4) aes_dec_round_last(rs1, rs2, sel). rd = rs1 ^ quad_inv_sbox(byte_sel(rs2, sel)) + * + * Here is what those inner functions mean: + * - sbox apply the sbox transformation on the 'sel' byte of the 32 bits word + * - quad_mul multiply (Galois field) each byte of 32 bits word by a constant (which depend of sel) + * - quad_inv_sbox apply the inverse sbox transformation on each byte of 32 bits word + * + * You can find a complet example of those instruction usage in aes_cusom.h in vexriscv_aes_encrypt and + * vexriscv_aes_decrypt. Those function are made to work on little endian as in the linux kernel default AES + * implementation, but unlike libressl, libopenssl and dropbear ones (swapping the byte of the expended key can fix that). + * + * This plugin implement the processing using a single 32_bits * 512_words rom to fetch the sbox/inv_sbox/multiplication + * results already combined. This rom is formated as following : + * + * From word 0x000 to 0x0FF, it is formatted as follow : (note multiplication are in Galois field) + * [ 7 : 0] : SBox[word_address & 0xFF] * 1 + * [15 : 8] : SBox[word_address & 0xFF] * 2 + * [23 : 16] : SBox[word_address & 0xFF] * 3 + * [31 : 24] : inverse SBox[word_address & 0xFF] * 1 (Used for the last round of the decryption) + * + * From word 0x100 to 0x1FF, it is formatted as follow : + * [ 7 : 0] : inverse SBox[word_address & 0xFF * 14] + * [15 : 8] : inverse SBox[word_address & 0xFF * 9] + * [23 : 16] : inverse SBox[word_address & 0xFF * 13] + * [31 : 24] : inverse SBox[word_address & 0xFF * 11] + * + * So, on each instruction, the following is done (in order) + * 1) Select the 'sel' byte of RS2 + * 2) Read the rom at a address which depend of the RS2 selected byte and the instruction + * 3) Permute the rom read data depending the instruction and the 'sel' argument + * 4) Xor the result with RS1 and return that as instruction result + * + * The instructions are encoded by default as following : + * --SS-LDXXXXXYYYYY000ZZZZZ0001011 + * + * Where : + * - XXXXX is the register file source 2 (RS2) + * - YYYYY is the register file source 1 (RS1) + * - ZZZZZ is the register file destination + * - D=1 mean decrypt, D=0 mean encrypt + * - L=1 mean last round, L=0 mean full round + * - SS specify which byte should be used from RS2 for the processing + * + * In practice the aes-256-cbc performances should improve by a factor 4. See the following results from libopenssl + * from a SoC running linux at 100 Mhz + * type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes + * aes-256-cbc SW 492.58k 700.22k 796.41k 831.49k 830.09k 832.81k + * aes-256 cbc HW 1781.52k 2834.07k 3323.07k 3486.72k 3465.22k 3440.10k + */ + +case class AesPlugin(encoding : MaskedLiteral = M"-----------------000-----0001011") extends Plugin[VexRiscv]{ + + object IS_AES extends Stageable(Bool) + object CALC extends Stageable(Bits(32 bits)) + + val mapping = new { + def DECRYPT = 25 // 0/1 => encrypt/decrypt + def LAST_ROUND = 26 + def BYTE_SEL = 28 //Which byte should be used in RS2 + } + + //Callback to setup the plugin and ask for different services + override def setup(pipeline: VexRiscv): Unit = { + import pipeline.config._ + + val decoderService = pipeline.service(classOf[DecoderService]) + + decoderService.addDefault(IS_AES, False) + decoderService.add( + key = encoding, + List( + IS_AES -> True, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> False, //Late result + RS1_USE -> True, + RS2_USE -> True + ) + ) + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + + + def BANK0 = (TE0, SBOX_INV).zipped.map((te0, inv) => (te0.toLong) | (inv.toLong << 24)) + def BANK1 = TD0 + + + + val onExecute = execute plug new Area{ + import execute._ + val byteSel = input(INSTRUCTION)(mapping.BYTE_SEL, 2 bits).asUInt + val bankSel = input(INSTRUCTION)(mapping.DECRYPT) && !input(INSTRUCTION)(mapping.LAST_ROUND) + val romAddress = U(bankSel ## input(RS2).subdivideIn(8 bits).read(byteSel)) + } + + memory plug new Area{ + import memory._ + + //Decode the rom data + val rom = new Area { + val storage = Mem(Bits(32 bits), 512) initBigInt((BANK0 ++ BANK1).map(BigInt(_))) + + val data = storage.readSync(onExecute.romAddress, !arbitration.isStuck) + val bytes = data.subdivideIn(8 bits) + + def VecUInt(l: Int*) = Vec(l.map(U(_, 2 bits))) + // remap will be used to decode the rom + val remap = Vec( + VecUInt(2, 0, 0, 1), + VecUInt(0, 0, 0, 0), + VecUInt(3, 2, 1, 0), + VecUInt(3, 3, 3, 3) + ) + + val address = U(input(INSTRUCTION)(mapping.DECRYPT) ## input(INSTRUCTION)(mapping.LAST_ROUND)) + val output = remap(address) + } + + val wordDesuffle = new Area{ + val zero = B"0000" + val byteSel = input(INSTRUCTION)(mapping.BYTE_SEL, 2 bits).asUInt + val output = Vec(Bits(8 bits), 4) + + def remap(l : Int*) = Vec(l.map(rom.output(_))) + val sel = byteSel.mux( + 0 -> remap(3, 2, 1, 0), + 1 -> remap(0, 3, 2, 1), + 2 -> remap(1, 0, 3, 2), + 3 -> remap(2, 1, 0, 3) + ) + when(input(INSTRUCTION)(mapping.LAST_ROUND)){ + zero := B"1111" + zero(byteSel) := False + } + + //Finaly, mux the rom data + for(byteId <- 0 to 3){ + output(byteId) := rom.bytes(sel(byteId)) + when(zero(byteId)){ + output(byteId) := 0 + } + } + } + + val xored = wordDesuffle.output.asBits ^ input(RS1) + insert(CALC) := xored + } + + writeBack plug new Area { + import writeBack._ + + when(input(IS_AES)) { + output(REGFILE_WRITE_DATA) := input(CALC) + } + } + } + + // Encryption table which solve a single byte sbox + column mix. Used for all rounds + def TE0 = List( + 0xa5c663, 0x84f87c, 0x99ee77, 0x8df67b, + 0x0dfff2, 0xbdd66b, 0xb1de6f, 0x5491c5, + 0x506030, 0x030201, 0xa9ce67, 0x7d562b, + 0x19e7fe, 0x62b5d7, 0xe64dab, 0x9aec76, + 0x458fca, 0x9d1f82, 0x4089c9, 0x87fa7d, + 0x15effa, 0xebb259, 0xc98e47, 0x0bfbf0, + 0xec41ad, 0x67b3d4, 0xfd5fa2, 0xea45af, + 0xbf239c, 0xf753a4, 0x96e472, 0x5b9bc0, + 0xc275b7, 0x1ce1fd, 0xae3d93, 0x6a4c26, + 0x5a6c36, 0x417e3f, 0x02f5f7, 0x4f83cc, + 0x5c6834, 0xf451a5, 0x34d1e5, 0x08f9f1, + 0x93e271, 0x73abd8, 0x536231, 0x3f2a15, + 0x0c0804, 0x5295c7, 0x654623, 0x5e9dc3, + 0x283018, 0xa13796, 0x0f0a05, 0xb52f9a, + 0x090e07, 0x362412, 0x9b1b80, 0x3ddfe2, + 0x26cdeb, 0x694e27, 0xcd7fb2, 0x9fea75, + 0x1b1209, 0x9e1d83, 0x74582c, 0x2e341a, + 0x2d361b, 0xb2dc6e, 0xeeb45a, 0xfb5ba0, + 0xf6a452, 0x4d763b, 0x61b7d6, 0xce7db3, + 0x7b5229, 0x3edde3, 0x715e2f, 0x971384, + 0xf5a653, 0x68b9d1, 0x000000, 0x2cc1ed, + 0x604020, 0x1fe3fc, 0xc879b1, 0xedb65b, + 0xbed46a, 0x468dcb, 0xd967be, 0x4b7239, + 0xde944a, 0xd4984c, 0xe8b058, 0x4a85cf, + 0x6bbbd0, 0x2ac5ef, 0xe54faa, 0x16edfb, + 0xc58643, 0xd79a4d, 0x556633, 0x941185, + 0xcf8a45, 0x10e9f9, 0x060402, 0x81fe7f, + 0xf0a050, 0x44783c, 0xba259f, 0xe34ba8, + 0xf3a251, 0xfe5da3, 0xc08040, 0x8a058f, + 0xad3f92, 0xbc219d, 0x487038, 0x04f1f5, + 0xdf63bc, 0xc177b6, 0x75afda, 0x634221, + 0x302010, 0x1ae5ff, 0x0efdf3, 0x6dbfd2, + 0x4c81cd, 0x14180c, 0x352613, 0x2fc3ec, + 0xe1be5f, 0xa23597, 0xcc8844, 0x392e17, + 0x5793c4, 0xf255a7, 0x82fc7e, 0x477a3d, + 0xacc864, 0xe7ba5d, 0x2b3219, 0x95e673, + 0xa0c060, 0x981981, 0xd19e4f, 0x7fa3dc, + 0x664422, 0x7e542a, 0xab3b90, 0x830b88, + 0xca8c46, 0x29c7ee, 0xd36bb8, 0x3c2814, + 0x79a7de, 0xe2bc5e, 0x1d160b, 0x76addb, + 0x3bdbe0, 0x566432, 0x4e743a, 0x1e140a, + 0xdb9249, 0x0a0c06, 0x6c4824, 0xe4b85c, + 0x5d9fc2, 0x6ebdd3, 0xef43ac, 0xa6c462, + 0xa83991, 0xa43195, 0x37d3e4, 0x8bf279, + 0x32d5e7, 0x438bc8, 0x596e37, 0xb7da6d, + 0x8c018d, 0x64b1d5, 0xd29c4e, 0xe049a9, + 0xb4d86c, 0xfaac56, 0x07f3f4, 0x25cfea, + 0xafca65, 0x8ef47a, 0xe947ae, 0x181008, + 0xd56fba, 0x88f078, 0x6f4a25, 0x725c2e, + 0x24381c, 0xf157a6, 0xc773b4, 0x5197c6, + 0x23cbe8, 0x7ca1dd, 0x9ce874, 0x213e1f, + 0xdd964b, 0xdc61bd, 0x860d8b, 0x850f8a, + 0x90e070, 0x427c3e, 0xc471b5, 0xaacc66, + 0xd89048, 0x050603, 0x01f7f6, 0x121c0e, + 0xa3c261, 0x5f6a35, 0xf9ae57, 0xd069b9, + 0x911786, 0x5899c1, 0x273a1d, 0xb9279e, + 0x38d9e1, 0x13ebf8, 0xb32b98, 0x332211, + 0xbbd269, 0x70a9d9, 0x89078e, 0xa73394, + 0xb62d9b, 0x223c1e, 0x921587, 0x20c9e9, + 0x4987ce, 0xffaa55, 0x785028, 0x7aa5df, + 0x8f038c, 0xf859a1, 0x800989, 0x171a0d, + 0xda65bf, 0x31d7e6, 0xc68442, 0xb8d068, + 0xc38241, 0xb02999, 0x775a2d, 0x111e0f, + 0xcb7bb0, 0xfca854, 0xd66dbb, 0x3a2c16 + ) + + + // Decryption table which solve a single byte sbox + column mix. Not used in the last round + def TD0 = List( + 0x50a7f451l, 0x5365417el, 0xc3a4171al, 0x965e273al, + 0xcb6bab3bl, 0xf1459d1fl, 0xab58faacl, 0x9303e34bl, + 0x55fa3020l, 0xf66d76adl, 0x9176cc88l, 0x254c02f5l, + 0xfcd7e54fl, 0xd7cb2ac5l, 0x80443526l, 0x8fa362b5l, + 0x495ab1del, 0x671bba25l, 0x980eea45l, 0xe1c0fe5dl, + 0x02752fc3l, 0x12f04c81l, 0xa397468dl, 0xc6f9d36bl, + 0xe75f8f03l, 0x959c9215l, 0xeb7a6dbfl, 0xda595295l, + 0x2d83bed4l, 0xd3217458l, 0x2969e049l, 0x44c8c98el, + 0x6a89c275l, 0x78798ef4l, 0x6b3e5899l, 0xdd71b927l, + 0xb64fe1bel, 0x17ad88f0l, 0x66ac20c9l, 0xb43ace7dl, + 0x184adf63l, 0x82311ae5l, 0x60335197l, 0x457f5362l, + 0xe07764b1l, 0x84ae6bbbl, 0x1ca081fel, 0x942b08f9l, + 0x58684870l, 0x19fd458fl, 0x876cde94l, 0xb7f87b52l, + 0x23d373abl, 0xe2024b72l, 0x578f1fe3l, 0x2aab5566l, + 0x0728ebb2l, 0x03c2b52fl, 0x9a7bc586l, 0xa50837d3l, + 0xf2872830l, 0xb2a5bf23l, 0xba6a0302l, 0x5c8216edl, + 0x2b1ccf8al, 0x92b479a7l, 0xf0f207f3l, 0xa1e2694el, + 0xcdf4da65l, 0xd5be0506l, 0x1f6234d1l, 0x8afea6c4l, + 0x9d532e34l, 0xa055f3a2l, 0x32e18a05l, 0x75ebf6a4l, + 0x39ec830bl, 0xaaef6040l, 0x069f715el, 0x51106ebdl, + 0xf98a213el, 0x3d06dd96l, 0xae053eddl, 0x46bde64dl, + 0xb58d5491l, 0x055dc471l, 0x6fd40604l, 0xff155060l, + 0x24fb9819l, 0x97e9bdd6l, 0xcc434089l, 0x779ed967l, + 0xbd42e8b0l, 0x888b8907l, 0x385b19e7l, 0xdbeec879l, + 0x470a7ca1l, 0xe90f427cl, 0xc91e84f8l, 0x00000000l, + 0x83868009l, 0x48ed2b32l, 0xac70111el, 0x4e725a6cl, + 0xfbff0efdl, 0x5638850fl, 0x1ed5ae3dl, 0x27392d36l, + 0x64d90f0al, 0x21a65c68l, 0xd1545b9bl, 0x3a2e3624l, + 0xb1670a0cl, 0x0fe75793l, 0xd296eeb4l, 0x9e919b1bl, + 0x4fc5c080l, 0xa220dc61l, 0x694b775al, 0x161a121cl, + 0x0aba93e2l, 0xe52aa0c0l, 0x43e0223cl, 0x1d171b12l, + 0x0b0d090el, 0xadc78bf2l, 0xb9a8b62dl, 0xc8a91e14l, + 0x8519f157l, 0x4c0775afl, 0xbbdd99eel, 0xfd607fa3l, + 0x9f2601f7l, 0xbcf5725cl, 0xc53b6644l, 0x347efb5bl, + 0x7629438bl, 0xdcc623cbl, 0x68fcedb6l, 0x63f1e4b8l, + 0xcadc31d7l, 0x10856342l, 0x40229713l, 0x2011c684l, + 0x7d244a85l, 0xf83dbbd2l, 0x1132f9ael, 0x6da129c7l, + 0x4b2f9e1dl, 0xf330b2dcl, 0xec52860dl, 0xd0e3c177l, + 0x6c16b32bl, 0x99b970a9l, 0xfa489411l, 0x2264e947l, + 0xc48cfca8l, 0x1a3ff0a0l, 0xd82c7d56l, 0xef903322l, + 0xc74e4987l, 0xc1d138d9l, 0xfea2ca8cl, 0x360bd498l, + 0xcf81f5a6l, 0x28de7aa5l, 0x268eb7dal, 0xa4bfad3fl, + 0xe49d3a2cl, 0x0d927850l, 0x9bcc5f6al, 0x62467e54l, + 0xc2138df6l, 0xe8b8d890l, 0x5ef7392el, 0xf5afc382l, + 0xbe805d9fl, 0x7c93d069l, 0xa92dd56fl, 0xb31225cfl, + 0x3b99acc8l, 0xa77d1810l, 0x6e639ce8l, 0x7bbb3bdbl, + 0x097826cdl, 0xf418596el, 0x01b79aecl, 0xa89a4f83l, + 0x656e95e6l, 0x7ee6ffaal, 0x08cfbc21l, 0xe6e815efl, + 0xd99be7bal, 0xce366f4al, 0xd4099feal, 0xd67cb029l, + 0xafb2a431l, 0x31233f2al, 0x3094a5c6l, 0xc066a235l, + 0x37bc4e74l, 0xa6ca82fcl, 0xb0d090e0l, 0x15d8a733l, + 0x4a9804f1l, 0xf7daec41l, 0x0e50cd7fl, 0x2ff69117l, + 0x8dd64d76l, 0x4db0ef43l, 0x544daaccl, 0xdf0496e4l, + 0xe3b5d19el, 0x1b886a4cl, 0xb81f2cc1l, 0x7f516546l, + 0x04ea5e9dl, 0x5d358c01l, 0x737487fal, 0x2e410bfbl, + 0x5a1d67b3l, 0x52d2db92l, 0x335610e9l, 0x1347d66dl, + 0x8c61d79al, 0x7a0ca137l, 0x8e14f859l, 0x893c13ebl, + 0xee27a9cel, 0x35c961b7l, 0xede51ce1l, 0x3cb1477al, + 0x59dfd29cl, 0x3f73f255l, 0x79ce1418l, 0xbf37c773l, + 0xeacdf753l, 0x5baafd5fl, 0x146f3ddfl, 0x86db4478l, + 0x81f3afcal, 0x3ec468b9l, 0x2c342438l, 0x5f40a3c2l, + 0x72c31d16l, 0x0c25e2bcl, 0x8b493c28l, 0x41950dffl, + 0x7101a839l, 0xdeb30c08l, 0x9ce4b4d8l, 0x90c15664l, + 0x6184cb7bl, 0x70b632d5l, 0x745c6c48l, 0x4257b8d0l + ) + + // Last round decryption sbox + def SBOX_INV = List( + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + ) +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/BranchPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/BranchPlugin.scala new file mode 100644 index 0000000..24d42fa --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/BranchPlugin.scala @@ -0,0 +1,386 @@ +package vexriscv.plugin + +import vexriscv.Riscv._ +import vexriscv._ +import spinal.core._ +import spinal.lib._ + +trait BranchPrediction +object NONE extends BranchPrediction +object STATIC extends BranchPrediction +object DYNAMIC extends BranchPrediction +object DYNAMIC_TARGET extends BranchPrediction + +object BranchCtrlEnum extends SpinalEnum(binarySequential){ + val INC,B,JAL,JALR = newElement() +} +object BRANCH_CTRL extends Stageable(BranchCtrlEnum()) + + +case class DecodePredictionCmd() extends Bundle { + val hadBranch = Bool +} +case class DecodePredictionRsp(stage : Stage) extends Bundle { + val wasWrong = Bool +} +case class DecodePredictionBus(stage : Stage) extends Bundle { + val cmd = DecodePredictionCmd() + val rsp = DecodePredictionRsp(stage) +} + +case class FetchPredictionCmd() extends Bundle{ + val hadBranch = Bool + val targetPc = UInt(32 bits) +} +case class FetchPredictionRsp() extends Bundle{ + val wasRight = Bool + val finalPc = UInt(32 bits) + val sourceLastWord = UInt(32 bits) +} +case class FetchPredictionBus(stage : Stage) extends Bundle { + val cmd = FetchPredictionCmd() + val rsp = FetchPredictionRsp() +} + + +trait PredictionInterface{ + def askFetchPrediction() : FetchPredictionBus + def askDecodePrediction() : DecodePredictionBus + def inDebugNoFetch() : Unit +} + + + +class BranchPlugin(earlyBranch : Boolean, + catchAddressMisaligned : Boolean = false, + fenceiGenAsAJump : Boolean = false, + fenceiGenAsANop : Boolean = false, + decodeBranchSrc2 : Boolean = false) extends Plugin[VexRiscv] with PredictionInterface{ + + + def catchAddressMisalignedForReal = catchAddressMisaligned && !pipeline.config.withRvc + lazy val branchStage = if(earlyBranch) pipeline.execute else pipeline.memory + + object BRANCH_CALC extends Stageable(UInt(32 bits)) + object BRANCH_DO extends Stageable(Bool) + object BRANCH_COND_RESULT extends Stageable(Bool) + object IS_FENCEI extends Stageable(Bool) + + var jumpInterface : Flow[UInt] = null + var predictionExceptionPort : Flow[ExceptionCause] = null + var branchExceptionPort : Flow[ExceptionCause] = null + var inDebugNoFetchFlag : Bool = null + + + var decodePrediction : DecodePredictionBus = null + var fetchPrediction : FetchPredictionBus = null + + + override def askFetchPrediction() = { + fetchPrediction = FetchPredictionBus(branchStage) + fetchPrediction + } + + override def askDecodePrediction() = { + decodePrediction = DecodePredictionBus(branchStage) + decodePrediction + } + + + override def inDebugNoFetch(): Unit = inDebugNoFetchFlag := True + + def hasHazardOnBranch = if(earlyBranch) pipeline.service(classOf[HazardService]).hazardOnExecuteRS else False + + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + import IntAluPlugin._ + + assert(earlyBranch || withMemoryStage, "earlyBranch must be true when memory stage is disabled!") + + val bActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.RS, + SRC_USE_SUB_LESS -> True, + RS1_USE -> True, + RS2_USE -> True, + HAS_SIDE_EFFECT -> True + ) + + val jActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.PC_INCREMENT, + SRC2_CTRL -> Src2CtrlEnum.PC, + SRC_USE_SUB_LESS -> False, + REGFILE_WRITE_VALID -> True, + HAS_SIDE_EFFECT -> True + ) + + val decoderService = pipeline.service(classOf[DecoderService]) + + + decoderService.addDefault(BRANCH_CTRL, BranchCtrlEnum.INC) + decoderService.add(List( + JAL(true) -> (jActions ++ List(BRANCH_CTRL -> BranchCtrlEnum.JAL, ALU_CTRL -> AluCtrlEnum.ADD_SUB)), + JALR -> (jActions ++ List(BRANCH_CTRL -> BranchCtrlEnum.JALR, ALU_CTRL -> AluCtrlEnum.ADD_SUB, RS1_USE -> True)), + BEQ(true) -> (bActions ++ List(BRANCH_CTRL -> BranchCtrlEnum.B)), + BNE(true) -> (bActions ++ List(BRANCH_CTRL -> BranchCtrlEnum.B)), + BLT(true) -> (bActions ++ List(BRANCH_CTRL -> BranchCtrlEnum.B, SRC_LESS_UNSIGNED -> False)), + BGE(true) -> (bActions ++ List(BRANCH_CTRL -> BranchCtrlEnum.B, SRC_LESS_UNSIGNED -> False)), + BLTU(true) -> (bActions ++ List(BRANCH_CTRL -> BranchCtrlEnum.B, SRC_LESS_UNSIGNED -> True)), + BGEU(true) -> (bActions ++ List(BRANCH_CTRL -> BranchCtrlEnum.B, SRC_LESS_UNSIGNED -> True)) + )) + + if(fenceiGenAsAJump) { + decoderService.addDefault(IS_FENCEI, False) + decoderService.add(List( + FENCEI -> (List(IS_FENCEI -> True,HAS_SIDE_EFFECT -> True, BRANCH_CTRL -> BranchCtrlEnum.JAL)) + )) + } + + if(fenceiGenAsANop){ + decoderService.add(List(FENCEI -> List())) + } + + val pcManagerService = pipeline.service(classOf[JumpService]) + + //Priority -1, as DYNAMIC_TARGET misspredicted on non branch instruction should lose against other instructions + //legitim branches, as MRET for instance + jumpInterface = pcManagerService.createJumpInterface(branchStage, priority = -10) + + + if (catchAddressMisalignedForReal) { + val exceptionService = pipeline.service(classOf[ExceptionService]) + branchExceptionPort = exceptionService.newExceptionPort(branchStage) + } + inDebugNoFetchFlag = False.setCompositeName(this, "inDebugNoFetchFlag") + } + + override def build(pipeline: VexRiscv): Unit = { + (fetchPrediction,decodePrediction) match { + case (null, null) => buildWithoutPrediction(pipeline) + case (_ , null) => buildFetchPrediction(pipeline) + case (null, _) => buildDecodePrediction(pipeline) + } + if(fenceiGenAsAJump) { + import pipeline._ + import pipeline.config._ + when(decode.input(IS_FENCEI)) { + decode.output(INSTRUCTION)(12) := False + decode.output(INSTRUCTION)(22) := True + } + execute.arbitration.haltByOther setWhen(execute.arbitration.isValid && execute.input(IS_FENCEI) && stagesFromExecute.tail.map(_.arbitration.isValid).asBits.orR) + } + } + + def buildWithoutPrediction(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + //Do branch calculations (conditions + target PC) + execute plug new Area { + import execute._ + + val less = input(SRC_LESS) + val eq = input(SRC1) === input(SRC2) + + insert(BRANCH_DO) := input(BRANCH_CTRL).mux( + BranchCtrlEnum.INC -> False, + BranchCtrlEnum.JAL -> True, + BranchCtrlEnum.JALR -> True, + BranchCtrlEnum.B -> input(INSTRUCTION)(14 downto 12).mux( + B"000" -> eq , + B"001" -> !eq , + M"1-1" -> !less, + default -> less + ) + ) + + val imm = IMM(input(INSTRUCTION)) + val branch_src1 = (input(BRANCH_CTRL) === BranchCtrlEnum.JALR) ? input(RS1).asUInt | input(PC) + val branch_src2 = input(BRANCH_CTRL).mux( + BranchCtrlEnum.JAL -> imm.j_sext, + BranchCtrlEnum.JALR -> imm.i_sext, + default -> imm.b_sext + ).asUInt + + val branchAdder = branch_src1 + branch_src2 + insert(BRANCH_CALC) := branchAdder(31 downto 1) @@ U"0" + } + + //Apply branchs (JAL,JALR, Bxx) + branchStage plug new Area { + import branchStage._ + jumpInterface.valid := arbitration.isValid && input(BRANCH_DO) && !hasHazardOnBranch + jumpInterface.payload := input(BRANCH_CALC) + arbitration.flushNext setWhen(jumpInterface.valid) + + if(catchAddressMisalignedForReal) { + branchExceptionPort.valid := arbitration.isValid && input(BRANCH_DO) && jumpInterface.payload(1) + branchExceptionPort.code := 0 + branchExceptionPort.badAddr := jumpInterface.payload + + if(branchStage == execute) branchExceptionPort.valid clearWhen(service(classOf[HazardService]).hazardOnExecuteRS) + } + } + } + + + def buildDecodePrediction(pipeline: VexRiscv): Unit = { + object PREDICTION_HAD_BRANCHED extends Stageable(Bool) + + import pipeline._ + import pipeline.config._ + + + decode plug new Area { + import decode._ + insert(PREDICTION_HAD_BRANCHED) := (if(fenceiGenAsAJump) decodePrediction.cmd.hadBranch && !decode.input(IS_FENCEI) else decodePrediction.cmd.hadBranch) + } + + //Do real branch calculation + execute plug new Area { + import execute._ + + val less = input(SRC_LESS) + val eq = input(SRC1) === input(SRC2) + + insert(BRANCH_COND_RESULT) := input(BRANCH_CTRL).mux( + BranchCtrlEnum.INC -> False, + BranchCtrlEnum.JAL -> True, + BranchCtrlEnum.JALR -> True, + BranchCtrlEnum.B -> input(INSTRUCTION)(14 downto 12).mux( + B"000" -> eq , + B"001" -> !eq , + M"1-1" -> !less, + default -> less + ) + ) + + val imm = IMM(input(INSTRUCTION)) + val missAlignedTarget = if(pipeline.config.withRvc) False else (input(BRANCH_COND_RESULT) && input(BRANCH_CTRL).mux( + BranchCtrlEnum.JALR -> (imm.i_sext(1) ^ input(RS1)(1)), + BranchCtrlEnum.JAL -> imm.j_sext(1), + default -> imm.b_sext(1) + )) + + insert(BRANCH_DO) := input(PREDICTION_HAD_BRANCHED) =/= input(BRANCH_COND_RESULT) || missAlignedTarget + + //Calculation of the branch target / correction + val branch_src1,branch_src2 = UInt(32 bits) + switch(input(BRANCH_CTRL)){ + is(BranchCtrlEnum.JALR){ + branch_src1 := input(RS1).asUInt + branch_src2 := imm.i_sext.asUInt + } + default{ + branch_src1 := input(PC) + branch_src2 := ((input(BRANCH_CTRL) === BranchCtrlEnum.JAL) ? imm.j_sext | imm.b_sext).asUInt + when(input(PREDICTION_HAD_BRANCHED)){ //Assume the predictor never predict missaligned stuff, this avoid the need to know if the instruction should branch or not + branch_src2 := (if(pipeline.config.withRvc) Mux(input(IS_RVC), B(2), B(4)) else B(4)).asUInt.resized + } + } + } + val branchAdder = branch_src1 + branch_src2 + insert(BRANCH_CALC) := branchAdder(31 downto 1) @@ U"0" + } + + + // branch JALR or JAL/Bxx prediction miss corrections + val branchStage = if(earlyBranch) execute else memory + branchStage plug new Area { + import branchStage._ + jumpInterface.valid := arbitration.isValid && input(BRANCH_DO) && !hasHazardOnBranch + jumpInterface.payload := input(BRANCH_CALC) + arbitration.flushNext setWhen(jumpInterface.valid) + + if(catchAddressMisalignedForReal) { + val unalignedJump = input(BRANCH_DO) && input(BRANCH_CALC)(1) + branchExceptionPort.valid := arbitration.isValid && unalignedJump + branchExceptionPort.code := 0 + branchExceptionPort.badAddr := input(BRANCH_CALC) //pipeline.stages(pipeline.indexOf(branchStage)-1).input + + if(branchStage == execute) branchExceptionPort.valid clearWhen(service(classOf[HazardService]).hazardOnExecuteRS) + } + } + + decodePrediction.rsp.wasWrong := jumpInterface.valid + } + + + + + + def buildFetchPrediction(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + + //Do branch calculations (conditions + target PC) + object NEXT_PC extends Stageable(UInt(32 bits)) + object TARGET_MISSMATCH extends Stageable(Bool) + object BRANCH_SRC2 extends Stageable(UInt(32 bits)) + val branchSrc2Stage = if(decodeBranchSrc2) decode else execute + execute plug new Area { + import execute._ + + val less = input(SRC_LESS) + val eq = input(SRC1) === input(SRC2) + + insert(BRANCH_DO) := input(BRANCH_CTRL).mux( + BranchCtrlEnum.INC -> False, + BranchCtrlEnum.JAL -> True, + BranchCtrlEnum.JALR -> True, + BranchCtrlEnum.B -> input(INSTRUCTION)(14 downto 12).mux( + B"000" -> eq , + B"001" -> !eq , + M"1-1" -> !less, + default -> less + ) + ) + + val branch_src1 = (input(BRANCH_CTRL) === BranchCtrlEnum.JALR) ? input(RS1).asUInt | input(PC) + + val imm = IMM(branchSrc2Stage.input(INSTRUCTION)) + branchSrc2Stage.insert(BRANCH_SRC2) := branchSrc2Stage.input(BRANCH_CTRL).mux( + BranchCtrlEnum.JAL -> imm.j_sext, + BranchCtrlEnum.JALR -> imm.i_sext, + default -> imm.b_sext + ).asUInt + + val branchAdder = branch_src1 + input(BRANCH_SRC2) + insert(BRANCH_CALC) := branchAdder(31 downto 1) @@ U"0" + insert(NEXT_PC) := input(PC) + (if(pipeline.config.withRvc) ((input(IS_RVC)) ? U(2) | U(4)) else 4) + insert(TARGET_MISSMATCH) := decode.input(PC) =/= input(BRANCH_CALC) + } + + //Apply branchs (JAL,JALR, Bxx) + val branchStage = if(earlyBranch) execute else memory + branchStage plug new Area { + import branchStage._ + + val predictionMissmatch = fetchPrediction.cmd.hadBranch =/= input(BRANCH_DO) || (input(BRANCH_DO) && input(TARGET_MISSMATCH)) + when(inDebugNoFetchFlag) { predictionMissmatch := input(BRANCH_DO)} + fetchPrediction.rsp.wasRight := ! predictionMissmatch + fetchPrediction.rsp.finalPc := input(BRANCH_CALC) + fetchPrediction.rsp.sourceLastWord := { + if(pipeline.config.withRvc) + ((!input(IS_RVC) && input(PC)(1)) ? input(NEXT_PC) | input(PC)) + else + input(PC) + } + + jumpInterface.valid := arbitration.isValid && predictionMissmatch && !hasHazardOnBranch + jumpInterface.payload := (input(BRANCH_DO) ? input(BRANCH_CALC) | input(NEXT_PC)) + arbitration.flushNext setWhen(jumpInterface.valid) + + + if(catchAddressMisalignedForReal) { + branchExceptionPort.valid := arbitration.isValid && input(BRANCH_DO) && input(BRANCH_CALC)(1) + branchExceptionPort.code := 0 + branchExceptionPort.badAddr := input(BRANCH_CALC) + + if(branchStage == execute) branchExceptionPort.valid clearWhen(service(classOf[HazardService]).hazardOnExecuteRS) + } + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/CfuPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/CfuPlugin.scala new file mode 100644 index 0000000..d343640 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/CfuPlugin.scala @@ -0,0 +1,357 @@ +package vexriscv.plugin + +import vexriscv.{DecoderService, ExceptionCause, ExceptionService, Stage, Stageable, VexRiscv} +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.bmb.WeakConnector +import spinal.lib.bus.misc.{AddressMapping, DefaultMapping} +import vexriscv.Riscv.IMM + +case class CfuPluginParameter( + CFU_VERSION : Int, + CFU_INTERFACE_ID_W : Int, + CFU_FUNCTION_ID_W : Int, + CFU_REORDER_ID_W : Int, + CFU_REQ_RESP_ID_W : Int, + CFU_INPUTS : Int, + CFU_INPUT_DATA_W : Int, + CFU_OUTPUTS : Int, + CFU_OUTPUT_DATA_W : Int, + CFU_FLOW_REQ_READY_ALWAYS : Boolean, + CFU_FLOW_RESP_READY_ALWAYS : Boolean) + +case class CfuBusParameter(CFU_VERSION : Int = 0, + CFU_INTERFACE_ID_W : Int = 0, + CFU_FUNCTION_ID_W : Int, + CFU_CFU_ID_W : Int = 0, + CFU_REORDER_ID_W : Int = 0, + CFU_REQ_RESP_ID_W : Int = 0, + CFU_STATE_INDEX_NUM : Int = 0, + CFU_INPUTS : Int, + CFU_INPUT_DATA_W : Int, + CFU_OUTPUTS : Int, + CFU_OUTPUT_DATA_W : Int, + CFU_FLOW_REQ_READY_ALWAYS : Boolean, + CFU_FLOW_RESP_READY_ALWAYS : Boolean, + CFU_WITH_STATUS : Boolean = false, + CFU_RAW_INSN_W : Int = 0) + +case class CfuCmd( p : CfuBusParameter ) extends Bundle{ + val function_id = UInt(p.CFU_FUNCTION_ID_W bits) + val reorder_id = UInt(p.CFU_REORDER_ID_W bits) + val request_id = UInt(p.CFU_REQ_RESP_ID_W bits) + val inputs = Vec(Bits(p.CFU_INPUT_DATA_W bits), p.CFU_INPUTS) + val state_index = UInt(log2Up(p.CFU_STATE_INDEX_NUM) bits) + val cfu_index = UInt(p.CFU_CFU_ID_W bits) + val raw_insn = Bits(p.CFU_RAW_INSN_W bits) + def weakAssignFrom(m : CfuCmd): Unit ={ + def s = this + WeakConnector(m, s, m.function_id, s.function_id, defaultValue = null, allowUpSize = false, allowDownSize = true , allowDrop = true) + WeakConnector(m, s, m.reorder_id, s.reorder_id, defaultValue = null, allowUpSize = false , allowDownSize = false, allowDrop = false) + WeakConnector(m, s, m.request_id, s.request_id, defaultValue = null, allowUpSize = false, allowDownSize = false, allowDrop = false) + s.inputs := m.inputs + } +} + +case class CfuRsp(p : CfuBusParameter) extends Bundle{ + val response_id = UInt(p.CFU_REQ_RESP_ID_W bits) + val outputs = Vec(Bits(p.CFU_OUTPUT_DATA_W bits), p.CFU_OUTPUTS) + val status = p.CFU_WITH_STATUS generate Bits(3 bits) + + def weakAssignFrom(m : CfuRsp): Unit ={ + def s = this + s.response_id := m.response_id + s.outputs := m.outputs + } +} + +case class CfuBus(p : CfuBusParameter) extends Bundle with IMasterSlave{ + val cmd = Stream(CfuCmd(p)) + val rsp = Stream(CfuRsp(p)) + + def <<(m : CfuBus) : Unit = { + val s = this + s.cmd.arbitrationFrom(m.cmd) + m.rsp.arbitrationFrom(s.rsp) + + s.cmd.weakAssignFrom(m.cmd) + m.rsp.weakAssignFrom(s.rsp) + } + + override def asMaster(): Unit = { + master(cmd) + slave(rsp) + } +} + +object CfuPlugin{ + object Input2Kind extends SpinalEnum{ + val RS, IMM_I = newElement() + } +} + +case class CfuPluginEncoding(instruction : MaskedLiteral, + functionId : List[Range], + input2Kind : CfuPlugin.Input2Kind.E){ + val functionIdWidth = functionId.map(_.size).sum +} + +class CfuPlugin(val stageCount : Int, + val allowZeroLatency : Boolean, + val busParameter : CfuBusParameter, + val encodings : List[CfuPluginEncoding] = null, + val stateAndIndexCsrOffset : Int = 0xBC0, + val statusCsrOffset : Int = 0x801, + val withEnable : Boolean = true) extends Plugin[VexRiscv]{ + def p = busParameter + + assert(p.CFU_INPUTS <= 2) + assert(p.CFU_OUTPUTS == 1) +// assert(p.CFU_FUNCTION_ID_W == 3) + + var bus : CfuBus = null + + lazy val forkStage = pipeline.execute + lazy val joinStage = pipeline.stages(Math.min(pipeline.stages.length - 1, pipeline.indexOf(forkStage) + stageCount)) + + + val CFU_ENABLE = new Stageable(Bool()).setCompositeName(this, "CFU_ENABLE") + val CFU_IN_FLIGHT = new Stageable(Bool()).setCompositeName(this, "CFU_IN_FLIGHT") + val CFU_ENCODING = new Stageable(UInt(log2Up(encodings.size) bits)).setCompositeName(this, "CFU_ENCODING") + val CFU_INPUT_2_KIND = new Stageable(CfuPlugin.Input2Kind()).setCompositeName(this, "CFU_INPUT_2_KIND") + + override def setup(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + bus = master(CfuBus(p)) + + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(CFU_ENABLE, False) + + for((encoding, id) <- encodings.zipWithIndex){ + var actions = List( + CFU_ENABLE -> True, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(stageCount == 0), + BYPASSABLE_MEMORY_STAGE -> Bool(stageCount <= 1), + RS1_USE -> True, + CFU_ENCODING -> U(id), + CFU_INPUT_2_KIND -> encoding.input2Kind() + ) + + encoding.input2Kind match { + case CfuPlugin.Input2Kind.RS => + actions :+= RS2_USE -> True + case CfuPlugin.Input2Kind.IMM_I => + } + + decoderService.add( + key = encoding.instruction, + values = actions + ) + } + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + val csr = pipeline plug new Area{ + val factory = pipeline.service(classOf[CsrInterface]) + val en = withEnable generate (Reg(Bool()) init(False)) + if(withEnable) factory.rw(stateAndIndexCsrOffset, 31, en) + + val stateId = Reg(UInt(log2Up(p.CFU_STATE_INDEX_NUM) bits)) init(0) + if(p.CFU_STATE_INDEX_NUM > 1) { + assert(stateAndIndexCsrOffset != -1, "CfuPlugin stateCsrIndex need to be set in the parameters") + factory.rw(stateAndIndexCsrOffset, 16, stateId) + } + + val cfuIndex = Reg(UInt(p.CFU_CFU_ID_W bits)) init(0) + if(p.CFU_CFU_ID_W != 0){ + factory.rw(stateAndIndexCsrOffset, 0, cfuIndex) + } + val status = p.CFU_WITH_STATUS generate new Area{ + val CU, OP, FI, OF, SI, CI = RegInit(False) + val flags = List(CU, OP, FI, OF, SI, CI).reverse + factory.rw(statusCsrOffset, flags.zipWithIndex.map(_.swap) :_*) + factory.duringWrite(statusCsrOffset){ + decode.arbitration.haltByOther := True //Handle CSRW to decode + } + } + } + + if(withEnable) when(decode.input(CFU_ENABLE) && !csr.en){ + pipeline.service(classOf[DecoderService]).forceIllegal() + } + + forkStage plug new Area{ + import forkStage._ + val hazard = stages.dropWhile(_ != forkStage).tail.map(s => s.arbitration.isValid && s.input(HAS_SIDE_EFFECT)).orR + val scheduleWish = arbitration.isValid && input(CFU_ENABLE) + val schedule = scheduleWish && !hazard + arbitration.haltItself setWhen(scheduleWish && hazard) + + val hold = RegInit(False) setWhen(schedule) clearWhen(bus.cmd.ready) + val fired = RegInit(False) setWhen(bus.cmd.fire) clearWhen(!arbitration.isStuck) + insert(CFU_IN_FLIGHT) := schedule || hold || fired + + bus.cmd.valid := (schedule || hold) && !fired + arbitration.haltItself setWhen(bus.cmd.valid && !bus.cmd.ready) + +// bus.cmd.function_id := U(input(INSTRUCTION)(14 downto 12)).resized + val functionIdFromInstructinoWidth = encodings.map(_.functionIdWidth).max + val functionsIds = encodings.map(e => U(Cat(e.functionId.map(r => input(INSTRUCTION)(r))), functionIdFromInstructinoWidth bits)) + bus.cmd.cfu_index := csr.cfuIndex + bus.cmd.state_index := csr.stateId + bus.cmd.function_id := functionsIds.read(input(CFU_ENCODING)) + bus.cmd.reorder_id := 0 + bus.cmd.request_id := 0 + bus.cmd.raw_insn := input(INSTRUCTION).resized + if(p.CFU_INPUTS >= 1) bus.cmd.inputs(0) := input(RS1) + if(p.CFU_INPUTS >= 2) bus.cmd.inputs(1) := input(CFU_INPUT_2_KIND).mux( + CfuPlugin.Input2Kind.RS -> input(RS2), + CfuPlugin.Input2Kind.IMM_I -> IMM(input(INSTRUCTION)).h_sext + ) + } + + joinStage plug new Area{ + import joinStage._ + + //If the CFU interface can produce a result combinatorialy and the fork stage isn't the same than the join stage + //Then it is required to add a buffer on rsp to not propagate the fork stage ready := False in the CPU pipeline. + val rsp = if(p.CFU_FLOW_RESP_READY_ALWAYS){ + bus.rsp.toFlow.toStream.queueLowLatency( + size = stageCount + 1, + latency = 0 + ) + } else if(forkStage != joinStage && allowZeroLatency) { + bus.rsp.s2mPipe() + } else { + bus.rsp.combStage() + } + + rsp.ready := False + when(input(CFU_IN_FLIGHT)){ + arbitration.haltItself setWhen(!rsp.valid) + rsp.ready := !arbitration.isStuckByOthers + output(REGFILE_WRITE_DATA) := rsp.outputs(0) + if(p.CFU_WITH_STATUS) when(arbitration.isFiring){ + switch(rsp.status) { + for (i <- 1 to 6) is(i) { + csr.status.flags(i-1) := True + } + } + } + } + } + + pipeline.stages.drop(1).foreach(s => s.output(CFU_IN_FLIGHT) clearWhen(s.arbitration.isStuck)) + addPrePopTask(() => stages.dropWhile(_ != memory).reverse.dropWhile(_ != joinStage).foreach(s => s.input(CFU_IN_FLIGHT).init(False))) + } +} + + +object CfuTest{ + +// stageCount = 0, +// allowZeroLatency = true, + def getCfuParameter() = CfuBusParameter( + CFU_VERSION = 0, + CFU_INTERFACE_ID_W = 0, + CFU_FUNCTION_ID_W = 3, + CFU_REORDER_ID_W = 0, + CFU_REQ_RESP_ID_W = 0, + CFU_INPUTS = 2, + CFU_INPUT_DATA_W = 32, + CFU_OUTPUTS = 1, + CFU_OUTPUT_DATA_W = 32, + CFU_FLOW_REQ_READY_ALWAYS = false, + CFU_FLOW_RESP_READY_ALWAYS = false + ) +} +case class CfuTest() extends Component{ + val io = new Bundle { + val bus = slave(CfuBus(CfuTest.getCfuParameter())) + } + io.bus.rsp.arbitrationFrom(io.bus.cmd) + io.bus.rsp.response_id := io.bus.cmd.request_id + io.bus.rsp.outputs(0) := ~(io.bus.cmd.inputs(0) & io.bus.cmd.inputs(1)) +} + + +case class CfuBb(p : CfuBusParameter) extends BlackBox{ + val io = new Bundle { + val clk, reset = in Bool() + val bus = slave(CfuBus(p)) + } + + mapCurrentClockDomain(io.clk, io.reset) +} + +//case class CfuGray(p : CfuBusParameter) extends BlackBox{ +// val req_function_id = in Bits(p.CFU_FUNCTION_ID_W) +// val req_data = in Bits(p.CFU_REQ_INPUTS) +// val resp_data = in Bits(p.CFU_FUNCTION_ID_W) +// input `CFU_FUNCTION_ID req_function_id, +// input [CFU_REQ_INPUTS-1:0]`CFU_REQ_DATA req_data, +// output [CFU_RESP_OUTPUTS-1:0]`CFU_RESP_DATA resp_data +// io.bus.rsp.arbitrationFrom(io.bus.cmd) +// io.bus.rsp.response_ok := True +// io.bus.rsp.response_id := io.bus.cmd.request_id +// io.bus.rsp.outputs(0) := ~(io.bus.cmd.inputs(0) & io.bus.cmd.inputs(1)) +//} + + +case class CfuDecoder(p : CfuBusParameter, + mappings : Seq[AddressMapping], + pendingMax : Int = 3) extends Component{ + val io = new Bundle { + val input = slave(CfuBus(p)) + val outputs = Vec(master(CfuBus(p)), mappings.size) + } + val hasDefault = mappings.contains(DefaultMapping) + val logic = if(hasDefault && mappings.size == 1){ + io.outputs(0) << io.input + } else new Area { + val hits = Vec(Bool, mappings.size) + for (portId <- 0 until mappings.length) yield { + val slaveBus = io.outputs(portId) + val memorySpace = mappings(portId) + val hit = hits(portId) + hit := (memorySpace match { + case DefaultMapping => !hits.filterNot(_ == hit).orR + case _ => memorySpace.hit(io.input.cmd.function_id) + }) + slaveBus.cmd.valid := io.input.cmd.valid && hit + slaveBus.cmd.payload := io.input.cmd.payload.resized + } + val noHit = if (!hasDefault) !hits.orR else False + io.input.cmd.ready := (hits, io.outputs).zipped.map(_ && _.cmd.ready).orR || noHit + + val rspPendingCounter = Reg(UInt(log2Up(pendingMax + 1) bits)) init(0) + rspPendingCounter := rspPendingCounter + U(io.input.cmd.fire) - U(io.input.rsp.fire) + val rspHits = RegNextWhen(hits, io.input.cmd.fire) + val rspPending = rspPendingCounter =/= 0 + val rspNoHitValid = if (!hasDefault) !rspHits.orR else False + val rspNoHit = !hasDefault generate new Area{ + val doIt = RegInit(False) clearWhen(io.input.rsp.fire) setWhen(io.input.cmd.fire && noHit) + val response_id = RegNextWhen(io.input.cmd.request_id, io.input.cmd.fire) + } + + io.input.rsp.valid := io.outputs.map(_.rsp.valid).orR || (rspPending && rspNoHitValid) + io.input.rsp.payload := io.outputs.map(_.rsp.payload).read(OHToUInt(rspHits)) + if(!hasDefault) when(rspNoHit.doIt) { + io.input.rsp.valid := True + io.input.rsp.response_id := rspNoHit.response_id + } + for(output <- io.outputs) output.rsp.ready := io.input.rsp.ready + + val cmdWait = (rspPending && (hits =/= rspHits || rspNoHitValid)) || rspPendingCounter === pendingMax + when(cmdWait) { + io.input.cmd.ready := False + io.outputs.foreach(_.cmd.valid := False) + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/CsrPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/CsrPlugin.scala new file mode 100644 index 0000000..7731a41 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/CsrPlugin.scala @@ -0,0 +1,1335 @@ +package vexriscv.plugin + +import spinal.core._ +import spinal.lib._ +import vexriscv._ +import vexriscv.Riscv._ +import vexriscv.plugin.IntAluPlugin.{ALU_BITWISE_CTRL, ALU_CTRL, AluBitwiseCtrlEnum, AluCtrlEnum} + +import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable +import spinal.core.sim._ + +/** + * Created by spinalvm on 21.03.17. + */ + +trait CsrAccess{ + def canWrite : Boolean = false + def canRead : Boolean = false +} +object CsrAccess { + object WRITE_ONLY extends CsrAccess{ + override def canWrite : Boolean = true + } + object READ_ONLY extends CsrAccess{ + override def canRead : Boolean = true + } + object READ_WRITE extends CsrAccess{ + override def canWrite : Boolean = true + override def canRead : Boolean = true + } + object NONE extends CsrAccess +} + + + +case class ExceptionPortInfo(port : Flow[ExceptionCause],stage : Stage, priority : Int, codeWidth : Int) +case class CsrPluginConfig( + catchIllegalAccess : Boolean, + mvendorid : BigInt, + marchid : BigInt, + mimpid : BigInt, + mhartid : BigInt, + misaExtensionsInit : Int, + misaAccess : CsrAccess, + mtvecAccess : CsrAccess, + mtvecInit : BigInt, + mepcAccess : CsrAccess, + mscratchGen : Boolean, + mcauseAccess : CsrAccess, + mbadaddrAccess : CsrAccess, + mcycleAccess : CsrAccess, + minstretAccess : CsrAccess, + ucycleAccess : CsrAccess, + uinstretAccess : CsrAccess = CsrAccess.NONE, + wfiGenAsWait : Boolean, + ecallGen : Boolean, + xtvecModeGen : Boolean = false, + noCsrAlu : Boolean = false, + wfiGenAsNop : Boolean = false, + ebreakGen : Boolean = false, + userGen : Boolean = false, + supervisorGen : Boolean = false, + sscratchGen : Boolean = false, + stvecAccess : CsrAccess = CsrAccess.NONE, + sepcAccess : CsrAccess = CsrAccess.NONE, + scauseAccess : CsrAccess = CsrAccess.NONE, + sbadaddrAccess : CsrAccess = CsrAccess.NONE, + scycleAccess : CsrAccess = CsrAccess.NONE, + sinstretAccess : CsrAccess = CsrAccess.NONE, + satpAccess : CsrAccess = CsrAccess.NONE, + utimeAccess :CsrAccess = CsrAccess.NONE, + medelegAccess : CsrAccess = CsrAccess.NONE, + midelegAccess : CsrAccess = CsrAccess.NONE, + withExternalMhartid : Boolean = false, + mhartidWidth : Int = 0, + pipelineCsrRead : Boolean = false, + pipelinedInterrupt : Boolean = true, + csrOhDecoder : Boolean = true, + deterministicInteruptionEntry : Boolean = false, //Only used for simulatation purposes + wfiOutput : Boolean = false + ){ + assert(!ucycleAccess.canWrite) + def privilegeGen = userGen || supervisorGen + def noException = this.copy(ecallGen = false, ebreakGen = false, catchIllegalAccess = false) + def noExceptionButEcall = this.copy(ecallGen = true, ebreakGen = false, catchIllegalAccess = false) +} + +object CsrPluginConfig{ + def all : CsrPluginConfig = all(0x00000020l) + def small : CsrPluginConfig = small(0x00000020l) + def smallest : CsrPluginConfig = smallest(0x00000020l) + + def openSbi(mhartid : Int, misa : Int) = CsrPluginConfig( + catchIllegalAccess = true, + mvendorid = 0, + marchid = 0, + mimpid = 0, + mhartid = mhartid, + misaExtensionsInit = misa, + misaAccess = CsrAccess.READ_ONLY, + mtvecAccess = CsrAccess.READ_WRITE, //Could have been WRITE_ONLY :( + mtvecInit = null, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = true, + mcauseAccess = CsrAccess.READ_ONLY, + mbadaddrAccess = CsrAccess.READ_ONLY, + mcycleAccess = CsrAccess.NONE, + minstretAccess = CsrAccess.NONE, + ucycleAccess = CsrAccess.NONE, + wfiGenAsWait = true, + ecallGen = true, + xtvecModeGen = false, + noCsrAlu = false, + wfiGenAsNop = false, + ebreakGen = true, + userGen = true, + supervisorGen = true, + sscratchGen = true, + stvecAccess = CsrAccess.READ_WRITE, + sepcAccess = CsrAccess.READ_WRITE, + scauseAccess = CsrAccess.READ_WRITE, + sbadaddrAccess = CsrAccess.READ_WRITE, + scycleAccess = CsrAccess.NONE, + sinstretAccess = CsrAccess.NONE, + satpAccess = CsrAccess.NONE, + medelegAccess = CsrAccess.READ_WRITE, //Could have been WRITE_ONLY :( + midelegAccess = CsrAccess.READ_WRITE, //Could have been WRITE_ONLY :( + pipelineCsrRead = false, + deterministicInteruptionEntry = false + ) + + def linuxMinimal(mtVecInit : BigInt) = CsrPluginConfig( + catchIllegalAccess = true, + mvendorid = 1, + marchid = 2, + mimpid = 3, + mhartid = 0, + misaExtensionsInit = 0, //TODO + misaAccess = CsrAccess.NONE, //Read required by some regressions + mtvecAccess = CsrAccess.WRITE_ONLY, //Read required by some regressions + mtvecInit = mtVecInit, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = true, + mcauseAccess = CsrAccess.READ_ONLY, + mbadaddrAccess = CsrAccess.READ_ONLY, + mcycleAccess = CsrAccess.NONE, + minstretAccess = CsrAccess.NONE, + ucycleAccess = CsrAccess.NONE, + uinstretAccess = CsrAccess.NONE, + wfiGenAsWait = true, + ecallGen = true, + xtvecModeGen = false, + noCsrAlu = false, + wfiGenAsNop = false, + ebreakGen = true, + userGen = true, + supervisorGen = true, + sscratchGen = true, + stvecAccess = CsrAccess.READ_WRITE, + sepcAccess = CsrAccess.READ_WRITE, + scauseAccess = CsrAccess.READ_WRITE, + sbadaddrAccess = CsrAccess.READ_WRITE, + scycleAccess = CsrAccess.NONE, + sinstretAccess = CsrAccess.NONE, + satpAccess = CsrAccess.NONE, //Implemented into the MMU plugin + medelegAccess = CsrAccess.WRITE_ONLY, + midelegAccess = CsrAccess.WRITE_ONLY, + pipelineCsrRead = false, + deterministicInteruptionEntry = false + ) + + + def linuxFull(mtVecInit : BigInt) = CsrPluginConfig( + catchIllegalAccess = true, + mvendorid = 1, + marchid = 2, + mimpid = 3, + mhartid = 0, + misaExtensionsInit = 0, //TODO + misaAccess = CsrAccess.READ_WRITE, + mtvecAccess = CsrAccess.READ_WRITE, + mtvecInit = mtVecInit, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = true, + mcauseAccess = CsrAccess.READ_WRITE, + mbadaddrAccess = CsrAccess.READ_WRITE, + mcycleAccess = CsrAccess.READ_WRITE, + minstretAccess = CsrAccess.READ_WRITE, + ucycleAccess = CsrAccess.READ_ONLY, + uinstretAccess = CsrAccess.READ_ONLY, + wfiGenAsWait = true, + ecallGen = true, + xtvecModeGen = false, + noCsrAlu = false, + wfiGenAsNop = false, + ebreakGen = false, + userGen = true, + supervisorGen = true, + sscratchGen = true, + stvecAccess = CsrAccess.READ_WRITE, + sepcAccess = CsrAccess.READ_WRITE, + scauseAccess = CsrAccess.READ_WRITE, + sbadaddrAccess = CsrAccess.READ_WRITE, + scycleAccess = CsrAccess.READ_WRITE, + sinstretAccess = CsrAccess.READ_WRITE, + satpAccess = CsrAccess.NONE, //Implemented into the MMU plugin + medelegAccess = CsrAccess.READ_WRITE, + midelegAccess = CsrAccess.READ_WRITE, + pipelineCsrRead = false, + deterministicInteruptionEntry = false + ) + + def all(mtvecInit : BigInt) : CsrPluginConfig = CsrPluginConfig( + catchIllegalAccess = true, + mvendorid = 11, + marchid = 22, + mimpid = 33, + mhartid = 0, + misaExtensionsInit = 66, + misaAccess = CsrAccess.READ_WRITE, + mtvecAccess = CsrAccess.READ_WRITE, + mtvecInit = mtvecInit, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = true, + mcauseAccess = CsrAccess.READ_WRITE, + mbadaddrAccess = CsrAccess.READ_WRITE, + mcycleAccess = CsrAccess.READ_WRITE, + minstretAccess = CsrAccess.READ_WRITE, + ecallGen = true, + wfiGenAsWait = true, + ucycleAccess = CsrAccess.READ_ONLY, + uinstretAccess = CsrAccess.READ_ONLY + ) + + def all2(mtvecInit : BigInt) : CsrPluginConfig = CsrPluginConfig( + catchIllegalAccess = true, + mvendorid = 11, + marchid = 22, + mimpid = 33, + mhartid = 0, + misaExtensionsInit = 66, + misaAccess = CsrAccess.READ_WRITE, + mtvecAccess = CsrAccess.READ_WRITE, + mtvecInit = mtvecInit, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = true, + mcauseAccess = CsrAccess.READ_WRITE, + mbadaddrAccess = CsrAccess.READ_WRITE, + mcycleAccess = CsrAccess.READ_WRITE, + minstretAccess = CsrAccess.READ_WRITE, + ecallGen = true, + wfiGenAsWait = true, + ucycleAccess = CsrAccess.READ_ONLY, + uinstretAccess = CsrAccess.READ_ONLY, + supervisorGen = true, + sscratchGen = true, + stvecAccess = CsrAccess.READ_WRITE, + sepcAccess = CsrAccess.READ_WRITE, + scauseAccess = CsrAccess.READ_WRITE, + sbadaddrAccess = CsrAccess.READ_WRITE, + scycleAccess = CsrAccess.READ_WRITE, + sinstretAccess = CsrAccess.READ_WRITE, + satpAccess = CsrAccess.READ_WRITE, + medelegAccess = CsrAccess.READ_WRITE, + midelegAccess = CsrAccess.READ_WRITE + ) + + def small(mtvecInit : BigInt) = CsrPluginConfig( + catchIllegalAccess = false, + mvendorid = null, + marchid = null, + mimpid = null, + mhartid = null, + misaExtensionsInit = 66, + misaAccess = CsrAccess.NONE, + mtvecAccess = CsrAccess.NONE, + mtvecInit = mtvecInit, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = false, + mcauseAccess = CsrAccess.READ_ONLY, + mbadaddrAccess = CsrAccess.READ_ONLY, + mcycleAccess = CsrAccess.NONE, + minstretAccess = CsrAccess.NONE, + ecallGen = false, + wfiGenAsWait = false, + ucycleAccess = CsrAccess.NONE, + uinstretAccess = CsrAccess.NONE + ) + + def smallest(mtvecInit : BigInt) = CsrPluginConfig( + catchIllegalAccess = false, + mvendorid = null, + marchid = null, + mimpid = null, + mhartid = null, + misaExtensionsInit = 66, + misaAccess = CsrAccess.NONE, + mtvecAccess = CsrAccess.NONE, + mtvecInit = mtvecInit, + mepcAccess = CsrAccess.NONE, + mscratchGen = false, + mcauseAccess = CsrAccess.READ_ONLY, + mbadaddrAccess = CsrAccess.NONE, + mcycleAccess = CsrAccess.NONE, + minstretAccess = CsrAccess.NONE, + ecallGen = false, + wfiGenAsWait = false, + ucycleAccess = CsrAccess.NONE, + uinstretAccess = CsrAccess.NONE + ) + + def secure(mtvecInit : BigInt) = CsrPluginConfig( + catchIllegalAccess = true, + mvendorid = 1, + marchid = 2, + mimpid = 3, + mhartid = 0, + misaExtensionsInit = 0x101064, // RV32GCFMU + misaAccess = CsrAccess.READ_WRITE, + mtvecAccess = CsrAccess.READ_WRITE, + mtvecInit = mtvecInit, + mepcAccess = CsrAccess.READ_WRITE, + mscratchGen = true, + mcauseAccess = CsrAccess.READ_WRITE, + mbadaddrAccess = CsrAccess.READ_WRITE, + mcycleAccess = CsrAccess.READ_WRITE, + minstretAccess = CsrAccess.READ_WRITE, + ucycleAccess = CsrAccess.READ_ONLY, + uinstretAccess = CsrAccess.READ_ONLY, + wfiGenAsWait = true, + ecallGen = true, + userGen = true, + medelegAccess = CsrAccess.READ_WRITE, + midelegAccess = CsrAccess.READ_WRITE + ) + +} +case class CsrWrite(that : Data, bitOffset : Int) +case class CsrRead(that : Data , bitOffset : Int) +case class CsrReadToWriteOverride(that : Data, bitOffset : Int) //Used for special cases, as MIP where there shadow stuff +case class CsrOnWrite(doThat :() => Unit) +case class CsrDuringWrite(doThat :() => Unit) +case class CsrDuringRead(doThat :() => Unit) +case class CsrDuring(doThat :() => Unit) +case class CsrOnRead(doThat : () => Unit) + + +case class CsrMapping() extends Area with CsrInterface { + val mapping = mutable.LinkedHashMap[Int,ArrayBuffer[Any]]() + val always = ArrayBuffer[Any]() + val readDataSignal, readDataInit, writeDataSignal = Bits(32 bits) + val allowCsrSignal = False + val hazardFree = Bool() + + readDataSignal := readDataInit + def addMappingAt(address : Int,that : Any) = mapping.getOrElseUpdate(address,new ArrayBuffer[Any]) += that + override def r(csrAddress : Int, bitOffset : Int, that : Data): Unit = addMappingAt(csrAddress, CsrRead(that,bitOffset)) + override def w(csrAddress : Int, bitOffset : Int, that : Data): Unit = addMappingAt(csrAddress, CsrWrite(that,bitOffset)) + override def r2w(csrAddress : Int, bitOffset : Int, that : Data): Unit = addMappingAt(csrAddress, CsrReadToWriteOverride(that,bitOffset)) + override def onWrite(csrAddress: Int)(body: => Unit): Unit = addMappingAt(csrAddress, CsrOnWrite(() => body)) + override def duringWrite(csrAddress: Int)(body: => Unit): Unit = addMappingAt(csrAddress, CsrDuringWrite(() => body)) + override def duringRead(csrAddress: Int)(body: => Unit): Unit = addMappingAt(csrAddress, CsrDuringRead(() => body)) + override def during(csrAddress: Int)(body: => Unit): Unit = addMappingAt(csrAddress, CsrDuring(() => body)) + override def onRead(csrAddress: Int)(body: => Unit): Unit = addMappingAt(csrAddress, CsrOnRead(() => {body})) + override def duringAny(): Bool = ??? + override def duringAnyRead(body: => Unit) : Unit = always += CsrDuringRead(() => body) + override def duringAnyWrite(body: => Unit) : Unit = always += CsrDuringWrite(() => body) + override def onAnyRead(body: => Unit) : Unit = always += CsrOnRead(() => body) + override def onAnyWrite(body: => Unit) : Unit = always += CsrOnWrite(() => body) + override def readData() = readDataSignal + override def writeData() = writeDataSignal + override def allowCsr() = allowCsrSignal := True + override def isHazardFree() = hazardFree +} + + +trait CsrInterface{ + def onWrite(csrAddress : Int)(doThat : => Unit) : Unit + def onRead(csrAddress : Int)(doThat : => Unit) : Unit + def duringWrite(csrAddress: Int)(body: => Unit): Unit + def duringRead(csrAddress: Int)(body: => Unit): Unit + def during(csrAddress: Int)(body: => Unit): Unit + def duringAny(): Bool + def r(csrAddress : Int, bitOffset : Int, that : Data): Unit + def w(csrAddress : Int, bitOffset : Int, that : Data): Unit + def rw(csrAddress : Int, bitOffset : Int,that : Data): Unit ={ + r(csrAddress,bitOffset,that) + w(csrAddress,bitOffset,that) + } + def duringAnyRead(body: => Unit) : Unit //Called all the durration of a Csr write instruction in the execute stage + def duringAnyWrite(body: => Unit) : Unit //same than above for read + def onAnyRead(body: => Unit) : Unit + def onAnyWrite(body: => Unit) : Unit + def allowCsr() : Unit //In case your csr do not use the regular API with csrAddress but is implemented using "side channels", you can call that if the current csr is implemented + def isHazardFree() : Bool // You should not have any side effect nor use readData() until this return True + + def r2w(csrAddress : Int, bitOffset : Int,that : Data): Unit + + def rw(csrAddress : Int, thats : (Int, Data)*) : Unit = for(that <- thats) rw(csrAddress,that._1, that._2) + def w(csrAddress : Int, thats : (Int, Data)*) : Unit = for(that <- thats) w(csrAddress,that._1, that._2) + def r(csrAddress : Int, thats : (Int, Data)*) : Unit = for(that <- thats) r(csrAddress,that._1, that._2) + def rw[T <: Data](csrAddress : Int, that : T): Unit = rw(csrAddress,0,that) + def w[T <: Data](csrAddress : Int, that : T): Unit = w(csrAddress,0,that) + def r [T <: Data](csrAddress : Int, that : T): Unit = r(csrAddress,0,that) + def isWriting(csrAddress : Int) : Bool = { + val ret = False + onWrite(csrAddress){ + ret := True + } + ret + } + + def isReading(csrAddress : Int) : Bool = { + val ret = False + onRead(csrAddress){ + ret := True + } + ret + } + + def readData() : Bits //Return the 32 bits internal signal of the CsrPlugin for you to override (if you want) + def writeData() : Bits //Return the 32 bits value that the CsrPlugin want to write in the CSR (depend on readData combinatorialy) +} + + +trait IContextSwitching{ + def isContextSwitching : Bool +} +trait IWake{ + def askWake() : Unit +} + +class CsrPlugin(val config: CsrPluginConfig) extends Plugin[VexRiscv] with ExceptionService with PrivilegeService with InterruptionInhibitor with ExceptionInhibitor with IContextSwitching with CsrInterface with IWake{ + import config._ + import CsrAccess._ + + assert(!(wfiGenAsNop && wfiGenAsWait)) + + def xlen = 32 + + //Mannage ExceptionService calls + val exceptionPortsInfos = ArrayBuffer[ExceptionPortInfo]() + override def newExceptionPort(stage : Stage, priority : Int = 0, codeWidth : Int = 4) = { + val interface = Flow(ExceptionCause(codeWidth)) + exceptionPortsInfos += ExceptionPortInfo(interface,stage,priority,codeWidth) + interface + } + + + + var exceptionPendings : Vec[Bool] = null + override def isExceptionPending(stage : Stage): Bool = exceptionPendings(pipeline.stages.indexOf(stage)) + + var redoInterface : Flow[UInt] = null + var jumpInterface : Flow[UInt] = null + var timerInterrupt, externalInterrupt, softwareInterrupt : Bool = null + var externalInterruptS : Bool = null + var forceMachineWire : Bool = null + var privilege : UInt = null + var selfException : Flow[ExceptionCause] = null + var contextSwitching : Bool = null + var thirdPartyWake : Bool = null + var inWfi : Bool = null + var externalMhartId : UInt = null + var utime : UInt = null + + override def askWake(): Unit = thirdPartyWake := True + + override def isContextSwitching = contextSwitching + + object EnvCtrlEnum extends SpinalEnum(binarySequential){ + val NONE, XRET = newElement() + val WFI = if(wfiGenAsWait) newElement() else null + val ECALL = if(ecallGen) newElement() else null + val EBREAK = if(ebreakGen) newElement() else null + } + + object ENV_CTRL extends Stageable(EnvCtrlEnum()) + object IS_CSR extends Stageable(Bool) + object IS_SFENCE_VMA extends Stageable(Bool) + object CSR_WRITE_OPCODE extends Stageable(Bool) + object CSR_READ_OPCODE extends Stageable(Bool) + object PIPELINED_CSR_READ extends Stageable(Bits(32 bits)) + + var allowInterrupts : Bool = null + var allowException : Bool = null + var allowEbreakException : Bool = null + + var csrMapping : CsrMapping = null + + //Print CSR mapping + def printCsr() { + for ((address, things) <- csrMapping.mapping) { + println("0x" + address.toHexString + " => ") + for (thing <- things) { + println(" - " + thing) + } + } + } + + + //Interruption and exception data model + case class Delegator(var enable : Bool, privilege : Int) + case class InterruptSpec(var cond : Bool, id : Int, privilege : Int, delegators : List[Delegator]) + case class ExceptionSpec(id : Int, delegators : List[Delegator]) + var interruptSpecs = ArrayBuffer[InterruptSpec]() + var exceptionSpecs = ArrayBuffer[ExceptionSpec]() + + def addInterrupt(cond : Bool, id : Int, privilege : Int, delegators : List[Delegator]): Unit = { + interruptSpecs += InterruptSpec(cond, id, privilege, delegators) + } + + override def r(csrAddress: Int, bitOffset: Int, that: Data): Unit = csrMapping.r(csrAddress, bitOffset, that) + override def w(csrAddress: Int, bitOffset: Int, that: Data): Unit = csrMapping.w(csrAddress, bitOffset, that) + override def r2w(csrAddress: Int, bitOffset: Int, that: Data): Unit = csrMapping.r2w(csrAddress, bitOffset, that) + override def onWrite(csrAddress: Int)(body: => Unit): Unit = csrMapping.onWrite(csrAddress)(body) + override def duringWrite(csrAddress: Int)(body: => Unit): Unit = csrMapping.duringWrite(csrAddress)(body) + override def onRead(csrAddress: Int)(body: => Unit): Unit = csrMapping.onRead(csrAddress)(body) + override def duringRead(csrAddress: Int)(body: => Unit): Unit = csrMapping.duringRead(csrAddress)(body) + override def during(csrAddress: Int)(body: => Unit): Unit = csrMapping.during(csrAddress)(body) + override def duringAny(): Bool = pipeline.execute.arbitration.isValid && pipeline.execute.input(IS_CSR) + override def duringAnyRead(body: => Unit) = csrMapping.duringAnyRead(body) + override def duringAnyWrite(body: => Unit) = csrMapping.duringAnyWrite(body) + override def onAnyRead(body: => Unit) = csrMapping.onAnyRead(body) + override def onAnyWrite(body: => Unit) = csrMapping.onAnyWrite(body) + override def allowCsr() = csrMapping.allowCsr() + override def readData() = csrMapping.readData() + override def writeData() = csrMapping.writeData() + override def isHazardFree() = csrMapping.isHazardFree() + + override def setup(pipeline: VexRiscv): Unit = { + import pipeline.config._ + + if(!config.ebreakGen) { + SpinalWarning("This VexRiscv configuration is set without software ebreak instruction support. Some software may rely on it (ex: Rust). (This isn't related to JTAG ebreak)") + } + + csrMapping = new CsrMapping() + + inWfi = False.addTag(Verilator.public) + + thirdPartyWake = False + + val defaultEnv = List[(Stageable[_ <: BaseType],Any)]( + ) + + val defaultCsrActions = List[(Stageable[_ <: BaseType],Any)]( + IS_CSR -> True, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> True + ) ++ (if(catchIllegalAccess) List(HAS_SIDE_EFFECT -> True) else Nil) + + val nonImmediatActions = defaultCsrActions ++ List( + SRC1_CTRL -> Src1CtrlEnum.RS, + RS1_USE -> True + ) + + val immediatActions = defaultCsrActions ++ List( + SRC1_CTRL -> Src1CtrlEnum.URS1 + ) + + val decoderService = pipeline.service(classOf[DecoderService]) + + decoderService.addDefault(ENV_CTRL, EnvCtrlEnum.NONE) + decoderService.addDefault(IS_CSR, False) + decoderService.add(List( + CSRRW -> nonImmediatActions, + CSRRS -> nonImmediatActions, + CSRRC -> nonImmediatActions, + CSRRWI -> immediatActions, + CSRRSI -> immediatActions, + CSRRCI -> immediatActions, + MRET -> (defaultEnv ++ List(ENV_CTRL -> EnvCtrlEnum.XRET, HAS_SIDE_EFFECT -> True)), + SRET -> (defaultEnv ++ List(ENV_CTRL -> EnvCtrlEnum.XRET, HAS_SIDE_EFFECT -> True)) + )) + if(wfiGenAsWait) decoderService.add(WFI, defaultEnv ++ List(ENV_CTRL -> EnvCtrlEnum.WFI)) + if(wfiGenAsNop) decoderService.add(WFI, Nil) + if(ecallGen) decoderService.add(ECALL, defaultEnv ++ List(ENV_CTRL -> EnvCtrlEnum.ECALL, HAS_SIDE_EFFECT -> True)) + if(ebreakGen) decoderService.add(EBREAK, defaultEnv ++ List(ENV_CTRL -> EnvCtrlEnum.EBREAK, HAS_SIDE_EFFECT -> True)) + + val pcManagerService = pipeline.service(classOf[JumpService]) + jumpInterface = pcManagerService.createJumpInterface(pipeline.stages.last) + jumpInterface.valid := False + jumpInterface.payload.assignDontCare() + + + if(supervisorGen) { + redoInterface = pcManagerService.createJumpInterface(pipeline.execute, -20) //Should lose against dynamic_target branch prediction correction + } + + exceptionPendings = Vec(Bool, pipeline.stages.length) + timerInterrupt = in Bool() setName("timerInterrupt") + externalInterrupt = in Bool() setName("externalInterrupt") + softwareInterrupt = in Bool() setName("softwareInterrupt") default(False) + if(supervisorGen){ +// timerInterruptS = in Bool() setName("timerInterruptS") + externalInterruptS = in Bool() setName("externalInterruptS") + } + contextSwitching = Bool().setName("contextSwitching") + + privilege = UInt(2 bits).setName("CsrPlugin_privilege") + forceMachineWire = False + + if(catchIllegalAccess || ecallGen || ebreakGen) + selfException = newExceptionPort(pipeline.execute) + + allowInterrupts = True + allowException = True + allowEbreakException = True + + for (i <- interruptSpecs) i.cond = i.cond.pull() + + + pipeline.update(MPP, UInt(2 bits)) + + if(withExternalMhartid) externalMhartId = in UInt(mhartidWidth bits) + if(utimeAccess != CsrAccess.NONE) utime = in UInt(64 bits) setName("utime") + + if(supervisorGen) { + decoderService.addDefault(IS_SFENCE_VMA, False) + decoderService.add(SFENCE_VMA, List(IS_SFENCE_VMA -> True)) + } + } + + def inhibateInterrupts() : Unit = allowInterrupts := False + def inhibateException() : Unit = allowException := False + def inhibateEbreakException() : Unit = allowEbreakException := False + + override def isUser() : Bool = privilege === 0 + override def isSupervisor(): Bool = privilege === 1 + override def isMachine(): Bool = privilege === 3 + override def forceMachine(): Unit = forceMachineWire := True + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + val fetcher = service(classOf[IBusFetcher]) + val trapCodeWidth = log2Up((List(16) ++ interruptSpecs.map(_.id + 1) ++ exceptionPortsInfos.map(p => 1 << widthOf(p.port.code))).max) + + //Define CSR mapping utilities + implicit class CsrAccessPimper(csrAccess : CsrAccess){ + def apply(csrAddress : Int, thats : (Int, Data)*) : Unit = { + if(csrAccess == `WRITE_ONLY` || csrAccess == `READ_WRITE`) for(that <- thats) csrMapping.w(csrAddress,that._1, that._2) + if(csrAccess == `READ_ONLY` || csrAccess == `READ_WRITE`) for(that <- thats) csrMapping.r(csrAddress,that._1, that._2) + } + def apply(csrAddress : Int, that : Data) : Unit = { + if(csrAccess == `WRITE_ONLY` || csrAccess == `READ_WRITE`) csrMapping.w(csrAddress, 0, that) + if(csrAccess == `READ_ONLY` || csrAccess == `READ_WRITE`) csrMapping.r(csrAddress, 0, that) + } + } + + + case class Xtvec() extends Bundle { + val mode = Bits(2 bits) + val base = UInt(xlen-2 bits) + } + + val privilegeReg = privilegeGen generate RegInit(U"11") + privilege := (if(privilegeGen) privilegeReg else U"11") + + when(forceMachineWire) { privilege := 3 } + + val machineCsr = pipeline plug new Area{ + //Define CSR registers + // Status => MXR, SUM, TVM, TW, TSE ? + val misa = new Area{ + val base = Reg(UInt(2 bits)) init(U"01") allowUnsetRegToAvoidLatch + val extensions = Reg(Bits(26 bits)) init(misaExtensionsInit) allowUnsetRegToAvoidLatch + } + + val mtvec = Reg(Xtvec()).allowUnsetRegToAvoidLatch + + if(mtvecInit != null) mtvec.mode init(mtvecInit & 0x3) + if(mtvecInit != null) mtvec.base init(mtvecInit / 4) + val mepc = Reg(UInt(xlen bits)) + val mstatus = new Area{ + val MIE, MPIE = RegInit(False) + val MPP = RegInit(U"11") + } + val mip = new Area{ + val MEIP = RegNext(externalInterrupt) + val MTIP = RegNext(timerInterrupt) + val MSIP = RegNext(softwareInterrupt) + } + val mie = new Area{ + val MEIE, MTIE, MSIE = RegInit(False) + } + val mscratch = if(mscratchGen) Reg(Bits(xlen bits)) else null + val mcause = new Area{ + val interrupt = Reg(Bool) + val exceptionCode = Reg(UInt(trapCodeWidth bits)) + } + val mtval = Reg(UInt(xlen bits)) + val mcycle = Reg(UInt(64 bits)) init(0) + val minstret = Reg(UInt(64 bits)) init(0) + + + val medeleg = supervisorGen generate new Area { + val IAM, IAF, II, LAM, LAF, SAM, SAF, EU, ES, IPF, LPF, SPF = RegInit(False) + val mapping = mutable.LinkedHashMap(0 -> IAM, 1 -> IAF, 2 -> II, 4 -> LAM, 5 -> LAF, 6 -> SAM, 7 -> SAF, 8 -> EU, 9 -> ES, 12 -> IPF, 13 -> LPF, 15 -> SPF) + } + val mideleg = supervisorGen generate new Area { + val ST, SE, SS = RegInit(False) + } + + if(mvendorid != null) READ_ONLY(CSR.MVENDORID, U(mvendorid)) + if(marchid != null) READ_ONLY(CSR.MARCHID , U(marchid )) + if(mimpid != null) READ_ONLY(CSR.MIMPID , U(mimpid )) + if(mhartid != null && !withExternalMhartid) READ_ONLY(CSR.MHARTID , U(mhartid )) + if(withExternalMhartid) READ_ONLY(CSR.MHARTID , externalMhartId) + misaAccess(CSR.MISA, xlen-2 -> misa.base , 0 -> misa.extensions) + + //Machine CSR + READ_WRITE(CSR.MSTATUS, 7 -> mstatus.MPIE, 3 -> mstatus.MIE) + READ_ONLY(CSR.MIP, 11 -> mip.MEIP, 7 -> mip.MTIP) + READ_WRITE(CSR.MIP, 3 -> mip.MSIP) + READ_WRITE(CSR.MIE, 11 -> mie.MEIE, 7 -> mie.MTIE, 3 -> mie.MSIE) + + r(CSR.MSTATUS, 11 -> mstatus.MPP) + onWrite(CSR.MSTATUS){ + switch(writeData()(12 downto 11)){ + is(3){ mstatus.MPP := 3 } + if(supervisorGen) is(1){ mstatus.MPP := 1 } + if(userGen) is(0){ mstatus.MPP := 0 } + } + } + + mtvecAccess(CSR.MTVEC, 2 -> mtvec.base, 0 -> mtvec.mode) + mepcAccess(CSR.MEPC, mepc) + if(mscratchGen) READ_WRITE(CSR.MSCRATCH, mscratch) + mcauseAccess(CSR.MCAUSE, xlen-1 -> mcause.interrupt, 0 -> mcause.exceptionCode) + mbadaddrAccess(CSR.MBADADDR, mtval) + mcycleAccess(CSR.MCYCLE, mcycle(31 downto 0)) + mcycleAccess(CSR.MCYCLEH, mcycle(63 downto 32)) + minstretAccess(CSR.MINSTRET, minstret(31 downto 0)) + minstretAccess(CSR.MINSTRETH, minstret(63 downto 32)) + + if(supervisorGen) { + for((id, enable) <- medeleg.mapping) medelegAccess(CSR.MEDELEG, id -> enable) + midelegAccess(CSR.MIDELEG, 9 -> mideleg.SE, 5 -> mideleg.ST, 1 -> mideleg.SS) + } + + //User CSR + ucycleAccess(CSR.UCYCLE, mcycle(31 downto 0)) + ucycleAccess(CSR.UCYCLEH, mcycle(63 downto 32)) + uinstretAccess(CSR.UINSTRET, minstret(31 downto 0)) + uinstretAccess(CSR.UINSTRETH, minstret(63 downto 32)) + + if(utimeAccess != CsrAccess.NONE) { + utimeAccess(CSR.UTIME, utime(31 downto 0)) + utimeAccess(CSR.UTIMEH, utime(63 downto 32)) + } + + pipeline(MPP) := mstatus.MPP + } + + val supervisorCsr = ifGen(supervisorGen) { + pipeline plug new Area { + val sstatus = new Area { + val SIE, SPIE = RegInit(False) + val SPP = RegInit(U"1") + } + + val sip = new Area { + val SEIP_SOFT = RegInit(False) + val SEIP_INPUT = RegNext(externalInterruptS) + val SEIP_OR = SEIP_SOFT || SEIP_INPUT + val STIP = RegInit(False) + val SSIP = RegInit(False) + } + val sie = new Area { + val SEIE, STIE, SSIE = RegInit(False) + } + val stvec = Reg(Xtvec()).allowUnsetRegToAvoidLatch + val sscratch = if (sscratchGen) Reg(Bits(xlen bits)) else null + + val scause = new Area { + val interrupt = Reg(Bool) + val exceptionCode = Reg(UInt(trapCodeWidth bits)) + } + val stval = Reg(UInt(xlen bits)) + val sepc = Reg(UInt(xlen bits)) + val satp = new Area { + val PPN = Reg(Bits(22 bits)) + val ASID = Reg(Bits(9 bits)) + val MODE = Reg(Bits(1 bits)) + } + + //Supervisor CSR + for(offset <- List(CSR.MSTATUS, CSR.SSTATUS)) READ_WRITE(offset,8 -> sstatus.SPP, 5 -> sstatus.SPIE, 1 -> sstatus.SIE) + for(offset <- List(CSR.MIP, CSR.SIP)) { + READ_WRITE(offset, 5 -> sip.STIP, 1 -> sip.SSIP) + READ_ONLY(offset, 9 -> sip.SEIP_OR) + WRITE_ONLY(offset, 9 -> sip.SEIP_SOFT) + r2w(offset, 9, sip.SEIP_SOFT) + } + + for(offset <- List(CSR.MIE, CSR.SIE)) READ_WRITE(offset, 9 -> sie.SEIE, 5 -> sie.STIE, 1 -> sie.SSIE) + + + stvecAccess(CSR.STVEC, 2 -> stvec.base, 0 -> stvec.mode) + sepcAccess(CSR.SEPC, sepc) + if(sscratchGen) READ_WRITE(CSR.SSCRATCH, sscratch) + scauseAccess(CSR.SCAUSE, xlen-1 -> scause.interrupt, 0 -> scause.exceptionCode) + sbadaddrAccess(CSR.SBADADDR, stval) + satpAccess(CSR.SATP, 31 -> satp.MODE, 22 -> satp.ASID, 0 -> satp.PPN) + + + val rescheduleLogic = supervisorGen generate new Area { + redoInterface.valid := False + redoInterface.payload := decode.input(PC) + + val rescheduleNext = False + when(execute.arbitration.isValid && execute.input(IS_SFENCE_VMA)) { rescheduleNext := True } + duringWrite(CSR.SATP) { rescheduleNext := True } + + when(rescheduleNext){ + redoInterface.valid := True + execute.arbitration.flushNext := True + decode.arbitration.haltByOther := True + } + } + } + } + + + + pipeline plug new Area{ + import machineCsr._ + import supervisorCsr._ + + val lastStage = pipeline.stages.last + val beforeLastStage = pipeline.stages(pipeline.stages.size-2) + val stagesFromExecute = pipeline.stages.dropWhile(_ != execute) + + //Manage counters + mcycle := mcycle + 1 + when(lastStage.arbitration.isFiring) { + minstret := minstret + 1 + } + + + if(supervisorGen) { + addInterrupt(sip.STIP && sie.STIE, id = 5, privilege = 1, delegators = List(Delegator(mideleg.ST, 3))) + addInterrupt(sip.SSIP && sie.SSIE, id = 1, privilege = 1, delegators = List(Delegator(mideleg.SS, 3))) + addInterrupt(sip.SEIP_OR && sie.SEIE, id = 9, privilege = 1, delegators = List(Delegator(mideleg.SE, 3))) + + for((id, enable) <- medeleg.mapping) exceptionSpecs += ExceptionSpec(id, List(Delegator(enable, 3))) + } + + addInterrupt(mip.MTIP && mie.MTIE, id = 7, privilege = 3, delegators = Nil) + addInterrupt(mip.MSIP && mie.MSIE, id = 3, privilege = 3, delegators = Nil) + addInterrupt(mip.MEIP && mie.MEIE, id = 11, privilege = 3, delegators = Nil) + + + val mepcCaptureStage = if(exceptionPortsInfos.nonEmpty) lastStage else decode + + + //Aggregate all exception port and remove required instructions + val exceptionPortCtrl = exceptionPortsInfos.nonEmpty generate new Area{ + val codeWidth = exceptionPortsInfos.map(_.codeWidth).max + val firstStageIndexWithExceptionPort = exceptionPortsInfos.map(i => indexOf(i.stage)).min + val exceptionValids = Vec(stages.map(s => Bool().setPartialName(s.getName()))) + val exceptionValidsRegs = Vec(stages.map(s => Reg(Bool).init(False).setPartialName(s.getName()))).allowUnsetRegToAvoidLatch + val exceptionContext = Reg(ExceptionCause(codeWidth)) + val exceptionTargetPrivilegeUncapped = U"11" + + switch(exceptionContext.code){ + for(s <- exceptionSpecs){ + is(s.id){ + var exceptionPrivilegs = if (supervisorGen) List(1, 3) else List(3) + while(exceptionPrivilegs.length != 1){ + val p = exceptionPrivilegs.head + if (exceptionPrivilegs.tail.forall(e => s.delegators.exists(_.privilege == e))) { + val delegUpOn = s.delegators.filter(_.privilege > p).map(_.enable).fold(True)(_ && _) + val delegDownOff = !s.delegators.filter(_.privilege <= p).map(_.enable).orR + when(delegUpOn && delegDownOff) { + exceptionTargetPrivilegeUncapped := p + } + } + exceptionPrivilegs = exceptionPrivilegs.tail + } + } + } + } + val exceptionTargetPrivilege = privilege.max(exceptionTargetPrivilegeUncapped) + + val groupedByStage = exceptionPortsInfos.map(_.stage).distinct.map(s => { + val stagePortsInfos = exceptionPortsInfos.filter(_.stage == s).sortWith(_.priority > _.priority) + val stagePort = stagePortsInfos.length match{ + case 1 => { + stagePortsInfos.head.port.translateWith(stagePortsInfos.head.port.payload.resizeCode(codeWidth)) + } + case _ => { + val groupedPort = Flow(ExceptionCause(codeWidth)) + val valids = stagePortsInfos.map(_.port.valid) + val codes = stagePortsInfos.map(_.port.payload.resizeCode(codeWidth)) + groupedPort.valid := valids.orR + groupedPort.payload := MuxOH(OHMasking.first(stagePortsInfos.map(_.port.valid).asBits), codes) + groupedPort + } + } + ExceptionPortInfo(stagePort,s,0, codeWidth) + }) + + val sortedByStage = groupedByStage.sortWith((a, b) => pipeline.indexOf(a.stage) < pipeline.indexOf(b.stage)) +// sortedByStage.zipWithIndex.foreach(e => e._1.port.setName(e._1.stage.getName() + "_exception_agregat")) + exceptionValids := exceptionValidsRegs + for(portInfo <- sortedByStage; port = portInfo.port ; stage = portInfo.stage; stageId = indexOf(portInfo.stage)) { + when(port.valid) { + stage.arbitration.flushNext := True + stage.arbitration.removeIt := True + exceptionValids(stageId) := True + exceptionContext := port.payload + } + } + + for(stageId <- firstStageIndexWithExceptionPort until stages.length; stage = stages(stageId) ){ + val previousStage = if(stageId == firstStageIndexWithExceptionPort) stage else stages(stageId-1) + when(!stage.arbitration.isStuck){ + exceptionValidsRegs(stageId) := (if(stageId != firstStageIndexWithExceptionPort) exceptionValids(stageId-1) && !previousStage.arbitration.isStuck else False) + }otherwise{ + if(stage != stages.last) + exceptionValidsRegs(stageId) := exceptionValids(stageId) + else + exceptionValidsRegs(stageId) := False + } + when(stage.arbitration.isFlushed){ + exceptionValids(stageId) := False + } + } + + when(exceptionValids.orR){ + fetcher.haltIt() + } + + //Avoid the PC register of the last stage to change durring an exception handleing (Used to fill Xepc) + stages.last.dontSample.getOrElseUpdate(PC, ArrayBuffer[Bool]()) += exceptionValids.last + exceptionPendings := exceptionValidsRegs + } + + + + + + //Process interrupt request, code and privilege + val interrupt = new Area { + val valid = if(pipelinedInterrupt) RegNext(False) init(False) else False + val code = if(pipelinedInterrupt) Reg(UInt(trapCodeWidth bits)) else UInt(trapCodeWidth bits).assignDontCare() + var privilegs = if (supervisorGen) List(1, 3) else List(3) + val targetPrivilege = if(pipelinedInterrupt) Reg(UInt(2 bits)) else UInt(2 bits).assignDontCare() + val privilegeAllowInterrupts = mutable.LinkedHashMap[Int, Bool]() + if (supervisorGen) privilegeAllowInterrupts += 1 -> ((sstatus.SIE && privilege === U"01") || privilege < U"01") + privilegeAllowInterrupts += 3 -> (mstatus.MIE || privilege < U"11") + while (privilegs.nonEmpty) { + val p = privilegs.head + when(privilegeAllowInterrupts(p)) { + for (i <- interruptSpecs + if i.privilege <= p //EX : Machine timer interrupt can't go into supervisor mode + if privilegs.tail.forall(e => i.delegators.exists(_.privilege == e))) { // EX : Supervisor timer need to have machine mode delegator + val delegUpOn = i.delegators.filter(_.privilege > p).map(_.enable).fold(True)(_ && _) + val delegDownOff = !i.delegators.filter(_.privilege <= p).map(_.enable).orR + when(i.cond && delegUpOn && delegDownOff) { + valid := True + code := i.id + targetPrivilege := p + } + } + } + privilegs = privilegs.tail + } + + code.addTag(Verilator.public) + } + + + + + val exception = if(exceptionPortCtrl != null) exceptionPortCtrl.exceptionValids.last && allowException else False + val lastStageWasWfi = if(wfiGenAsWait) RegNext(lastStage.arbitration.isFiring && lastStage.input(ENV_CTRL) === EnvCtrlEnum.WFI) init(False) else False + + + + //Used to make the pipeline empty softly (for interrupts) + val pipelineLiberator = new Area{ + val pcValids = Vec(RegInit(False), stagesFromExecute.length) + val active = interrupt.valid && allowInterrupts && decode.arbitration.isValid + when(active){ + decode.arbitration.haltByOther := True + for((stage, reg, previous) <- (stagesFromExecute, pcValids, True :: pcValids.toList).zipped){ + when(!stage.arbitration.isStuck){ + reg := previous + } + } + } + when(!active || decode.arbitration.isRemoved) { + pcValids.foreach(_ := False) + } + +// val pcValids = for(stage <- stagesFromExecute) yield RegInit(False) clearWhen(!started) setWhen(!stage.arbitration.isValid) + val done = CombInit(pcValids.last) + if(exceptionPortCtrl != null) done.clearWhen(exceptionPortCtrl.exceptionValidsRegs.tail.orR) + } + + //Interrupt/Exception entry logic + val interruptJump = Bool.addTag(Verilator.public) + interruptJump := interrupt.valid && pipelineLiberator.done && allowInterrupts + if(pipelinedInterrupt) interrupt.valid clearWhen(interruptJump) //avoid double fireing + + val hadException = RegNext(exception) init(False) addTag(Verilator.public) + pipelineLiberator.done.clearWhen(hadException) + + + val targetPrivilege = CombInit(interrupt.targetPrivilege) + if(exceptionPortCtrl != null) when(hadException) { + targetPrivilege := exceptionPortCtrl.exceptionTargetPrivilege + } + + val trapCause = CombInit(interrupt.code.resize(trapCodeWidth)) + if(exceptionPortCtrl != null) when( hadException){ + trapCause := exceptionPortCtrl.exceptionContext.code.resized + } + + val xtvec = Xtvec().assignDontCare() + switch(targetPrivilege){ + if(supervisorGen) is(1) { xtvec := supervisorCsr.stvec } + is(3){ xtvec := machineCsr.mtvec } + } + + when(hadException || interruptJump){ + fetcher.haltIt() //Avoid having the fetch confused by the incomming privilege switch + + jumpInterface.valid := True + jumpInterface.payload := (if(!xtvecModeGen) xtvec.base @@ U"00" else (xtvec.mode === 0 || hadException) ? (xtvec.base @@ U"00") | ((xtvec.base + trapCause) @@ U"00") ) + lastStage.arbitration.flushNext := True + + if(privilegeGen) privilegeReg := targetPrivilege + + switch(targetPrivilege){ + if(supervisorGen) is(1) { + sstatus.SIE := False + sstatus.SPIE := sstatus.SIE + sstatus.SPP := privilege(0 downto 0) + scause.interrupt := !hadException + scause.exceptionCode := trapCause + sepc := mepcCaptureStage.input(PC) + if (exceptionPortCtrl != null) when(hadException){ + stval := exceptionPortCtrl.exceptionContext.badAddr + } + } + + is(3){ + mstatus.MIE := False + mstatus.MPIE := mstatus.MIE + mstatus.MPP := privilege + mcause.interrupt := !hadException + mcause.exceptionCode := trapCause + mepc := mepcCaptureStage.input(PC) + if(exceptionPortCtrl != null) when(hadException){ + mtval := exceptionPortCtrl.exceptionContext.badAddr + } + } + } + } + + if(exceptionPortCtrl == null){ + if(mbadaddrAccess == CsrAccess.READ_ONLY) mtval := 0 + if(sbadaddrAccess == CsrAccess.READ_ONLY) stval := 0 + } + + lastStage plug new Area{ + import lastStage._ + + //Manage MRET / SRET instructions + when(arbitration.isValid && input(ENV_CTRL) === EnvCtrlEnum.XRET) { + fetcher.haltIt() + jumpInterface.valid := True + lastStage.arbitration.flushNext := True + switch(input(INSTRUCTION)(29 downto 28)){ + is(3){ + mstatus.MPP := U"00" + mstatus.MIE := mstatus.MPIE + mstatus.MPIE := True + jumpInterface.payload := mepc + if(privilegeGen) privilegeReg := mstatus.MPP + } + if(supervisorGen) is(1){ + sstatus.SPP := U"0" + sstatus.SIE := sstatus.SPIE + sstatus.SPIE := True + jumpInterface.payload := sepc + if(privilegeGen) privilegeReg := U"0" @@ sstatus.SPP + } + } + } + } + + + contextSwitching := jumpInterface.valid + + //CSR read/write instructions management + decode plug new Area{ + import decode._ + + val imm = IMM(input(INSTRUCTION)) + insert(CSR_WRITE_OPCODE) := ! ( + (input(INSTRUCTION)(14 downto 13) === B"01" && input(INSTRUCTION)(rs1Range) === 0) + || (input(INSTRUCTION)(14 downto 13) === B"11" && imm.z === 0) + ) + insert(CSR_READ_OPCODE) := input(INSTRUCTION)(13 downto 7) =/= B"0100000" + } + + + execute plug new Area{ + import execute._ + //Manage WFI instructions + if(wfiOutput) out(inWfi) + val wfiWake = RegNext(interruptSpecs.map(_.cond).orR || thirdPartyWake) init(False) + if(wfiGenAsWait) when(arbitration.isValid && input(ENV_CTRL) === EnvCtrlEnum.WFI){ + inWfi := True + when(!wfiWake){ + arbitration.haltItself := True + } + } + } + + decode.arbitration.haltByOther setWhen(stagesFromExecute.map(s => s.arbitration.isValid && s.input(ENV_CTRL) === EnvCtrlEnum.XRET).asBits.orR) + + execute plug new Area { + import execute._ + def previousStage = decode + val blockedBySideEffects = stagesFromExecute.tail.map(s => s.arbitration.isValid).asBits().orR || pipeline.service(classOf[HazardService]).hazardOnExecuteRS// && s.input(HAS_SIDE_EFFECT) to improve be less pessimistic + + val illegalAccess = True + val illegalInstruction = False + if(selfException != null) { + selfException.valid := False + selfException.code.assignDontCare() + selfException.badAddr := input(INSTRUCTION).asUInt + if(catchIllegalAccess) when(illegalAccess || illegalInstruction){ + selfException.valid := True + selfException.code := 2 + } + } + + //Manage MRET / SRET instructions + when(arbitration.isValid && input(ENV_CTRL) === EnvCtrlEnum.XRET) { + when(input(INSTRUCTION)(29 downto 28).asUInt > privilege) { + illegalInstruction := True + } + } + + + //Manage ECALL instructions + if(ecallGen) when(arbitration.isValid && input(ENV_CTRL) === EnvCtrlEnum.ECALL){ + selfException.valid := True + switch(privilege) { + is(0) { selfException.code := 8 } + if(supervisorGen) is(1) { selfException.code := 9 } + default { selfException.code := 11 } + } + } + + + if(ebreakGen) when(arbitration.isValid && input(ENV_CTRL) === EnvCtrlEnum.EBREAK && allowEbreakException){ + selfException.valid := True + selfException.code := 3 + } + + + val imm = IMM(input(INSTRUCTION)) + def writeSrc = input(SRC1) + def readData = csrMapping.readDataSignal + def writeData = csrMapping.writeDataSignal + val writeInstruction = arbitration.isValid && input(IS_CSR) && input(CSR_WRITE_OPCODE) + val readInstruction = arbitration.isValid && input(IS_CSR) && input(CSR_READ_OPCODE) + val writeEnable = writeInstruction && !arbitration.isStuck + val readEnable = readInstruction && !arbitration.isStuck + csrMapping.hazardFree := !blockedBySideEffects + + val readToWriteData = CombInit(readData) + writeData := (if(noCsrAlu) writeSrc else input(INSTRUCTION)(13).mux( + False -> writeSrc, + True -> Mux(input(INSTRUCTION)(12), readToWriteData & ~writeSrc, readToWriteData | writeSrc) + )) + + when(arbitration.isValid && input(IS_CSR)) { + if(!pipelineCsrRead) output(REGFILE_WRITE_DATA) := readData + } + + when(arbitration.isValid && (input(IS_CSR) || (if(supervisorGen) input(IS_SFENCE_VMA) else False))) { + arbitration.haltItself setWhen(blockedBySideEffects) + } + + if(pipelineCsrRead){ + insert(PIPELINED_CSR_READ) := readData + when(memory.arbitration.isValid && memory.input(IS_CSR)) { + memory.output(REGFILE_WRITE_DATA) := memory.input(PIPELINED_CSR_READ) + } + } +// +// Component.current.rework{ +// when(arbitration.isFiring && input(IS_CSR)) { +// memory.input(REGFILE_WRITE_DATA).getDrivingReg := readData +// } +// } + + //Translation of the csrMapping into real logic + val csrAddress = input(INSTRUCTION)(csrRange) + Component.current.afterElaboration{ + def doJobs(jobs : ArrayBuffer[Any]): Unit ={ + val withWrite = jobs.exists(j => j.isInstanceOf[CsrWrite] || j.isInstanceOf[CsrOnWrite] || j.isInstanceOf[CsrDuringWrite]) + val withRead = jobs.exists(j => j.isInstanceOf[CsrRead] || j.isInstanceOf[CsrOnRead]) + if(withRead && withWrite) { + illegalAccess := False + } else { + if (withWrite) illegalAccess.clearWhen(input(CSR_WRITE_OPCODE)) + if (withRead) illegalAccess.clearWhen(input(CSR_READ_OPCODE)) + } + + + for (element <- jobs) element match { + case element : CsrDuringWrite => when(writeInstruction){element.doThat()} + case element : CsrDuringRead => when(readInstruction){element.doThat()} + case element : CsrDuring => {element.doThat()} + case _ => + } + when(writeEnable) { + for (element <- jobs) element match { + case element: CsrWrite => element.that.assignFromBits(writeData(element.bitOffset, element.that.getBitsWidth bits)) + case element: CsrOnWrite => element.doThat() + case _ => + } + } + + when(readEnable) { + for (element <- jobs) element match { + case element: CsrOnRead => + element.doThat() + case _ => + } + } + } + + def doJobsOverride(jobs : ArrayBuffer[Any]): Unit ={ + for (element <- jobs) element match { + case element: CsrReadToWriteOverride if element.that.getBitsWidth != 0 => readToWriteData(element.bitOffset, element.that.getBitsWidth bits) := element.that.asBits + case _ => + } + } + + csrOhDecoder match { + case false => { + csrMapping.readDataInit := 0 + switch(csrAddress) { + for ((address, jobs) <- csrMapping.mapping) { + is(address) { + doJobs(jobs) + for (element <- jobs) element match { + case element: CsrRead if element.that.getBitsWidth != 0 => csrMapping.readDataInit (element.bitOffset, element.that.getBitsWidth bits) := element.that.asBits + case _ => + } + } + } + } + switch(csrAddress) { + for ((address, jobs) <- csrMapping.mapping if jobs.exists(_.isInstanceOf[CsrReadToWriteOverride])) { + is(address) { + doJobsOverride(jobs) + } + } + } + } + case true => { + val oh = csrMapping.mapping.keys.toList.distinct.map(address => address -> RegNextWhen(decode.input(INSTRUCTION)(csrRange) === address, !execute.arbitration.isStuck).setCompositeName(this, "csr_" + address)).toMap + val readDatas = ArrayBuffer[Bits]() + for ((address, jobs) <- csrMapping.mapping) { + when(oh(address)){ + doJobs(jobs) + } + if(jobs.exists(_.isInstanceOf[CsrRead])) { + val masked = B(0, 32 bits) + when(oh(address)) (for (element <- jobs) element match { + case element: CsrRead if element.that.getBitsWidth != 0 => masked(element.bitOffset, element.that.getBitsWidth bits) := element.that.asBits + case _ => + }) + readDatas += masked + } + } + csrMapping.readDataInit := readDatas.reduceBalancedTree(_ | _) + for ((address, jobs) <- csrMapping.mapping) { + when(oh(address)){ + doJobsOverride(jobs) + } + } + } + } + + csrMapping.always.foreach { + case element : CsrDuringWrite => when(writeInstruction){element.doThat()} + case element : CsrDuringRead => when(readInstruction){element.doThat()} + case element : CsrOnWrite => when(writeEnable){element.doThat()} + case element : CsrOnRead => when(readEnable){element.doThat()} + } + + illegalAccess clearWhen(csrMapping.allowCsrSignal) + + when(privilege < csrAddress(9 downto 8).asUInt){ + illegalAccess := True + readInstruction := False + writeInstruction := False + } + illegalAccess clearWhen(!arbitration.isValid || !input(IS_CSR)) + } + } + } + } +} + + +class UserInterruptPlugin(interruptName : String, code : Int, privilege : Int = 3) extends Plugin[VexRiscv]{ + var interrupt, interruptEnable : Bool = null + override def setup(pipeline: VexRiscv): Unit = { + val csr = pipeline.service(classOf[CsrPlugin]) + interrupt = in.Bool().setName(interruptName) + val interruptPending = RegNext(interrupt) init(False) + val interruptEnable = RegInit(False).setName(interruptName + "_enable") + csr.addInterrupt(interruptPending && interruptEnable, code, privilege, Nil) + csr.r(csrAddress = CSR.MIP, bitOffset = code,interruptPending) + csr.rw(csrAddress = CSR.MIE, bitOffset = code, interruptEnable) + } + override def build(pipeline: VexRiscv): Unit = {} +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala new file mode 100644 index 0000000..80d4409 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/DBusCachedPlugin.scala @@ -0,0 +1,554 @@ +package vexriscv.plugin + +import vexriscv.ip._ +import vexriscv._ +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.amba4.axi.Axi4 + +import scala.collection.mutable.ArrayBuffer + + +class DAxiCachedPlugin(config : DataCacheConfig, memoryTranslatorPortConfig : Any = null) extends DBusCachedPlugin(config, memoryTranslatorPortConfig) { + var dAxi : Axi4 = null + + override def build(pipeline: VexRiscv): Unit = { + super.build(pipeline) + dBus.setAsDirectionLess() + dAxi = master(dBus.toAxi4Shared().toAxi4()).setName("dAxi") + dBus = null //For safety, as nobody should use it anymore :) + } +} + +trait DBusEncodingService { + def addLoadWordEncoding(key: MaskedLiteral): Unit + def addStoreWordEncoding(key: MaskedLiteral): Unit + def bypassStore(data : Bits) : Unit + def loadData() : Bits +} + +class DBusCachedPlugin(val config : DataCacheConfig, + memoryTranslatorPortConfig : Any = null, + dBusCmdMasterPipe : Boolean = false, + dBusCmdSlavePipe : Boolean = false, + dBusRspSlavePipe : Boolean = false, + relaxedMemoryTranslationRegister : Boolean = false, + csrInfo : Boolean = false) extends Plugin[VexRiscv] with DBusAccessService with DBusEncodingService with VexRiscvRegressionArg { + import config._ + assert(!(config.withExternalAmo && !dBusRspSlavePipe)) + assert(isPow2(cacheSize)) + assert(!(memoryTranslatorPortConfig != null && config.cacheSize/config.wayCount > 4096), "When the D$ is used with MMU, each way can't be bigger than a page (4096 bytes)") + + var dBus : DataCacheMemBus = null + var mmuBus : MemoryTranslatorBus = null + var exceptionBus : Flow[ExceptionCause] = null + var privilegeService : PrivilegeService = null + var redoBranch : Flow[UInt] = null + + @dontName var dBusAccess : DBusAccess = null + override def newDBusAccess(): DBusAccess = { + assert(dBusAccess == null) + dBusAccess = DBusAccess() + dBusAccess + } + + override def getVexRiscvRegressionArgs(): Seq[String] = { + var args = List[String]() + args :+= "DBUS=CACHED" + args :+= s"DBUS_LOAD_DATA_WIDTH=$memDataWidth" + args :+= s"DBUS_STORE_DATA_WIDTH=$cpuDataWidth" + if(withLrSc) args :+= "LRSC=yes" + if(withAmo) args :+= "AMO=yes" + if(config.withExclusive && config.withInvalidate) args ++= List("DBUS_EXCLUSIVE=yes", "DBUS_INVALIDATE=yes") + args + } + + + override def addLoadWordEncoding(key : MaskedLiteral): Unit = { + val decoderService = pipeline.service(classOf[DecoderService]) + val cfg = pipeline.config + import cfg._ + + decoderService.add( + key, + List( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC_USE_SUB_LESS -> False, + MEMORY_ENABLE -> True, + RS1_USE -> True, + IntAluPlugin.ALU_CTRL -> IntAluPlugin.AluCtrlEnum.ADD_SUB, + SRC2_CTRL -> Src2CtrlEnum.IMI, + // REGFILE_WRITE_VALID -> True, + // BYPASSABLE_EXECUTE_STAGE -> False, + // BYPASSABLE_MEMORY_STAGE -> False, + MEMORY_WR -> False, + HAS_SIDE_EFFECT -> True + ) + ) + + if(withLrSc) decoderService.add(key, Seq(MEMORY_LRSC -> False)) + if(withAmo) decoderService.add(key, Seq(MEMORY_AMO -> False)) + } + override def addStoreWordEncoding(key : MaskedLiteral): Unit = { + val decoderService = pipeline.service(classOf[DecoderService]) + val cfg = pipeline.config + import cfg._ + + decoderService.add( + key, + List( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC_USE_SUB_LESS -> False, + MEMORY_ENABLE -> True, + RS1_USE -> True, + IntAluPlugin.ALU_CTRL -> IntAluPlugin.AluCtrlEnum.ADD_SUB, + SRC2_CTRL -> Src2CtrlEnum.IMS, +// RS2_USE -> True, + MEMORY_WR -> True, + HAS_SIDE_EFFECT -> True + ) + ) + + if(withLrSc) decoderService.add(key, Seq(MEMORY_LRSC -> False)) + if(withAmo) decoderService.add(key, Seq(MEMORY_AMO -> False)) + } + + val bypassStoreList = ArrayBuffer[(Bool, Bits)]() + + override def bypassStore(data: Bits): Unit = { + val prefix = s"DBusBypass${bypassStoreList.size}" + bypassStoreList += ConditionalContext.isTrue().setName(prefix + "_cond") -> CombInit(data).setName(prefix + "_value") + assert(config.cpuDataWidth >= data.getWidth, "Data cache word width is too small for that") + } + + + override def loadData(): Bits = pipeline.stages.last.output(MEMORY_LOAD_DATA) + + object MEMORY_ENABLE extends Stageable(Bool) + object MEMORY_MANAGMENT extends Stageable(Bool) + object MEMORY_WR extends Stageable(Bool) + object MEMORY_LRSC extends Stageable(Bool) + object MEMORY_AMO extends Stageable(Bool) + object MEMORY_FENCE extends Stageable(Bool) + object MEMORY_FORCE_CONSTISTENCY extends Stageable(Bool) + object IS_DBUS_SHARING extends Stageable(Bool()) + object MEMORY_VIRTUAL_ADDRESS extends Stageable(UInt(32 bits)) + object MEMORY_STORE_DATA_RF extends Stageable(Bits(32 bits)) +// object MEMORY_STORE_DATA_CPU extends Stageable(Bits(config.cpuDataWidth bits)) + object MEMORY_LOAD_DATA extends Stageable(Bits(config.cpuDataWidth bits)) + + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + + dBus = master(DataCacheMemBus(this.config)).setName("dBus") + + val decoderService = pipeline.service(classOf[DecoderService]) + + val stdActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC_USE_SUB_LESS -> False, + MEMORY_ENABLE -> True, + RS1_USE -> True, + IntAluPlugin.ALU_CTRL -> IntAluPlugin.AluCtrlEnum.ADD_SUB + ) + + val loadActions = stdActions ++ List( + SRC2_CTRL -> Src2CtrlEnum.IMI, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> False, + MEMORY_WR -> False, + HAS_SIDE_EFFECT -> True + ) + + val storeActions = stdActions ++ List( + SRC2_CTRL -> Src2CtrlEnum.IMS, + RS2_USE -> True, + MEMORY_WR -> True, + HAS_SIDE_EFFECT -> True + ) + + decoderService.addDefault(MEMORY_ENABLE, False) + decoderService.add( + List(LB, LH, LW, LBU, LHU, LWU).map(_ -> loadActions) ++ + List(SB, SH, SW).map(_ -> storeActions) + ) + + if(withLrSc){ + List(LB, LH, LW, LBU, LHU, LWU, SB, SH, SW).foreach(e => + decoderService.add(e, Seq(MEMORY_LRSC -> False)) + ) + decoderService.add( + key = LR, + values = loadActions.filter(_._1 != SRC2_CTRL) ++ Seq( + SRC_ADD_ZERO -> True, + MEMORY_LRSC -> True + ) + ) + decoderService.add( + key = SC, + values = storeActions.filter(_._1 != SRC2_CTRL) ++ Seq( + SRC_ADD_ZERO -> True, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> False, + MEMORY_LRSC -> True + ) + ) + } + + if(withAmo){ + List(LB, LH, LW, LBU, LHU, LWU, SB, SH, SW).foreach(e => + decoderService.add(e, Seq(MEMORY_AMO -> False)) + ) + val amoActions = storeActions.filter(_._1 != SRC2_CTRL) ++ Seq( + SRC_ADD_ZERO -> True, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> False, + MEMORY_AMO -> True + ) + + for(i <- List(AMOSWAP, AMOADD, AMOXOR, AMOAND, AMOOR, AMOMIN, AMOMAX, AMOMINU, AMOMAXU)){ + decoderService.add(i, amoActions) + } + } + + if(withAmo && withLrSc){ + for(i <- List(AMOSWAP, AMOADD, AMOXOR, AMOAND, AMOOR, AMOMIN, AMOMAX, AMOMINU, AMOMAXU)){ + decoderService.add(i, List(MEMORY_LRSC -> False)) + } + for(i <- List(LR, SC)){ + decoderService.add(i, List(MEMORY_AMO -> False)) + } + } + + def MANAGEMENT = M"-------00000-----101-----0001111" + + decoderService.addDefault(MEMORY_MANAGMENT, False) + decoderService.add(MANAGEMENT, List( + MEMORY_MANAGMENT -> True, + RS1_USE -> True + )) + + withWriteResponse match { + case false => decoderService.add(FENCE, Nil) + case true => { + decoderService.addDefault(MEMORY_FENCE, False) + decoderService.add(FENCE, List(MEMORY_FENCE -> True)) + } + } + + mmuBus = pipeline.service(classOf[MemoryTranslator]).newTranslationPort(MemoryTranslatorPort.PRIORITY_DATA ,memoryTranslatorPortConfig) + redoBranch = pipeline.service(classOf[JumpService]).createJumpInterface(if(pipeline.writeBack != null) pipeline.writeBack else pipeline.memory) + + if(catchSomething) + exceptionBus = pipeline.service(classOf[ExceptionService]).newExceptionPort(if(pipeline.writeBack == null) pipeline.memory else pipeline.writeBack) + + if(pipeline.serviceExist(classOf[PrivilegeService])) + privilegeService = pipeline.service(classOf[PrivilegeService]) + + pipeline.update(DEBUG_BYPASS_CACHE, False) + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + val twoStageMmu = mmuBus.p.latency match { + case 0 => false + case 1 => true + } + + val cache = new DataCache( + this.config.copy( + mergeExecuteMemory = writeBack == null, + rfDataWidth = 32 + ), + mmuParameter = mmuBus.p + ) + + //Interconnect the plugin dBus with the cache dBus with some optional pipelining + def optionPipe[T](cond : Boolean, on : T)(f : T => T) : T = if(cond) f(on) else on + def cmdBuf = optionPipe(dBusCmdSlavePipe, cache.io.mem.cmd)(_.s2mPipe()) + dBus.cmd << optionPipe(dBusCmdMasterPipe, cmdBuf)(_.m2sPipe()) + cache.io.mem.rsp << (dBusRspSlavePipe match { + case false => dBus.rsp + case true if !withExternalAmo => dBus.rsp.m2sPipe() + case true if withExternalAmo => { + val rsp = Flow (DataCacheMemRsp(cache.p)) + rsp.valid := RegNext(dBus.rsp.valid) init(False) + rsp.exclusive := RegNext(dBus.rsp.exclusive) + rsp.error := RegNext(dBus.rsp.error) + rsp.last := RegNext(dBus.rsp.last) + rsp.aggregated := RegNext(dBus.rsp.aggregated) + rsp.data := RegNextWhen(dBus.rsp.data, dBus.rsp.valid && !cache.io.cpu.writeBack.keepMemRspData) + rsp + } + }) + + if(withInvalidate) { + cache.io.mem.inv << dBus.inv + cache.io.mem.ack >> dBus.ack + cache.io.mem.sync << dBus.sync + } + + pipeline plug new Area{ + //Memory bandwidth counter + val rspCounter = Reg(UInt(32 bits)) init(0) + when(dBus.rsp.valid){ + rspCounter := rspCounter + 1 + } + } + + decode plug new Area { + import decode._ + + when(mmuBus.busy && arbitration.isValid && input(MEMORY_ENABLE)) { + arbitration.haltItself := True + } + + + //Manage write to read hit ordering (ensure invalidation timings) + val fence = new Area { + insert(MEMORY_FORCE_CONSTISTENCY) := False + when(input(INSTRUCTION)(25)) { //RL + if (withLrSc) insert(MEMORY_FORCE_CONSTISTENCY) setWhen (input(MEMORY_LRSC)) + if (withAmo) insert(MEMORY_FORCE_CONSTISTENCY) setWhen (input(MEMORY_AMO)) + } + } + } + + execute plug new Area { + import execute._ + + val size = input(INSTRUCTION)(13 downto 12).asUInt + cache.io.cpu.execute.isValid := arbitration.isValid && input(MEMORY_ENABLE) + cache.io.cpu.execute.address := input(SRC_ADD).asUInt + cache.io.cpu.execute.args.wr := input(MEMORY_WR) + insert(MEMORY_STORE_DATA_RF) := size.mux( + U(0) -> input(RS2)( 7 downto 0) ## input(RS2)( 7 downto 0) ## input(RS2)(7 downto 0) ## input(RS2)(7 downto 0), + U(1) -> input(RS2)(15 downto 0) ## input(RS2)(15 downto 0), + default -> input(RS2)(31 downto 0) + ) + cache.io.cpu.execute.args.size := size.resized + + if(twoStageMmu) { + mmuBus.cmd(0).isValid := cache.io.cpu.execute.isValid + mmuBus.cmd(0).isStuck := arbitration.isStuck + mmuBus.cmd(0).virtualAddress := input(SRC_ADD).asUInt + mmuBus.cmd(0).bypassTranslation := False +// KeepAttribute(mmuBus.cmd(0)) +// KeepAttribute(mmuBus.cmd(1)) + } + + cache.io.cpu.flush.valid := arbitration.isValid && input(MEMORY_MANAGMENT) + cache.io.cpu.flush.singleLine := input(INSTRUCTION)(Riscv.rs1Range) =/= 0 + cache.io.cpu.flush.lineId := U(input(RS1) >> log2Up(bytePerLine)).resized + cache.io.cpu.execute.args.totalyConsistent := input(MEMORY_FORCE_CONSTISTENCY) + arbitration.haltItself setWhen(cache.io.cpu.flush.isStall || cache.io.cpu.execute.haltIt) + + if(withLrSc) { + cache.io.cpu.execute.args.isLrsc := False + when(input(MEMORY_LRSC)){ + cache.io.cpu.execute.args.isLrsc := True + } + } + + if(withAmo){ + cache.io.cpu.execute.isAmo := input(MEMORY_AMO) + cache.io.cpu.execute.amoCtrl.alu := input(INSTRUCTION)(31 downto 29) + cache.io.cpu.execute.amoCtrl.swap := input(INSTRUCTION)(27) + } + + + when(cache.io.cpu.execute.refilling && arbitration.isValid){ + arbitration.haltByOther := True + } + + if(relaxedMemoryTranslationRegister) { + insert(MEMORY_VIRTUAL_ADDRESS) := cache.io.cpu.execute.address + memory.input(MEMORY_VIRTUAL_ADDRESS) + if(writeBack != null) addPrePopTask( () => + KeepAttribute(memory.input(MEMORY_VIRTUAL_ADDRESS).getDrivingReg) + ) + } + } + + val mmuAndBufferStage = if(writeBack != null) memory else execute + mmuAndBufferStage plug new Area { + import mmuAndBufferStage._ + + cache.io.cpu.memory.isValid := arbitration.isValid && input(MEMORY_ENABLE) + cache.io.cpu.memory.isStuck := arbitration.isStuck + cache.io.cpu.memory.address := (if(relaxedMemoryTranslationRegister) input(MEMORY_VIRTUAL_ADDRESS) else if(mmuAndBufferStage == execute) cache.io.cpu.execute.address else U(input(REGFILE_WRITE_DATA))) + + mmuBus.cmd.last.isValid := cache.io.cpu.memory.isValid + mmuBus.cmd.last.isStuck := cache.io.cpu.memory.isStuck + mmuBus.cmd.last.virtualAddress := cache.io.cpu.memory.address + mmuBus.cmd.last.bypassTranslation := False + mmuBus.end := !arbitration.isStuck || arbitration.removeIt + cache.io.cpu.memory.mmuRsp := mmuBus.rsp + cache.io.cpu.memory.mmuRsp.isIoAccess setWhen(pipeline(DEBUG_BYPASS_CACHE) && !cache.io.cpu.memory.isWrite) + } + + val managementStage = stages.last + val mgs = managementStage plug new Area{ + import managementStage._ + cache.io.cpu.writeBack.isValid := arbitration.isValid && input(MEMORY_ENABLE) + cache.io.cpu.writeBack.isStuck := arbitration.isStuck + cache.io.cpu.writeBack.isFiring := arbitration.isFiring + cache.io.cpu.writeBack.isUser := (if(privilegeService != null) privilegeService.isUser() else False) + cache.io.cpu.writeBack.address := U(input(REGFILE_WRITE_DATA)) + cache.io.cpu.writeBack.storeData.subdivideIn(32 bits).foreach(_ := input(MEMORY_STORE_DATA_RF)) + afterElaboration(for((cond, value) <- bypassStoreList) when(cond){ + cache.io.cpu.writeBack.storeData.subdivideIn(widthOf(value) bits).foreach(_ := value) //Not optimal, but ok + }) + + val fence = if(withInvalidate) new Area { + cache.io.cpu.writeBack.fence := input(INSTRUCTION)(31 downto 20).as(FenceFlags()) + val aquire = False + if(withWriteResponse) when(input(MEMORY_ENABLE) && input(INSTRUCTION)(26)) { //AQ + if(withLrSc) when(input(MEMORY_LRSC)){ + aquire := True + } + if(withAmo) when(input(MEMORY_AMO)){ + aquire := True + } + } + + when(aquire){ + cache.io.cpu.writeBack.fence.forceAll() + } + + when(!input(MEMORY_FENCE) || !arbitration.isFiring){ + cache.io.cpu.writeBack.fence.clearAll() + } + + when(arbitration.isValid && (input(MEMORY_FENCE) || aquire)){ + mmuAndBufferStage.arbitration.haltByOther := True //Ensure that the fence affect the memory stage instruction by stoping it + } + } + + redoBranch.valid := False + redoBranch.payload := input(PC) + arbitration.flushIt setWhen(redoBranch.valid) + arbitration.flushNext setWhen(redoBranch.valid) + + if(catchSomething) { + exceptionBus.valid := False //cache.io.cpu.writeBack.mmuMiss || cache.io.cpu.writeBack.accessError || cache.io.cpu.writeBack.illegalAccess || cache.io.cpu.writeBack.unalignedAccess + exceptionBus.badAddr := U(input(REGFILE_WRITE_DATA)) + exceptionBus.code.assignDontCare() + } + + + when(arbitration.isValid && input(MEMORY_ENABLE)) { + if (catchAccessError) when(cache.io.cpu.writeBack.accessError) { + exceptionBus.valid := True + exceptionBus.code := (input(MEMORY_WR) ? U(7) | U(5)).resized + } + if(catchIllegal) when (cache.io.cpu.writeBack.mmuException) { + exceptionBus.valid := True + exceptionBus.code := (input(MEMORY_WR) ? U(15) | U(13)).resized + } + if (catchUnaligned) when(cache.io.cpu.writeBack.unalignedAccess) { + exceptionBus.valid := True + exceptionBus.code := (input(MEMORY_WR) ? U(6) | U(4)).resized + } + + when(cache.io.cpu.redo) { + redoBranch.valid := True + if(catchSomething) exceptionBus.valid := False + } + } + + arbitration.haltItself.setWhen(cache.io.cpu.writeBack.isValid && cache.io.cpu.writeBack.haltIt) + + val rspSplits = cache.io.cpu.writeBack.data.subdivideIn(8 bits) + val rspShifted = Bits(cpuDataWidth bits) + //Generate minimal mux to move from a wide aligned memory read to the register file shifter representation + for(i <- 0 until cpuDataWidth/8){ + val srcSize = 1 << (log2Up(cpuDataBytes) - log2Up(i+1)) + val srcZipped = rspSplits.zipWithIndex.filter{case (v, b) => b % (cpuDataBytes/srcSize) == i} + val src = srcZipped.map(_._1) + val range = cache.cpuWordToRfWordRange.high downto cache.cpuWordToRfWordRange.high+1-log2Up(srcSize) + val sel = cache.io.cpu.writeBack.address(range) +// println(s"$i $srcSize $range ${srcZipped.map(_._2).mkString(",")}") + rspShifted(i*8, 8 bits) := src.read(sel) + } + + val rspRf = CombInit(rspShifted(31 downto 0)) + if(withLrSc) when(input(MEMORY_LRSC) && input(MEMORY_WR)){ + rspRf := B(!cache.io.cpu.writeBack.exclusiveOk).resized + } + + val rspFormated = input(INSTRUCTION)(13 downto 12).mux( + 0 -> B((31 downto 8) -> (rspRf(7) && !input(INSTRUCTION)(14)),(7 downto 0) -> rspRf(7 downto 0)), + 1 -> B((31 downto 16) -> (rspRf(15) && ! input(INSTRUCTION)(14)),(15 downto 0) -> rspRf(15 downto 0)), + default -> rspRf //W + ) + + when(arbitration.isValid && input(MEMORY_ENABLE)) { + output(REGFILE_WRITE_DATA) := rspFormated + } + + insert(MEMORY_LOAD_DATA) := rspShifted + } + + //Share access to the dBus (used by self refilled MMU) + if(dBusAccess != null) pipeline plug new Area{ + dBusAccess.cmd.ready := False + val forceDatapath = False + when(dBusAccess.cmd.valid){ + decode.arbitration.haltByOther := True + val exceptionService = pipeline.service(classOf[ExceptionService]) + when(!stagesFromExecute.map(s => s.arbitration.isValid || exceptionService.isExceptionPending(s)).orR){ + when(!cache.io.cpu.execute.refilling) { + cache.io.cpu.execute.isValid := True + dBusAccess.cmd.ready := !execute.arbitration.isStuck + } + cache.io.cpu.execute.args.wr := False //dBusAccess.cmd.write +// execute.insert(MEMORY_STORE_DATA_RF) := dBusAccess.cmd.data //Not implemented + cache.io.cpu.execute.args.size := dBusAccess.cmd.size.resized + if(withLrSc) execute.input(MEMORY_LRSC) := False + if(withAmo) execute.input(MEMORY_AMO) := False + cache.io.cpu.execute.address := dBusAccess.cmd.address //Will only be 12 muxes + forceDatapath := True + } + } + execute.insert(IS_DBUS_SHARING) := dBusAccess.cmd.fire + mmuBus.cmd.last.bypassTranslation setWhen(mmuAndBufferStage.input(IS_DBUS_SHARING)) + if(twoStageMmu) mmuBus.cmd(0).bypassTranslation setWhen(execute.input(IS_DBUS_SHARING)) + + if(mmuAndBufferStage != execute) (cache.io.cpu.memory.isValid setWhen(mmuAndBufferStage.input(IS_DBUS_SHARING))) + cache.io.cpu.writeBack.isValid setWhen(managementStage.input(IS_DBUS_SHARING)) + dBusAccess.rsp.valid := managementStage.input(IS_DBUS_SHARING) && !cache.io.cpu.writeBack.isWrite && (cache.io.cpu.redo || !cache.io.cpu.writeBack.haltIt) + dBusAccess.rsp.data := mgs.rspRf + dBusAccess.rsp.error := cache.io.cpu.writeBack.unalignedAccess || cache.io.cpu.writeBack.accessError + dBusAccess.rsp.redo := cache.io.cpu.redo + component.addPrePopTask{() => + managementStage.input(IS_DBUS_SHARING).getDrivingReg clearWhen(dBusAccess.rsp.fire) + when(forceDatapath){ + execute.output(REGFILE_WRITE_DATA) := dBusAccess.cmd.address.asBits + } + if(mmuAndBufferStage != execute) mmuAndBufferStage.input(IS_DBUS_SHARING) init(False) + managementStage.input(IS_DBUS_SHARING) init(False) + when(dBusAccess.rsp.valid){ + managementStage.input(IS_DBUS_SHARING).getDrivingReg := False + } + } + } + + when(stages.last.arbitration.haltByOther){ + cache.io.cpu.writeBack.isValid := False + } + + if(csrInfo){ + val csr = service(classOf[CsrPlugin]) + csr.r(0xCC0, 0 -> U(cacheSize/wayCount), 20 -> U(bytePerLine)) + } + } +} + + diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala new file mode 100644 index 0000000..372cfcc --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/DBusSimplePlugin.scala @@ -0,0 +1,614 @@ +package vexriscv.plugin + +import vexriscv._ +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.amba3.ahblite.{AhbLite3Config, AhbLite3Master} +import spinal.lib.bus.amba4.axi._ +import spinal.lib.bus.avalon.{AvalonMM, AvalonMMConfig} +import spinal.lib.bus.bmb.{Bmb, BmbParameter} +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig} +import spinal.lib.bus.simple._ +import vexriscv.ip.DataCacheMemCmd + +import scala.collection.mutable.ArrayBuffer + + +case class DBusSimpleCmd() extends Bundle{ + val wr = Bool + val address = UInt(32 bits) + val data = Bits(32 bit) + val size = UInt(2 bit) +} + +case class DBusSimpleRsp() extends Bundle with IMasterSlave{ + val ready = Bool + val error = Bool + val data = Bits(32 bit) + + override def asMaster(): Unit = { + out(ready,error,data) + } +} + + +object DBusSimpleBus{ + def getAxi4Config() = Axi4Config( + addressWidth = 32, + dataWidth = 32, + useId = false, + useRegion = false, + useBurst = false, + useLock = false, + useQos = false, + useLen = false, + useResp = true + ) + + def getAvalonConfig() = AvalonMMConfig.pipelined( + addressWidth = 32, + dataWidth = 32).copy( + useByteEnable = true, + useResponse = true, + maximumPendingReadTransactions = 1 + ) + + def getWishboneConfig() = WishboneConfig( + addressWidth = 30, + dataWidth = 32, + selWidth = 4, + useSTALL = false, + useLOCK = false, + useERR = true, + useRTY = false, + tgaWidth = 0, + tgcWidth = 0, + tgdWidth = 0, + useBTE = true, + useCTI = true + ) + + def getPipelinedMemoryBusConfig() = PipelinedMemoryBusConfig( + addressWidth = 32, + dataWidth = 32 + ) + + def getAhbLite3Config() = AhbLite3Config( + addressWidth = 32, + dataWidth = 32 + ) + def getBmbParameter() = BmbParameter( + addressWidth = 32, + dataWidth = 32, + lengthWidth = 2, + sourceWidth = 0, + contextWidth = 1, + alignment = BmbParameter.BurstAlignement.LENGTH + ) +} + +case class DBusSimpleBus(bigEndian : Boolean = false) extends Bundle with IMasterSlave{ + val cmd = Stream(DBusSimpleCmd()) + val rsp = DBusSimpleRsp() + + override def asMaster(): Unit = { + master(cmd) + slave(rsp) + } + + def cmdS2mPipe() : DBusSimpleBus = { + val s = DBusSimpleBus(bigEndian) + s.cmd << this.cmd.s2mPipe() + this.rsp := s.rsp + s + } + + def genMask(cmd : DBusSimpleCmd) = { + if(bigEndian) + cmd.size.mux( + U(0) -> B"1000", + U(1) -> B"1100", + default -> B"1111" + ) |>> cmd.address(1 downto 0) + else + cmd.size.mux( + U(0) -> B"0001", + U(1) -> B"0011", + default -> B"1111" + ) |<< cmd.address(1 downto 0) + } + + def toAxi4Shared(stageCmd : Boolean = false, pendingWritesMax : Int = 7): Axi4Shared = { + val axi = Axi4Shared(DBusSimpleBus.getAxi4Config()) + + val cmdPreFork = if (stageCmd) cmd.stage.stage().s2mPipe() else cmd + + val pendingWrites = CounterUpDown( + stateCount = pendingWritesMax + 1, + incWhen = cmdPreFork.fire && cmdPreFork.wr, + decWhen = axi.writeRsp.fire + ) + + val hazard = (pendingWrites =/= 0 && cmdPreFork.valid && !cmdPreFork.wr) || pendingWrites === pendingWritesMax + val (cmdFork, dataFork) = StreamFork2(cmdPreFork.haltWhen(hazard)) + axi.sharedCmd.arbitrationFrom(cmdFork) + axi.sharedCmd.write := cmdFork.wr + axi.sharedCmd.prot := "010" + axi.sharedCmd.cache := "1111" + axi.sharedCmd.size := cmdFork.size.resized + axi.sharedCmd.addr := cmdFork.address + + val dataStage = dataFork.throwWhen(!dataFork.wr) + axi.writeData.arbitrationFrom(dataStage) + axi.writeData.last := True + axi.writeData.data := dataStage.data + axi.writeData.strb := genMask(dataStage).resized + + + rsp.ready := axi.r.valid + rsp.error := !axi.r.isOKAY() + rsp.data := axi.r.data + + axi.r.ready := True + axi.b.ready := True + axi + } + + def toAxi4(stageCmd : Boolean = true) = this.toAxi4Shared(stageCmd).toAxi4() + + + + def toAvalon(stageCmd : Boolean = true): AvalonMM = { + val avalonConfig = DBusSimpleBus.getAvalonConfig() + val mm = AvalonMM(avalonConfig) + val cmdStage = if(stageCmd) cmd.stage else cmd + mm.read := cmdStage.valid && !cmdStage.wr + mm.write := cmdStage.valid && cmdStage.wr + mm.address := (cmdStage.address >> 2) @@ U"00" + mm.writeData := cmdStage.data(31 downto 0) + mm.byteEnable := genMask(cmdStage).resized + + + cmdStage.ready := mm.waitRequestn + rsp.ready :=mm.readDataValid + rsp.error := mm.response =/= AvalonMM.Response.OKAY + rsp.data := mm.readData + + mm + } + + def toWishbone(): Wishbone = { + val wishboneConfig = DBusSimpleBus.getWishboneConfig() + val bus = Wishbone(wishboneConfig) + val cmdStage = cmd.halfPipe() + + bus.ADR := cmdStage.address >> 2 + bus.CTI :=B"000" + bus.BTE := "00" + bus.SEL := genMask(cmdStage).resized + when(!cmdStage.wr) { + bus.SEL := "1111" + } + bus.WE := cmdStage.wr + bus.DAT_MOSI := cmdStage.data + + cmdStage.ready := cmdStage.valid && bus.ACK + bus.CYC := cmdStage.valid + bus.STB := cmdStage.valid + + rsp.ready := cmdStage.valid && !bus.WE && bus.ACK + rsp.data := bus.DAT_MISO + rsp.error := False //TODO + bus + } + + def toPipelinedMemoryBus() : PipelinedMemoryBus = { + val pipelinedMemoryBusConfig = DBusSimpleBus.getPipelinedMemoryBusConfig() + val bus = PipelinedMemoryBus(pipelinedMemoryBusConfig) + bus.cmd.valid := cmd.valid + bus.cmd.write := cmd.wr + bus.cmd.address := cmd.address.resized + bus.cmd.data := cmd.data + bus.cmd.mask := genMask(cmd) + cmd.ready := bus.cmd.ready + + rsp.ready := bus.rsp.valid + rsp.data := bus.rsp.data + + bus + } + + def toAhbLite3Master(avoidWriteToReadHazard : Boolean): AhbLite3Master = { + val bus = AhbLite3Master(DBusSimpleBus.getAhbLite3Config()) + bus.HADDR := this.cmd.address + bus.HWRITE := this.cmd.wr + bus.HSIZE := B(this.cmd.size, 3 bits) + bus.HBURST := 0 + bus.HPROT := "1111" + bus.HTRANS := this.cmd.valid ## B"0" + bus.HMASTLOCK := False + bus.HWDATA := RegNextWhen(this.cmd.data, bus.HREADY) + this.cmd.ready := bus.HREADY + + val pending = RegInit(False) clearWhen(bus.HREADY) setWhen(this.cmd.fire && !this.cmd.wr) + this.rsp.ready := bus.HREADY && pending + this.rsp.data := bus.HRDATA + this.rsp.error := bus.HRESP + + if(avoidWriteToReadHazard) { + val writeDataPhase = RegNextWhen(bus.HTRANS === 2 && bus.HWRITE, bus.HREADY) init (False) + val potentialHazard = this.cmd.valid && !this.cmd.wr && writeDataPhase + when(potentialHazard) { + bus.HTRANS := 0 + this.cmd.ready := False + } + } + bus + } + + def toBmb() : Bmb = { + val pipelinedMemoryBusConfig = DBusSimpleBus.getBmbParameter() + val bus = Bmb(pipelinedMemoryBusConfig) + + bus.cmd.valid := cmd.valid + bus.cmd.last := True + bus.cmd.context(0) := cmd.wr + bus.cmd.opcode := (cmd.wr ? B(Bmb.Cmd.Opcode.WRITE) | B(Bmb.Cmd.Opcode.READ)) + bus.cmd.address := cmd.address.resized + bus.cmd.data := cmd.data + bus.cmd.length := cmd.size.mux( + 0 -> U"00", + 1 -> U"01", + default -> U"11" + ) + bus.cmd.mask := genMask(cmd) + + cmd.ready := bus.cmd.ready + + rsp.ready := bus.rsp.valid && !bus.rsp.context(0) + rsp.data := bus.rsp.data + rsp.error := bus.rsp.isError + bus.rsp.ready := True + + bus + } +} + + +class DBusSimplePlugin(catchAddressMisaligned : Boolean = false, + catchAccessFault : Boolean = false, + earlyInjection : Boolean = false, /*, idempotentRegions : (UInt) => Bool = (x) => False*/ + emitCmdInMemoryStage : Boolean = false, + onlyLoadWords : Boolean = false, + withLrSc : Boolean = false, + val bigEndian : Boolean = false, + memoryTranslatorPortConfig : Any = null) extends Plugin[VexRiscv] with DBusAccessService { + + var dBus : DBusSimpleBus = null + assert(!(emitCmdInMemoryStage && earlyInjection)) + object MEMORY_ENABLE extends Stageable(Bool) + object MEMORY_READ_DATA extends Stageable(Bits(32 bits)) + object MEMORY_ADDRESS_LOW extends Stageable(UInt(2 bits)) + object ALIGNEMENT_FAULT extends Stageable(Bool) + object MMU_FAULT extends Stageable(Bool) + object MEMORY_ATOMIC extends Stageable(Bool) + object ATOMIC_HIT extends Stageable(Bool) + object MEMORY_STORE extends Stageable(Bool) + + var memoryExceptionPort : Flow[ExceptionCause] = null + var rspStage : Stage = null + var mmuBus : MemoryTranslatorBus = null + var redoBranch : Flow[UInt] = null + val catchSomething = catchAccessFault || catchAddressMisaligned || memoryTranslatorPortConfig != null + + @dontName var dBusAccess : DBusAccess = null + override def newDBusAccess(): DBusAccess = { + assert(dBusAccess == null) + dBusAccess = DBusAccess() + dBusAccess + } + + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + import pipeline._ + + val decoderService = pipeline.service(classOf[DecoderService]) + + val stdActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC_USE_SUB_LESS -> False, + MEMORY_ENABLE -> True, + RS1_USE -> True + ) ++ (if(catchAccessFault || catchAddressMisaligned) List(IntAluPlugin.ALU_CTRL -> IntAluPlugin.AluCtrlEnum.ADD_SUB) else Nil) //Used for access fault bad address in memory stage + + val loadActions = stdActions ++ List( + SRC2_CTRL -> Src2CtrlEnum.IMI, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> Bool(earlyInjection), + MEMORY_STORE -> False, + HAS_SIDE_EFFECT -> True + ) + + val storeActions = stdActions ++ List( + SRC2_CTRL -> Src2CtrlEnum.IMS, + RS2_USE -> True, + MEMORY_STORE -> True, + HAS_SIDE_EFFECT -> True + ) + + decoderService.addDefault(MEMORY_ENABLE, False) + decoderService.add( + (if(onlyLoadWords) List(LW) else List(LB, LH, LW, LBU, LHU, LWU)).map(_ -> loadActions) ++ + List(SB, SH, SW).map(_ -> storeActions) + ) + + + if(withLrSc){ + List(LB, LH, LW, LBU, LHU, LWU, SB, SH, SW).foreach(e => + decoderService.add(e, Seq(MEMORY_ATOMIC -> False)) + ) + decoderService.add( + key = LR, + values = loadActions.filter(_._1 != SRC2_CTRL) ++ Seq( + SRC_ADD_ZERO -> True, + MEMORY_ATOMIC -> True + ) + ) + + decoderService.add( + key = SC, + values = storeActions.filter(_._1 != SRC2_CTRL) ++ Seq( + SRC_ADD_ZERO -> True, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> False, + MEMORY_ATOMIC -> True + ) + ) + } + + decoderService.add(FENCE, Nil) + + rspStage = if(stages.last == execute) execute else (if(emitCmdInMemoryStage) writeBack else memory) + if(catchSomething) { + val exceptionService = pipeline.service(classOf[ExceptionService]) + memoryExceptionPort = exceptionService.newExceptionPort(rspStage) + } + + if(memoryTranslatorPortConfig != null) { + mmuBus = pipeline.service(classOf[MemoryTranslator]).newTranslationPort(MemoryTranslatorPort.PRIORITY_DATA, memoryTranslatorPortConfig) + redoBranch = pipeline.service(classOf[JumpService]).createJumpInterface(if(pipeline.memory != null) pipeline.memory else pipeline.execute) + } + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + object MMU_RSP extends Stageable(MemoryTranslatorRsp(mmuBus.p)) + + dBus = master(DBusSimpleBus(bigEndian)).setName("dBus") + + + decode plug new Area { + import decode._ + + if(mmuBus != null) when(mmuBus.busy && arbitration.isValid && input(MEMORY_ENABLE)) { + arbitration.haltItself := True + } + } + + //Emit dBus.cmd request + val cmdSent = if(rspStage == execute) RegInit(False) setWhen(dBus.cmd.fire) clearWhen(!execute.arbitration.isStuck) else False + val cmdStage = if(emitCmdInMemoryStage) memory else execute + cmdStage plug new Area{ + import cmdStage._ + val privilegeService = pipeline.serviceElse(classOf[PrivilegeService], PrivilegeServiceDefault()) + + + if (catchAddressMisaligned) + insert(ALIGNEMENT_FAULT) := (dBus.cmd.size === 2 && dBus.cmd.address(1 downto 0) =/= 0) || (dBus.cmd.size === 1 && dBus.cmd.address(0 downto 0) =/= 0) + else + insert(ALIGNEMENT_FAULT) := False + + + val skipCmd = False + skipCmd setWhen(input(ALIGNEMENT_FAULT)) + + dBus.cmd.valid := arbitration.isValid && input(MEMORY_ENABLE) && !arbitration.isStuckByOthers && !arbitration.isFlushed && !skipCmd && !cmdSent + dBus.cmd.wr := input(MEMORY_STORE) + dBus.cmd.size := input(INSTRUCTION)(13 downto 12).asUInt + dBus.cmd.payload.data := dBus.cmd.size.mux ( + U(0) -> input(RS2)(7 downto 0) ## input(RS2)(7 downto 0) ## input(RS2)(7 downto 0) ## input(RS2)(7 downto 0), + U(1) -> input(RS2)(15 downto 0) ## input(RS2)(15 downto 0), + default -> input(RS2)(31 downto 0) + ) + when(arbitration.isValid && input(MEMORY_ENABLE) && !dBus.cmd.ready && !skipCmd && !cmdSent){ + arbitration.haltItself := True + } + + insert(MEMORY_ADDRESS_LOW) := dBus.cmd.address(1 downto 0) + + //formal + val formalMask = dBus.genMask(dBus.cmd) + + insert(FORMAL_MEM_ADDR) := dBus.cmd.address & U"xFFFFFFFC" + insert(FORMAL_MEM_WMASK) := (dBus.cmd.valid && dBus.cmd.wr) ? formalMask | B"0000" + insert(FORMAL_MEM_RMASK) := (dBus.cmd.valid && !dBus.cmd.wr) ? formalMask | B"0000" + insert(FORMAL_MEM_WDATA) := dBus.cmd.payload.data + + val mmu = (mmuBus != null) generate new Area { + mmuBus.cmd.last.isValid := arbitration.isValid && input(MEMORY_ENABLE) + mmuBus.cmd.last.isStuck := arbitration.isStuck + mmuBus.cmd.last.virtualAddress := input(SRC_ADD).asUInt + mmuBus.cmd.last.bypassTranslation := False + mmuBus.end := !arbitration.isStuck || arbitration.isRemoved + dBus.cmd.address := mmuBus.rsp.physicalAddress + + //do not emit memory request if MMU refilling + insert(MMU_FAULT) := input(MMU_RSP).exception || (!input(MMU_RSP).allowWrite && input(MEMORY_STORE)) || (!input(MMU_RSP).allowRead && !input(MEMORY_STORE)) + skipCmd.setWhen(input(MMU_FAULT) || input(MMU_RSP).refilling) + + insert(MMU_RSP) := mmuBus.rsp + } + + val mmuLess = (mmuBus == null) generate new Area{ + dBus.cmd.address := input(SRC_ADD).asUInt + } + + + val atomic = withLrSc generate new Area{ + val reserved = RegInit(False) + insert(ATOMIC_HIT) := reserved + when(arbitration.isFiring && input(MEMORY_ENABLE) && (if(mmuBus != null) !input(MMU_FAULT) else True) && !skipCmd){ + reserved setWhen(input(MEMORY_ATOMIC)) + reserved clearWhen(input(MEMORY_STORE)) + } + when(input(MEMORY_STORE) && input(MEMORY_ATOMIC) && !input(ATOMIC_HIT)){ + skipCmd := True + } + } + } + + //Collect dBus.rsp read responses + rspStage plug new Area { + val s = rspStage; import s._ + + + insert(MEMORY_READ_DATA) := dBus.rsp.data + + arbitration.haltItself setWhen(arbitration.isValid && input(MEMORY_ENABLE) && !input(MEMORY_STORE) && (!dBus.rsp.ready || (if(rspStage == execute) !cmdSent else False))) + + if(catchSomething) { + memoryExceptionPort.valid := False + memoryExceptionPort.code.assignDontCare() + memoryExceptionPort.badAddr := input(REGFILE_WRITE_DATA).asUInt + + if(catchAccessFault) when(dBus.rsp.ready && dBus.rsp.error && !input(MEMORY_STORE)) { + memoryExceptionPort.valid := True + memoryExceptionPort.code := 5 + } + + if(catchAddressMisaligned) when(input(ALIGNEMENT_FAULT)){ + memoryExceptionPort.code := (input(MEMORY_STORE) ? U(6) | U(4)).resized + memoryExceptionPort.valid := True + } + + if(memoryTranslatorPortConfig != null) { + redoBranch.valid := False + redoBranch.payload := input(PC) + + when(input(MMU_RSP).refilling){ + redoBranch.valid := True + memoryExceptionPort.valid := False + } elsewhen(input(MMU_FAULT)) { + memoryExceptionPort.valid := True + memoryExceptionPort.code := (input(MEMORY_STORE) ? U(15) | U(13)).resized + } + + arbitration.flushIt setWhen(redoBranch.valid) + arbitration.flushNext setWhen(redoBranch.valid) + } + + when(!(arbitration.isValid && input(MEMORY_ENABLE) && (Bool(cmdStage != rspStage) || !arbitration.isStuckByOthers))){ + if(catchSomething) memoryExceptionPort.valid := False + if(memoryTranslatorPortConfig != null) redoBranch.valid := False + } + + } + } + + //Reformat read responses, REGFILE_WRITE_DATA overriding + val injectionStage = if(earlyInjection) memory else stages.last + injectionStage plug new Area { + import injectionStage._ + + + val rspShifted = MEMORY_READ_DATA() + rspShifted := input(MEMORY_READ_DATA) + if(bigEndian) + switch(input(MEMORY_ADDRESS_LOW)){ + is(1){rspShifted(31 downto 24) := input(MEMORY_READ_DATA)(23 downto 16)} + is(2){rspShifted(31 downto 16) := input(MEMORY_READ_DATA)(15 downto 0)} + is(3){rspShifted(31 downto 24) := input(MEMORY_READ_DATA)(7 downto 0)} + } + else + switch(input(MEMORY_ADDRESS_LOW)){ + is(1){rspShifted(7 downto 0) := input(MEMORY_READ_DATA)(15 downto 8)} + is(2){rspShifted(15 downto 0) := input(MEMORY_READ_DATA)(31 downto 16)} + is(3){rspShifted(7 downto 0) := input(MEMORY_READ_DATA)(31 downto 24)} + } + + val rspFormated = + if(bigEndian) + input(INSTRUCTION)(13 downto 12).mux( + 0 -> B((31 downto 8) -> (rspShifted(31) && !input(INSTRUCTION)(14)),(7 downto 0) -> rspShifted(31 downto 24)), + 1 -> B((31 downto 16) -> (rspShifted(31) && ! input(INSTRUCTION)(14)),(15 downto 0) -> rspShifted(31 downto 16)), + default -> rspShifted //W + ) + else + input(INSTRUCTION)(13 downto 12).mux( + 0 -> B((31 downto 8) -> (rspShifted(7) && !input(INSTRUCTION)(14)),(7 downto 0) -> rspShifted(7 downto 0)), + 1 -> B((31 downto 16) -> (rspShifted(15) && ! input(INSTRUCTION)(14)),(15 downto 0) -> rspShifted(15 downto 0)), + default -> rspShifted //W + ) + + when(arbitration.isValid && input(MEMORY_ENABLE)) { + output(REGFILE_WRITE_DATA) := (if(!onlyLoadWords) rspFormated else input(MEMORY_READ_DATA)) + if(withLrSc){ + when(input(MEMORY_ATOMIC) && input(MEMORY_STORE)){ + output(REGFILE_WRITE_DATA) := (!input(ATOMIC_HIT)).asBits.resized + } + } + } + +// if(!earlyInjection && !emitCmdInMemoryStage && config.withWriteBackStage) +// assert(!(arbitration.isValid && input(MEMORY_ENABLE) && !input(MEMORY_STORE) && arbitration.isStuck),"DBusSimplePlugin doesn't allow writeback stage stall when read happend") + + //formal + insert(FORMAL_MEM_RDATA) := input(MEMORY_READ_DATA) + } + + //Share access to the dBus (used by self refilled MMU) + val dBusSharing = (dBusAccess != null) generate new Area{ + val state = Reg(UInt(2 bits)) init(0) + dBusAccess.cmd.ready := False + dBusAccess.rsp.valid := False + dBusAccess.rsp.data := dBus.rsp.data + dBusAccess.rsp.error := dBus.rsp.error + dBusAccess.rsp.redo := False + + switch(state){ + is(0){ + when(dBusAccess.cmd.valid){ + decode.arbitration.haltItself := True + when(!stages.dropWhile(_ != execute).map(_.arbitration.isValid).orR){ + state := 1 + } + } + } + is(1){ + decode.arbitration.haltItself := True + dBus.cmd.valid := True + dBus.cmd.address := dBusAccess.cmd.address + dBus.cmd.wr := dBusAccess.cmd.write + dBus.cmd.data := dBusAccess.cmd.data + dBus.cmd.size := dBusAccess.cmd.size + when(dBus.cmd.ready){ + state := (dBusAccess.cmd.write ? U(0) | U(2)) + dBusAccess.cmd.ready := True + } + } + is(2){ + decode.arbitration.haltItself := True + when(dBus.rsp.ready){ + dBusAccess.rsp.valid := True + state := 0 + } + } + } + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/DebugPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/DebugPlugin.scala new file mode 100644 index 0000000..01c2acd --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/DebugPlugin.scala @@ -0,0 +1,364 @@ +package vexriscv.plugin + +import spinal.lib.com.jtag.{Jtag, JtagTapInstructionCtrl} +import spinal.lib.system.debugger.{JtagBridge, JtagBridgeNoTap, SystemDebugger, SystemDebuggerConfig, SystemDebuggerMemBus} +import vexriscv.plugin.IntAluPlugin.{ALU_CTRL, AluCtrlEnum} +import vexriscv._ +import vexriscv.ip._ +import spinal.core._ +import spinal.lib._ +import spinal.lib.blackbox.xilinx.s7.BSCANE2 +import spinal.lib.bus.amba3.apb.{Apb3, Apb3Config} +import spinal.lib.bus.avalon.{AvalonMM, AvalonMMConfig} +import spinal.lib.bus.bmb.{Bmb, BmbAccessCapabilities, BmbAccessParameter, BmbParameter} +import spinal.lib.bus.simple.PipelinedMemoryBus + +import scala.collection.mutable.ArrayBuffer + + +case class DebugExtensionCmd() extends Bundle{ + val wr = Bool + val address = UInt(8 bit) + val data = Bits(32 bit) +} +case class DebugExtensionRsp() extends Bundle{ + val data = Bits(32 bit) +} + +object DebugExtensionBus{ + def getBmbAccessParameter(source : BmbAccessCapabilities) = source.copy( + addressWidth = 8, + dataWidth = 32, + lengthWidthMax = 2, + alignment = BmbParameter.BurstAlignement.LENGTH + ) +} + +case class DebugExtensionBus() extends Bundle with IMasterSlave{ + val cmd = Stream(DebugExtensionCmd()) + val rsp = DebugExtensionRsp() //one cycle latency + + override def asMaster(): Unit = { + master(cmd) + in(rsp) + } + + def fromApb3(): Apb3 ={ + val apb = Apb3(Apb3Config( + addressWidth = 8, + dataWidth = 32, + useSlaveError = false + )) + + cmd.valid := apb.PSEL(0) && apb.PENABLE + cmd.wr := apb.PWRITE + cmd.address := apb.PADDR + cmd.data := apb.PWDATA + + apb.PREADY := cmd.ready + apb.PRDATA := rsp.data + + apb + } + + def fromAvalon(): AvalonMM ={ + val bus = AvalonMM(AvalonMMConfig.fixed(addressWidth = 8,dataWidth = 32, readLatency = 1)) + + cmd.valid := bus.read || bus.write + cmd.wr := bus.write + cmd.address := bus.address + cmd.data := bus.writeData + + bus.waitRequestn := cmd.ready + bus.readData := rsp.data + + bus + } + + def fromPipelinedMemoryBus(): PipelinedMemoryBus ={ + val bus = PipelinedMemoryBus(32, 32) + + cmd.arbitrationFrom(bus.cmd) + cmd.wr := bus.cmd.write + cmd.address := bus.cmd.address.resized + cmd.data := bus.cmd.data + + bus.rsp.valid := RegNext(cmd.fire) init(False) + bus.rsp.data := rsp.data + + bus + } + + def fromBmb(): Bmb ={ + val bus = Bmb(BmbParameter( + addressWidth = 8, + dataWidth = 32, + lengthWidth = 2, + sourceWidth = 0, + contextWidth = 0 + )) + + cmd.arbitrationFrom(bus.cmd) + cmd.wr := bus.cmd.isWrite + cmd.address := bus.cmd.address + cmd.data := bus.cmd.data + + bus.rsp.valid := RegNext(cmd.fire) init(False) + bus.rsp.data := rsp.data + bus.rsp.last := True + bus.rsp.setSuccess() + + bus + } + + def from(c : SystemDebuggerConfig) : SystemDebuggerMemBus = { + val mem = SystemDebuggerMemBus(c) + cmd.valid := mem.cmd.valid + cmd.wr := mem.cmd.wr + cmd.data := mem.cmd.data + cmd.address := mem.cmd.address.resized + mem.cmd.ready := cmd.ready + mem.rsp.valid := RegNext(cmd.fire).init(False) + mem.rsp.payload := rsp.data + mem + } + + def fromJtag(): Jtag ={ + val jtagConfig = SystemDebuggerConfig( + memAddressWidth = 32, + memDataWidth = 32, + remoteCmdWidth = 1 + ) + val jtagBridge = new JtagBridge(jtagConfig) + val debugger = new SystemDebugger(jtagConfig) + debugger.io.remote <> jtagBridge.io.remote + debugger.io.mem <> this.from(jtagConfig) + + jtagBridge.io.jtag + } + + def fromJtagInstructionCtrl(jtagClockDomain : ClockDomain, jtagHeaderIgnoreWidth : Int): JtagTapInstructionCtrl ={ + val jtagConfig = SystemDebuggerConfig( + memAddressWidth = 32, + memDataWidth = 32, + remoteCmdWidth = 1 + ) + val jtagBridge = new JtagBridgeNoTap(jtagConfig, jtagClockDomain, jtagHeaderIgnoreWidth) + val debugger = new SystemDebugger(jtagConfig) + debugger.io.remote <> jtagBridge.io.remote + debugger.io.mem <> this.from(jtagConfig) + + jtagBridge.io.ctrl + } + + def fromBscane2(usedId : Int, jtagHeaderIgnoreWidth : Int): Unit ={ + val jtagConfig = SystemDebuggerConfig() + + val bscane2 = BSCANE2(usedId) + val jtagClockDomain = ClockDomain(bscane2.TCK) + + val jtagBridge = new JtagBridgeNoTap(jtagConfig, jtagClockDomain, jtagHeaderIgnoreWidth) + jtagBridge.io.ctrl << bscane2.toJtagTapInstructionCtrl() + + val debugger = new SystemDebugger(jtagConfig) + debugger.io.remote <> jtagBridge.io.remote + debugger.io.mem <> this.from(debugger.io.mem.c) + } +} + +case class DebugExtensionIo() extends Bundle with IMasterSlave{ + val bus = DebugExtensionBus() + val resetOut = Bool + + override def asMaster(): Unit = { + master(bus) + in(resetOut) + } +} + +class DebugPlugin(var debugClockDomain : ClockDomain, hardwareBreakpointCount : Int = 0, BreakpointReadback : Boolean = false) extends Plugin[VexRiscv] { + + var io : DebugExtensionIo = null + val injectionAsks = ArrayBuffer[(Stage, Bool)]() + var injectionPort : Stream[Bits] = null + + + object IS_EBREAK extends Stageable(Bool) + object DO_EBREAK extends Stageable(Bool) + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + + io = slave(DebugExtensionIo()).setName("debug") + + val decoderService = pipeline.service(classOf[DecoderService]) + + decoderService.addDefault(IS_EBREAK, False) + decoderService.add(EBREAK,List(IS_EBREAK -> True)) + + injectionPort = pipeline.service(classOf[IBusFetcher]).getInjectionPort() + + if(pipeline.serviceExist(classOf[ReportService])){ + val report = pipeline.service(classOf[ReportService]) + report.add("debug" -> { + val e = new DebugReport() + e.hardwareBreakpointCount = hardwareBreakpointCount + e + }) + } + } + + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + val logic = debugClockDomain {pipeline plug new Area{ + val iBusFetcher = service(classOf[IBusFetcher]) + val firstCycle = RegNext(False) setWhen (io.bus.cmd.ready) + val secondCycle = RegNext(firstCycle) + val resetIt = RegInit(False) + val haltIt = RegInit(False) + val stepIt = RegInit(False) + + val isPipBusy = RegNext(stages.map(_.arbitration.isValid).orR || iBusFetcher.incoming()) + val godmode = RegInit(False) setWhen(haltIt && !isPipBusy) + val haltedByBreak = RegInit(False) + val debugUsed = RegInit(False) setWhen(io.bus.cmd.valid) addAttribute(Verilator.public) + val disableEbreak = RegInit(False) + + val allowEBreak = debugUsed && !disableEbreak + + val hardwareBreakpoints = Vec(Reg(new Bundle{ + val valid = Bool() + val pc = UInt(31 bits) + }), hardwareBreakpointCount) + hardwareBreakpoints.foreach(_.valid init(False)) + + val busReadDataReg = Reg(Bits(32 bit)) + when(stages.last.arbitration.isValid) { + busReadDataReg := stages.last.output(REGFILE_WRITE_DATA) + } + io.bus.cmd.ready := True + io.bus.rsp.data := busReadDataReg + when(!RegNext(io.bus.cmd.address(2))){ + io.bus.rsp.data(0) := resetIt + io.bus.rsp.data(1) := haltIt + io.bus.rsp.data(2) := isPipBusy + io.bus.rsp.data(3) := haltedByBreak + io.bus.rsp.data(4) := stepIt + } + if (BreakpointReadback) { + switch(RegNext(io.bus.cmd.address(7 downto 2))) { + for(i <- 0 until hardwareBreakpointCount){ + is(0x10 + i){ + io.bus.rsp.data(31 downto 1) := hardwareBreakpoints(i).pc.asBits + io.bus.rsp.data(0) := hardwareBreakpoints(i).valid + } + } + } + } + + + injectionPort.valid := False + injectionPort.payload := io.bus.cmd.data + + when(io.bus.cmd.valid) { + switch(io.bus.cmd.address(7 downto 2)) { + is(0x0) { + when(io.bus.cmd.wr) { + stepIt := io.bus.cmd.data(4) + resetIt setWhen (io.bus.cmd.data(16)) clearWhen (io.bus.cmd.data(24)) + haltIt setWhen (io.bus.cmd.data(17)) clearWhen (io.bus.cmd.data(25)) + haltedByBreak clearWhen (io.bus.cmd.data(25)) + godmode clearWhen(io.bus.cmd.data(25)) + disableEbreak setWhen (io.bus.cmd.data(18)) clearWhen (io.bus.cmd.data(26)) + } + } + is(0x1) { + when(io.bus.cmd.wr) { + injectionPort.valid := True + io.bus.cmd.ready := injectionPort.ready + } + } + for(i <- 0 until hardwareBreakpointCount){ + is(0x10 + i){ + when(io.bus.cmd.wr){ + hardwareBreakpoints(i).assignFromBits(io.bus.cmd.data) + } + } + } + } + } + + decode.insert(DO_EBREAK) := !haltIt && (decode.input(IS_EBREAK) || hardwareBreakpoints.map(hb => hb.valid && hb.pc === (decode.input(PC) >> 1)).foldLeft(False)(_ || _)) && allowEBreak + when(execute.arbitration.isValid && execute.input(DO_EBREAK)){ + execute.arbitration.haltByOther := True + busReadDataReg := execute.input(PC).asBits + when(stagesFromExecute.tail.map(_.arbitration.isValid).orR === False){ + iBusFetcher.haltIt() + execute.arbitration.flushIt := True + execute.arbitration.flushNext := True + haltIt := True + haltedByBreak := True + } + } + + when(haltIt) { + iBusFetcher.haltIt() + } + + when(stepIt && iBusFetcher.incoming()) { + iBusFetcher.haltIt() + when(decode.arbitration.isValid) { + haltIt := True + } + } + + //Avoid having two C instruction executed in a single step + if(pipeline.config.withRvc){ + val cleanStep = RegNext(stepIt && decode.arbitration.isFiring) init(False) + execute.arbitration.flushNext setWhen(cleanStep) + when(cleanStep){ + execute.arbitration.flushNext := True + iBusFetcher.forceNoDecode() + } + } + + io.resetOut := RegNext(resetIt) + + if(serviceExist(classOf[InterruptionInhibitor])) { + when(haltIt || stepIt) { + service(classOf[InterruptionInhibitor]).inhibateInterrupts() + } + } + + when(godmode) { + pipeline.plugins.foreach{ + case p : ExceptionInhibitor => p.inhibateException() + case _ => + } + pipeline.plugins.foreach{ + case p : PrivilegeService => p.forceMachine() + case _ => + } + pipeline.plugins.foreach{ + case p : PredictionInterface => p.inDebugNoFetch() + case _ => + } + if(pipeline.things.contains(DEBUG_BYPASS_CACHE)) pipeline(DEBUG_BYPASS_CACHE) := True + } + when(allowEBreak) { + pipeline.plugins.foreach { + case p: ExceptionInhibitor => p.inhibateEbreakException() + case _ => + } + } + + val wakeService = serviceElse(classOf[IWake], null) + if(wakeService != null) when(haltIt){ + wakeService.askWake() + } + }} + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/DecoderSimplePlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/DecoderSimplePlugin.scala new file mode 100644 index 0000000..a525b77 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/DecoderSimplePlugin.scala @@ -0,0 +1,402 @@ +package vexriscv.plugin + +import vexriscv._ +import spinal.core._ +import spinal.core.internals.Literal +import spinal.lib._ +import vexriscv.demo.GenFull + +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + + +case class Masked(value : BigInt,care : BigInt){ + assert((value & ~care) == 0) + var isPrime = true + + def < (that: Masked) = value < that.value || value == that.value && ~care < ~that.care + + def intersects(x: Masked) = ((value ^ x.value) & care & x.care) == 0 + + def covers(x: Masked) = ((value ^ x.value) & care | (~x.care) & care) == 0 + + def setPrime(value : Boolean) = { + isPrime = value + this + } + + def mergeOneBitDifSmaller(x: Masked) = { + val bit = value - x.value + val ret = new Masked(value &~ bit, care & ~bit) + // ret.isPrime = isPrime || x.isPrime + isPrime = false + x.isPrime = false + ret + } + def isSimilarOneBitDifSmaller(x: Masked) = { + val diff = value - x.value + care == x.care && value > x.value && (diff & diff - 1) == 0 + } + + + def === (hard : Bits) : Bool = (hard & care) === (value & care) + + def toString(bitCount : Int) = (0 until bitCount).map(i => if(care.testBit(i)) (if(value.testBit(i)) "1" else "0") else "-").reverseIterator.reduce(_+_) +} + +class DecoderSimplePlugin(catchIllegalInstruction : Boolean = false, + throwIllegalInstruction : Boolean = false, + assertIllegalInstruction : Boolean = false, + forceLegalInstructionComputation : Boolean = false, + decoderIsolationBench : Boolean = false, + stupidDecoder : Boolean = false) extends Plugin[VexRiscv] with DecoderService { + override def add(encoding: Seq[(MaskedLiteral, Seq[(Stageable[_ <: BaseType], Any)])]): Unit = encoding.foreach(e => this.add(e._1,e._2)) + override def add(key: MaskedLiteral, values: Seq[(Stageable[_ <: BaseType], Any)]): Unit = { + val instructionModel = encodings.getOrElseUpdate(key,ArrayBuffer[(Stageable[_ <: BaseType], BaseType)]()) + values.map{case (a,b) => { + assert(!instructionModel.contains(a), s"Over specification of $a") + val value = b match { + case e: SpinalEnumElement[_] => e() + case e: BaseType => e + } + instructionModel += (a->value) + }} + } + + override def addDefault(key: Stageable[_ <: BaseType], value: Any): Unit = { + assert(!defaults.contains(key)) + defaults(key) = value match{ + case e : SpinalEnumElement[_] => e() + case e : BaseType => e + } + } + + def forceIllegal() : Unit = if(catchIllegalInstruction) pipeline.decode.input(pipeline.config.LEGAL_INSTRUCTION) := False + + val defaults = mutable.LinkedHashMap[Stageable[_ <: BaseType], BaseType]() + val encodings = mutable.LinkedHashMap[MaskedLiteral,ArrayBuffer[(Stageable[_ <: BaseType], BaseType)]]() + var decodeExceptionPort : Flow[ExceptionCause] = null + + + override def setup(pipeline: VexRiscv): Unit = { + if(!catchIllegalInstruction) { + SpinalWarning("This VexRiscv configuration is set without illegal instruction catch support. Some software may rely on it (ex: Rust)") + } + if(catchIllegalInstruction) { + val exceptionService = pipeline.plugins.filter(_.isInstanceOf[ExceptionService]).head.asInstanceOf[ExceptionService] + decodeExceptionPort = exceptionService.newExceptionPort(pipeline.decode).setName("decodeExceptionPort") + } + } + + val detectLegalInstructions = catchIllegalInstruction || throwIllegalInstruction || forceLegalInstructionComputation || assertIllegalInstruction + + object ASSERT_ERROR extends Stageable(Bool) + + override def build(pipeline: VexRiscv): Unit = { + import pipeline.config._ + import pipeline.decode._ + + val stageables = (encodings.flatMap(_._2.map(_._1)) ++ defaults.map(_._1)).toList.distinct + + + if(stupidDecoder){ + if (detectLegalInstructions) insert(LEGAL_INSTRUCTION) := False + for(stageable <- stageables){ + if(defaults.contains(stageable)){ + insert(stageable).assignFrom(defaults(stageable)) + } else { + insert(stageable).assignDontCare() + } + } + for((key, tasks) <- encodings){ + when(input(INSTRUCTION) === key){ + if (detectLegalInstructions) insert(LEGAL_INSTRUCTION) := True + for((stageable, value) <- tasks){ + insert(stageable).assignFrom(value) + } + } + } + } else { + var offset = 0 + var defaultValue, defaultCare = BigInt(0) + val offsetOf = mutable.LinkedHashMap[Stageable[_ <: BaseType], Int]() + + //Build defaults value and field offset map + stageables.foreach(e => { + defaults.get(e) match { + case Some(value) => { + value.head.source match { + case literal: EnumLiteral[_] => literal.fixEncoding(e.dataType.asInstanceOf[SpinalEnumCraft[_]].getEncoding) + case _ => + } + defaultValue += value.head.source.asInstanceOf[Literal].getValue << offset + defaultCare += ((BigInt(1) << e.dataType.getBitsWidth) - 1) << offset + + } + case _ => + } + offsetOf(e) = offset + offset += e.dataType.getBitsWidth + }) + + //Build spec + val spec = encodings.map { case (key, values) => + var decodedValue = defaultValue + var decodedCare = defaultCare + for ((e, literal) <- values) { + literal.head.source match { + case literal: EnumLiteral[_] => literal.fixEncoding(e.dataType.asInstanceOf[SpinalEnumCraft[_]].getEncoding) + case _ => + } + val offset = offsetOf(e) + decodedValue |= literal.head.source.asInstanceOf[Literal].getValue << offset + decodedCare |= ((BigInt(1) << e.dataType.getBitsWidth) - 1) << offset + } + (Masked(key.value, key.careAbout), Masked(decodedValue, decodedCare)) + } + + + // logic implementation + val decodedBits = Bits(stageables.foldLeft(0)(_ + _.dataType.getBitsWidth) bits) + decodedBits := Symplify(input(INSTRUCTION), spec, decodedBits.getWidth) + if (detectLegalInstructions) insert(LEGAL_INSTRUCTION) := Symplify.logicOf(input(INSTRUCTION), SymplifyBit.getPrimeImplicantsByTrueAndDontCare(spec.unzip._1.toSeq, Nil, 32)) + if (throwIllegalInstruction) { + input(LEGAL_INSTRUCTION) //Fill the request for later (prePopTask) + Component.current.addPrePopTask(() => arbitration.isValid clearWhen(!input(LEGAL_INSTRUCTION))) + } + if(assertIllegalInstruction){ + val reg = RegInit(False) setWhen(arbitration.isValid) clearWhen(arbitration.isRemoved || !arbitration.isStuck) + insert(ASSERT_ERROR) := arbitration.isValid || reg + } + + if(decoderIsolationBench){ + KeepAttribute(RegNext(KeepAttribute(RegNext(decodedBits.removeAssignments().asInput())))) + out(Bits(32 bits)).setName("instruction") := KeepAttribute(RegNext(KeepAttribute(RegNext(input(INSTRUCTION))))) + } + + //Unpack decodedBits and insert fields in the pipeline + offset = 0 + stageables.foreach(e => { + insert(e).assignFromBits(decodedBits(offset, e.dataType.getBitsWidth bits)) + // insert(e).assignFromBits(RegNext(decodedBits(offset, e.dataType.getBitsWidth bits))) + offset += e.dataType.getBitsWidth + }) + } + + if(catchIllegalInstruction){ + decodeExceptionPort.valid := arbitration.isValid && !input(LEGAL_INSTRUCTION) // ?? HalitIt to alow decoder stage to wait valid data from 2 stages cache cache ?? + decodeExceptionPort.code := 2 + decodeExceptionPort.badAddr := input(INSTRUCTION).asUInt + } + if(assertIllegalInstruction){ + pipeline.stages.tail.foreach(s => s.output(ASSERT_ERROR) clearWhen(s.arbitration.isRemoved)) + assert(!pipeline.stages.last.output(ASSERT_ERROR)) + } + } + + def bench(toplevel : VexRiscv): Unit ={ + toplevel.rework{ + import toplevel.config._ + toplevel.getAllIo.toList.foreach{io => + if(io.isInput) { io.assignDontCare()} + io.setAsDirectionLess() + } + toplevel.decode.input(INSTRUCTION).removeAssignments() + toplevel.decode.input(INSTRUCTION) := Delay((in Bits(32 bits)).setName("instruction"),2) + val stageables = encodings.flatMap(_._2.map(_._1)).toSet + stageables.foreach(e => out(RegNext(RegNext(toplevel.decode.insert(e)).setName(e.getName())))) + if(catchIllegalInstruction) out(RegNext(RegNext(toplevel.decode.insert(LEGAL_INSTRUCTION)).setName(LEGAL_INSTRUCTION.getName()))) + // toplevel.getAdditionalNodesRoot.clear() + } + } +} + +object DecodingBench extends App{ + SpinalVerilog{ + val top = GenFull.cpu() + top.service(classOf[DecoderSimplePlugin]).bench(top) + top + } +} + + +object Symplify{ + val cache = mutable.LinkedHashMap[Bits,mutable.LinkedHashMap[Masked,Bool]]() + def getCache(addr : Bits) = cache.getOrElseUpdate(addr,mutable.LinkedHashMap[Masked,Bool]()) + + //Generate terms logic for the given input + def logicOf(input : Bits,terms : Seq[Masked]) = terms.map(t => getCache(input).getOrElseUpdate(t,t === input)).asBits.orR + + //Decode 'input' b using an mapping[key, decoding] specification + def apply(input: Bits, mapping: Iterable[(Masked, Masked)],resultWidth : Int) : Bits = { + val addrWidth = widthOf(input) + (for(bitId <- 0 until resultWidth) yield{ + val trueTerm = mapping.filter { case (k,t) => (t.care.testBit(bitId) && t.value.testBit(bitId))}.map(_._1) + val falseTerm = mapping.filter { case (k,t) => (t.care.testBit(bitId) && !t.value.testBit(bitId))}.map(_._1) + val symplifiedTerms = SymplifyBit.getPrimeImplicantsByTrueAndFalse(trueTerm.toSeq, falseTerm.toSeq, addrWidth) + logicOf(input, symplifiedTerms) + }).asBits + } +} + +object SymplifyBit{ + + //Return a new term with only one bit difference with 'term' and not included in falseTerms. above => 0 to 1 dif, else 1 to 0 diff + def genImplicitDontCare(falseTerms: Seq[Masked], term: Masked, bits: Int, above: Boolean): Masked = { + for (i <- 0 until bits; if term.care.testBit(i)) { + var t: Masked = null + if(above) { + if (!term.value.testBit(i)) + t = Masked(term.value.setBit(i), term.care) + } else { + if (term.value.testBit(i)) + t = Masked(term.value.clearBit(i), term.care) + } + if (t != null && !falseTerms.exists(_.intersects(t))) { + t.isPrime = false + return t + } + } + null + } + + //Return primes implicants for the trueTerms, falseTerms spec. Default value is don't care + def getPrimeImplicantsByTrueAndFalse(trueTerms: Seq[Masked], falseTerms: Seq[Masked], inputWidth : Int): Seq[Masked] = { + val primes = mutable.LinkedHashSet[Masked]() + trueTerms.foreach(_.isPrime = true) + falseTerms.foreach(_.isPrime = true) + val trueTermByCareCount = (inputWidth to 0 by -1).map(b => trueTerms.filter(b == _.care.bitCount)) + //table[Vector[HashSet[Masked]]](careCount)(bitSetCount) + val table = trueTermByCareCount.map(c => (0 to inputWidth).map(b => collection.mutable.Set(c.filter(b == _.value.bitCount): _*))) + for (i <- 0 to inputWidth) { + //Expends explicit terms + for (j <- 0 until inputWidth - i){ + for(term <- table(i)(j)){ + table(i+1)(j) ++= table(i)(j+1).withFilter(_.isSimilarOneBitDifSmaller(term)).map(_.mergeOneBitDifSmaller(term)) + } + } + //Expends implicit don't care terms + for (j <- 0 until inputWidth-i) { + for (prime <- table(i)(j).withFilter(_.isPrime)) { + val dc = genImplicitDontCare(falseTerms, prime, inputWidth, true) + if (dc != null) + table(i+1)(j) += dc mergeOneBitDifSmaller prime + } + for (prime <- table(i)(j+1).withFilter(_.isPrime)) { + val dc = genImplicitDontCare(falseTerms, prime, inputWidth, false) + if (dc != null) + table(i+1)(j) += prime mergeOneBitDifSmaller dc + } + } + for (r <- table(i)) + for (p <- r; if p.isPrime) + primes += p + } + + def optimise() { + val duplicateds = primes.filter(prime => verifyTrueFalse(primes.filterNot(_ == prime), trueTerms, falseTerms)) + if(duplicateds.nonEmpty) { + primes -= duplicateds.maxBy(_.care.bitCount) + optimise() + } + } + + optimise() + + verifyTrueFalse(primes, trueTerms, falseTerms) + var duplication = 0 + for(prime <- primes){ + if(verifyTrueFalse(primes.filterNot(_ == prime), trueTerms, falseTerms)){ + duplication += 1 + } + } + if(duplication != 0){ + PendingError(s"Duplicated primes : $duplication") + } + primes.toSeq + } + + //Verify that the 'terms' doesn't violate the trueTerms ++ falseTerms spec + def verifyTrueFalse(terms : Iterable[Masked], trueTerms : Seq[Masked], falseTerms : Seq[Masked]): Boolean ={ + return (trueTerms.forall(trueTerm => terms.exists(_ covers trueTerm))) && (falseTerms.forall(falseTerm => !terms.exists(_ covers falseTerm))) + } + + def checkTrue(terms : Iterable[Masked], trueTerms : Seq[Masked]): Boolean ={ + return trueTerms.forall(trueTerm => terms.exists(_ covers trueTerm)) + } + + + def getPrimeImplicantsByTrue(trueTerms: Seq[Masked], inputWidth : Int) : Seq[Masked] = getPrimeImplicantsByTrueAndDontCare(trueTerms, Nil, inputWidth) + + // Return primes implicants for the trueTerms, default value is False. + // You can insert don't care values by adding non-prime implicants in the trueTerms + // Will simplify the trueTerms from the most constrained ones to the least constrained ones + def getPrimeImplicantsByTrueAndDontCare(trueTerms: Seq[Masked],dontCareTerms: Seq[Masked], inputWidth : Int): Seq[Masked] = { + val primes = mutable.LinkedHashSet[Masked]() + trueTerms.foreach(_.isPrime = true) + dontCareTerms.foreach(_.isPrime = false) + val termsByCareCount = (inputWidth to 0 by -1).map(b => (trueTerms ++ dontCareTerms).filter(b == _.care.bitCount)) + //table[Vector[HashSet[Masked]]](careCount)(bitSetCount) + val table = termsByCareCount.map(c => (0 to inputWidth).map(b => collection.mutable.Set(c.filter(m => b == m.value.bitCount): _*))) + for (i <- 0 to inputWidth) { + for (j <- 0 until inputWidth - i){ + for(term <- table(i)(j)){ + table(i+1)(j) ++= table(i)(j+1).withFilter(_.isSimilarOneBitDifSmaller(term)).map(_.mergeOneBitDifSmaller(term)) + } + } + for (r <- table(i)) + for (p <- r; if p.isPrime) + primes += p + } + + + def optimise() { + val duplicateds = primes.filter(prime => checkTrue(primes.filterNot(_ == prime), trueTerms)) + if(duplicateds.nonEmpty) { + primes -= duplicateds.maxBy(_.care.bitCount) + optimise() + } + } + + optimise() + + + var duplication = 0 + for(prime <- primes){ + if(checkTrue(primes.filterNot(_ == prime), trueTerms)){ + duplication += 1 + } + } + if(duplication != 0){ + PendingError(s"Duplicated primes : $duplication") + } + primes.toSeq + } + + def main(args: Array[String]) { + { + // val default = Masked(0, 0xF) + // val primeImplicants = List(4, 8, 10, 11, 12, 15).map(v => Masked(v, 0xF)) + // val dcImplicants = List(9, 14).map(v => Masked(v, 0xF).setPrime(false)) + // val reducedPrimeImplicants = getPrimeImplicantsByTrueAndDontCare(primeImplicants, dcImplicants, 4) + // println("UUT") + // println(reducedPrimeImplicants.map(_.toString(4)).mkString("\n")) + // println("REF") + // println("-100\n10--\n1--0\n1-1-") + } + + { + val primeImplicants = List(0).map(v => Masked(v, 0xF)) + val dcImplicants = (1 to 15).map(v => Masked(v, 0xF)) + val reducedPrimeImplicants = getPrimeImplicantsByTrueAndDontCare(primeImplicants, dcImplicants, 4) + println("UUT") + println(reducedPrimeImplicants.map(_.toString(4)).mkString("\n")) + } + { + val trueTerms = List(0, 15).map(v => Masked(v, 0xF)) + val falseTerms = List(3).map(v => Masked(v, 0xF)) + val primes = getPrimeImplicantsByTrueAndFalse(trueTerms, falseTerms, 4) + println(primes.map(_.toString(4)).mkString("\n")) + } + } +}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/DivPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/DivPlugin.scala new file mode 100644 index 0000000..c20dcb3 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/DivPlugin.scala @@ -0,0 +1,75 @@ +package vexriscv.plugin + +import vexriscv.{VexRiscv, _} +import spinal.core._ + +// DivPlugin was by the past a standalone plugin, but now it use the MulDivIterativePlugin implementation +class DivPlugin extends MulDivIterativePlugin(genMul = false, genDiv = true, mulUnrollFactor = 1, divUnrollFactor = 1) + +//import spinal.lib.math.MixedDivider +// +//class DivPlugin extends Plugin[VexRiscv]{ +// object IS_DIV extends Stageable(Bool) +// +// override def setup(pipeline: VexRiscv): Unit = { +// import Riscv._ +// import pipeline.config._ +// +// val actions = List[(Stageable[_ <: BaseType],Any)]( +// SRC1_CTRL -> Src1CtrlEnum.RS, +// SRC2_CTRL -> Src2CtrlEnum.RS, +// REGFILE_WRITE_VALID -> True, +// BYPASSABLE_EXECUTE_STAGE -> False, +// BYPASSABLE_MEMORY_STAGE -> True, +// RS1_USE -> True, +// RS2_USE -> True, +// IS_DIV -> True +// ) +// +// val decoderService = pipeline.service(classOf[DecoderService]) +// decoderService.addDefault(IS_DIV, False) +// decoderService.add(List( +// DIVX -> actions +// )) +// +// } +// +// override def build(pipeline: VexRiscv): Unit = { +// import pipeline._ +// import pipeline.config._ +// +// val divider = new MixedDivider(32, 32, true) //cmd >-> rsp +// +// //Send request to the divider component +// execute plug new Area { +// import execute._ +// +// divider.io.cmd.valid := False +// divider.io.cmd.numerator := input(SRC1) +// divider.io.cmd.denominator := input(SRC2) +// divider.io.cmd.signed := !input(INSTRUCTION)(12) +// +// when(arbitration.isValid && input(IS_DIV)) { +// divider.io.cmd.valid := !arbitration.isStuckByOthers && !arbitration.removeIt +// arbitration.haltItself := memory.arbitration.isValid && memory.input(IS_DIV) +// } +// } +// +// //Collect response from the divider component, REGFILE_WRITE_DATA overriding +// memory plug new Area{ +// import memory._ +// +// divider.io.flush := memory.arbitration.removeIt +// divider.io.rsp.ready := !arbitration.isStuckByOthers +// +// when(arbitration.isValid && input(IS_DIV)) { +// arbitration.haltItself := !divider.io.rsp.valid +// +// output(REGFILE_WRITE_DATA) := Mux(input(INSTRUCTION)(13), divider.io.rsp.remainder, divider.io.rsp.quotient).asBits +// } +// +// +// divider.io.rsp.payload.error.allowPruning +// } +// } +// } diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/DummyFencePlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/DummyFencePlugin.scala new file mode 100644 index 0000000..7efbaac --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/DummyFencePlugin.scala @@ -0,0 +1,22 @@ +package vexriscv.plugin + +import spinal.core._ +import vexriscv.{VexRiscv, _} + +class DummyFencePlugin extends Plugin[VexRiscv]{ + + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.add(FENCE_I, Nil) + decoderService.add(FENCE, Nil) + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + //Dummy + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/ExternalInterruptArrayPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/ExternalInterruptArrayPlugin.scala new file mode 100644 index 0000000..43d32f0 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/ExternalInterruptArrayPlugin.scala @@ -0,0 +1,30 @@ +package vexriscv.plugin + +import spinal.core._ +import vexriscv.VexRiscv + +class ExternalInterruptArrayPlugin(arrayWidth : Int = 32, + machineMaskCsrId : Int = 0xBC0, + machinePendingsCsrId : Int = 0xFC0, + supervisorMaskCsrId : Int = 0x9C0, + supervisorPendingsCsrId : Int = 0xDC0) extends Plugin[VexRiscv]{ + var externalInterruptArray : Bits = null + + override def setup(pipeline: VexRiscv): Unit = { + externalInterruptArray = in(Bits(arrayWidth bits)).setName("externalInterruptArray") + } + + override def build(pipeline: VexRiscv): Unit = { + val csr = pipeline.service(classOf[CsrPlugin]) + val externalInterruptArrayBuffer = RegNext(externalInterruptArray) + def gen(maskCsrId : Int, pendingsCsrId : Int, interruptPin : Bool) = new Area { + val mask = Reg(Bits(arrayWidth bits)) init(0) + val pendings = mask & externalInterruptArrayBuffer + interruptPin.setAsDirectionLess() := pendings.orR + csr.rw(maskCsrId, mask) + csr.r(pendingsCsrId, pendings) + } + gen(machineMaskCsrId, machinePendingsCsrId, csr.externalInterrupt) + if(csr.config.supervisorGen) gen(supervisorMaskCsrId, supervisorPendingsCsrId, csr.externalInterruptS) + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/Fetcher.scala b/VexRiscv/src/main/scala/vexriscv/plugin/Fetcher.scala new file mode 100644 index 0000000..14450a1 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/Fetcher.scala @@ -0,0 +1,637 @@ +package vexriscv.plugin + +import vexriscv._ +import spinal.core._ +import spinal.lib._ +import vexriscv.Riscv.IMM +import StreamVexPimper._ +import scala.collection.mutable.ArrayBuffer + + +//TODO val killLastStage = jump.pcLoad.valid || decode.arbitration.isRemoved +// DBUSSimple check memory halt execute optimization + +abstract class IBusFetcherImpl(val resetVector : BigInt, + val keepPcPlus4 : Boolean, + val decodePcGen : Boolean, + val compressedGen : Boolean, + val cmdToRspStageCount : Int, + val allowPcRegReusedForSecondStage : Boolean, + val injectorReadyCutGen : Boolean, + val prediction : BranchPrediction, + val historyRamSizeLog2 : Int, + val injectorStage : Boolean, + val relaxPredictorAddress : Boolean, + val fetchRedoGen : Boolean, + val predictionBuffer : Boolean = true) extends Plugin[VexRiscv] with JumpService with IBusFetcher{ + var prefetchExceptionPort : Flow[ExceptionCause] = null + var decodePrediction : DecodePredictionBus = null + var fetchPrediction : FetchPredictionBus = null + var dynamicTargetFailureCorrection : Flow[UInt] = null + var externalResetVector : UInt = null + assert(cmdToRspStageCount >= 1) +// assert(!(cmdToRspStageCount == 1 && !injectorStage)) + assert(!(compressedGen && !decodePcGen)) + var fetcherHalt : Bool = null + var forceNoDecodeCond : Bool = null + var pcValids : Vec[Bool] = null + def pcValid(stage : Stage) = pcValids(pipeline.indexOf(stage)) + var incomingInstruction : Bool = null + override def incoming() = incomingInstruction + + + override def withRvc(): Boolean = compressedGen + + var injectionPort : Stream[Bits] = null + override def getInjectionPort() = { + injectionPort = Stream(Bits(32 bits)) + injectionPort + } + def pcRegReusedForSecondStage = allowPcRegReusedForSecondStage && prediction != DYNAMIC_TARGET //TODO might not be required for DYNAMIC_TARGET + var predictionJumpInterface : Flow[UInt] = null + + override def haltIt(): Unit = fetcherHalt := True + override def forceNoDecode(): Unit = forceNoDecodeCond := True + case class JumpInfo(interface : Flow[UInt], stage: Stage, priority : Int) + val jumpInfos = ArrayBuffer[JumpInfo]() + override def createJumpInterface(stage: Stage, priority : Int = 0): Flow[UInt] = { + assert(stage != null) + val interface = Flow(UInt(32 bits)) + jumpInfos += JumpInfo(interface,stage, priority) + interface + } + + +// var decodeExceptionPort : Flow[ExceptionCause] = null + override def setup(pipeline: VexRiscv): Unit = { + fetcherHalt = False + forceNoDecodeCond = False + incomingInstruction = False + if(resetVector == null) externalResetVector = in(UInt(32 bits).setName("externalResetVector")) + + prediction match { + case NONE => + case STATIC | DYNAMIC => { + predictionJumpInterface = createJumpInterface(pipeline.decode) + decodePrediction = pipeline.service(classOf[PredictionInterface]).askDecodePrediction() + } + case DYNAMIC_TARGET => { + fetchPrediction = pipeline.service(classOf[PredictionInterface]).askFetchPrediction() + } + } + + pcValids = Vec(Bool, pipeline.stages.size) + } + + object IBUS_RSP + object DECOMPRESSOR + object INJECTOR_M2S + + def isDrivingDecode(s : Any): Boolean = { + if(injectorStage) return s == INJECTOR_M2S + s == IBUS_RSP || s == DECOMPRESSOR + } + + + + class FetchArea(pipeline : VexRiscv) extends Area { + import pipeline._ + import pipeline.config._ + val externalFlush = stages.map(_.arbitration.flushNext).orR + + def getFlushAt(s : Any, lastCond : Boolean = true): Bool = { + if(isDrivingDecode(s) && lastCond) pipeline.decode.arbitration.isRemoved else externalFlush + } + + //Arbitrate jump requests into pcLoad + val jump = new Area { + val sortedByStage = jumpInfos.sortWith((a, b) => { + (pipeline.indexOf(a.stage) > pipeline.indexOf(b.stage)) || + (pipeline.indexOf(a.stage) == pipeline.indexOf(b.stage) && a.priority > b.priority) + }) + val valids = sortedByStage.map(_.interface.valid) + val pcs = sortedByStage.map(_.interface.payload) + + val pcLoad = Flow(UInt(32 bits)) + pcLoad.valid := jumpInfos.map(_.interface.valid).orR + pcLoad.payload := MuxOH(OHMasking.first(valids.asBits), pcs) + } + + + + //The fetchPC pcReg can also be use for the second stage of the fetch + //When the fetcherHalt is set and the pipeline isn't stalled,, the pc is propagated to to the pcReg, which allow + //using the pc pipeline to get the next PC value for interrupts + val fetchPc = new Area{ + //PC calculation without Jump + val output = Stream(UInt(32 bits)) + val pcReg = Reg(UInt(32 bits)) init(if(resetVector != null) resetVector else externalResetVector) addAttribute(Verilator.public) + val correction = False + val correctionReg = RegInit(False) setWhen(correction) clearWhen(output.fire) + val corrected = correction || correctionReg + val pcRegPropagate = False + val booted = RegNext(True) init (False) + val inc = RegInit(False) clearWhen(correction || pcRegPropagate) setWhen(output.fire) clearWhen(!output.valid && output.ready) + val pc = pcReg + (inc ## B"00").asUInt + val predictionPcLoad = ifGen(prediction == DYNAMIC_TARGET) (Flow(UInt(32 bits))) + val redo = (fetchRedoGen || prediction == DYNAMIC_TARGET) generate Flow(UInt(32 bits)) + val flushed = False + + if(compressedGen) when(inc) { + pc(1) := False + } + + if(predictionPcLoad != null) { + when(predictionPcLoad.valid) { + correction := True + pc := predictionPcLoad.payload + } + } + if(redo != null) when(redo.valid){ + correction := True + pc := redo.payload + flushed := True + } + when(jump.pcLoad.valid) { + correction := True + pc := jump.pcLoad.payload + flushed := True + } + + when(booted && (output.ready || correction || pcRegPropagate)){ + pcReg := pc + } + + pc(0) := False + if(!compressedGen) pc(1) := False + + output.valid := !fetcherHalt && booted + output.payload := pc + } + + val decodePc = ifGen(decodePcGen)(new Area { + //PC calculation without Jump + val flushed = False + val pcReg = Reg(UInt(32 bits)) init(if(resetVector != null) resetVector else externalResetVector) addAttribute(Verilator.public) + val pcPlus = if(compressedGen) + pcReg + ((decode.input(IS_RVC)) ? U(2) | U(4)) + else + pcReg + 4 + + if (keepPcPlus4) KeepAttribute(pcPlus) + val injectedDecode = False + when(decode.arbitration.isFiring && !injectedDecode) { + pcReg := pcPlus + } + + val predictionPcLoad = ifGen(prediction == DYNAMIC_TARGET) (Flow(UInt(32 bits))) + if(prediction == DYNAMIC_TARGET) { + when(predictionPcLoad.valid && !forceNoDecodeCond) { + pcReg := predictionPcLoad.payload + } + } + + //application of the selected jump request + when(jump.pcLoad.valid && (!decode.arbitration.isStuck || decode.arbitration.isRemoved)) { + pcReg := jump.pcLoad.payload + flushed := True + } + }) + + + case class FetchRsp() extends Bundle { + val pc = UInt(32 bits) + val rsp = IBusSimpleRsp() + val isRvc = Bool() + } + + + val iBusRsp = new Area { + val redoFetch = False + val stages = Array.fill(cmdToRspStageCount + 1)(new Bundle { + val input = Stream(UInt(32 bits)) + val output = Stream(UInt(32 bits)) + val halt = Bool() + }) + + stages(0).input << fetchPc.output + for(s <- stages) { + s.halt := False + s.output << s.input.haltWhen(s.halt) + } + + if(fetchPc.redo != null) { + fetchPc.redo.valid := redoFetch + fetchPc.redo.payload := stages.last.input.payload + } + + val flush = (if(isDrivingDecode(IBUS_RSP)) pipeline.decode.arbitration.isRemoved || decode.arbitration.flushNext && !decode.arbitration.isStuck else externalFlush) || redoFetch + for((s,sNext) <- (stages, stages.tail).zipped) { + val sFlushed = if(s != stages.head) flush else False + val sNextFlushed = flush + if(s == stages.head && pcRegReusedForSecondStage) { + sNext.input.arbitrationFrom(s.output.toEvent().m2sPipeWithFlush(sNextFlushed, false, collapsBubble = false, flushInput = sFlushed)) + sNext.input.payload := fetchPc.pcReg + fetchPc.pcRegPropagate setWhen(sNext.input.ready) + } else { + sNext.input << s.output.m2sPipeWithFlush(sNextFlushed, false, collapsBubble = false, flushInput = sFlushed) + } + } + + val readyForError = True + val output = Stream(FetchRsp()) + incomingInstruction setWhen(stages.tail.map(_.input.valid).reduce(_ || _)) + } + + val decompressor = ifGen(decodePcGen)(new Area{ + val input = iBusRsp.output.clearValidWhen(iBusRsp.redoFetch) + val output = Stream(FetchRsp()) + val flush = getFlushAt(DECOMPRESSOR) + val flushNext = if(isDrivingDecode(DECOMPRESSOR)) decode.arbitration.flushNext else False + val consumeCurrent = if(isDrivingDecode(DECOMPRESSOR)) flushNext && output.ready else False + + val bufferValid = RegInit(False) + val bufferData = Reg(Bits(16 bits)) + + val isInputLowRvc = input.rsp.inst(1 downto 0) =/= 3 + val isInputHighRvc = input.rsp.inst(17 downto 16) =/= 3 + val throw2BytesReg = RegInit(False) + val throw2Bytes = throw2BytesReg || input.pc(1) + val unaligned = throw2Bytes || bufferValid + def aligned = !unaligned + + //Latch and patches are there to ensure that the decoded instruction do not mutate while being halted and unscheduled to ensure FpuPlugin cmd fork from consistancy + val bufferValidLatch = RegNextWhen(bufferValid, input.valid) + val throw2BytesLatch = RegNextWhen(throw2Bytes, input.valid) + val bufferValidPatched = input.valid ? bufferValid | bufferValidLatch + val throw2BytesPatched = input.valid ? throw2Bytes | throw2BytesLatch + + val raw = Mux( + sel = bufferValidPatched, + whenTrue = input.rsp.inst(15 downto 0) ## bufferData, + whenFalse = input.rsp.inst(31 downto 16) ## (throw2BytesPatched ? input.rsp.inst(31 downto 16) | input.rsp.inst(15 downto 0)) + ) + val isRvc = raw(1 downto 0) =/= 3 + val decompressed = RvcDecompressor(raw(15 downto 0), pipeline.config.withRvf, pipeline.config.withRvd) + output.valid := input.valid && !(throw2Bytes && !bufferValid && !isInputHighRvc) + output.pc := input.pc + output.isRvc := isRvc + output.rsp.inst := isRvc ? decompressed | raw + input.ready := output.ready && (!iBusRsp.stages.last.input.valid || flushNext || (!(bufferValid && isInputHighRvc) && !(aligned && isInputLowRvc && isInputHighRvc))) + + when(output.fire){ + throw2BytesReg := (aligned && isInputLowRvc && isInputHighRvc) || (bufferValid && isInputHighRvc) + } + val bufferFill = (aligned && isInputLowRvc && !isInputHighRvc) || (bufferValid && !isInputHighRvc) || (throw2Bytes && !isRvc && !isInputHighRvc) + when(output.ready && input.valid){ + bufferValid := False + } + when(output.ready && input.valid){ + bufferData := input.rsp.inst(31 downto 16) + bufferValid setWhen(bufferFill) + } + + when(flush || consumeCurrent){ + throw2BytesReg := False + bufferValid := False + } + + if(fetchPc.redo != null) { + fetchPc.redo.payload(1) setWhen(throw2BytesReg) + } + }) + + + def condApply[T](that : T, cond : Boolean)(func : (T) => T) = if(cond)func(that) else that + val injector = new Area { + val inputBeforeStage = condApply(if (decodePcGen) decompressor.output else iBusRsp.output, injectorReadyCutGen)(_.s2mPipe(externalFlush)) + if (injectorReadyCutGen) { + iBusRsp.readyForError.clearWhen(inputBeforeStage.valid) //Can't emit error if there is a instruction pending in the s2mPipe + incomingInstruction setWhen (inputBeforeStage.valid) + } + val decodeInput = (if (injectorStage) { + val flushStage = getFlushAt(INJECTOR_M2S) + val decodeInput = inputBeforeStage.m2sPipeWithFlush(flushStage, false, collapsBubble = false, flushInput = externalFlush) + decode.insert(INSTRUCTION_ANTICIPATED) := Mux(decode.arbitration.isStuck, decode.input(INSTRUCTION), inputBeforeStage.rsp.inst) + iBusRsp.readyForError.clearWhen(decodeInput.valid) //Can't emit error when there is a instruction pending in the injector stage buffer + incomingInstruction setWhen (decodeInput.valid) + decodeInput + } else { + inputBeforeStage + }) + + if(!decodePcGen) iBusRsp.readyForError.clearWhen(!pcValid(decode)) //Need to wait a valid PC on the decode stage, as it is use to fill CSR xEPC + + + def pcUpdatedGen(input : Bool, stucks : Seq[Bool], relaxedInput : Boolean, flush : Bool) : Seq[Bool] = { + stucks.scanLeft(input)((i, stuck) => { + val reg = RegInit(False) + if(!relaxedInput) when(flush) { + reg := False + } + when(!stuck) { + reg := i + } + if(relaxedInput || i != input) when(flush) { + reg := False + } + reg + }).tail + } + + val stagesFromExecute = stages.dropWhile(_ != execute).toList + val nextPcCalc = if (decodePcGen) new Area{ + val valids = pcUpdatedGen(True, False :: stagesFromExecute.map(_.arbitration.isStuck), true, decodePc.flushed) + pcValids := Vec(valids.takeRight(stages.size)) + } else new Area{ + val valids = pcUpdatedGen(True, iBusRsp.stages.tail.map(!_.input.ready) ++ (if (injectorStage) List(!decodeInput.ready) else Nil) ++ stagesFromExecute.map(_.arbitration.isStuck), false, fetchPc.flushed) + pcValids := Vec(valids.takeRight(stages.size)) + } + + decodeInput.ready := !decode.arbitration.isStuck + decode.arbitration.isValid := decodeInput.valid + decode.insert(PC) := (if (decodePcGen) decodePc.pcReg else decodeInput.pc) + decode.insert(INSTRUCTION) := decodeInput.rsp.inst + if (compressedGen) decode.insert(IS_RVC) := decodeInput.isRvc + + if (injectionPort != null) { + Component.current.addPrePopTask(() => { + val state = RegInit(U"000") + + injectionPort.ready := False + if(decodePcGen){ + decodePc.injectedDecode setWhen(state =/= 0) + } + switch(state) { + is(0) { //request pipelining + when(injectionPort.valid) { + state := 1 + } + } + is(1) { //Give time to propagate the payload + state := 2 + } + is(2){ //read regfile delay + decode.arbitration.isValid := True + decode.arbitration.haltItself := True + state := 3 + } + is(3){ //Do instruction + decode.arbitration.isValid := True + when(!decode.arbitration.isStuck) { + state := 4 + } + } + is(4){ //request pipelining + injectionPort.ready := True + state := 0 + } + } + + //Check if the decode instruction is driven by a register + val instructionDriver = try { + decode.input(INSTRUCTION).getDrivingReg + } catch { + case _: Throwable => null + } + if (instructionDriver != null) { //If yes => + //Insert the instruction by writing the "fetch to decode instruction register", + // Work even if it need to cross some hierarchy (caches) + instructionDriver.component.rework { + when(state.pull() =/= 0) { + instructionDriver := injectionPort.payload.pull() + } + } + } else { + //Insert the instruction via a mux in the decode stage + when(state =/= 0) { + decode.input(INSTRUCTION) := RegNext(injectionPort.payload) + } + } + }) + } + + Component.current.addPrePopTask(() => { + decode.arbitration.isValid clearWhen(forceNoDecodeCond) + }) + + //Formal verification signals generation, miss prediction stuff ? + val formal = new Area { + val raw = if(compressedGen) decompressor.raw else inputBeforeStage.rsp.inst + val rawInDecode = Delay(raw, if(injectorStage) 1 else 0, when = decodeInput.ready) + decode.insert(FORMAL_INSTRUCTION) := rawInDecode + + decode.insert(FORMAL_PC_NEXT) := (if (compressedGen) + decode.input(PC) + ((decode.input(IS_RVC)) ? U(2) | U(4)) + else + decode.input(PC) + 4) + + if(decodePc != null && decodePc.predictionPcLoad != null){ + when(decodePc.predictionPcLoad.valid){ + decode.insert(FORMAL_PC_NEXT) := decodePc.predictionPcLoad.payload + } + } + + jumpInfos.foreach(info => { + when(info.interface.valid) { + info.stage.output(FORMAL_PC_NEXT) := info.interface.payload + } + }) + } + } + + def stage1ToInjectorPipe[T <: Data](input : T): (T, T, T) ={ + val iBusRspContext = iBusRsp.stages.drop(1).dropRight(1).foldLeft(input)((data,stage) => RegNextWhen(data, stage.output.ready)) + + val iBusRspContextOutput = cloneOf(input) + iBusRspContextOutput := iBusRspContext + val injectorContext = Delay(iBusRspContextOutput, cycleCount=if(injectorStage) 1 else 0, when=injector.decodeInput.ready) + val injectorContextWire = cloneOf(input) //Allow combinatorial override + injectorContextWire := injectorContext + (iBusRspContext, iBusRspContextOutput, injectorContextWire) + } + + val predictor = prediction match { + case NONE => + case STATIC | DYNAMIC => { + def historyWidth = 2 + val dynamic = ifGen(prediction == DYNAMIC) (new Area { + case class BranchPredictorLine() extends Bundle{ + val history = SInt(historyWidth bits) + } + + val historyCache = Mem(BranchPredictorLine(), 1 << historyRamSizeLog2) + val historyWrite = historyCache.writePort + val historyWriteLast = RegNextWhen(historyWrite, iBusRsp.stages(0).output.ready) + val hazard = historyWriteLast.valid && historyWriteLast.address === (iBusRsp.stages(0).input.payload >> 2).resized + + case class DynamicContext() extends Bundle{ + val hazard = Bool + val line = BranchPredictorLine() + } + val fetchContext = DynamicContext() + fetchContext.hazard := hazard + fetchContext.line := historyCache.readSync((fetchPc.output.payload >> 2).resized, iBusRsp.stages(0).output.ready || externalFlush) + + object PREDICTION_CONTEXT extends Stageable(DynamicContext()) + decode.insert(PREDICTION_CONTEXT) := stage1ToInjectorPipe(fetchContext)._3 + val decodeContextPrediction = decode.input(PREDICTION_CONTEXT).line.history.msb + + val branchStage = decodePrediction.stage + val branchContext = branchStage.input(PREDICTION_CONTEXT) + val moreJump = decodePrediction.rsp.wasWrong ^ branchContext.line.history.msb + + historyWrite.address := branchStage.input(PC)(2, historyRamSizeLog2 bits) + (if(pipeline.config.withRvc) + ((!branchStage.input(IS_RVC) && branchStage.input(PC)(1)) ? U(1) | U(0)) + else + U(0)) + + historyWrite.data.history := branchContext.line.history + (moreJump ? S(-1) | S(1)) + val sat = (branchContext.line.history === (moreJump ? S(branchContext.line.history.minValue) | S(branchContext.line.history.maxValue))) + historyWrite.valid := !branchContext.hazard && branchStage.arbitration.isFiring && branchStage.input(BRANCH_CTRL) === BranchCtrlEnum.B && !sat + }) + + + val imm = IMM(decode.input(INSTRUCTION)) + + val conditionalBranchPrediction = prediction match { + case STATIC => imm.b_sext.msb + case DYNAMIC => dynamic.decodeContextPrediction + } + + decodePrediction.cmd.hadBranch := decode.input(BRANCH_CTRL) === BranchCtrlEnum.JAL || (decode.input(BRANCH_CTRL) === BranchCtrlEnum.B && conditionalBranchPrediction) + + val noPredictionOnMissaligned = (!pipeline.config.withRvc) generate new Area{ + val missaligned = decode.input(BRANCH_CTRL).mux( + BranchCtrlEnum.JAL -> imm.j_sext(1), + default -> imm.b_sext(1) + ) + decodePrediction.cmd.hadBranch clearWhen(missaligned) + } + + //TODO no more fireing depedancies + predictionJumpInterface.valid := decode.arbitration.isValid && decodePrediction.cmd.hadBranch + predictionJumpInterface.payload := decode.input(PC) + ((decode.input(BRANCH_CTRL) === BranchCtrlEnum.JAL) ? imm.j_sext | imm.b_sext).asUInt + decode.arbitration.flushNext setWhen(predictionJumpInterface.valid) + + if(relaxPredictorAddress) KeepAttribute(predictionJumpInterface.payload) + } + case DYNAMIC_TARGET => new Area{ +// assert(!compressedGen || cmdToRspStageCount == 1, "Can't combine DYNAMIC_TARGET and RVC as it could stop the instruction fetch mid-air") + + case class BranchPredictorLine() extends Bundle{ + val source = Bits(30 - historyRamSizeLog2 bits) + val branchWish = UInt(2 bits) + val last2Bytes = ifGen(compressedGen)(Bool) + val target = UInt(32 bits) + } + + val history = Mem(BranchPredictorLine(), 1 << historyRamSizeLog2) + val historyWriteDelayPatched = history.writePort + val historyWrite = cloneOf(historyWriteDelayPatched) + historyWriteDelayPatched.valid := historyWrite.valid + historyWriteDelayPatched.address := (if(predictionBuffer) historyWrite.address - 1 else historyWrite.address) + historyWriteDelayPatched.data := historyWrite.data + + + val writeLast = RegNextWhen(historyWriteDelayPatched, iBusRsp.stages(0).output.ready) + + //Avoid write to read hazard + val buffer = predictionBuffer generate new Area{ + val line = history.readSync((iBusRsp.stages(0).input.payload >> 2).resized, iBusRsp.stages(0).output.ready) + val pcCorrected = RegNextWhen(fetchPc.corrected, iBusRsp.stages(0).input.ready) + val hazard = (writeLast.valid && writeLast.address === (iBusRsp.stages(1).input.payload >> 2).resized) + } + + val (line, hazard) = predictionBuffer match { + case true => + (RegNextWhen(buffer.line, iBusRsp.stages(0).output.ready), + RegNextWhen(buffer.hazard, iBusRsp.stages(0).output.ready) || buffer.pcCorrected) + case false => + (history.readSync((iBusRsp.stages(0).input.payload >> 2).resized, + iBusRsp.stages(0).output.ready), writeLast.valid && writeLast.address === (iBusRsp.stages(1).input.payload >> 2).resized) + } + + val hit = line.source === (iBusRsp.stages(1).input.payload.asBits >> 2 + historyRamSizeLog2) + if(compressedGen) hit clearWhen(!line.last2Bytes && iBusRsp.stages(1).input.payload(1)) + + fetchPc.predictionPcLoad.valid := line.branchWish.msb && hit && !hazard && iBusRsp.stages(1).input.valid + fetchPc.predictionPcLoad.payload := line.target + + case class PredictionResult() extends Bundle{ + val hazard = Bool + val hit = Bool + val line = BranchPredictorLine() + } + + val fetchContext = PredictionResult() + fetchContext.hazard := hazard + fetchContext.hit := hit + fetchContext.line := line + + val (iBusRspContext, iBusRspContextOutput, injectorContext) = stage1ToInjectorPipe(fetchContext) + + object PREDICTION_CONTEXT extends Stageable(PredictionResult()) + pipeline.decode.insert(PREDICTION_CONTEXT) := injectorContext + val branchStage = fetchPrediction.stage + val branchContext = branchStage.input(PREDICTION_CONTEXT) + + fetchPrediction.cmd.hadBranch := branchContext.hit && !branchContext.hazard && branchContext.line.branchWish.msb + fetchPrediction.cmd.targetPc := branchContext.line.target + + + historyWrite.valid := False + historyWrite.address := fetchPrediction.rsp.sourceLastWord(2, historyRamSizeLog2 bits) + historyWrite.data.source := fetchPrediction.rsp.sourceLastWord.asBits >> 2 + historyRamSizeLog2 + historyWrite.data.target := fetchPrediction.rsp.finalPc + if(compressedGen) historyWrite.data.last2Bytes := fetchPrediction.stage.input(PC)(1) && fetchPrediction.stage.input(IS_RVC) + + when(fetchPrediction.rsp.wasRight) { + historyWrite.valid := branchContext.hit + historyWrite.data.branchWish := branchContext.line.branchWish + (branchContext.line.branchWish === 2).asUInt - (branchContext.line.branchWish === 1).asUInt + } otherwise { + when(branchContext.hit) { + historyWrite.valid := True + historyWrite.data.branchWish := branchContext.line.branchWish - (branchContext.line.branchWish.msb).asUInt + (!branchContext.line.branchWish.msb).asUInt + } otherwise { + historyWrite.valid := True + historyWrite.data.branchWish := "10" + } + } + + historyWrite.valid clearWhen(branchContext.hazard || !branchStage.arbitration.isFiring) + + val compressor = compressedGen generate new Area{ + val predictionBranch = iBusRspContext.hit && !iBusRspContext.hazard && iBusRspContext.line.branchWish(1) + val unalignedWordIssue = iBusRsp.output.valid && predictionBranch && iBusRspContext.line.last2Bytes && Mux(decompressor.unaligned, !decompressor.isInputHighRvc, decompressor.isInputLowRvc && !decompressor.isInputHighRvc) + + when(unalignedWordIssue){ + historyWrite.valid := True + historyWrite.address := (iBusRsp.stages(1).input.payload >> 2).resized + historyWrite.data.branchWish := 0 + + iBusRsp.redoFetch := True + } + + //Do not trigger prediction hit when it is one for the upper RVC word and we aren't there yet + iBusRspContextOutput.hit clearWhen(iBusRspContext.line.last2Bytes && (decompressor.bufferValid || (!decompressor.throw2Bytes && decompressor.isInputLowRvc))) + + decodePc.predictionPcLoad.valid := injectorContext.line.branchWish.msb && injectorContext.hit && !injectorContext.hazard && injector.decodeInput.fire + decodePc.predictionPcLoad.payload := injectorContext.line.target + + //Clean the RVC buffer when a prediction was made + when(iBusRspContext.line.branchWish.msb && iBusRspContextOutput.hit && !iBusRspContext.hazard && decompressor.output.fire){ + decompressor.bufferValid := False + decompressor.throw2BytesReg := False + decompressor.input.ready := True //Drop the remaining byte if any + } + } + } + } + + def stageXToIBusRsp[T <: Data](stage : Any, input : T): (T) ={ + iBusRsp.stages.dropWhile(_ != stage).tail.foldLeft(input)((data,stage) => RegNextWhen(data, stage.output.ready)) + } + + } +}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/FormalPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/FormalPlugin.scala new file mode 100644 index 0000000..2d70ebd --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/FormalPlugin.scala @@ -0,0 +1,135 @@ +package vexriscv.plugin + +import spinal.core._ +import spinal.lib._ +import vexriscv.VexRiscv + +case class RvfiPortRsRead() extends Bundle{ + val addr = UInt(5 bits) + val rdata = Bits(32 bits) +} + +case class RvfiPortRsWrite() extends Bundle{ + val addr = UInt(5 bits) + val wdata = Bits(32 bits) +} + +case class RvfiPortPc() extends Bundle{ + val rdata = UInt(32 bits) + val wdata = UInt(32 bits) +} + + +case class RvfiPortMem() extends Bundle{ + val addr = UInt(32 bits) + val rmask = Bits(4 bits) + val wmask = Bits(4 bits) + val rdata = Bits(32 bits) + val wdata = Bits(32 bits) +} + +case class RvfiPort() extends Bundle with IMasterSlave { + val valid = Bool + val order = UInt(64 bits) + val insn = Bits(32 bits) + val trap = Bool + val halt = Bool + val intr = Bool + val mode = Bits(2 bits) + val ixl = Bits(2 bits) + val rs1 = RvfiPortRsRead() + val rs2 = RvfiPortRsRead() + val rd = RvfiPortRsWrite() + val pc = RvfiPortPc() + val mem = RvfiPortMem() + + override def asMaster(): Unit = out(this) +} + + +//Tool stuff +//https://www.reddit.com/r/yosys/comments/77g5hn/unsupported_cell_type_error_adff/ +//rd_addr == 0 => no rd_wdata check +//instruction that doesn't use RSx have to force the formal port address to zero + +//feature added +//Halt CPU on decoding exception + +//VexRiscv changes +// + +//VexRiscv bug +//1) pcManagerService.createJumpInterface(pipeline.execute) +// pcManagerService.createJumpInterface(if(earlyBranch) pipeline.execute else pipeline.memory) +//2) JALR => clear PC(0) +//3) input(INSTRUCTION)(5) REGFILE_WRITE_VALID memory read with exception would not fire properly + +class FormalPlugin extends Plugin[VexRiscv]{ + + var rvfi : RvfiPort = null + + + override def setup(pipeline: VexRiscv): Unit = { + rvfi = master(RvfiPort()).setName("rvfi") + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + import vexriscv.Riscv._ + + writeBack plug new Area{ + import writeBack._ + + val order = Reg(UInt(64 bits)) init(0) + when(arbitration.isFiring){ + order := order + 1 + } + + + rvfi.valid := arbitration.isFiring + rvfi.order := order + rvfi.insn := output(FORMAL_INSTRUCTION) + rvfi.trap := False + rvfi.halt := False + rvfi.intr := False + rvfi.mode := 3 + rvfi.ixl := 1 +// rvfi.rs1.addr := output(INSTRUCTION)(rs1Range).asUInt +// rvfi.rs2.addr := output(INSTRUCTION)(rs2Range).asUInt +// rvfi.rs1.rdata := output(RS1) +// rvfi.rs2.rdata := output(RS2) + rvfi.rs1.addr := output(RS1_USE) ? output(INSTRUCTION)(rs1Range).asUInt | U(0) + rvfi.rs2.addr := output(RS2_USE) ? output(INSTRUCTION)(rs2Range).asUInt | U(0) + rvfi.rs1.rdata := output(RS1_USE) ? output(RS1) | B(0) + rvfi.rs2.rdata := output(RS2_USE) ? output(RS2) | B(0) + rvfi.rd.addr := output(REGFILE_WRITE_VALID) ? (output(INSTRUCTION)(rdRange).asUInt) | U(0) + rvfi.rd.wdata := output(REGFILE_WRITE_VALID) ? output(REGFILE_WRITE_DATA) | B(0) + rvfi.pc.rdata := output(PC) + rvfi.pc.wdata := output(FORMAL_PC_NEXT) + rvfi.mem.addr := output(FORMAL_MEM_ADDR) + rvfi.mem.rmask := output(FORMAL_MEM_RMASK) + rvfi.mem.wmask := output(FORMAL_MEM_WMASK) + rvfi.mem.rdata := output(FORMAL_MEM_RDATA) + rvfi.mem.wdata := output(FORMAL_MEM_WDATA) + + val haltRequest = False + stages.map(s => { + when(s.arbitration.isValid && s.output(FORMAL_HALT)){ //Stage is exception halted + when(stages.drop(indexOf(s) + 1).map(!_.arbitration.isValid).foldLeft(True)(_ && _)){ //There nothing in futher stages + haltRequest := True + } + } + }) + + when(Delay(haltRequest, 5, init=False)){ //Give time for value propagation from decode stage to writeback stage + rvfi.valid := True + rvfi.trap := True + rvfi.halt := True + } + + val haltFired = RegInit(False) setWhen(rvfi.valid && rvfi.halt) + rvfi.valid clearWhen(haltFired) + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/FpuPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/FpuPlugin.scala new file mode 100644 index 0000000..3e664f5 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/FpuPlugin.scala @@ -0,0 +1,314 @@ +package vexriscv.plugin + +import spinal.core._ +import spinal.core.internals.{BoolLiteral, Literal} +import spinal.lib._ +import vexriscv._ +import vexriscv.Riscv._ +import vexriscv.ip.fpu._ + +import scala.collection.mutable.ArrayBuffer + +class FpuPlugin(externalFpu : Boolean = false, + simHalt : Boolean = false, + val p : FpuParameter) extends Plugin[VexRiscv] with VexRiscvRegressionArg { + + object FPU_ENABLE extends Stageable(Bool()) + object FPU_COMMIT extends Stageable(Bool()) + object FPU_COMMIT_SYNC extends Stageable(Bool()) + object FPU_COMMIT_LOAD extends Stageable(Bool()) + object FPU_RSP extends Stageable(Bool()) + object FPU_FORKED extends Stageable(Bool()) + object FPU_OPCODE extends Stageable(FpuOpcode()) + object FPU_ARG extends Stageable(Bits(2 bits)) + object FPU_FORMAT extends Stageable(FpuFormat()) + + var port : FpuPort = null //Commit port is already isolated + + override def getVexRiscvRegressionArgs(): Seq[String] = { + var args = List[String]() + args :+= "RVF=yes" + if(p.withDouble) args :+= "RVD=yes" + args + } + + override def setup(pipeline: VexRiscv): Unit = { + import pipeline.config._ + + type ENC = (Stageable[_ <: BaseType],Any) + + val intRfWrite = List[ENC]( + FPU_ENABLE -> True, + FPU_COMMIT -> False, + FPU_RSP -> True, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> False + ) + + val floatRfWrite = List[ENC]( + FPU_ENABLE -> True, + FPU_COMMIT -> True, + FPU_RSP -> False + ) + + val addSub = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.ADD + val mul = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.MUL + val fma = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.FMA + val div = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.DIV + val sqrt = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.SQRT + val fsgnj = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.SGNJ + val fminMax = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.MIN_MAX + val fmvWx = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.FMV_W_X :+ RS1_USE -> True + val fcvtI2f = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.I2F :+ RS1_USE -> True + val fcvtxx = floatRfWrite :+ FPU_OPCODE -> FpuOpcode.FCVT_X_X + + val fcmp = intRfWrite :+ FPU_OPCODE -> FpuOpcode.CMP + val fclass = intRfWrite :+ FPU_OPCODE -> FpuOpcode.FCLASS + val fmvXw = intRfWrite :+ FPU_OPCODE -> FpuOpcode.FMV_X_W + val fcvtF2i = intRfWrite :+ FPU_OPCODE -> FpuOpcode.F2I + + val fl = List[ENC]( + FPU_ENABLE -> True, + FPU_OPCODE -> FpuOpcode.LOAD, + FPU_COMMIT -> True, + FPU_RSP -> False + ) + + val fs = List[ENC]( + FPU_ENABLE -> True, + FPU_OPCODE -> FpuOpcode.STORE, + FPU_COMMIT -> False, + FPU_RSP -> True + ) + + + def arg(v : Int) = FPU_ARG -> B(v, 2 bits) + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(FPU_ENABLE, False) + + val f32 = FPU_FORMAT -> FpuFormat.FLOAT + val f64 = FPU_FORMAT -> FpuFormat.DOUBLE + + decoderService.add(List( + FADD_S -> (addSub :+ f32 :+ arg(0)), + FSUB_S -> (addSub :+ f32 :+ arg(1)), + FMADD_S -> (fma :+ f32 :+ arg(0)), + FMSUB_S -> (fma :+ f32 :+ arg(2)), + FNMADD_S -> (fma :+ f32 :+ arg(3)), + FNMSUB_S -> (fma :+ f32 :+ arg(1)), + FMUL_S -> (mul :+ f32 :+ arg(0)), + FDIV_S -> (div :+ f32 ), + FSQRT_S -> (sqrt :+ f32 ), + FLW -> (fl :+ f32 ), + FSW -> (fs :+ f32 ), + FCVT_S_WU -> (fcvtI2f :+ f32 :+ arg(0)), + FCVT_S_W -> (fcvtI2f :+ f32 :+ arg(1)), + FCVT_WU_S -> (fcvtF2i :+ f32 :+ arg(0)), + FCVT_W_S -> (fcvtF2i :+ f32 :+ arg(1)), + FCLASS_S -> (fclass :+ f32 ), + FLE_S -> (fcmp :+ f32 :+ arg(0)), + FEQ_S -> (fcmp :+ f32 :+ arg(2)), + FLT_S -> (fcmp :+ f32 :+ arg(1)), + FSGNJ_S -> (fsgnj :+ f32 :+ arg(0)), + FSGNJN_S -> (fsgnj :+ f32 :+ arg(1)), + FSGNJX_S -> (fsgnj :+ f32 :+ arg(2)), + FMIN_S -> (fminMax :+ f32 :+ arg(0)), + FMAX_S -> (fminMax :+ f32 :+ arg(1)), + FMV_X_W -> (fmvXw :+ f32 ), + FMV_W_X -> (fmvWx :+ f32 ) + )) + + if(p.withDouble){ + decoderService.add(List( + FADD_D -> (addSub :+ f64 :+ arg(0)), + FSUB_D -> (addSub :+ f64 :+ arg(1)), + FMADD_D -> (fma :+ f64 :+ arg(0)), + FMSUB_D -> (fma :+ f64 :+ arg(2)), + FNMADD_D -> (fma :+ f64 :+ arg(3)), + FNMSUB_D -> (fma :+ f64 :+ arg(1)), + FMUL_D -> (mul :+ f64 :+ arg(0)), + FDIV_D -> (div :+ f64 ), + FSQRT_D -> (sqrt :+ f64 ), + FLD -> (fl :+ f64 ), + FSD -> (fs :+ f64 ), + FCVT_D_WU -> (fcvtI2f :+ f64 :+ arg(0)), + FCVT_D_W -> (fcvtI2f :+ f64 :+ arg(1)), + FCVT_WU_D -> (fcvtF2i :+ f64 :+ arg(0)), + FCVT_W_D -> (fcvtF2i :+ f64 :+ arg(1)), + FCLASS_D -> (fclass :+ f64 ), + FLE_D -> (fcmp :+ f64 :+ arg(0)), + FEQ_D -> (fcmp :+ f64 :+ arg(2)), + FLT_D -> (fcmp :+ f64 :+ arg(1)), + FSGNJ_D -> (fsgnj :+ f64 :+ arg(0)), + FSGNJN_D -> (fsgnj :+ f64 :+ arg(1)), + FSGNJX_D -> (fsgnj :+ f64 :+ arg(2)), + FMIN_D -> (fminMax :+ f64 :+ arg(0)), + FMAX_D -> (fminMax :+ f64 :+ arg(1)), + FCVT_D_S -> (fcvtxx :+ f32), + FCVT_S_D -> (fcvtxx :+ f64) + )) + } + //TODO FMV_X_X + doubles + + port = FpuPort(p).addTag(Verilator.public) + if(externalFpu) master(port) + + val dBusEncoding = pipeline.service(classOf[DBusEncodingService]) + dBusEncoding.addLoadWordEncoding(FLW) + dBusEncoding.addStoreWordEncoding(FSW) + if(p.withDouble) { + dBusEncoding.addLoadWordEncoding(FLD) + dBusEncoding.addStoreWordEncoding(FSD) + } + +// exposeEncoding() + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + import Riscv._ + + val internal = (!externalFpu).generate (pipeline plug new Area{ + val fpu = FpuCore(1, p) + if(simHalt) { + val cmdHalt = in(Bool).setName("fpuCmdHalt").addAttribute(Verilator.public) + val commitHalt = in(Bool).setName("fpuCommitHalt").addAttribute(Verilator.public) + val rspHalt = in(Bool).setName("fpuRspHalt").addAttribute(Verilator.public) + fpu.io.port(0).cmd << port.cmd.haltWhen(cmdHalt) + fpu.io.port(0).commit << port.commit.haltWhen(commitHalt) + fpu.io.port(0).rsp.haltWhen(rspHalt) >> port.rsp + fpu.io.port(0).completion <> port.completion + } else { + fpu.io.port(0).cmd << port.cmd + fpu.io.port(0).commit << port.commit + fpu.io.port(0).rsp >> port.rsp + fpu.io.port(0).completion <> port.completion + } + }) + + + val csr = pipeline plug new Area{ + val pendings = Reg(UInt(6 bits)) init(0) + pendings := pendings + U(port.cmd.fire) - U(port.completion.fire) - U(port.rsp.fire) + + val hasPending = pendings =/= 0 + + val flags = Reg(FpuFlags()) + flags.NV init(False) setWhen(port.completion.fire && port.completion.flags.NV) + flags.DZ init(False) setWhen(port.completion.fire && port.completion.flags.DZ) + flags.OF init(False) setWhen(port.completion.fire && port.completion.flags.OF) + flags.UF init(False) setWhen(port.completion.fire && port.completion.flags.UF) + flags.NX init(False) setWhen(port.completion.fire && port.completion.flags.NX) + + val service = pipeline.service(classOf[CsrInterface]) + val rm = Reg(Bits(3 bits)) init(0) + + service.rw(CSR.FCSR, 5, rm) + service.rw(CSR.FCSR, 0, flags) + service.rw(CSR.FRM, 0, rm) + service.rw(CSR.FFLAGS, 0, flags) + + val csrActive = service.duringAny() + execute.arbitration.haltByOther setWhen(csrActive && hasPending) // pessimistic + + val fs = Reg(Bits(2 bits)) init(1) + val sd = fs === 3 + + when(stages.last.arbitration.isFiring && stages.last.input(FPU_ENABLE) && stages.last.input(FPU_OPCODE) =/= FpuOpcode.STORE){ + fs := 3 //DIRTY + } + + service.rw(CSR.SSTATUS, 13, fs) + service.rw(CSR.MSTATUS, 13, fs) + + service.r(CSR.SSTATUS, 31, sd) + service.r(CSR.MSTATUS, 31, sd) + } + + decode plug new Area{ + import decode._ + + //Maybe it might be better to not fork before fire to avoid RF stall on commits + val forked = Reg(Bool) setWhen(port.cmd.fire) clearWhen(!arbitration.isStuck) init(False) + + val hazard = csr.pendings.msb || csr.csrActive + + arbitration.haltItself setWhen(arbitration.isValid && input(FPU_ENABLE) && hazard) + arbitration.haltItself setWhen(port.cmd.isStall) + + val iRoundMode = input(INSTRUCTION)(funct3Range) + val roundMode = (input(INSTRUCTION)(funct3Range) === B"111") ? csr.rm | input(INSTRUCTION)(funct3Range) + + port.cmd.valid := arbitration.isValid && input(FPU_ENABLE) && !forked && !hazard + port.cmd.opcode := input(FPU_OPCODE) + port.cmd.arg := input(FPU_ARG) + port.cmd.rs1 := input(INSTRUCTION)(rs1Range).asUInt + port.cmd.rs2 := input(INSTRUCTION)(rs2Range).asUInt + port.cmd.rs3 := input(INSTRUCTION)(rs3Range).asUInt + port.cmd.rd := input(INSTRUCTION)(rdRange).asUInt + port.cmd.format := (if(p.withDouble) input(FPU_FORMAT) else FpuFormat.FLOAT()) + port.cmd.roundMode := roundMode.as(FpuRoundMode()) + + insert(FPU_FORKED) := forked || port.cmd.fire + + insert(FPU_COMMIT_SYNC) := List(FpuOpcode.LOAD, FpuOpcode.FMV_W_X, FpuOpcode.I2F).map(_ === input(FPU_OPCODE)).orR + insert(FPU_COMMIT_LOAD) := input(FPU_OPCODE) === FpuOpcode.LOAD + + if(serviceExist(classOf[IWake])) when(forked){ + service(classOf[IWake]).askWake() //Ensure that no WFI followed by a FPU stall the FPU interface for other CPU + } + } + + writeBack plug new Area{ //WARNING IF STAGE CHANGE, update the regression rsp capture filter for the golden model (top->VexRiscv->lastStageIsFiring) + import writeBack._ + + val dBusEncoding = pipeline.service(classOf[DBusEncodingService]) + val isRsp = input(FPU_FORKED) && input(FPU_RSP) + val isCommit = input(FPU_FORKED) && input(FPU_COMMIT) + val storeFormated = CombInit(port.rsp.value) + if(p.withDouble) when(!input(INSTRUCTION)(12)){ + storeFormated(32, 32 bits) := port.rsp.value(0, 32 bits) + } + //Manage $store and port.rsp + port.rsp.ready := False + when(isRsp){ + when(arbitration.isValid) { + dBusEncoding.bypassStore(storeFormated) + output(REGFILE_WRITE_DATA) := port.rsp.value(31 downto 0) + when(!arbitration.isStuck && !arbitration.isRemoved){ + csr.flags.NV setWhen(port.rsp.NV) + csr.flags.NX setWhen(port.rsp.NX) + } + } + when(!port.rsp.valid){ + arbitration.haltByOther := True + } elsewhen(!arbitration.haltItself){ + port.rsp.ready := True + } + } + + // Manage $load + val commit = Stream(FpuCommit(p)).addTag(Verilator.public) + commit.valid := isCommit && !arbitration.isStuck + commit.value(31 downto 0) := (input(FPU_COMMIT_LOAD) ? dBusEncoding.loadData()(31 downto 0) | input(RS1)) + if(p.withDouble) commit.value(63 downto 32) := dBusEncoding.loadData()(63 downto 32) + commit.write := arbitration.isValid && !arbitration.removeIt + commit.opcode := input(FPU_OPCODE) + commit.rd := input(INSTRUCTION)(rdRange).asUInt + + when(isCommit && !commit.ready){ + arbitration.haltByOther := True + } + + port.commit << commit.pipelined(s2m = true, m2s = false) + } + + pipeline.stages.dropRight(1).foreach(s => s.output(FPU_FORKED) clearWhen(s.arbitration.isStuck)) + + Component.current.afterElaboration{ + pipeline.stages.tail.foreach(_.input(FPU_FORKED).init(False)) + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/HaltOnExceptionPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/HaltOnExceptionPlugin.scala new file mode 100644 index 0000000..b104223 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/HaltOnExceptionPlugin.scala @@ -0,0 +1,44 @@ + +package vexriscv.plugin + +import spinal.core._ +import spinal.lib._ +import vexriscv._ +import vexriscv.Riscv._ + +import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable + + +class HaltOnExceptionPlugin() extends Plugin[VexRiscv] with ExceptionService { + def xlen = 32 + + //Mannage ExceptionService calls + val exceptionPortsInfos = ArrayBuffer[ExceptionPortInfo]() + def exceptionCodeWidth = 4 + override def newExceptionPort(stage : Stage, priority : Int = 0, codeWidth : Int = 4) = { + val interface = Flow(ExceptionCause(4)) + exceptionPortsInfos += ExceptionPortInfo(interface,stage,priority, codeWidth) + interface + } + override def isExceptionPending(stage : Stage): Bool = False + + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + stages.head.insert(FORMAL_HALT) := False + stages.foreach(stage => { + val stagePorts = exceptionPortsInfos.filter(_.stage == stage) + if(stagePorts.nonEmpty) { + when(stagePorts.map(info => info.port.valid).orR) { + stage.output(FORMAL_HALT) := True + stage.arbitration.haltItself := True + } + for(stage <- stages){ + stage.output(FORMAL_HALT) clearWhen(stage.arbitration.isFlushed) + } + } + }) + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/HazardPessimisticPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/HazardPessimisticPlugin.scala new file mode 100644 index 0000000..5a8f4d3 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/HazardPessimisticPlugin.scala @@ -0,0 +1,24 @@ +package vexriscv.plugin + +import vexriscv._ +import spinal.core._ +import spinal.lib._ + + +class HazardPessimisticPlugin() extends Plugin[VexRiscv] { + import Riscv._ + + override def setup(pipeline: VexRiscv): Unit = { + import pipeline.config._ + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(HAS_SIDE_EFFECT, False) + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + val writesInPipeline = stages.dropWhile(_ != execute).map(s => s.arbitration.isValid && s.input(REGFILE_WRITE_VALID)) :+ RegNext(stages.last.arbitration.isValid && stages.last.input(REGFILE_WRITE_VALID)) + decode.arbitration.haltByOther.setWhen(decode.arbitration.isValid && writesInPipeline.orR) + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/HazardSimplePlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/HazardSimplePlugin.scala new file mode 100644 index 0000000..1b650e3 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/HazardSimplePlugin.scala @@ -0,0 +1,125 @@ +package vexriscv.plugin + +import vexriscv._ +import spinal.core._ +import spinal.lib._ + +trait HazardService{ + def hazardOnExecuteRS : Bool +} + +class HazardSimplePlugin(bypassExecute : Boolean = false, + bypassMemory: Boolean = false, + bypassWriteBack: Boolean = false, + bypassWriteBackBuffer : Boolean = false, + pessimisticUseSrc : Boolean = false, + pessimisticWriteRegFile : Boolean = false, + pessimisticAddressMatch : Boolean = false) extends Plugin[VexRiscv] with HazardService{ + import Riscv._ + + + def hazardOnExecuteRS = { + if(pipeline.service(classOf[RegFileService]).readStage() == pipeline.execute) pipeline.execute.arbitration.isStuckByOthers else False //TODO not so nice + } + + override def setup(pipeline: VexRiscv): Unit = { + import pipeline.config._ + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(HAS_SIDE_EFFECT, False) //TODO implement it in each plugin + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + pipeline plug new Area { + val src0Hazard = False + val src1Hazard = False + + val readStage = service(classOf[RegFileService]).readStage() + + def trackHazardWithStage(stage: Stage, bypassable: Boolean, runtimeBypassable: Stageable[Bool]): Unit = { + val runtimeBypassableValue = if (runtimeBypassable != null) stage.input(runtimeBypassable) else True + val addr0Match = if (pessimisticAddressMatch) True else stage.input(INSTRUCTION)(rdRange) === readStage.input(INSTRUCTION)(rs1Range) + val addr1Match = if (pessimisticAddressMatch) True else stage.input(INSTRUCTION)(rdRange) === readStage.input(INSTRUCTION)(rs2Range) + when(stage.arbitration.isValid && stage.input(REGFILE_WRITE_VALID)) { + if (bypassable) { + when(runtimeBypassableValue) { + when(addr0Match) { + readStage.input(RS1) := stage.output(REGFILE_WRITE_DATA) + } + when(addr1Match) { + readStage.input(RS2) := stage.output(REGFILE_WRITE_DATA) + } + } + } + } + when(stage.arbitration.isValid && (if (pessimisticWriteRegFile) True else stage.input(REGFILE_WRITE_VALID))) { + when((Bool(!bypassable) || !runtimeBypassableValue)) { + when(addr0Match) { + src0Hazard := True + } + when(addr1Match) { + src1Hazard := True + } + } + } + } + + + val writeBackWrites = Flow(cloneable(new Bundle { + val address = Bits(5 bits) + val data = Bits(32 bits) + })) + writeBackWrites.valid := stages.last.output(REGFILE_WRITE_VALID) && stages.last.arbitration.isFiring + writeBackWrites.address := stages.last.output(INSTRUCTION)(rdRange) + writeBackWrites.data := stages.last.output(REGFILE_WRITE_DATA) + val writeBackBuffer = writeBackWrites.stage() + + val addr0Match = if (pessimisticAddressMatch) True else writeBackBuffer.address === readStage.input(INSTRUCTION)(rs1Range) + val addr1Match = if (pessimisticAddressMatch) True else writeBackBuffer.address === readStage.input(INSTRUCTION)(rs2Range) + when(writeBackBuffer.valid) { + if (bypassWriteBackBuffer) { + when(addr0Match) { + readStage.input(RS1) := writeBackBuffer.data + } + when(addr1Match) { + readStage.input(RS2) := writeBackBuffer.data + } + } else { + when(addr0Match) { + src0Hazard := True + } + when(addr1Match) { + src1Hazard := True + } + } + } + + if (withWriteBackStage) trackHazardWithStage(writeBack, bypassWriteBack, null) + if (withMemoryStage) trackHazardWithStage(memory, bypassMemory, if (stages.last == memory) null else BYPASSABLE_MEMORY_STAGE) + if (readStage != execute) trackHazardWithStage(execute, bypassExecute, if (stages.last == execute) null else BYPASSABLE_EXECUTE_STAGE) + + + if (!pessimisticUseSrc) { + when(!readStage.input(RS1_USE)) { + src0Hazard := False + } + when(!readStage.input(RS2_USE)) { + src1Hazard := False + } + } + + when(readStage.arbitration.isValid && (src0Hazard || src1Hazard)) { + readStage.arbitration.haltByOther := True + } + } + } +} + + +class NoHazardPlugin extends Plugin[VexRiscv] with HazardService { + override def build(pipeline: VexRiscv): Unit = {} + + def hazardOnExecuteRS = False +}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala new file mode 100644 index 0000000..035c5dc --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/IBusCachedPlugin.scala @@ -0,0 +1,290 @@ +package vexriscv.plugin + +import vexriscv.{plugin, _} +import vexriscv.ip._ +import spinal.core._ +import spinal.lib._ + +import scala.collection.mutable.ArrayBuffer + +//class IBusCachedPlugin(config : InstructionCacheConfig, memoryTranslatorPortConfig : Any = null) extends Plugin[VexRiscv] { +// var iBus : InstructionCacheMemBus = null +// override def build(pipeline: VexRiscv): Unit = ??? +//} + +case class TightlyCoupledBus() extends Bundle with IMasterSlave { + val enable = Bool() + val address = UInt(32 bits) + val data = Bits(32 bits) + + override def asMaster(): Unit = { + out(enable, address) + in(data) + } +} + +case class TightlyCoupledPortParameter(name : String, hit : UInt => Bool) +case class TightlyCoupledPort(p : TightlyCoupledPortParameter, var bus : TightlyCoupledBus) +class IBusCachedPlugin(resetVector : BigInt = 0x80000000l, + relaxedPcCalculation : Boolean = false, + prediction : BranchPrediction = NONE, + historyRamSizeLog2 : Int = 10, + compressedGen : Boolean = false, + keepPcPlus4 : Boolean = false, + val config : InstructionCacheConfig, + memoryTranslatorPortConfig : Any = null, + injectorStage : Boolean = false, + withoutInjectorStage : Boolean = false, + relaxPredictorAddress : Boolean = true, + predictionBuffer : Boolean = true) extends IBusFetcherImpl( + resetVector = resetVector, + keepPcPlus4 = keepPcPlus4, + decodePcGen = compressedGen, + compressedGen = compressedGen, + cmdToRspStageCount = (if(config.twoCycleCache) 2 else 1) + (if(relaxedPcCalculation) 1 else 0), + allowPcRegReusedForSecondStage = true, + injectorReadyCutGen = false, + prediction = prediction, + historyRamSizeLog2 = historyRamSizeLog2, + injectorStage = (!config.twoCycleCache && !withoutInjectorStage) || injectorStage, + relaxPredictorAddress = relaxPredictorAddress, + fetchRedoGen = true, + predictionBuffer = predictionBuffer) with VexRiscvRegressionArg{ + import config._ + + + + assert(isPow2(cacheSize)) + assert(!(memoryTranslatorPortConfig != null && config.cacheSize/config.wayCount > 4096), "When the I$ is used with MMU, each way can't be bigger than a page (4096 bytes)") + + + assert(!(withoutInjectorStage && injectorStage)) + + + override def getVexRiscvRegressionArgs(): Seq[String] = { + var args = List[String]() + args :+= "IBUS=CACHED" + args :+= s"IBUS_DATA_WIDTH=$memDataWidth" + args :+= s"COMPRESSED=${if(compressedGen) "yes" else "no"}" + args + } + + var iBus : InstructionCacheMemBus = null + var mmuBus : MemoryTranslatorBus = null + var privilegeService : PrivilegeService = null + var decodeExceptionPort : Flow[ExceptionCause] = null + val tightlyCoupledPorts = ArrayBuffer[TightlyCoupledPort]() + def tightlyGen = tightlyCoupledPorts.nonEmpty + + def newTightlyCoupledPort(p : TightlyCoupledPortParameter) = { + val port = TightlyCoupledPort(p, null) + tightlyCoupledPorts += port + this + } + + + object FLUSH_ALL extends Stageable(Bool) + object IBUS_ACCESS_ERROR extends Stageable(Bool) + object IBUS_MMU_MISS extends Stageable(Bool) + object IBUS_ILLEGAL_ACCESS extends Stageable(Bool) + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + + super.setup(pipeline) + + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(FLUSH_ALL, False) + decoderService.add(FENCE_I, List( + FLUSH_ALL -> True + )) + + if(catchSomething) { + val exceptionService = pipeline.service(classOf[ExceptionService]) + decodeExceptionPort = exceptionService.newExceptionPort(pipeline.decode,1) + } + + if(pipeline.serviceExist(classOf[MemoryTranslator])) + mmuBus = pipeline.service(classOf[MemoryTranslator]).newTranslationPort(MemoryTranslatorPort.PRIORITY_INSTRUCTION, memoryTranslatorPortConfig) + + privilegeService = pipeline.serviceElse(classOf[PrivilegeService], PrivilegeServiceDefault()) + + if(pipeline.serviceExist(classOf[ReportService])){ + val report = pipeline.service(classOf[ReportService]) + report.add("iBus" -> { + val e = new BusReport() + val c = new CacheReport() + e.kind = "cached" + e.flushInstructions.add(0x100F) //FENCE.I + e.flushInstructions.add(0x13) + e.flushInstructions.add(0x13) + e.flushInstructions.add(0x13) + + e.info = c + c.size = cacheSize + c.bytePerLine = bytePerLine + + e + }) + } + } + + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + pipeline plug new FetchArea(pipeline) { + val cache = new InstructionCache(IBusCachedPlugin.this.config.copy(bypassGen = tightlyGen), if(mmuBus != null) mmuBus.p else MemoryTranslatorBusParameter(0,0)) + iBus = master(new InstructionCacheMemBus(IBusCachedPlugin.this.config)).setName("iBus") + iBus <> cache.io.mem + iBus.cmd.address.allowOverride := cache.io.mem.cmd.address + + //Memory bandwidth counter + val rspCounter = Reg(UInt(32 bits)) init(0) + when(iBus.rsp.valid){ + rspCounter := rspCounter + 1 + } + + val stageOffset = if(relaxedPcCalculation) 1 else 0 + def stages = iBusRsp.stages.drop(stageOffset) + + tightlyCoupledPorts.foreach(p => p.bus = master(TightlyCoupledBus()).setName(p.p.name)) + + val s0 = new Area { + //address decoding + val tightlyCoupledHits = Vec(tightlyCoupledPorts.map(_.p.hit(stages(0).input.payload))) + val tightlyCoupledHit = tightlyCoupledHits.orR + + for((port, hit) <- (tightlyCoupledPorts, tightlyCoupledHits).zipped){ + port.bus.enable := stages(0).input.fire && hit + port.bus.address := stages(0).input.payload(31 downto 2) @@ U"00" + } + + //Connect prefetch cache side + cache.io.cpu.prefetch.isValid := stages(0).input.valid && !tightlyCoupledHit + cache.io.cpu.prefetch.pc := stages(0).input.payload + stages(0).halt setWhen (cache.io.cpu.prefetch.haltIt) + + if(mmuBus != null && mmuBus.p.latency == 1) { + stages(0).halt setWhen(mmuBus.busy) + mmuBus.cmd(0).isValid := cache.io.cpu.prefetch.isValid + mmuBus.cmd(0).isStuck := !stages(0).input.ready + mmuBus.cmd(0).virtualAddress := cache.io.cpu.prefetch.pc + mmuBus.cmd(0).bypassTranslation := False + } + } + + + val s1 = new Area { + val tightlyCoupledHits = RegNextWhen(s0.tightlyCoupledHits, stages(1).input.ready) + val tightlyCoupledHit = RegNextWhen(s0.tightlyCoupledHit, stages(1).input.ready) + + if(tightlyGen) cache.io.cpu.fetch.dataBypassValid := tightlyCoupledHit + if(tightlyGen) cache.io.cpu.fetch.dataBypass := MuxOH(tightlyCoupledHits, tightlyCoupledPorts.map(e => CombInit(e.bus.data))) + + //Connect fetch cache side + cache.io.cpu.fetch.isValid := stages(1).input.valid && !tightlyCoupledHit + cache.io.cpu.fetch.isStuck := !stages(1).input.ready + cache.io.cpu.fetch.pc := stages(1).input.payload + + if(mmuBus != null) { + mmuBus.cmd.last.isValid := cache.io.cpu.fetch.isValid + mmuBus.cmd.last.isStuck := !stages(1).input.ready + mmuBus.cmd.last.virtualAddress := cache.io.cpu.fetch.pc + mmuBus.cmd.last.bypassTranslation := False + mmuBus.end := stages(1).input.ready || externalFlush + if (mmuBus.p.latency == 0) stages(1).halt setWhen (mmuBus.busy) + } + + + if (!twoCycleCache) { + cache.io.cpu.fetch.isUser := privilegeService.isUser() + } + } + + val s2 = twoCycleCache generate new Area { + val tightlyCoupledHit = RegNextWhen(s1.tightlyCoupledHit, stages(2).input.ready) + cache.io.cpu.decode.isValid := stages(2).input.valid && !tightlyCoupledHit + cache.io.cpu.decode.isStuck := !stages(2).input.ready + cache.io.cpu.decode.pc := stages(2).input.payload + cache.io.cpu.decode.isUser := privilegeService.isUser() + + if ((!twoCycleRam || wayCount == 1) && !compressedGen && !injectorStage) { + decode.insert(INSTRUCTION_ANTICIPATED) := Mux(decode.arbitration.isStuck, decode.input(INSTRUCTION), cache.io.cpu.fetch.data) + } + } + + val rsp = new Area { + val iBusRspOutputHalt = False + + val cacheRsp = if (twoCycleCache) cache.io.cpu.decode else cache.io.cpu.fetch + val cacheRspArbitration = stages(if (twoCycleCache) 2 else 1) + var issueDetected = False + val redoFetch = False + + + //Refill / redo + assert(decodePcGen == compressedGen) + cache.io.cpu.fill.valid := redoFetch && !cacheRsp.mmuRefilling + cache.io.cpu.fill.payload := cacheRsp.physicalAddress + + + if (catchSomething) { + decodeExceptionPort.valid := False + decodeExceptionPort.code.assignDontCare() + decodeExceptionPort.badAddr := cacheRsp.pc(31 downto 2) @@ U"00" + } + + when(cacheRsp.isValid && cacheRsp.mmuRefilling && !issueDetected) { + issueDetected \= True + redoFetch := True + } + + if(catchIllegalAccess) when(cacheRsp.isValid && cacheRsp.mmuException && !issueDetected) { + issueDetected \= True + decodeExceptionPort.valid := iBusRsp.readyForError + decodeExceptionPort.code := 12 + } + + when(cacheRsp.isValid && cacheRsp.cacheMiss && !issueDetected) { + issueDetected \= True + cache.io.cpu.fill.valid := True + redoFetch := True + } + + if(catchAccessFault) when(cacheRsp.isValid && cacheRsp.error && !issueDetected) { + issueDetected \= True + decodeExceptionPort.valid := iBusRsp.readyForError + decodeExceptionPort.code := 1 + } + + when(redoFetch) { + iBusRsp.redoFetch := True + } + + + cacheRspArbitration.halt setWhen (issueDetected || iBusRspOutputHalt) + iBusRsp.output.valid := cacheRspArbitration.output.valid + cacheRspArbitration.output.ready := iBusRsp.output.ready + iBusRsp.output.rsp.inst := cacheRsp.data + iBusRsp.output.pc := cacheRspArbitration.output.payload + } + + if (mmuBus != null) { + cache.io.cpu.fetch.mmuRsp <> mmuBus.rsp + } else { + cache.io.cpu.fetch.mmuRsp.physicalAddress := cache.io.cpu.fetch.pc + cache.io.cpu.fetch.mmuRsp.allowExecute := True + cache.io.cpu.fetch.mmuRsp.allowRead := True + cache.io.cpu.fetch.mmuRsp.allowWrite := True + cache.io.cpu.fetch.mmuRsp.isIoAccess := False + cache.io.cpu.fetch.mmuRsp.exception := False + cache.io.cpu.fetch.mmuRsp.refilling := False + } + + val flushStage = decode + cache.io.flush := flushStage.arbitration.isValid && flushStage.input(FLUSH_ALL) + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala new file mode 100644 index 0000000..1bb02bf --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/IBusSimplePlugin.scala @@ -0,0 +1,418 @@ +package vexriscv.plugin + +import vexriscv._ +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.amba3.ahblite.{AhbLite3, AhbLite3Config, AhbLite3Master} +import spinal.lib.bus.amba4.axi._ +import spinal.lib.bus.avalon.{AvalonMM, AvalonMMConfig} +import spinal.lib.bus.bmb.{Bmb, BmbParameter} +import spinal.lib.bus.wishbone.{Wishbone, WishboneConfig} +import spinal.lib.bus.simple._ +import vexriscv.Riscv.{FENCE, FENCE_I} + + +case class IBusSimpleCmd() extends Bundle{ + val pc = UInt(32 bits) +} + +case class IBusSimpleRsp() extends Bundle with IMasterSlave{ + val error = Bool + val inst = Bits(32 bits) + + override def asMaster(): Unit = { + out(error,inst) + } +} + + +object IBusSimpleBus{ + def getAxi4Config() = Axi4Config( + addressWidth = 32, + dataWidth = 32, + useId = false, + useRegion = false, + useBurst = false, + useLock = false, + useQos = false, + useLen = false, + useResp = true, + useSize = false + ) + + def getAvalonConfig() = AvalonMMConfig.pipelined( + addressWidth = 32, + dataWidth = 32 + ).getReadOnlyConfig.copy( + useResponse = true, + maximumPendingReadTransactions = 8 + ) + + def getWishboneConfig() = WishboneConfig( + addressWidth = 30, + dataWidth = 32, + selWidth = 4, + useSTALL = false, + useLOCK = false, + useERR = true, + useRTY = false, + tgaWidth = 0, + tgcWidth = 0, + tgdWidth = 0, + useBTE = true, + useCTI = true + ) + + def getPipelinedMemoryBusConfig() = PipelinedMemoryBusConfig( + addressWidth = 32, + dataWidth = 32 + ) + + + def getAhbLite3Config() = AhbLite3Config( + addressWidth = 32, + dataWidth = 32 + ) + + def getBmbParameter(plugin : IBusSimplePlugin = null) = BmbParameter( + addressWidth = 32, + dataWidth = 32, + lengthWidth = 2, + sourceWidth = 0, + contextWidth = 0, + canRead = true, + canWrite = false, + alignment = BmbParameter.BurstAlignement.LENGTH, + maximumPendingTransaction = if(plugin != null) plugin.pendingMax else Int.MaxValue + ) +} + + +case class IBusSimpleBus(plugin: IBusSimplePlugin) extends Bundle with IMasterSlave { + var cmd = Stream(IBusSimpleCmd()) + var rsp = Flow(IBusSimpleRsp()) + + override def asMaster(): Unit = { + master(cmd) + slave(rsp) + } + + + def cmdS2mPipe() : IBusSimpleBus = { + val s = IBusSimpleBus(plugin) + s.cmd << this.cmd.s2mPipe() + this.rsp << s.rsp + s + } + + + def toAxi4ReadOnly(): Axi4ReadOnly = { + assert(plugin.cmdForkPersistence) + val axi = Axi4ReadOnly(IBusSimpleBus.getAxi4Config()) + + axi.ar.valid := cmd.valid + axi.ar.addr := cmd.pc(axi.readCmd.addr.getWidth -1 downto 2) @@ U"00" + axi.ar.prot := "110" + axi.ar.cache := "1111" + cmd.ready := axi.ar.ready + + + rsp.valid := axi.r.valid + rsp.inst := axi.r.data + rsp.error := !axi.r.isOKAY() + axi.r.ready := True + + axi + } + + def toAvalon(): AvalonMM = { + assert(plugin.cmdForkPersistence) + val avalonConfig = IBusSimpleBus.getAvalonConfig() + val mm = AvalonMM(avalonConfig) + + mm.read := cmd.valid + mm.address := (cmd.pc >> 2) @@ U"00" + cmd.ready := mm.waitRequestn + + rsp.valid := mm.readDataValid + rsp.inst := mm.readData + rsp.error := mm.response =/= AvalonMM.Response.OKAY + + mm + } + + def toWishbone(): Wishbone = { + val wishboneConfig = IBusSimpleBus.getWishboneConfig() + val bus = Wishbone(wishboneConfig) + val cmdPipe = cmd.stage() + + bus.ADR := (cmdPipe.pc >> 2) + bus.CTI := B"000" + bus.BTE := "00" + bus.SEL := "1111" + bus.WE := False + bus.DAT_MOSI.assignDontCare() + bus.CYC := cmdPipe.valid + bus.STB := cmdPipe.valid + + + cmdPipe.ready := cmdPipe.valid && bus.ACK + rsp.valid := bus.CYC && bus.ACK + rsp.inst := bus.DAT_MISO + rsp.error := False //TODO + bus + } + + def toPipelinedMemoryBus(): PipelinedMemoryBus = { + val pipelinedMemoryBusConfig = IBusSimpleBus.getPipelinedMemoryBusConfig() + val bus = PipelinedMemoryBus(pipelinedMemoryBusConfig) + bus.cmd.arbitrationFrom(cmd) + bus.cmd.address := cmd.pc.resized + bus.cmd.write := False + bus.cmd.mask.assignDontCare() + bus.cmd.data.assignDontCare() + rsp.valid := bus.rsp.valid + rsp.inst := bus.rsp.payload.data + rsp.error := False + bus + } + + + //cmdForkPersistence need to bet set + def toAhbLite3Master(): AhbLite3Master = { + assert(plugin.cmdForkPersistence) + val bus = AhbLite3Master(IBusSimpleBus.getAhbLite3Config()) + bus.HADDR := this.cmd.pc + bus.HWRITE := False + bus.HSIZE := 2 + bus.HBURST := 0 + bus.HPROT := "1110" + bus.HTRANS := this.cmd.valid ## B"0" + bus.HMASTLOCK := False + bus.HWDATA.assignDontCare() + this.cmd.ready := bus.HREADY + + val pending = RegInit(False) clearWhen(bus.HREADY) setWhen(this.cmd.fire) + this.rsp.valid := bus.HREADY && pending + this.rsp.inst := bus.HRDATA + this.rsp.error := bus.HRESP + bus + } + + def toBmb() : Bmb = { + val pipelinedMemoryBusConfig = IBusSimpleBus.getBmbParameter(plugin) + val bus = Bmb(pipelinedMemoryBusConfig) + bus.cmd.arbitrationFrom(cmd) + bus.cmd.opcode := Bmb.Cmd.Opcode.READ + bus.cmd.address := cmd.pc.resized + bus.cmd.length := 3 + bus.cmd.last := True + rsp.valid := bus.rsp.valid + rsp.inst := bus.rsp.data + rsp.error := bus.rsp.isError + bus.rsp.ready := True + bus + } +} + + + + + + +class IBusSimplePlugin( resetVector : BigInt, + val cmdForkOnSecondStage : Boolean, + val cmdForkPersistence : Boolean, + val catchAccessFault : Boolean = false, + prediction : BranchPrediction = NONE, + historyRamSizeLog2 : Int = 10, + keepPcPlus4 : Boolean = false, + compressedGen : Boolean = false, + val busLatencyMin : Int = 1, + val pendingMax : Int = 7, + injectorStage : Boolean = true, + val rspHoldValue : Boolean = false, + val singleInstructionPipeline : Boolean = false, + val memoryTranslatorPortConfig : Any = null, + relaxPredictorAddress : Boolean = true, + predictionBuffer : Boolean = true, + bigEndian : Boolean = false, + vecRspBuffer : Boolean = false + ) extends IBusFetcherImpl( + resetVector = resetVector, + keepPcPlus4 = keepPcPlus4, + decodePcGen = compressedGen, + compressedGen = compressedGen, + cmdToRspStageCount = busLatencyMin + (if(cmdForkOnSecondStage) 1 else 0), + allowPcRegReusedForSecondStage = !(cmdForkOnSecondStage && cmdForkPersistence), + injectorReadyCutGen = false, + prediction = prediction, + historyRamSizeLog2 = historyRamSizeLog2, + injectorStage = injectorStage, + relaxPredictorAddress = relaxPredictorAddress, + fetchRedoGen = memoryTranslatorPortConfig != null, + predictionBuffer = predictionBuffer){ + + var iBus : IBusSimpleBus = null + var decodeExceptionPort : Flow[ExceptionCause] = null + val catchSomething = memoryTranslatorPortConfig != null || catchAccessFault + var mmuBus : MemoryTranslatorBus = null + +// if(rspHoldValue) assert(busLatencyMin <= 1) + assert(!rspHoldValue, "rspHoldValue not supported yet") + assert(!singleInstructionPipeline) + + override def setup(pipeline: VexRiscv): Unit = { + super.setup(pipeline) + iBus = master(IBusSimpleBus(this)).setName("iBus") + + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.add(FENCE_I, Nil) + + if(catchSomething) { + decodeExceptionPort = pipeline.service(classOf[ExceptionService]).newExceptionPort(pipeline.decode,1) + } + + if(memoryTranslatorPortConfig != null) { + mmuBus = pipeline.service(classOf[MemoryTranslator]).newTranslationPort(MemoryTranslatorPort.PRIORITY_INSTRUCTION, memoryTranslatorPortConfig) + } + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + pipeline plug new FetchArea(pipeline) { + var cmd = Stream(IBusSimpleCmd()) + val cmdWithS2mPipe = cmdForkPersistence && (!cmdForkOnSecondStage || mmuBus != null) + iBus.cmd << (if(cmdWithS2mPipe) cmd.s2mPipe() else cmd) + + //Avoid sending to many iBus cmd + val pending = new Area{ + val inc, dec = Bool() + val value = Reg(UInt(log2Up(pendingMax + 1) bits)) init (0) + val next = value + U(inc) - U(dec) + value := next + } + + val secondStagePersistence = cmdForkPersistence && cmdForkOnSecondStage && !cmdWithS2mPipe + def cmdForkStage = if(!secondStagePersistence) iBusRsp.stages(if(cmdForkOnSecondStage) 1 else 0) else iBusRsp.stages(1) + + val cmdFork = if(!secondStagePersistence) new Area { + //This implementation keep the cmd on the bus until it's executed or the the pipeline is flushed + def stage = cmdForkStage + val canEmit = stage.output.ready && pending.value =/= pendingMax + stage.halt setWhen(stage.input.valid && (!canEmit || !cmd.ready)) + cmd.valid := stage.input.valid && canEmit + pending.inc := cmd.fire + } else new Area{ + //This implementation keep the cmd on the bus until it's executed, even if the pipeline is flushed + def stage = cmdForkStage + val pendingFull = pending.value === pendingMax + val enterTheMarket = Bool() + val cmdKeep = RegInit(False) setWhen(enterTheMarket) clearWhen(cmd.ready) + val cmdFired = RegInit(False) setWhen(cmd.fire) clearWhen(stage.input.ready) + enterTheMarket := stage.input.valid && !pendingFull && !cmdFired && !cmdKeep +// stage.halt setWhen(cmd.isStall || (pendingFull && !cmdFired)) //(cmd.isStall) + stage.halt setWhen(pendingFull && !cmdFired && !cmdKeep) + stage.halt setWhen(!cmd.ready && !cmdFired) + cmd.valid := enterTheMarket || cmdKeep + pending.inc := enterTheMarket + } + + val mmu = (mmuBus != null) generate new Area { + mmuBus.cmd.last.isValid := cmdForkStage.input.valid + mmuBus.cmd.last.virtualAddress := cmdForkStage.input.payload + mmuBus.cmd.last.bypassTranslation := False + mmuBus.end := cmdForkStage.output.fire || externalFlush + + cmd.pc := mmuBus.rsp.physicalAddress(31 downto 2) @@ U"00" + + //do not emit memory request if MMU had issues + when(cmdForkStage.input.valid) { + when(mmuBus.rsp.refilling) { + cmdForkStage.halt := True + cmd.valid := False + } + when(mmuBus.rsp.exception) { + cmdForkStage.halt := False + cmd.valid := False + } + } + + val joinCtx = stageXToIBusRsp(cmdForkStage, mmuBus.rsp) + } + + val mmuLess = (mmuBus == null) generate new Area{ + cmd.pc := cmdForkStage.input.payload(31 downto 2) @@ U"00" + } + + val rspJoin = new Area { + import iBusRsp._ + //Manage flush for iBus transactions in flight + val rspBuffer = new Area { + val output = Stream(IBusSimpleRsp()) + val c = new StreamFifoLowLatency(IBusSimpleRsp(), busLatencyMin + (if(cmdForkOnSecondStage && cmdForkPersistence) 1 else 0), useVec = vecRspBuffer) + val discardCounter = Reg(UInt(log2Up(pendingMax + 1) bits)) init (0) + discardCounter := discardCounter - (c.io.pop.valid && discardCounter =/= 0).asUInt + when(iBusRsp.flush) { + discardCounter := (if(cmdForkOnSecondStage) pending.next else pending.value - U(pending.dec)) + } + + c.io.push << iBus.rsp.toStream +// if(compressedGen) c.io.flush setWhen(decompressor.consumeCurrent) +// if(!compressedGen && isDrivingDecode(IBUS_RSP)) c.io.flush setWhen(decode.arbitration.flushNext && iBusRsp.output.ready) + val flush = discardCounter =/= 0 || iBusRsp.flush + output.valid := c.io.pop.valid && discardCounter === 0 + output.payload := c.io.pop.payload + c.io.pop.ready := output.ready || flush + + pending.dec := c.io.pop.fire // iBus.rsp.valid && flush || c.io.pop.valid && output.ready instead to avoid unecessary dependancies ? + } + + val fetchRsp = FetchRsp() + fetchRsp.pc := stages.last.output.payload + fetchRsp.rsp := rspBuffer.output.payload + fetchRsp.rsp.error.clearWhen(!rspBuffer.output.valid) //Avoid interference with instruction injection from the debug plugin + if(bigEndian){ + // instructions are stored in little endian byteorder + fetchRsp.rsp.inst.allowOverride + fetchRsp.rsp.inst := EndiannessSwap(rspBuffer.output.payload.inst) + } + + val join = Stream(FetchRsp()) + val exceptionDetected = False + join.valid := stages.last.output.valid && rspBuffer.output.valid + join.payload := fetchRsp + stages.last.output.ready := stages.last.output.valid ? join.fire | join.ready + rspBuffer.output.ready := join.fire + output << join.haltWhen(exceptionDetected) + + if(memoryTranslatorPortConfig != null){ + when(stages.last.input.valid && mmu.joinCtx.refilling) { + iBusRsp.redoFetch := True + } + } + + + if(catchSomething){ + decodeExceptionPort.code.assignDontCare() + decodeExceptionPort.badAddr := join.pc(31 downto 2) @@ U"00" + + if(catchAccessFault) when(join.valid && join.rsp.error){ + decodeExceptionPort.code := 1 + exceptionDetected := True + } + if(memoryTranslatorPortConfig != null) { + val privilegeService = pipeline.serviceElse(classOf[PrivilegeService], PrivilegeServiceDefault()) + when(stages.last.input.valid && !mmu.joinCtx.refilling && (mmu.joinCtx.exception || !mmu.joinCtx.allowExecute)){ + decodeExceptionPort.code := 12 + exceptionDetected := True + } + } + decodeExceptionPort.valid := exceptionDetected && iBusRsp.readyForError + } + } + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/IntAluPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/IntAluPlugin.scala new file mode 100644 index 0000000..0520c2f --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/IntAluPlugin.scala @@ -0,0 +1,100 @@ +package vexriscv.plugin + +import vexriscv._ +import spinal.core._ +object IntAluPlugin{ + object AluBitwiseCtrlEnum extends SpinalEnum(binarySequential){ + val XOR, OR, AND = newElement() + } + object AluCtrlEnum extends SpinalEnum(binarySequential){ + val ADD_SUB, SLT_SLTU, BITWISE = newElement() + } + + object ALU_BITWISE_CTRL extends Stageable(AluBitwiseCtrlEnum()) + object ALU_CTRL extends Stageable(AluCtrlEnum()) +} + +class IntAluPlugin extends Plugin[VexRiscv]{ + import IntAluPlugin._ + + + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + + val immediateActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.IMI, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> True, + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True + ) + + val nonImmediateActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> True, + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + RS2_USE -> True + ) + + val otherAction = List[(Stageable[_ <: BaseType],Any)]( + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> True, + BYPASSABLE_MEMORY_STAGE -> True + ) + + + + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.add(List( + ADD -> (nonImmediateActions ++ List(ALU_CTRL -> AluCtrlEnum.ADD_SUB, SRC_USE_SUB_LESS -> False)), + SUB -> (nonImmediateActions ++ List(ALU_CTRL -> AluCtrlEnum.ADD_SUB, SRC_USE_SUB_LESS -> True)), + SLT -> (nonImmediateActions ++ List(ALU_CTRL -> AluCtrlEnum.SLT_SLTU, SRC_USE_SUB_LESS -> True, SRC_LESS_UNSIGNED -> False)), + SLTU -> (nonImmediateActions ++ List(ALU_CTRL -> AluCtrlEnum.SLT_SLTU, SRC_USE_SUB_LESS -> True, SRC_LESS_UNSIGNED -> True)), + XOR -> (nonImmediateActions ++ List(ALU_CTRL -> AluCtrlEnum.BITWISE, ALU_BITWISE_CTRL -> AluBitwiseCtrlEnum.XOR)), + OR -> (nonImmediateActions ++ List(ALU_CTRL -> AluCtrlEnum.BITWISE, ALU_BITWISE_CTRL -> AluBitwiseCtrlEnum.OR)), + AND -> (nonImmediateActions ++ List(ALU_CTRL -> AluCtrlEnum.BITWISE, ALU_BITWISE_CTRL -> AluBitwiseCtrlEnum.AND)) + )) + + decoderService.add(List( + ADDI -> (immediateActions ++ List(ALU_CTRL -> AluCtrlEnum.ADD_SUB, SRC_USE_SUB_LESS -> False)), + SLTI -> (immediateActions ++ List(ALU_CTRL -> AluCtrlEnum.SLT_SLTU, SRC_USE_SUB_LESS -> True, SRC_LESS_UNSIGNED -> False)), + SLTIU -> (immediateActions ++ List(ALU_CTRL -> AluCtrlEnum.SLT_SLTU, SRC_USE_SUB_LESS -> True, SRC_LESS_UNSIGNED -> True)), + XORI -> (immediateActions ++ List(ALU_CTRL -> AluCtrlEnum.BITWISE, ALU_BITWISE_CTRL -> AluBitwiseCtrlEnum.XOR)), + ORI -> (immediateActions ++ List(ALU_CTRL -> AluCtrlEnum.BITWISE, ALU_BITWISE_CTRL -> AluBitwiseCtrlEnum.OR)), + ANDI -> (immediateActions ++ List(ALU_CTRL -> AluCtrlEnum.BITWISE, ALU_BITWISE_CTRL -> AluBitwiseCtrlEnum.AND)) + )) + + decoderService.add(List( + LUI -> (otherAction ++ List(ALU_CTRL -> AluCtrlEnum.ADD_SUB, SRC1_CTRL -> Src1CtrlEnum.IMU, SRC_USE_SUB_LESS -> False, SRC_ADD_ZERO -> True)), + AUIPC -> (otherAction ++ List(ALU_CTRL -> AluCtrlEnum.ADD_SUB, SRC_USE_SUB_LESS -> False, SRC1_CTRL -> Src1CtrlEnum.IMU, SRC2_CTRL -> Src2CtrlEnum.PC)) + )) + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + + execute plug new Area{ + import execute._ + + val bitwise = input(ALU_BITWISE_CTRL).mux( + AluBitwiseCtrlEnum.AND -> (input(SRC1) & input(SRC2)), + AluBitwiseCtrlEnum.OR -> (input(SRC1) | input(SRC2)), + AluBitwiseCtrlEnum.XOR -> (input(SRC1) ^ input(SRC2)) + ) + + // mux results + insert(REGFILE_WRITE_DATA) := input(ALU_CTRL).mux( + AluCtrlEnum.BITWISE -> bitwise, + AluCtrlEnum.SLT_SLTU -> input(SRC_LESS).asBits(32 bit), + AluCtrlEnum.ADD_SUB -> input(SRC_ADD_SUB) + ) + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala new file mode 100644 index 0000000..081b11d --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/MemoryTranslatorPlugin.scala @@ -0,0 +1,159 @@ +package vexriscv.plugin + +import vexriscv.{VexRiscv, _} +import spinal.core._ +import spinal.lib._ + +import scala.collection.mutable.ArrayBuffer + +object MemoryTranslatorPort{ + val PRIORITY_DATA = 1 + val PRIORITY_INSTRUCTION = 0 +} +case class MemoryTranslatorPort(bus : MemoryTranslatorBus, priority : Int, args : MemoryTranslatorPortConfig/*, exceptionBus: Flow[ExceptionCause]*/) + +case class MemoryTranslatorPortConfig(portTlbSize : Int) + +class MemoryTranslatorPlugin(tlbSize : Int, + virtualRange : UInt => Bool, + ioRange : UInt => Bool) extends Plugin[VexRiscv] with MemoryTranslator { + assert(isPow2(tlbSize)) + + val portsInfo = ArrayBuffer[MemoryTranslatorPort]() + + override def newTranslationPort(priority : Int,args : Any): MemoryTranslatorBus = { + val config = args.asInstanceOf[MemoryTranslatorPortConfig] + val port = MemoryTranslatorPort(MemoryTranslatorBus(MemoryTranslatorBusParameter(wayCount = 0)),priority, config/*,exceptionBus*/) + portsInfo += port + port.bus + } + + object IS_TLB extends Stageable(Bool) + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + def TLBW0 = M"0000000----------111-----0001111" + def TLBW1 = M"0000001----------111-----0001111" + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(IS_TLB, False) + decoderService.add(TLBW0, List(IS_TLB -> True, RS1_USE -> True, SRC1_CTRL -> Src1CtrlEnum.RS)) + decoderService.add(TLBW1, List(IS_TLB -> True, RS1_USE -> True, RS2_USE -> True, SRC1_CTRL -> Src1CtrlEnum.RS)) + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + import Riscv._ + + //Sorted by priority + val sortedPortsInfo = portsInfo.sortWith((a,b) => a.priority > b.priority) + + case class CacheLine() extends Bundle { + val valid = Bool + val virtualAddress = UInt(20 bits) + val physicalAddress = UInt(20 bits) + val allowRead, allowWrite, allowExecute, allowUser = Bool + + def init = { + valid init (False) + this + } + } + + val core = pipeline plug new Area { + val shared = new Area { + val cache = Mem(CacheLine(), tlbSize) + var free = True + val readAddr = cache.addressType().assignDontCare() + val readData = RegNext(cache.readSync(readAddr)) + } + + val ports = for ((port, portId) <- sortedPortsInfo.zipWithIndex) yield new Area { + val cache = Vec(Reg(CacheLine()) init, port.args.portTlbSize) + val cacheHits = cache.map(line => line.valid && line.virtualAddress === port.bus.cmd.last.virtualAddress(31 downto 12)) + val cacheHit = cacheHits.asBits.orR + val cacheLine = MuxOH(cacheHits, cache) + val isInMmuRange = virtualRange(port.bus.cmd.last.virtualAddress) && !port.bus.cmd.last.bypassTranslation + + val sharedMiss = RegInit(False) + val sharedIterator = Reg(UInt(log2Up(tlbSize + 1) bits)) + val sharedAccessed = RegInit(B"00") + val entryToReplace = Counter(port.args.portTlbSize) + + val sharedAccessAsked = RegNext(port.bus.cmd.last.isValid && !cacheHit && sharedIterator < tlbSize && isInMmuRange) + val sharedAccessGranted = sharedAccessAsked && shared.free + when(sharedAccessGranted) { + shared.readAddr := sharedIterator.resized + sharedIterator := sharedIterator + 1 + } + sharedAccessed := (sharedAccessed ## sharedAccessGranted).resized + when(sharedAccessAsked){ + shared.free \= False + } + + when(sharedAccessed.msb){ + when(shared.readData.virtualAddress === port.bus.cmd.last.virtualAddress(31 downto 12)){ + cache(entryToReplace) := shared.readData + entryToReplace.increment() + } + } + + sharedMiss.setWhen(sharedIterator >= tlbSize && sharedAccessed === B"00") + when(port.bus.end){ + sharedIterator := 0 + sharedMiss.clear() + sharedAccessAsked.clear() + sharedAccessed := 0 + } + + + when(isInMmuRange) { + port.bus.rsp.physicalAddress := cacheLine.physicalAddress @@ port.bus.cmd.last.virtualAddress(11 downto 0) + port.bus.rsp.allowRead := cacheLine.allowRead + port.bus.rsp.allowWrite := cacheLine.allowWrite + port.bus.rsp.allowExecute := cacheLine.allowExecute + ??? +// port.bus.rsp.hit := cacheHit +// port.stage.arbitration.haltItself setWhen (port.bus.cmd.isValid && !cacheHit && !sharedMiss) + } otherwise { + port.bus.rsp.physicalAddress := port.bus.cmd.last.virtualAddress + port.bus.rsp.allowRead := True + port.bus.rsp.allowWrite := True + port.bus.rsp.allowExecute := True + ??? +// port.bus.rsp.hit := True + } + port.bus.rsp.isIoAccess := ioRange(port.bus.rsp.physicalAddress) + ??? +// port.bus.rsp.miss := sharedMiss + } + } + + //Manage TLBW0 and TLBW1 instructions + //TODO not exception safe (sideeffect) + execute plug new Area{ + import execute._ + val tlbWriteBuffer = Reg(UInt(20 bits)) + when(arbitration.isFiring && input(IS_TLB)){ + switch(input(INSTRUCTION)(25 downto 25)){ + is(0){ + tlbWriteBuffer := input(SRC1).asUInt.resized + } + is(1){ + val line = CacheLine() + line.virtualAddress := tlbWriteBuffer + line.physicalAddress := input(RS2)(19 downto 0).asUInt + line.allowUser := input(RS2)(27) + line.allowRead := input(RS2)(28) + line.allowWrite := input(RS2)(29) + line.allowExecute := input(RS2)(30) + line.valid := input(RS2)(31) + core.shared.cache(input(SRC1)(log2Up(tlbSize)-1 downto 0).asUInt) := line + + core.ports.foreach(_.cache.foreach(_.valid := False)) //Invalidate all ports caches + } + } + } + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/Misc.scala b/VexRiscv/src/main/scala/vexriscv/plugin/Misc.scala new file mode 100644 index 0000000..979d246 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/Misc.scala @@ -0,0 +1,214 @@ +package vexriscv.plugin + +import spinal.core._ +import spinal.lib._ + +object RvcDecompressor{ + + def main(args: Array[String]): Unit = { + SpinalVerilog(new Component{ + out(Delay((apply(Delay(in Bits(16 bits),2), false, false)),2)) + }.setDefinitionName("Decompressor")) + } + + def apply(i : Bits, rvf : Boolean, rvd : Boolean): Bits ={ + val ret = Bits(32 bits).assignDontCare() + + val rch = B"01" ## i(9 downto 7) + val rcl = B"01" ## i(4 downto 2) + + val addi5spnImm = B"00" ## i(10 downto 7) ## i(12 downto 11) ## i(5) ## i(6) ## B"00" + val lwImm = B"00000" ## i(5) ## i(12 downto 10) ## i(6) ## B"00" + def swImm = lwImm + val ldImm = B"0000" ## i(6 downto 5) ## i(12 downto 10) ## B"000" + def sdImm = ldImm + val addImm = B((11 downto 5) -> i(12), (4 downto 0) -> i(6 downto 2)) + def lImm = addImm + val jalImm = B((9 downto 0) -> i(12)) ## i(8) ## i(10 downto 9) ## i(6) ## i(7) ## i(2) ## i(11) ## i(5 downto 3) ## B"0" + val luiImm = B((14 downto 0) -> i(12)) ## i(6 downto 2) ## B"0000_0000_0000" + val shiftImm = i(6 downto 2) + val addi16spImm = B((2 downto 0) -> i(12)) ## i(4 downto 3) ## i(5) ## i(2) ## i(6) ## B"0000" + val jImm = B((9 downto 0) -> i(12)) ## i(8) ## i(10 downto 9) ## i(6) ## i(7) ## i(2) ## i(11) ## i(5 downto 3) ## B"0" + val bImm = B((4 downto 0) -> i(12)) ## i(6 downto 5) ## i(2) ## i(11 downto 10) ## i(4 downto 3) ## B"0" + + def lwspImm = B"0000" ## i(3 downto 2) ## i(12) ## i(6 downto 4) ## B"00" + def swspImm = B"0000" ## i(8 downto 7) ## i(12 downto 9) ## B"00" + def ldspImm = B"000" ## i(4 downto 2) ## i(12) ## i(6 downto 5) ## B"000" + def sdspImm = B"000" ## i(9 downto 7) ## i(12 downto 10) ## B"000" + + + val x0 = B"00000" + val x1 = B"00001" + val x2 = B"00010" + + switch(i(1 downto 0) ## i(15 downto 13)){ + is(0){ret := addi5spnImm ## B"00010" ## B"000" ## rcl ## B"0010011"} //C.ADDI4SPN -> addi rd0, x2, nzuimm[9:2]. + if(rvd) is(1){ret := ldImm ## rch ## B"011" ## rcl ## B"0000111"} // C.FLD + is(2){ret := lwImm ## rch ## B"010" ## rcl ## B"0000011"} //C.LW -> lw rd', offset[6:2](rs1') + if(rvf) is(3){ret := lwImm ## rch ## B"010" ## rcl ## B"0000111"} // C.FLW + if(rvd) is(5){ret := sdImm(11 downto 5) ## rcl ## rch ## B"011" ## sdImm(4 downto 0) ## B"0100111"} // C.FSD + is(6){ret := swImm(11 downto 5) ## rcl ## rch ## B"010" ## swImm(4 downto 0) ## B"0100011"} //C.SW -> sw rs2',offset[6:2](rs1') + if(rvf) is(7){ret := swImm(11 downto 5) ## rcl ## rch ## B"010" ## swImm(4 downto 0) ## B"0100111"} // C.FSW + is(8){ret := addImm ## i(11 downto 7) ## B"000" ## i(11 downto 7) ## B"0010011"} //C.ADDI -> addi rd, rd, nzimm[5:0]. + is(9){ret := jalImm(20) ## jalImm(10 downto 1) ## jalImm(11) ## jalImm(19 downto 12) ## x1 ## B"1101111"} //C.JAL -> jalr x1, rs1, 0. + is(10){ret := lImm ## B"00000" ## B"000" ## i(11 downto 7) ## B"0010011"} //C.LI -> addi rd, x0, imm[5:0]. + is(11){ //C.ADDI16SP C.LUI -> + val addi16sp = addi16spImm ## i(11 downto 7) ## B"000" ## i(11 downto 7) ## B"0010011" + val lui = luiImm(31 downto 12) ## i(11 downto 7) ## B"0110111" + ret := (i(11 downto 7) === 2) ? addi16sp | lui + } + is(12){ + val isImmediate = i(11 downto 10) =/= B"11" + val isShift = !i(11) + val func3 = i(11 downto 10).mux( + 0 -> B"101", + 1 -> B"101", + 2 -> B"111", + 3 -> i(6 downto 5).mux( + 0 -> B"000", + 1 -> B"100", + 2 -> B"110", + 3 -> B"111" + ) + ) + val msbs = Mux( + sel = i(11 downto 10) === B"10", + whenTrue = B((6 downto 0) -> i(12)), //andi + whenFalse = B"0" ## (i(11 downto 10) === B"01" || (i(11 downto 10) === B"11" && i(6 downto 5) === B"00")) ## B"00000" + ) + val rs2Shift = (isShift || isImmediate) ? shiftImm | rcl + val opc = (isImmediate ? B"0010011" | B"0110011") + ret := msbs ## rs2Shift ## rch ## func3 ## rch ## opc + } + is(13){ ret := jImm(20) ## jImm(10 downto 1) ## jImm(11) ## jImm(19 downto 12) ## x0 ## B"1101111"} + is(14){ ret := bImm(12) ## bImm(10 downto 5) ## x0 ## rch ## B"000" ## bImm(4 downto 1) ## bImm(11) ## B"1100011" } + is(15){ ret := bImm(12) ## bImm(10 downto 5) ## x0 ## rch ## B"001" ## bImm(4 downto 1) ## bImm(11) ## B"1100011" } + is(16){ ret := B"0000000" ## i(6 downto 2) ## i(11 downto 7) ## B"001" ## i(11 downto 7) ## B"0010011" } + if(rvd) is(17){ret := ldspImm ## x2 ## B"011" ## i(11 downto 7) ## B"0000111" } // C.FLDSP + is(18){ ret := lwspImm ## x2 ## B"010" ## i(11 downto 7) ## B"0000011" } + if(rvf) is(19){ret := lwspImm ## x2 ## B"010" ## i(11 downto 7) ## B"0000111" } // C.FLWSP + is(20) { + val add = B"000_0000" ## i(6 downto 2) ## (i(12) ? i(11 downto 7) | x0) ## B"000" ## i(11 downto 7) ## B"0110011" //add => add rd, rd, rs2 mv => add rd, x0, rs2 + val j = B"0000_0000_0000" ## i(11 downto 7) ## B"000" ## (i(12) ? x1 | x0) ## B"1100111" //jr => jalr x0, rs1, 0. jalr => jalr x1, rs1, 0. + val ebreak = B"000000000001_00000_000_00000_1110011" //EBREAK + val addJ = (i(6 downto 2) === 0) ? j | add + ret := (i(12 downto 2) === B"100_0000_0000") ? ebreak | addJ + } + + if(rvd) is(21){ret := sdspImm(11 downto 5) ## i(6 downto 2) ## x2 ## B"011" ## sdspImm(4 downto 0) ## B"0100111" } // C.FSDSP + is(22){ ret := swspImm(11 downto 5) ## i(6 downto 2) ## x2 ## B"010" ## swspImm(4 downto 0) ## B"0100011" } + if(rvf) is(23){ret := swspImm(11 downto 5) ## i(6 downto 2) ## x2 ## B"010" ## swspImm(4 downto 0) ## B"0100111" } // C.FSwSP + } + + ret + } +} + + +object StreamForkVex{ + def apply[T <: Data](input : Stream[T], portCount: Int, flush : Bool/*, flushDiscardInput : Boolean*/) : Vec[Stream[T]] = { + val outputs = Vec(cloneOf(input), portCount) + val linkEnable = Vec(RegInit(True), portCount) + + input.ready := True + for (i <- 0 until portCount) { + when(!outputs(i).ready && linkEnable(i)) { + input.ready := False + } + } + + for (i <- 0 until portCount) { + outputs(i).valid := input.valid && linkEnable(i) + outputs(i).payload := input.payload + when(outputs(i).fire) { + linkEnable(i) := False + } + } + + when(input.ready || flush) { + linkEnable.foreach(_ := True) + } + outputs + } +} + + +object StreamVexPimper{ + implicit class StreamFlushPimper[T <: Data](pimped : Stream[T]){ + def m2sPipeWithFlush(flush : Bool, discardInput : Boolean = true, collapsBubble : Boolean = true, flushInput : Bool = null): Stream[T] = { + val ret = cloneOf(pimped).setCompositeName(pimped, "m2sPipe", true) + + val rValid = RegInit(False) + val rData = Reg(pimped.payloadType) + if(!discardInput) rValid.clearWhen(flush) + + pimped.ready := (Bool(collapsBubble) && !ret.valid) || ret.ready + + when(pimped.ready) { + if(flushInput == null) + rValid := pimped.valid + else + rValid := pimped.valid && !flushInput + rData := pimped.payload + } + + ret.valid := rValid + ret.payload := rData + + if(discardInput) rValid.clearWhen(flush) + + ret + } + + def s2mPipe(flush : Bool): Stream[T] = { + val ret = cloneOf(pimped) + + val rValid = RegInit(False) + val rBits = Reg(pimped.payloadType) + + ret.valid := pimped.valid || rValid + pimped.ready := !rValid + ret.payload := Mux(rValid, rBits, pimped.payload) + + when(ret.ready) { + rValid := False + } + + when(pimped.ready && (!ret.ready)) { + rValid := pimped.valid + rBits := pimped.payload + } + + rValid.clearWhen(flush) + + ret + } + } + +} + + + +//case class FlowFifoLowLatency[T <: Data](dataType: T, depth: Int) extends Component { +// require(depth >= 1) +// val io = new Bundle { +// val push = slave Flow (dataType) +// val pop = master Stream (dataType) +// val flush = in Bool() +// } +// +// +// val mem = Vec(Reg(dataType), depth) +// val rPtr, wPtr = Counter(depth + 1) +// when(io.push.valid){ +// mem(wPtr) := io.push.payload +// wPtr.increment() +// } +// +// when(io.pop.fire){ +// rPtr.increment() +// } +// io.pop.valid := rPtr =/= wPtr +// +// +//}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/MmuPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/MmuPlugin.scala new file mode 100644 index 0000000..093f59a --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/MmuPlugin.scala @@ -0,0 +1,313 @@ +package vexriscv.plugin + +import vexriscv.{VexRiscv, _} +import spinal.core._ +import spinal.lib._ + +import scala.collection.mutable.ArrayBuffer + +trait DBusAccessService{ + def newDBusAccess() : DBusAccess +} + +case class DBusAccessCmd() extends Bundle { + val address = UInt(32 bits) + val size = UInt(2 bits) + val write = Bool + val data = Bits(32 bits) + val writeMask = Bits(4 bits) +} + +case class DBusAccessRsp() extends Bundle { + val data = Bits(32 bits) + val error = Bool() + val redo = Bool() +} + +case class DBusAccess() extends Bundle { + val cmd = Stream(DBusAccessCmd()) + val rsp = Flow(DBusAccessRsp()) +} + + +object MmuPort{ + val PRIORITY_DATA = 1 + val PRIORITY_INSTRUCTION = 0 +} +case class MmuPort(bus : MemoryTranslatorBus, priority : Int, args : MmuPortConfig, id : Int) + +case class MmuPortConfig(portTlbSize : Int, latency : Int = 0, earlyRequireMmuLockup : Boolean = false, earlyCacheHits : Boolean = false) + +class MmuPlugin(ioRange : UInt => Bool, + virtualRange : UInt => Bool = address => True, +// allowUserIo : Boolean = false, + enableMmuInMachineMode : Boolean = false) extends Plugin[VexRiscv] with MemoryTranslator { + + var dBusAccess : DBusAccess = null + val portsInfo = ArrayBuffer[MmuPort]() + + override def newTranslationPort(priority : Int,args : Any): MemoryTranslatorBus = { + val config = args.asInstanceOf[MmuPortConfig] + val port = MmuPort(MemoryTranslatorBus(MemoryTranslatorBusParameter(wayCount = config.portTlbSize, latency = config.latency)),priority, config, portsInfo.length) + portsInfo += port + port.bus + } + + object IS_SFENCE_VMA2 extends Stageable(Bool) + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(IS_SFENCE_VMA2, False) + decoderService.add(SFENCE_VMA, List(IS_SFENCE_VMA2 -> True)) + + + dBusAccess = pipeline.service(classOf[DBusAccessService]).newDBusAccess() + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + import Riscv._ + val csrService = pipeline.service(classOf[CsrInterface]) + + //Sorted by priority + val sortedPortsInfo = portsInfo.sortBy(_.priority) + + case class CacheLine() extends Bundle { + val valid, exception, superPage = Bool + val virtualAddress = Vec(UInt(10 bits), UInt(10 bits)) + val physicalAddress = Vec(UInt(10 bits), UInt(10 bits)) + val allowRead, allowWrite, allowExecute, allowUser = Bool + + def init = { + valid init (False) + this + } + } + + val csr = pipeline plug new Area{ + val status = new Area{ + val sum, mxr, mprv = RegInit(False) + } + val satp = new Area { + val mode = RegInit(False) + val asid = Reg(Bits(9 bits)) + val ppn = Reg(UInt(20 bits)) + } + + for(offset <- List(CSR.MSTATUS, CSR.SSTATUS)) csrService.rw(offset, 19 -> status.mxr, 18 -> status.sum, 17 -> status.mprv) + csrService.rw(CSR.SATP, 31 -> satp.mode, 22 -> satp.asid, 0 -> satp.ppn) + } + + val core = pipeline plug new Area { + val ports = for (port <- sortedPortsInfo) yield new Area { + val handle = port + val id = port.id + val privilegeService = pipeline.serviceElse(classOf[PrivilegeService], PrivilegeServiceDefault()) + val cache = Vec(Reg(CacheLine()) init, port.args.portTlbSize) + val dirty = RegInit(False).allowUnsetRegToAvoidLatch + if(port.args.earlyRequireMmuLockup){ + dirty clearWhen(!port.bus.cmd.last.isStuck) + } + + def toRsp[T <: Data](data : T, from : MemoryTranslatorCmd) : T = from match { + case _ if from == port.bus.cmd.last => data + case _ => { + val next = port.bus.cmd.dropWhile(_ != from)(1) + toRsp(RegNextWhen(data, !next.isStuck), next) + } + } + val requireMmuLockupCmd = port.bus.cmd.takeRight(if(port.args.earlyRequireMmuLockup) 2 else 1).head + + val requireMmuLockupCalc = virtualRange(requireMmuLockupCmd.virtualAddress) && !requireMmuLockupCmd.bypassTranslation && csr.satp.mode + if(!enableMmuInMachineMode) { + requireMmuLockupCalc clearWhen(!csr.status.mprv && privilegeService.isMachine()) + when(privilegeService.isMachine()) { + if (port.priority == MmuPort.PRIORITY_DATA) { + requireMmuLockupCalc clearWhen (!csr.status.mprv || pipeline(MPP) === 3) + } else { + requireMmuLockupCalc := False + } + } + } + + val cacheHitsCmd = port.bus.cmd.takeRight(if(port.args.earlyCacheHits) 2 else 1).head + val cacheHitsCalc = B(cache.map(line => line.valid && line.virtualAddress(1) === cacheHitsCmd.virtualAddress(31 downto 22) && (line.superPage || line.virtualAddress(0) === cacheHitsCmd.virtualAddress(21 downto 12)))) + + + val requireMmuLockup = toRsp(requireMmuLockupCalc, requireMmuLockupCmd) + val cacheHits = toRsp(cacheHitsCalc, cacheHitsCmd) + + val cacheHit = cacheHits.asBits.orR + val cacheLine = MuxOH(cacheHits, cache) + val entryToReplace = Counter(port.args.portTlbSize) + + + when(requireMmuLockup) { + port.bus.rsp.physicalAddress := cacheLine.physicalAddress(1) @@ (cacheLine.superPage ? port.bus.cmd.last.virtualAddress(21 downto 12) | cacheLine.physicalAddress(0)) @@ port.bus.cmd.last.virtualAddress(11 downto 0) + port.bus.rsp.allowRead := cacheLine.allowRead || csr.status.mxr && cacheLine.allowExecute + port.bus.rsp.allowWrite := cacheLine.allowWrite + port.bus.rsp.allowExecute := cacheLine.allowExecute + port.bus.rsp.exception := !dirty && cacheHit && (cacheLine.exception || cacheLine.allowUser && privilegeService.isSupervisor() && !csr.status.sum || !cacheLine.allowUser && privilegeService.isUser()) + port.bus.rsp.refilling := dirty || !cacheHit + port.bus.rsp.isPaging := True + } otherwise { + port.bus.rsp.physicalAddress := port.bus.cmd.last.virtualAddress + port.bus.rsp.allowRead := True + port.bus.rsp.allowWrite := True + port.bus.rsp.allowExecute := True + port.bus.rsp.exception := False + port.bus.rsp.refilling := False + port.bus.rsp.isPaging := False + } + port.bus.rsp.isIoAccess := ioRange(port.bus.rsp.physicalAddress) + + port.bus.rsp.bypassTranslation := !requireMmuLockup + for(wayId <- 0 until port.args.portTlbSize){ + port.bus.rsp.ways(wayId).sel := cacheHits(wayId) + port.bus.rsp.ways(wayId).physical := cache(wayId).physicalAddress(1) @@ (cache(wayId).superPage ? port.bus.cmd.last.virtualAddress(21 downto 12) | cache(wayId).physicalAddress(0)) @@ port.bus.cmd.last.virtualAddress(11 downto 0) + } + + // Avoid keeping any invalid line in the cache after an exception. + // https://github.com/riscv/riscv-linux/blob/8fe28cb58bcb235034b64cbbb7550a8a43fd88be/arch/riscv/include/asm/pgtable.h#L276 + when(service(classOf[IContextSwitching]).isContextSwitching) { + for (line <- cache) { + when(line.exception) { + line.valid := False + } + } + } + } + + val shared = new Area { + val State = new SpinalEnum{ + val IDLE, L1_CMD, L1_RSP, L0_CMD, L0_RSP = newElement() + } + val state = RegInit(State.IDLE) + val vpn = Reg(Vec(UInt(10 bits), UInt(10 bits))) + val portSortedOh = Reg(Bits(portsInfo.length bits)) + case class PTE() extends Bundle { + val V, R, W ,X, U, G, A, D = Bool() + val RSW = Bits(2 bits) + val PPN0 = UInt(10 bits) + val PPN1 = UInt(12 bits) + } + + val dBusRspStaged = dBusAccess.rsp.stage() + val dBusRsp = new Area{ + val pte = PTE() + pte.assignFromBits(dBusRspStaged.data) + val exception = !pte.V || (!pte.R && pte.W) || dBusRspStaged.error + val leaf = pte.R || pte.X + } + + val pteBuffer = RegNextWhen(dBusRsp.pte, dBusRspStaged.valid && !dBusRspStaged.redo) + + dBusAccess.cmd.valid := False + dBusAccess.cmd.write := False + dBusAccess.cmd.size := 2 + dBusAccess.cmd.address.assignDontCare() + dBusAccess.cmd.data.assignDontCare() + dBusAccess.cmd.writeMask.assignDontCare() + + val refills = OHMasking.last(B(ports.map(port => port.handle.bus.cmd.last.isValid && port.requireMmuLockup && !port.dirty && !port.cacheHit))) + switch(state){ + is(State.IDLE){ + when(refills.orR){ + portSortedOh := refills + state := State.L1_CMD + val address = MuxOH(refills, sortedPortsInfo.map(_.bus.cmd.last.virtualAddress)) + vpn(1) := address(31 downto 22) + vpn(0) := address(21 downto 12) + } +// for(port <- portsInfo.sortBy(_.priority)){ +// when(port.bus.cmd.isValid && port.bus.rsp.refilling){ +// vpn(1) := port.bus.cmd.virtualAddress(31 downto 22) +// vpn(0) := port.bus.cmd.virtualAddress(21 downto 12) +// portId := port.id +// state := State.L1_CMD +// } +// } + } + is(State.L1_CMD){ + dBusAccess.cmd.valid := True + dBusAccess.cmd.address := csr.satp.ppn @@ vpn(1) @@ U"00" + when(dBusAccess.cmd.ready){ + state := State.L1_RSP + } + } + is(State.L1_RSP){ + when(dBusRspStaged.valid){ + state := State.L0_CMD + when(dBusRsp.leaf || dBusRsp.exception){ + state := State.IDLE + } + when(dBusRspStaged.redo){ + state := State.L1_CMD + } + } + } + is(State.L0_CMD){ + dBusAccess.cmd.valid := True + dBusAccess.cmd.address := pteBuffer.PPN1(9 downto 0) @@ pteBuffer.PPN0 @@ vpn(0) @@ U"00" + when(dBusAccess.cmd.ready){ + state := State.L0_RSP + } + } + is(State.L0_RSP){ + when(dBusRspStaged.valid) { + state := State.IDLE + when(dBusRspStaged.redo){ + state := State.L0_CMD + } + } + } + } + + for((port, id) <- sortedPortsInfo.zipWithIndex) { + port.bus.busy := state =/= State.IDLE && portSortedOh(id) + } + + when(dBusRspStaged.valid && !dBusRspStaged.redo && (dBusRsp.leaf || dBusRsp.exception)){ + for((port, id) <- ports.zipWithIndex) { + when(portSortedOh(id)) { + port.entryToReplace.increment() + if(port.handle.args.earlyRequireMmuLockup) { + port.dirty := True + } //Avoid having non coherent TLB lookup + for ((line, lineId) <- port.cache.zipWithIndex) { + when(port.entryToReplace === lineId){ + val superPage = state === State.L1_RSP + line.valid := True + line.exception := dBusRsp.exception || (superPage && dBusRsp.pte.PPN0 =/= 0) + line.virtualAddress := vpn + line.physicalAddress := Vec(dBusRsp.pte.PPN0, dBusRsp.pte.PPN1(9 downto 0)) + line.allowRead := dBusRsp.pte.R + line.allowWrite := dBusRsp.pte.W + line.allowExecute := dBusRsp.pte.X + line.allowUser := dBusRsp.pte.U + line.superPage := state === State.L1_RSP + } + } + } + } + } + } + } + + val fenceStage = execute + + //Both SFENCE_VMA and SATP reschedule the next instruction in the CsrPlugin itself with one extra cycle to ensure side effect propagation. + fenceStage plug new Area{ + import fenceStage._ + when(arbitration.isValid && arbitration.isFiring && input(IS_SFENCE_VMA2)){ + for(port <- core.ports; line <- port.cache) line.valid := False + } + + csrService.onWrite(CSR.SATP){ + for(port <- core.ports; line <- port.cache) line.valid := False + } + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/Mul16Plugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/Mul16Plugin.scala new file mode 100644 index 0000000..f2a63c3 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/Mul16Plugin.scala @@ -0,0 +1,119 @@ +package vexriscv.plugin + +import vexriscv._ +import vexriscv.plugin._ +import spinal.core._ + +/** + * A multiplication plugin using only 16-bit multiplications + */ +class Mul16Plugin extends Plugin[VexRiscv]{ + + object MUL_LL extends Stageable(UInt(32 bits)) + object MUL_LH extends Stageable(UInt(32 bits)) + object MUL_HL extends Stageable(UInt(32 bits)) + object MUL_HH extends Stageable(UInt(32 bits)) + + object MUL extends Stageable(Bits(64 bits)) + + object IS_MUL extends Stageable(Bool) + + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + + + val actions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> False, + RS1_USE -> True, + RS2_USE -> True, + IS_MUL -> True + ) + + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(IS_MUL, False) + decoderService.add(List( + MULX -> actions + )) + + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + // Prepare signed inputs for the multiplier in the next stage. + // This will map them best to an FPGA DSP. + execute plug new Area { + import execute._ + val a,b = Bits(32 bit) + + a := input(SRC1) + b := input(SRC2) + + val aLow = a(15 downto 0).asUInt + val bLow = b(15 downto 0).asUInt + val aHigh = a(31 downto 16).asUInt + val bHigh = b(31 downto 16).asUInt + + insert(MUL_LL) := aLow * bLow + insert(MUL_LH) := aLow * bHigh + insert(MUL_HL) := aHigh * bLow + insert(MUL_HH) := aHigh * bHigh + } + + memory plug new Area { + import memory._ + + val ll = UInt(32 bits) + val lh = UInt(33 bits) + val hl = UInt(32 bits) + val hh = UInt(32 bits) + + ll := input(MUL_LL) + lh := input(MUL_LH).resized + hl := input(MUL_HL) + hh := input(MUL_HH) + + val hllh = lh + hl + insert(MUL) := ((hh ## ll(31 downto 16)).asUInt + hllh) ## ll(15 downto 0) + } + + writeBack plug new Area { + import writeBack._ + val aSigned,bSigned = Bool + switch(input(INSTRUCTION)(13 downto 12)) { + is(B"01") { + aSigned := True + bSigned := True + } + is(B"10") { + aSigned := True + bSigned := False + } + default { + aSigned := False + bSigned := False + } + } + + val a = (aSigned && input(SRC1).msb) ? input(SRC2).asUInt | U(0) + val b = (bSigned && input(SRC2).msb) ? input(SRC1).asUInt | U(0) + + when(arbitration.isValid && input(IS_MUL)){ + switch(input(INSTRUCTION)(13 downto 12)){ + is(B"00"){ + output(REGFILE_WRITE_DATA) := input(MUL)(31 downto 0) + } + is(B"01",B"10",B"11"){ + output(REGFILE_WRITE_DATA) := (((input(MUL)(63 downto 32)).asUInt + ~a) + (~b + 2)).asBits + } + } + } + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/MulDivIterativePlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/MulDivIterativePlugin.scala new file mode 100644 index 0000000..fff12ef --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/MulDivIterativePlugin.scala @@ -0,0 +1,188 @@ +package vexriscv.plugin + +import spinal.core._ +import spinal.lib._ +import vexriscv.{VexRiscv, _} + +object MulDivIterativePlugin{ + object IS_MUL extends Stageable(Bool) + object IS_DIV extends Stageable(Bool) + object IS_REM extends Stageable(Bool) + object IS_RS1_SIGNED extends Stageable(Bool) + object IS_RS2_SIGNED extends Stageable(Bool) + object FAST_DIV_VALID extends Stageable(Bool) + object FAST_DIV_VALUE extends Stageable(UInt(4 bits)) +} + +class MulDivIterativePlugin(genMul : Boolean = true, + genDiv : Boolean = true, + mulUnrollFactor : Int = 1, + divUnrollFactor : Int = 1, + dhrystoneOpt : Boolean = false, + customMul : (UInt, UInt, Stage, VexRiscv) => Area = null) extends Plugin[VexRiscv] with VexRiscvRegressionArg { + import MulDivIterativePlugin._ + + override def getVexRiscvRegressionArgs(): Seq[String] = { + var args = List[String]() + if(genMul) args :+= "MUL=yes" + if(genDiv) args :+= "DIV=yes" + args + } + + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + + + val commonActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(pipeline.stages.last == pipeline.execute), + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + RS2_USE -> True + ) + + + val decoderService = pipeline.service(classOf[DecoderService]) + + if(genMul) { + val mulActions = commonActions ++ List(IS_MUL -> True) + decoderService.addDefault(IS_MUL, False) + decoderService.add(List( + MUL -> (mulActions ++ List(IS_RS1_SIGNED -> False, IS_RS2_SIGNED -> False)), + MULH -> (mulActions ++ List(IS_RS1_SIGNED -> True, IS_RS2_SIGNED -> True)), + MULHSU -> (mulActions ++ List(IS_RS1_SIGNED -> True, IS_RS2_SIGNED -> False)), + MULHU -> (mulActions ++ List(IS_RS1_SIGNED -> False, IS_RS2_SIGNED -> False)) + )) + } + + if(genDiv) { + val divActions = commonActions ++ List(IS_DIV -> True) + decoderService.addDefault(IS_DIV, False) + decoderService.add(List( + DIV -> (divActions ++ List(IS_RS1_SIGNED -> True, IS_RS2_SIGNED -> True)), + DIVU -> (divActions ++ List(IS_RS1_SIGNED -> False, IS_RS2_SIGNED -> False)), + REM -> (divActions ++ List(IS_RS1_SIGNED -> True, IS_RS2_SIGNED -> True)), + REMU -> (divActions ++ List(IS_RS1_SIGNED -> False, IS_RS2_SIGNED -> False)) + )) + } + + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + if(!genMul && !genDiv) return + + val flushStage = if(memory != null) memory else execute + flushStage plug new Area { + import flushStage._ + + //Shared ressources + val rs1 = Reg(UInt(33 bits)) + val rs2 = Reg(UInt(32 bits)) + val accumulator = Reg(UInt(65 bits)) + + //FrontendOK is only used for CPU configs without memory/writeback stages, were it is required to wait one extra cycle + // to let's the frontend process rs1 rs2 registers + val frontendOk = if(flushStage != execute) True else RegInit(False) setWhen(arbitration.isValid && !pipeline.service(classOf[HazardService]).hazardOnExecuteRS && ((if(genDiv) input(IS_DIV) else False) || (if(genMul) input(IS_MUL) else False))) clearWhen(arbitration.isMoving) + + val mul = ifGen(genMul) (if(customMul != null) customMul(rs1,rs2,memory,pipeline) else new Area{ + assert(isPow2(mulUnrollFactor)) + val counter = Counter(32 / mulUnrollFactor + 1) + val done = counter.willOverflowIfInc + when(arbitration.isValid && input(IS_MUL)){ + when(!frontendOk || !done){ + arbitration.haltItself := True + } + when(frontendOk && !done){ + arbitration.haltItself := True + counter.increment() + rs2 := rs2 |>> mulUnrollFactor + val sumElements = ((0 until mulUnrollFactor).map(i => rs2(i) ? (rs1 << i) | U(0)) :+ (accumulator >> 32)) + val sumResult = sumElements.map(_.asSInt.resize(32 + mulUnrollFactor + 1).asUInt).reduceBalancedTree(_ + _) + accumulator := (sumResult @@ accumulator(31 downto 0)) >> mulUnrollFactor + } + output(REGFILE_WRITE_DATA) := ((input(INSTRUCTION)(13 downto 12) === B"00") ? accumulator(31 downto 0) | accumulator(63 downto 32)).asBits + } + when(!arbitration.isStuck) { + counter.clear() + } + }) + + + val div = ifGen(genDiv) (new Area{ + assert(isPow2(divUnrollFactor)) + def area = this + //register allocation + def numerator = rs1(31 downto 0) + def denominator = rs2 + def remainder = accumulator(31 downto 0) + + val needRevert = Reg(Bool) + val counter = Counter(32 / divUnrollFactor + 2) + val done = Reg(Bool) setWhen(counter === counter.end-1) clearWhen(!arbitration.isStuck) + val result = Reg(Bits(32 bits)) + when(arbitration.isValid && input(IS_DIV)){ + when(!frontendOk || !done){ + arbitration.haltItself := True + } + when(frontendOk && !done){ + counter.increment() + + def stages(inNumerator: UInt, inRemainder: UInt, stage: Int): Unit = stage match { + case 0 => { + numerator := inNumerator + remainder := inRemainder + } + case _ => new Area { + val remainderShifted = (inRemainder ## inNumerator.msb).asUInt + val remainderMinusDenominator = remainderShifted - denominator + val outRemainder = !remainderMinusDenominator.msb ? remainderMinusDenominator.resize(32 bits) | remainderShifted.resize(32 bits) + val outNumerator = (inNumerator ## !remainderMinusDenominator.msb).asUInt.resize(32 bits) + stages(outNumerator, outRemainder, stage - 1) + }.setCompositeName(area, "stage_" + (divUnrollFactor-stage)) + } + + stages(numerator, remainder, divUnrollFactor) + + when(counter === 32 / divUnrollFactor){ + val selectedResult = (input(INSTRUCTION)(13) ? remainder | numerator) + result := selectedResult.twoComplement(needRevert).asBits.resized + } + } + + output(REGFILE_WRITE_DATA) := result + } + }) + + //Execute stage logic to drive memory stage's input regs + when(if(flushStage != execute) !arbitration.isStuck else !frontendOk){ + accumulator := 0 + def twoComplement(that : Bits, enable: Bool): UInt = (Mux(enable, ~that, that).asUInt + enable.asUInt) + val rs2NeedRevert = execute.input(RS2).msb && execute.input(IS_RS2_SIGNED) + val rs1NeedRevert = (if(genMul)(execute.input(IS_MUL) && rs2NeedRevert) else False) || + (if(genDiv)(execute.input(IS_DIV) && execute.input(RS1).msb && execute.input(IS_RS1_SIGNED)) else False) + val rs1Extended = B((32 downto 32) -> (execute.input(IS_RS1_SIGNED) && execute.input(RS1).msb), (31 downto 0) -> execute.input(RS1)) + + rs1 := twoComplement(rs1Extended, rs1NeedRevert).resized + rs2 := twoComplement(execute.input(RS2), rs2NeedRevert) + if(genDiv) div.needRevert := (rs1NeedRevert ^ (rs2NeedRevert && !execute.input(INSTRUCTION)(13))) && !(execute.input(RS2) === 0 && execute.input(IS_RS2_SIGNED) && !execute.input(INSTRUCTION)(13)) + if(genDiv) div.counter.clear() + } + + if(dhrystoneOpt) { + execute.insert(FAST_DIV_VALID) := execute.input(IS_DIV) && execute.input(INSTRUCTION)(13 downto 12) === B"00" && !execute.input(RS1).msb && !execute.input(RS2).msb && execute.input(RS1).asUInt < 16 && execute.input(RS2).asUInt < 16 && execute.input(RS2) =/= 0 + execute.insert(FAST_DIV_VALUE) := (0 to 15).flatMap(n => (0 to 15).map(d => U(if (d == 0) 0 else n / d, 4 bits))).read(U(execute.input(RS1)(3 downto 0)) @@ U(execute.input(RS2)(3 downto 0))) //(U(execute.input(RS1)(3 downto 0)) / U(execute.input(RS2)(3 downto 0)) + when(execute.input(FAST_DIV_VALID)) { + execute.output(IS_DIV) := False + } + when(input(FAST_DIV_VALID)) { + output(REGFILE_WRITE_DATA) := B(0, 28 bits) ## input(FAST_DIV_VALUE) + } + } + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/MulPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/MulPlugin.scala new file mode 100644 index 0000000..3e909a0 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/MulPlugin.scala @@ -0,0 +1,159 @@ +package vexriscv.plugin +import vexriscv._ +import vexriscv.VexRiscv +import spinal.core._ +import spinal.lib.KeepAttribute + +//Input buffer generaly avoid the FPGA synthesis to duplicate reg inside the DSP cell, which could stress timings quite much. +class MulPlugin(var inputBuffer : Boolean = false, + var outputBuffer : Boolean = false) extends Plugin[VexRiscv] with VexRiscvRegressionArg { + object MUL_LL extends Stageable(UInt(32 bits)) + object MUL_LH extends Stageable(SInt(34 bits)) + object MUL_HL extends Stageable(SInt(34 bits)) + object MUL_HH extends Stageable(SInt(34 bits)) + + object MUL_LOW extends Stageable(SInt(34+16+2 bits)) + + object IS_MUL extends Stageable(Bool) + + override def getVexRiscvRegressionArgs(): Seq[String] = { + List("MUL=yes") + } + + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + + + val actions = List[(Stageable[_ <: BaseType],Any)]( +// SRC1_CTRL -> Src1CtrlEnum.RS, +// SRC2_CTRL -> Src2CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> False, + RS1_USE -> True, + RS2_USE -> True, + IS_MUL -> True + ) + + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(IS_MUL, False) + decoderService.add(List( + MULX -> actions + )) + + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + + //Do partial multiplication, four times 16 bits * 16 bits + execute plug new Area { + import execute._ + val aSigned,bSigned = Bool + val a,b = Bits(32 bit) + +// a := input(SRC1) +// b := input(SRC2) + + val delay = (if(inputBuffer) 1 else 0) + (if(outputBuffer) 1 else 0) + + val delayLogic = (delay != 0) generate new Area{ + val counter = Reg(UInt(log2Up(delay+1) bits)) + when(arbitration.isValid && input(IS_MUL) && counter =/= delay){ + arbitration.haltItself := True + } + + counter := counter + 1 + when(!arbitration.isStuck || arbitration.isStuckByOthers){ + counter := 0 + } + } + + val withInputBuffer = inputBuffer generate new Area{ + val rs1 = RegNext(input(RS1)) + val rs2 = RegNext(input(RS2)) + a := rs1 + b := rs2 + } + + val noInputBuffer = (!inputBuffer) generate new Area{ + a := input(RS1) + b := input(RS2) + } + + switch(input(INSTRUCTION)(13 downto 12)) { + is(B"01") { + aSigned := True + bSigned := True + } + is(B"10") { + aSigned := True + bSigned := False + } + default { + aSigned := False + bSigned := False + } + } + + val aULow = a(15 downto 0).asUInt + val bULow = b(15 downto 0).asUInt + val aSLow = (False ## a(15 downto 0)).asSInt + val bSLow = (False ## b(15 downto 0)).asSInt + val aHigh = (((aSigned && a.msb) ## a(31 downto 16))).asSInt + val bHigh = (((bSigned && b.msb) ## b(31 downto 16))).asSInt + + val withOuputBuffer = outputBuffer generate new Area{ + val mul_ll = RegNext(aULow * bULow) + val mul_lh = RegNext(aSLow * bHigh) + val mul_hl = RegNext(aHigh * bSLow) + val mul_hh = RegNext(aHigh * bHigh) + + insert(MUL_LL) := mul_ll + insert(MUL_LH) := mul_lh + insert(MUL_HL) := mul_hl + insert(MUL_HH) := mul_hh + } + + val noOutputBuffer = (!outputBuffer) generate new Area{ + insert(MUL_LL) := aULow * bULow + insert(MUL_LH) := aSLow * bHigh + insert(MUL_HL) := aHigh * bSLow + insert(MUL_HH) := aHigh * bHigh + } + + Component.current.afterElaboration{ + //Avoid synthesis tools to retime RS1 RS2 from execute stage to decode stage leading to bad timings (ex : Vivado, even if retiming is disabled) + KeepAttribute(input(RS1)) + KeepAttribute(input(RS2)) + } + } + + //First aggregation of partial multiplication + memory plug new Area { + import memory._ + insert(MUL_LOW) := S(0, MUL_HL.dataType.getWidth + 16 + 2 bit) + (False ## input(MUL_LL)).asSInt + (input(MUL_LH) << 16) + (input(MUL_HL) << 16) + } + + //Final aggregation of partial multiplications, REGFILE_WRITE_DATA overriding + writeBack plug new Area { + import writeBack._ + val result = input(MUL_LOW) + (input(MUL_HH) << 32) + + + when(arbitration.isValid && input(IS_MUL)){ + switch(input(INSTRUCTION)(13 downto 12)){ + is(B"00"){ + output(REGFILE_WRITE_DATA) := input(MUL_LOW)(31 downto 0).asBits + } + is(B"01",B"10",B"11"){ + output(REGFILE_WRITE_DATA) := result(63 downto 32).asBits + } + } + } + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/MulSimplePlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/MulSimplePlugin.scala new file mode 100644 index 0000000..3b407e1 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/MulSimplePlugin.scala @@ -0,0 +1,92 @@ +package vexriscv.plugin +import vexriscv._ +import vexriscv.VexRiscv +import spinal.core._ + +class MulSimplePlugin extends Plugin[VexRiscv]{ + object MUL_OPA extends Stageable(SInt(33 bits)) + object MUL_OPB extends Stageable(SInt(33 bits)) + object MUL extends Stageable(Bits(64 bits)) + + object IS_MUL extends Stageable(Bool) + + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + + + val actions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(pipeline.stages.last == pipeline.execute), + BYPASSABLE_MEMORY_STAGE -> Bool(pipeline.stages.last == pipeline.memory), + RS1_USE -> True, + RS2_USE -> True, + IS_MUL -> True + ) + + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(IS_MUL, False) + decoderService.add(List( + MULX -> actions + )) + + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + // Prepare signed inputs for the multiplier in the next stage. + // This will map them best to an FPGA DSP. + execute plug new Area { + import execute._ + val aSigned,bSigned = Bool + val a,b = Bits(32 bit) + + a := input(SRC1) + b := input(SRC2) + switch(input(INSTRUCTION)(13 downto 12)) { + is(B"01") { + aSigned := True + bSigned := True + } + is(B"10") { + aSigned := True + bSigned := False + } + default { + aSigned := False + bSigned := False + } + } + + insert(MUL_OPA) := ((aSigned ? a.msb | False) ## a).asSInt + insert(MUL_OPB) := ((bSigned ? b.msb | False) ## b).asSInt + } + + val injectionStage = if(pipeline.memory != null) pipeline.memory else pipeline.execute + injectionStage plug new Area { + import injectionStage._ + + insert(MUL) := (input(MUL_OPA) * input(MUL_OPB))(63 downto 0).asBits + } + + val memStage = stages.last + memStage plug new Area { + import memStage._ + + when(arbitration.isValid && input(IS_MUL)){ + switch(input(INSTRUCTION)(13 downto 12)){ + is(B"00"){ + output(REGFILE_WRITE_DATA) := input(MUL)(31 downto 0) + } + is(B"01",B"10",B"11"){ + output(REGFILE_WRITE_DATA) := input(MUL)(63 downto 32) + } + } + } + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/NoPipeliningPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/NoPipeliningPlugin.scala new file mode 100644 index 0000000..b4ad22b --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/NoPipeliningPlugin.scala @@ -0,0 +1,23 @@ +package vexriscv.plugin + +import spinal.core._ +import spinal.lib._ +import vexriscv._ + + +class NoPipeliningPlugin() extends Plugin[VexRiscv] { + + override def setup(pipeline: VexRiscv): Unit = { + import pipeline.config._ + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(HAS_SIDE_EFFECT, False) + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + val writesInPipeline = stages.dropWhile(_ != execute).map(s => s.arbitration.isValid && s.input(REGFILE_WRITE_VALID)) :+ RegNext(stages.last.arbitration.isValid && stages.last.input(REGFILE_WRITE_VALID)) + decode.arbitration.haltByOther.setWhen(stagesFromExecute.map(_.arbitration.isValid).orR) + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/PcManagerSimplePlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/PcManagerSimplePlugin.scala new file mode 100644 index 0000000..5b1226a --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/PcManagerSimplePlugin.scala @@ -0,0 +1,145 @@ +package vexriscv.plugin + +import vexriscv._ +import spinal.core._ +import spinal.lib._ + +import scala.collection.mutable.ArrayBuffer + + + + +class PcManagerSimplePlugin(resetVector : BigInt, + relaxedPcCalculation : Boolean = false, + keepPcPlus4 : Boolean = true) extends Plugin[VexRiscv]{ + override def build(pipeline: VexRiscv): Unit = {println("PcManagerSimplePlugin is now useless")} +} + + +//class PcManagerSimplePlugin(resetVector : BigInt, +// relaxedPcCalculation : Boolean = false, +// keepPcPlus4 : Boolean = true) extends Plugin[VexRiscv] with JumpService{ +// //FetchService interface +// case class JumpInfo(interface : Flow[UInt], stage: Stage, priority : Int) +// val jumpInfos = ArrayBuffer[JumpInfo]() +// override def createJumpInterface(stage: Stage, priority : Int = 0): Flow[UInt] = { +// val interface = Flow(UInt(32 bits)) +// jumpInfos += JumpInfo(interface,stage, priority) +// interface +// } +// var prefetchExceptionPort : Flow[ExceptionCause] = null +// +// override def setup(pipeline: VexRiscv): Unit = { +// if(!relaxedPcCalculation) pipeline.unremovableStages += pipeline.prefetch +// } +// +// +// override def build(pipeline: VexRiscv): Unit = { +// import pipeline.config._ +// import pipeline._ +// +// if(relaxedPcCalculation) +// relaxedImpl(pipeline) +// else +// cycleEffectiveImpl(pipeline) +// +// //Formal verification signals generation +// prefetch.insert(FORMAL_PC_NEXT) := prefetch.input(PC) + 4 +// jumpInfos.foreach(info => { +// when(info.interface.valid){ +// info.stage.output(FORMAL_PC_NEXT) := info.interface.payload +// } +// }) +// } +// +// //reduce combinatorial path, and expose the PC to the pipeline as a register +// def relaxedImpl(pipeline: VexRiscv): Unit = { +// import pipeline.config._ +// import pipeline._ +// +// prefetch plug new Area { +// import prefetch._ +// //Stage always valid +// arbitration.isValid := True +// +// //PC calculation without Jump +// val pcReg = Reg(UInt(32 bits)) init(resetVector) addAttribute(Verilator.public) +// val pcPlus4 = pcReg + 4 +// if(keepPcPlus4) KeepAttribute(pcPlus4) +// when(arbitration.isFiring){ +// pcReg := pcPlus4 +// } +// +// //JumpService hardware implementation +// val jump = if(jumpInfos.length != 0) new Area { +// val sortedByStage = jumpInfos.sortWith((a, b) => { +// (pipeline.indexOf(a.stage) > pipeline.indexOf(b.stage)) || +// (pipeline.indexOf(a.stage) == pipeline.indexOf(b.stage) && a.priority > b.priority) +// }) +// val valids = sortedByStage.map(_.interface.valid) +// val pcs = sortedByStage.map(_.interface.payload) +// +// val pcLoad = Flow(UInt(32 bits)) +// pcLoad.valid := jumpInfos.map(_.interface.valid).orR +// pcLoad.payload := MuxOH(OHMasking.first(valids.asBits), pcs) +// +// //application of the selected jump request +// when(pcLoad.valid) { +// pcReg := pcLoad.payload +// } +// } +// +// insert(PC_CALC_WITHOUT_JUMP) := pcReg +// insert(PC) := pcReg +// } +// } +// +// //Jump take effect instantly (save one cycle), but expose the PC to the pipeline as a 'long' combinatorial path +// def cycleEffectiveImpl(pipeline: VexRiscv): Unit = { +// import pipeline.config._ +// import pipeline.prefetch +// +// prefetch plug new Area { +// import prefetch._ +// //Stage always valid +// arbitration.isValid := True +// +// //PC calculation without Jump +// val pcReg = Reg(UInt(32 bits)) init(resetVector) addAttribute(Verilator.public) +// val inc = RegInit(False) +// val pcBeforeJumps = pcReg + (inc ## B"00").asUInt +// insert(PC_CALC_WITHOUT_JUMP) := pcBeforeJumps +// val pc = UInt(32 bits) +// pc := input(PC_CALC_WITHOUT_JUMP) +// +// val samplePcNext = False +// +// //JumpService hardware implementation +// val jump = if(jumpInfos.length != 0) new Area { +// val sortedByStage = jumpInfos.sortWith((a, b) => pipeline.indexOf(a.stage) > pipeline.indexOf(b.stage)) +// val valids = sortedByStage.map(_.interface.valid) +// val pcs = sortedByStage.map(_.interface.payload) +// +// val pcLoad = Flow(UInt(32 bits)) +// pcLoad.valid := jumpInfos.map(_.interface.valid).orR +// pcLoad.payload := MuxOH(OHMasking.first(valids.asBits), pcs) +// +// //application of the selected jump request +// when(pcLoad.valid) { +// inc := False +// samplePcNext := True +// pc := pcLoad.payload +// } +// } +// +// when(arbitration.isFiring){ +// inc := True +// samplePcNext := True +// } +// +// when(samplePcNext) { pcReg := pc } +// +// insert(PC) := pc +// } +// } +//}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/Plugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/Plugin.scala new file mode 100644 index 0000000..96d2bc6 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/Plugin.scala @@ -0,0 +1,25 @@ +package vexriscv.plugin + +import vexriscv.{Pipeline, Stage} +import spinal.core.{Area, Nameable} + +/** + * Created by PIC32F_USER on 03/03/2017. + */ +trait Plugin[T <: Pipeline] extends Nameable{ + var pipeline : T = null.asInstanceOf[T] + setName(this.getClass.getSimpleName.replace("$","")) + + // Used to setup things with other plugins + def setup(pipeline: T) : Unit = {} + + //Used to flush out the required hardware (called after setup) + def build(pipeline: T) : Unit + + implicit class implicitsStage(stage: Stage){ + def plug[T <: Area](area : T) : T = {area.setCompositeName(stage,getName()).reflectNames();area} + } + implicit class implicitsPipeline(stage: Pipeline){ + def plug[T <: Area](area : T) = {area.setName(getName()).reflectNames();area} + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/PmpPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/PmpPlugin.scala new file mode 100644 index 0000000..35951e5 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/PmpPlugin.scala @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2021 Samuel Lindemer <samuel.lindemer@ri.se> + * + * SPDX-License-Identifier: MIT + */ + +package vexriscv.plugin + +import vexriscv.{VexRiscv, _} +import vexriscv.plugin.MemoryTranslatorPort.{_} +import spinal.core._ +import spinal.lib._ +import spinal.lib.fsm._ + +/* Each 32-bit pmpcfg# register contains four 8-bit configuration sections. + * These section numbers contain flags which apply to regions defined by the + * corresponding pmpaddr# register. + * + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | pmp3cfg | pmp2cfg | pmp1cfg | pmp0cfg | pmpcfg0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | pmp7cfg | pmp6cfg | pmp5cfg | pmp4cfg | pmpcfg2 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 7 6 5 4 3 2 1 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | L | 0 | A | X | W | R | pmp#cfg + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * L: locks configuration until system reset (including M-mode) + * 0: hardwired to zero + * A: 0 = OFF (null region / disabled) + * 1 = TOR (top of range) + * 2 = NA4 (naturally aligned four-byte region) + * 3 = NAPOT (naturally aligned power-of-two region, > 7 bytes) + * X: execute + * W: write + * R: read + * + * TOR: Each 32-bit pmpaddr# register defines the upper bound of the pmp region + * right-shifted by two bits. The lower bound of the region is the previous + * pmpaddr# register. In the case of pmpaddr0, the lower bound is address 0x0. + * + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | address[33:2] | pmpaddr# + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * NAPOT: Each 32-bit pmpaddr# register defines the region address and the size + * of the pmp region. The number of concurrent 1s begging at the LSB indicates + * the size of the region as a power of two (e.g. 0x...0 = 8-byte, 0x...1 = + * 16-byte, 0x...11 = 32-byte, etc.). + * + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | address[33:2] |0|1|1|1|1| pmpaddr# + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * NA4: This is essentially an edge case of NAPOT where the entire pmpaddr# + * register defines a 4-byte wide region. + * + * N.B. THIS IMPLEMENTATION ONLY SUPPORTS NAPOT ADDRESSING. REGIONS ARE NOT + * ORDERED BY PRIORITY. A PERMISSION IS GRANTED TO AN ACCESS IF ANY MATCHING + * PMP REGION HAS THAT PERMISSION ENABLED. + */ + +trait Pmp { + def OFF = 0 + def TOR = 1 + def NA4 = 2 + def NAPOT = 3 + + def xlen = 32 + def rBit = 0 + def wBit = 1 + def xBit = 2 + def aBits = 4 downto 3 + def lBit = 7 +} + +class PmpSetter(cutoff : Int) extends Component with Pmp { + val io = new Bundle { + val addr = in UInt(xlen bits) + val base, mask = out UInt(xlen - cutoff bits) + } + + val ones = io.addr & ~(io.addr + 1) + io.base := io.addr(xlen - 3 downto cutoff - 2) ^ ones(xlen - 3 downto cutoff - 2) + io.mask := ~(ones(xlen - 4 downto cutoff - 2) @@ U"1") +} + +case class ProtectedMemoryTranslatorPort(bus : MemoryTranslatorBus) + +class PmpPlugin(regions : Int, granularity : Int, ioRange : UInt => Bool) extends Plugin[VexRiscv] with MemoryTranslator with Pmp { + assert(regions % 4 == 0 & regions <= 16) + assert(granularity >= 8) + + var setter : PmpSetter = null + var dPort, iPort : ProtectedMemoryTranslatorPort = null + val cutoff = log2Up(granularity) - 1 + + override def newTranslationPort(priority : Int, args : Any): MemoryTranslatorBus = { + val port = ProtectedMemoryTranslatorPort(MemoryTranslatorBus(new MemoryTranslatorBusParameter(0, 0))) + priority match { + case PRIORITY_INSTRUCTION => iPort = port + case PRIORITY_DATA => dPort = port + } + port.bus + } + + override def setup(pipeline: VexRiscv): Unit = { + setter = new PmpSetter(cutoff) + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline.config._ + import pipeline._ + import Riscv._ + + val csrService = pipeline.service(classOf[CsrInterface]) + val privilegeService = pipeline.service(classOf[PrivilegeService]) + + val state = pipeline plug new Area { + val pmpaddr = Mem(UInt(xlen bits), regions) + val pmpcfg = Vector.fill(regions)(Reg(Bits(8 bits)) init (0)) + val base, mask = Vector.fill(regions)(Reg(UInt(xlen - cutoff bits))) + } + + def machineMode : Bool = privilegeService.isMachine() + + execute plug new Area { + import execute._ + + val fsmPending = RegInit(False) clearWhen(!arbitration.isStuck) + val fsmComplete = False + val hazardFree = csrService.isHazardFree() + + val csrAddress = input(INSTRUCTION)(csrRange) + val pmpNcfg = csrAddress(log2Up(regions) - 1 downto 0).asUInt + val pmpcfgN = pmpNcfg(log2Up(regions) - 3 downto 0) + val pmpcfgCsr = input(INSTRUCTION)(31 downto 24) === 0x3a + val pmpaddrCsr = input(INSTRUCTION)(31 downto 24) === 0x3b + + val pmpNcfg_ = Reg(UInt(log2Up(regions) bits)) + val pmpcfgN_ = Reg(UInt(log2Up(regions) - 2 bits)) + val pmpcfgCsr_ = RegInit(False) + val pmpaddrCsr_ = RegInit(False) + val writeData_ = Reg(Bits(xlen bits)) + + csrService.duringAnyRead { + when (machineMode) { + when (pmpcfgCsr) { + csrService.allowCsr() + csrService.readData() := + state.pmpcfg(pmpcfgN @@ U(3, 2 bits)) ## + state.pmpcfg(pmpcfgN @@ U(2, 2 bits)) ## + state.pmpcfg(pmpcfgN @@ U(1, 2 bits)) ## + state.pmpcfg(pmpcfgN @@ U(0, 2 bits)) + } + when (pmpaddrCsr) { + csrService.allowCsr() + csrService.readData() := state.pmpaddr(pmpNcfg).asBits + } + } + } + + csrService.duringAnyWrite { + when ((pmpcfgCsr | pmpaddrCsr) & machineMode) { + csrService.allowCsr() + arbitration.haltItself := !fsmComplete + when (!fsmPending && hazardFree) { + fsmPending := True + writeData_ := csrService.writeData() + pmpNcfg_ := pmpNcfg + pmpcfgN_ := pmpcfgN + pmpcfgCsr_ := pmpcfgCsr + pmpaddrCsr_ := pmpaddrCsr + } + } + } + + val fsm = new StateMachine { + val fsmEnable = RegInit(False) + val fsmCounter = Reg(UInt(log2Up(regions) bits)) init(0) + + val stateIdle : State = new State with EntryPoint { + onEntry { + fsmPending := False + fsmEnable := False + fsmComplete := True + fsmCounter := 0 + } + whenIsActive { + when (fsmPending) { + goto(stateWrite) + } + } + } + + val stateWrite : State = new State { + whenIsActive { + when (pmpcfgCsr_) { + val overwrite = writeData_.subdivideIn(8 bits) + for (i <- 0 until 4) { + when (~state.pmpcfg(pmpcfgN_ @@ U(i, 2 bits))(lBit)) { + state.pmpcfg(pmpcfgN_ @@ U(i, 2 bits)).assignFromBits(overwrite(i)) + } + } + goto(stateCfg) + } + when (pmpaddrCsr_) { + when (~state.pmpcfg(pmpNcfg_)(lBit)) { + state.pmpaddr(pmpNcfg_) := writeData_.asUInt + } + goto(stateAddr) + } + } + onExit (fsmEnable := True) + } + + val stateCfg : State = new State { + onEntry (fsmCounter := pmpcfgN_ @@ U(0, 2 bits)) + whenIsActive { + fsmCounter := fsmCounter + 1 + when (fsmCounter(1 downto 0) === 3) { + goto(stateIdle) + } + } + } + + val stateAddr : State = new State { + onEntry (fsmCounter := pmpNcfg_) + whenIsActive (goto(stateIdle)) + } + + when (pmpaddrCsr_) { + setter.io.addr := writeData_.asUInt + } otherwise { + setter.io.addr := state.pmpaddr(fsmCounter) + } + + when (fsmEnable & ~state.pmpcfg(fsmCounter)(lBit)) { + state.base(fsmCounter) := setter.io.base + state.mask(fsmCounter) := setter.io.mask + } + } + } + + pipeline plug new Area { + def getHits(address : UInt) = { + (0 until regions).map(i => + ((address & state.mask(U(i, log2Up(regions) bits))) === state.base(U(i, log2Up(regions) bits))) & + (state.pmpcfg(i)(lBit) | ~machineMode) & (state.pmpcfg(i)(aBits) === NAPOT) + ) + } + + def getPermission(hits : IndexedSeq[Bool], bit : Int) = { + MuxOH(OHMasking.first(hits), state.pmpcfg.map(_(bit))) + } + + val dGuard = new Area { + val address = dPort.bus.cmd(0).virtualAddress + dPort.bus.rsp.physicalAddress := address + dPort.bus.rsp.isIoAccess := ioRange(address) + dPort.bus.rsp.isPaging := False + dPort.bus.rsp.exception := False + dPort.bus.rsp.refilling := False + dPort.bus.rsp.allowExecute := False + dPort.bus.busy := False + + val hits = getHits(address(31 downto cutoff)) + + when(~hits.orR) { + dPort.bus.rsp.allowRead := machineMode + dPort.bus.rsp.allowWrite := machineMode + } otherwise { + dPort.bus.rsp.allowRead := getPermission(hits, rBit) + dPort.bus.rsp.allowWrite := getPermission(hits, wBit) + } + } + + val iGuard = new Area { + val address = iPort.bus.cmd(0).virtualAddress + iPort.bus.rsp.physicalAddress := address + iPort.bus.rsp.isIoAccess := ioRange(address) + iPort.bus.rsp.isPaging := False + iPort.bus.rsp.exception := False + iPort.bus.rsp.refilling := False + iPort.bus.rsp.allowRead := False + iPort.bus.rsp.allowWrite := False + iPort.bus.busy := False + + val hits = getHits(address(31 downto cutoff)) + + when(~hits.orR) { + iPort.bus.rsp.allowExecute := machineMode + } otherwise { + iPort.bus.rsp.allowExecute := getPermission(hits, xBit) + } + } + } + } +}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/PmpPluginOld.scala b/VexRiscv/src/main/scala/vexriscv/plugin/PmpPluginOld.scala new file mode 100644 index 0000000..0426902 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/PmpPluginOld.scala @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2020 Samuel Lindemer <samuel.lindemer@ri.se> + * + * SPDX-License-Identifier: MIT + */ + +package vexriscv.plugin + +import vexriscv.{VexRiscv, _} +import spinal.core._ +import spinal.lib._ +import scala.collection.mutable.ArrayBuffer + +/* Each 32-bit pmpcfg# register contains four 8-bit configuration sections. + * These section numbers contain flags which apply to regions defined by the + * corresponding pmpaddr# register. + * + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | pmp3cfg | pmp2cfg | pmp1cfg | pmp0cfg | pmpcfg0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | pmp7cfg | pmp6cfg | pmp5cfg | pmp4cfg | pmpcfg2 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * 7 6 5 4 3 2 1 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | L | 0 | A | X | W | R | pmp#cfg + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * L: locks configuration until system reset (including M-mode) + * 0: hardwired to zero + * A: 0 = OFF (null region / disabled) + * 1 = TOR (top of range) + * 2 = NA4 (naturally aligned four-byte region) + * 3 = NAPOT (naturally aligned power-of-two region, > 7 bytes) + * X: execute + * W: write + * R: read + * + * TOR: Each 32-bit pmpaddr# register defines the upper bound of the pmp region + * right-shifted by two bits. The lower bound of the region is the previous + * pmpaddr# register. In the case of pmpaddr0, the lower bound is address 0x0. + * + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | address[33:2] | pmpaddr# + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * NAPOT: Each 32-bit pmpaddr# register defines the region address and the size + * of the pmp region. The number of concurrent 1s begging at the LSB indicates + * the size of the region as a power of two (e.g. 0x...0 = 8-byte, 0x...1 = + * 16-byte, 0x...11 = 32-byte, etc.). + * + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | address[33:2] |0|1|1|1|1| pmpaddr# + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * NA4: This is essentially an edge case of NAPOT where the entire pmpaddr# + * register defines a 4-byte wide region. + */ + +case class PmpRegister(previous : PmpRegister) extends Area { + + def OFF = 0 + def TOR = 1 + def NA4 = 2 + def NAPOT = 3 + + val state = new Area { + val r, w, x = Reg(Bool) + val l = RegInit(False) + val a = Reg(UInt(2 bits)) init(0) + val addr = Reg(UInt(32 bits)) + } + + // CSR writes connect to these signals rather than the internal state + // registers. This makes locking and WARL possible. + val csr = new Area { + val r, w, x = Bool + val l = Bool + val a = UInt(2 bits) + val addr = UInt(32 bits) + } + + // Last valid assignment wins; nothing happens if a user-initiated write did + // not occur on this clock cycle. + csr.r := state.r + csr.w := state.w + csr.x := state.x + csr.l := state.l + csr.a := state.a + csr.addr := state.addr + + // Computed PMP region bounds + val region = new Area { + val valid, locked = Bool + val start, end = UInt(32 bits) + } + + when(~state.l) { + state.r := csr.r + state.w := csr.w + state.x := csr.x + state.l := csr.l + state.a := csr.a + state.addr := csr.addr + + if (csr.l == True & csr.a == TOR) { + previous.state.l := True + } + } + + val shifted = state.addr |<< 2 + val mask = state.addr & ~(state.addr + 1) + val masked = (state.addr & ~mask) |<< 2 + + // PMP changes take effect two clock cycles after the initial CSR write (i.e., + // settings propagate from csr -> state -> region). + region.locked := state.l + region.valid := True + + switch(csr.a) { + is(TOR) { + if (previous == null) region.start := 0 + else region.start := previous.region.end + region.end := shifted + } + is(NA4) { + region.start := shifted + region.end := shifted + 4 + } + is(NAPOT) { + region.start := masked + region.end := masked + ((mask + 1) |<< 3) + } + default { + region.start := 0 + region.end := shifted + region.valid := False + } + } +} + + +class PmpPluginOld(regions : Int, ioRange : UInt => Bool) extends Plugin[VexRiscv] with MemoryTranslator { + + // Each pmpcfg# CSR configures four regions. + assert((regions % 4) == 0) + + val pmps = ArrayBuffer[PmpRegister]() + val portsInfo = ArrayBuffer[ProtectedMemoryTranslatorPort]() + + override def newTranslationPort(priority : Int, args : Any): MemoryTranslatorBus = { + val port = ProtectedMemoryTranslatorPort(MemoryTranslatorBus(new MemoryTranslatorBusParameter(0, 0))) + portsInfo += port + port.bus + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline.config._ + import pipeline._ + import Riscv._ + + val csrService = pipeline.service(classOf[CsrInterface]) + val privilegeService = pipeline.service(classOf[PrivilegeService]) + + val core = pipeline plug new Area { + + // Instantiate pmpaddr0 ... pmpaddr# CSRs. + for (i <- 0 until regions) { + if (i == 0) { + pmps += PmpRegister(null) + } else { + pmps += PmpRegister(pmps.last) + } + csrService.r(0x3b0 + i, pmps(i).state.addr) + csrService.w(0x3b0 + i, pmps(i).csr.addr) + } + + // Instantiate pmpcfg0 ... pmpcfg# CSRs. + for (i <- 0 until (regions / 4)) { + csrService.r(0x3a0 + i, + 31 -> pmps((i * 4) + 3).state.l, 23 -> pmps((i * 4) + 2).state.l, + 15 -> pmps((i * 4) + 1).state.l, 7 -> pmps((i * 4) ).state.l, + 27 -> pmps((i * 4) + 3).state.a, 26 -> pmps((i * 4) + 3).state.x, + 25 -> pmps((i * 4) + 3).state.w, 24 -> pmps((i * 4) + 3).state.r, + 19 -> pmps((i * 4) + 2).state.a, 18 -> pmps((i * 4) + 2).state.x, + 17 -> pmps((i * 4) + 2).state.w, 16 -> pmps((i * 4) + 2).state.r, + 11 -> pmps((i * 4) + 1).state.a, 10 -> pmps((i * 4) + 1).state.x, + 9 -> pmps((i * 4) + 1).state.w, 8 -> pmps((i * 4) + 1).state.r, + 3 -> pmps((i * 4) ).state.a, 2 -> pmps((i * 4) ).state.x, + 1 -> pmps((i * 4) ).state.w, 0 -> pmps((i * 4) ).state.r + ) + csrService.w(0x3a0 + i, + 31 -> pmps((i * 4) + 3).csr.l, 23 -> pmps((i * 4) + 2).csr.l, + 15 -> pmps((i * 4) + 1).csr.l, 7 -> pmps((i * 4) ).csr.l, + 27 -> pmps((i * 4) + 3).csr.a, 26 -> pmps((i * 4) + 3).csr.x, + 25 -> pmps((i * 4) + 3).csr.w, 24 -> pmps((i * 4) + 3).csr.r, + 19 -> pmps((i * 4) + 2).csr.a, 18 -> pmps((i * 4) + 2).csr.x, + 17 -> pmps((i * 4) + 2).csr.w, 16 -> pmps((i * 4) + 2).csr.r, + 11 -> pmps((i * 4) + 1).csr.a, 10 -> pmps((i * 4) + 1).csr.x, + 9 -> pmps((i * 4) + 1).csr.w, 8 -> pmps((i * 4) + 1).csr.r, + 3 -> pmps((i * 4) ).csr.a, 2 -> pmps((i * 4) ).csr.x, + 1 -> pmps((i * 4) ).csr.w, 0 -> pmps((i * 4) ).csr.r + ) + } + + // Connect memory ports to PMP logic. + val ports = for ((port, portId) <- portsInfo.zipWithIndex) yield new Area { + + val address = port.bus.cmd(0).virtualAddress + port.bus.rsp.physicalAddress := address + + // Only the first matching PMP region applies. + val hits = pmps.map(pmp => pmp.region.valid & + pmp.region.start <= address & + pmp.region.end > address & + (pmp.region.locked | ~privilegeService.isMachine())) + + // M-mode has full access by default, others have none. + when(CountOne(hits) === 0) { + port.bus.rsp.allowRead := privilegeService.isMachine() + port.bus.rsp.allowWrite := privilegeService.isMachine() + port.bus.rsp.allowExecute := privilegeService.isMachine() + } otherwise { + port.bus.rsp.allowRead := MuxOH(OHMasking.first(hits), pmps.map(_.state.r)) + port.bus.rsp.allowWrite := MuxOH(OHMasking.first(hits), pmps.map(_.state.w)) + port.bus.rsp.allowExecute := MuxOH(OHMasking.first(hits), pmps.map(_.state.x)) + } + + port.bus.rsp.isIoAccess := ioRange(port.bus.rsp.physicalAddress) + port.bus.rsp.isPaging := False + port.bus.rsp.exception := False + port.bus.rsp.refilling := False + port.bus.busy := False + + } + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/RegFilePlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/RegFilePlugin.scala new file mode 100644 index 0000000..94a3f32 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/RegFilePlugin.scala @@ -0,0 +1,122 @@ +package vexriscv.plugin + +import vexriscv._ +import spinal.core._ +import spinal.lib._ + +import scala.collection.mutable + + +trait RegFileReadKind +object ASYNC extends RegFileReadKind +object SYNC extends RegFileReadKind + + +class RegFilePlugin(regFileReadyKind : RegFileReadKind, + zeroBoot : Boolean = false, + x0Init : Boolean = true, + writeRfInMemoryStage : Boolean = false, + readInExecute : Boolean = false, + syncUpdateOnStall : Boolean = true, + rv32e : Boolean = false, + withShadow : Boolean = false //shadow registers aren't transition hazard free + ) extends Plugin[VexRiscv] with RegFileService{ + import Riscv._ + + override def readStage(): Stage = if(readInExecute) pipeline.execute else pipeline.decode + + override def setup(pipeline: VexRiscv): Unit = { + import pipeline.config._ + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(RS1_USE,False) + decoderService.addDefault(RS2_USE,False) + decoderService.addDefault(REGFILE_WRITE_VALID,False) + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + val readStage = if(readInExecute) execute else decode + val writeStage = if(writeRfInMemoryStage) memory else stages.last + + val numRegisters = if(rv32e) 16 else 32 + def clipRange(that : Range) = if(rv32e) that.tail else that + + val global = pipeline plug new Area{ + val regFileSize = if(withShadow) numRegisters * 2 else numRegisters + val regFile = Mem(Bits(32 bits),regFileSize) addAttribute(Verilator.public) + if(zeroBoot) regFile.init(List.fill(regFileSize)(B(0, 32 bits))) + + val shadow = ifGen(withShadow)(new Area{ + val write, read, clear = RegInit(False) + + read clearWhen(clear && !readStage.arbitration.isStuck) + write clearWhen(clear && !writeStage.arbitration.isStuck) + + val csrService = pipeline.service(classOf[CsrInterface]) + csrService.w(0x7C0,2 -> clear, 1 -> read, 0 -> write) + }) + } + + //Disable rd0 write in decoding stage + when(decode.input(INSTRUCTION)(rdRange) === 0) { + decode.input(REGFILE_WRITE_VALID) := False + } + if(rv32e) when(decode.input(INSTRUCTION)(rdRange.head)) { + decode.input(REGFILE_WRITE_VALID) := False + } + + //Read register file + readStage plug new Area{ + import readStage._ + + //read register file + val srcInstruction = regFileReadyKind match{ + case `ASYNC` => input(INSTRUCTION) + case `SYNC` if !readInExecute => input(INSTRUCTION_ANTICIPATED) + case `SYNC` if readInExecute => if(syncUpdateOnStall) Mux(execute.arbitration.isStuck, execute.input(INSTRUCTION), decode.input(INSTRUCTION)) else decode.input(INSTRUCTION) + } + + def shadowPrefix(that : Bits) = if(withShadow) global.shadow.read ## that else that + val regFileReadAddress1 = U(shadowPrefix(srcInstruction(clipRange(Riscv.rs1Range)))) + val regFileReadAddress2 = U(shadowPrefix(srcInstruction(clipRange(Riscv.rs2Range)))) + + val (rs1Data,rs2Data) = regFileReadyKind match{ + case `ASYNC` => (global.regFile.readAsync(regFileReadAddress1),global.regFile.readAsync(regFileReadAddress2)) + case `SYNC` => + val enable = if(!syncUpdateOnStall) !readStage.arbitration.isStuck else null + (global.regFile.readSync(regFileReadAddress1, enable),global.regFile.readSync(regFileReadAddress2, enable)) + } + + insert(RS1) := rs1Data + insert(RS2) := rs2Data + } + + //Write register file + writeStage plug new Area { + import writeStage._ + + def shadowPrefix(that : Bits) = if(withShadow) global.shadow.write ## that else that + val regFileWrite = global.regFile.writePort.addAttribute(Verilator.public).setName("lastStageRegFileWrite") + regFileWrite.valid := output(REGFILE_WRITE_VALID) && arbitration.isFiring + regFileWrite.address := U(shadowPrefix(output(INSTRUCTION)(clipRange(rdRange)))) + regFileWrite.data := output(REGFILE_WRITE_DATA) + + //Ensure no boot glitches modify X0 + if(!x0Init && zeroBoot) when(regFileWrite.address === 0){ + regFileWrite.valid := False + } + + //CPU will initialise constant register zero in the first cycle + if(x0Init) { + val boot = RegNext(False) init (True) + regFileWrite.valid setWhen (boot) + when(boot) { + regFileWrite.address := 0 + regFileWrite.data := 0 + } + } + } + } +}
\ No newline at end of file diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/ShiftPlugins.scala b/VexRiscv/src/main/scala/vexriscv/plugin/ShiftPlugins.scala new file mode 100644 index 0000000..a4ae716 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/ShiftPlugins.scala @@ -0,0 +1,193 @@ +package vexriscv.plugin + +import vexriscv._ +import spinal.core._ +import spinal.lib.Reverse + + + +class FullBarrelShifterPlugin(earlyInjection : Boolean = false) extends Plugin[VexRiscv]{ + object ShiftCtrlEnum extends SpinalEnum(binarySequential){ + val DISABLE, SLL, SRL, SRA = newElement() + } + + object SHIFT_CTRL extends Stageable(ShiftCtrlEnum()) + object SHIFT_RIGHT extends Stageable(Bits(32 bits)) + + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + + + + val immediateActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.IMI, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(earlyInjection), + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True + ) + + val nonImmediateActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(earlyInjection), + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + RS2_USE -> True + ) + + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(SHIFT_CTRL, ShiftCtrlEnum.DISABLE) + decoderService.add(List( + SLL -> (nonImmediateActions ++ List(SHIFT_CTRL -> ShiftCtrlEnum.SLL)), + SRL -> (nonImmediateActions ++ List(SHIFT_CTRL -> ShiftCtrlEnum.SRL)), + SRA -> (nonImmediateActions ++ List(SHIFT_CTRL -> ShiftCtrlEnum.SRA)) + )) + + decoderService.add(List( + SLLI -> (immediateActions ++ List(SHIFT_CTRL -> ShiftCtrlEnum.SLL)), + SRLI -> (immediateActions ++ List(SHIFT_CTRL -> ShiftCtrlEnum.SRL)), + SRAI -> (immediateActions ++ List(SHIFT_CTRL -> ShiftCtrlEnum.SRA)) + )) + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + + execute plug new Area{ + import execute._ + val amplitude = input(SRC2)(4 downto 0).asUInt + val reversed = Mux(input(SHIFT_CTRL) === ShiftCtrlEnum.SLL, Reverse(input(SRC1)), input(SRC1)) + insert(SHIFT_RIGHT) := (Cat(input(SHIFT_CTRL) === ShiftCtrlEnum.SRA & reversed.msb, reversed).asSInt >> amplitude)(31 downto 0).asBits + } + + val injectionStage = if(earlyInjection) execute else memory + injectionStage plug new Area{ + import injectionStage._ + when(arbitration.isValid){ + switch(input(SHIFT_CTRL)) { + is(ShiftCtrlEnum.SLL) { + output(REGFILE_WRITE_DATA) := Reverse(input(SHIFT_RIGHT)) + } + is(ShiftCtrlEnum.SRL, ShiftCtrlEnum.SRA) { + output(REGFILE_WRITE_DATA) := input(SHIFT_RIGHT) + } + } + } + } + } +} + + + + + + + + + + +class LightShifterPlugin extends Plugin[VexRiscv]{ + object ShiftCtrlEnum extends SpinalEnum(binarySequential){ + val DISABLE, SLL, SRL, SRA = newElement() + } + + object SHIFT_CTRL extends Stageable(ShiftCtrlEnum()) + + override def setup(pipeline: VexRiscv): Unit = { + import Riscv._ + import pipeline.config._ + import IntAluPlugin._ + + val immediateActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.IMI, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> True, + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + + //Get SRC1 through the MMU to the RF write path + ALU_CTRL -> AluCtrlEnum.ADD_SUB, + SRC_USE_SUB_LESS -> False, + SRC_ADD_ZERO -> True + ) + + val nonImmediateActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> True, + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + RS2_USE -> True, + + //Get SRC1 through the MMU to the RF write path + ALU_CTRL -> AluCtrlEnum.ADD_SUB, + SRC_USE_SUB_LESS -> False, + SRC_ADD_ZERO -> True + ) + + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(SHIFT_CTRL, ShiftCtrlEnum.DISABLE) + decoderService.add(List( + SLL -> (nonImmediateActions ++ List(SHIFT_CTRL -> ShiftCtrlEnum.SLL)), + SRL -> (nonImmediateActions ++ List(SHIFT_CTRL -> ShiftCtrlEnum.SRL)), + SRA -> (nonImmediateActions ++ List(SHIFT_CTRL -> ShiftCtrlEnum.SRA)) + )) + + decoderService.add(List( + SLLI -> (immediateActions ++ List(SHIFT_CTRL -> ShiftCtrlEnum.SLL)), + SRLI -> (immediateActions ++ List(SHIFT_CTRL -> ShiftCtrlEnum.SRL)), + SRAI -> (immediateActions ++ List(SHIFT_CTRL -> ShiftCtrlEnum.SRA)) + )) + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + + execute plug new Area{ + import execute._ + + val isActive = RegInit(False) + val isShift = input(SHIFT_CTRL) =/= ShiftCtrlEnum.DISABLE + val amplitudeReg = Reg(UInt(5 bits)) + val amplitude = isActive ? amplitudeReg | input(SRC2)(4 downto 0).asUInt + val shiftReg = ifGen(!withMemoryStage) (RegNextWhen(execute.output(REGFILE_WRITE_DATA), !arbitration.isStuckByOthers)) + val shiftInput = isActive ? (if(withMemoryStage) memory.input(REGFILE_WRITE_DATA) else shiftReg) | input(SRC1) + val done = amplitude(4 downto 1) === 0 + + if(withMemoryStage) memory.dontSampleStageable(REGFILE_WRITE_DATA, arbitration.isStuckByOthers) + + when(arbitration.isValid && isShift && input(SRC2)(4 downto 0) =/= 0){ + output(REGFILE_WRITE_DATA) := input(SHIFT_CTRL).mux( + ShiftCtrlEnum.SLL -> (shiftInput |<< 1), + default -> (((input(SHIFT_CTRL) === ShiftCtrlEnum.SRA && shiftInput.msb) ## shiftInput).asSInt >> 1).asBits //ALU.SRL,ALU.SRA + ) + + when(!arbitration.isStuckByOthers){ + isActive := True + amplitudeReg := amplitude - 1 + + when(done){ + isActive := False + } + } + + when(!done){ + arbitration.haltItself := True + } + } + when(arbitration.removeIt){ + isActive := False + } + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/SingleInstructionLimiterPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/SingleInstructionLimiterPlugin.scala new file mode 100644 index 0000000..c6c9706 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/SingleInstructionLimiterPlugin.scala @@ -0,0 +1,17 @@ +package vexriscv.plugin + +import vexriscv._ +import spinal.core._ +import spinal.lib._ + + +class SingleInstructionLimiterPlugin() extends Plugin[VexRiscv] { + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + val fetcher = pipeline.service(classOf[IBusFetcher]) + when(fetcher.incoming() || List(decode,execute,memory,writeBack).map(_.arbitration.isValid).orR) { + fetcher.haltIt() + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/SrcPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/SrcPlugin.scala new file mode 100644 index 0000000..d67e7cc --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/SrcPlugin.scala @@ -0,0 +1,83 @@ +package vexriscv.plugin + +import vexriscv._ +import spinal.core._ +import spinal.lib.KeepAttribute + + +class SrcPlugin(separatedAddSub : Boolean = false, executeInsertion : Boolean = false, decodeAddSub : Boolean = false) extends Plugin[VexRiscv]{ + object SRC2_FORCE_ZERO extends Stageable(Bool) + + + override def setup(pipeline: VexRiscv): Unit = { + import pipeline.config._ + + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(SRC_ADD_ZERO, False) //TODO avoid this default to simplify decoding ? + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + decode.insert(SRC2_FORCE_ZERO) := decode.input(SRC_ADD_ZERO) && !decode.input(SRC_USE_SUB_LESS) + + val insertionStage = if(executeInsertion) execute else decode + insertionStage plug new Area{ + import insertionStage._ + + val imm = Riscv.IMM(input(INSTRUCTION)) + insert(SRC1) := input(SRC1_CTRL).mux( + Src1CtrlEnum.RS -> output(RS1), + Src1CtrlEnum.PC_INCREMENT -> (if(pipeline.config.withRvc) Mux(input(IS_RVC), B(2), B(4)) else B(4)).resized, + Src1CtrlEnum.IMU -> imm.u.resized, + Src1CtrlEnum.URS1 -> input(INSTRUCTION)(Riscv.rs1Range).resized + ) + insert(SRC2) := input(SRC2_CTRL).mux( + Src2CtrlEnum.RS -> output(RS2), + Src2CtrlEnum.IMI -> imm.i_sext.resized, + Src2CtrlEnum.IMS -> imm.s_sext.resized, + Src2CtrlEnum.PC -> output(PC).asBits + ) + } + + val addSubStage = if(decodeAddSub) decode else execute + if(separatedAddSub) { + addSubStage plug new Area { + import addSubStage._ + + // ADD, SUB + val add = (U(input(SRC1)) + U(input(SRC2))).asBits.addAttribute("keep") + val sub = (U(input(SRC1)) - U(input(SRC2))).asBits.addAttribute("keep") + when(input(SRC_ADD_ZERO)){ add := input(SRC1) } + + // SLT, SLTU + val less = Mux(input(SRC1).msb === input(SRC2).msb, sub.msb, + Mux(input(SRC_LESS_UNSIGNED), input(SRC2).msb, input(SRC1).msb)) + + insert(SRC_ADD_SUB) := input(SRC_USE_SUB_LESS) ? sub | add + insert(SRC_ADD) := add + insert(SRC_SUB) := sub + insert(SRC_LESS) := less + } + }else{ + addSubStage plug new Area { + import addSubStage._ + + // ADD, SUB + val addSub = (input(SRC1).asSInt + Mux(input(SRC_USE_SUB_LESS), ~input(SRC2), input(SRC2)).asSInt + Mux(input(SRC_USE_SUB_LESS), S(1, 32 bits), S(0, 32 bits))).asBits + when(input(SRC2_FORCE_ZERO)){ addSub := input(SRC1) } + + + // SLT, SLTU + val less = Mux(input(SRC1).msb === input(SRC2).msb, addSub.msb, + Mux(input(SRC_LESS_UNSIGNED), input(SRC2).msb, input(SRC1).msb)) + + insert(SRC_ADD_SUB) := addSub + insert(SRC_ADD) := addSub + insert(SRC_SUB) := addSub + insert(SRC_LESS) := less + } + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala new file mode 100644 index 0000000..cafd8de --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/StaticMemoryTranslatorPlugin.scala @@ -0,0 +1,41 @@ +package vexriscv.plugin + +import vexriscv.{VexRiscv, _} +import spinal.core._ +import spinal.lib._ + +import scala.collection.mutable.ArrayBuffer +case class StaticMemoryTranslatorPort(bus : MemoryTranslatorBus, priority : Int) + +class StaticMemoryTranslatorPlugin(ioRange : UInt => Bool) extends Plugin[VexRiscv] with MemoryTranslator { + val portsInfo = ArrayBuffer[StaticMemoryTranslatorPort]() + + override def newTranslationPort(priority : Int,args : Any): MemoryTranslatorBus = { + val port = StaticMemoryTranslatorPort(MemoryTranslatorBus(MemoryTranslatorBusParameter(wayCount = 0)),priority) + portsInfo += port + port.bus + } + + override def setup(pipeline: VexRiscv): Unit = { + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + import Riscv._ + + val core = pipeline plug new Area { + val ports = for ((port, portId) <- portsInfo.zipWithIndex) yield new Area { + port.bus.rsp.physicalAddress := port.bus.cmd.last.virtualAddress + port.bus.rsp.allowRead := True + port.bus.rsp.allowWrite := True + port.bus.rsp.allowExecute := True + port.bus.rsp.isIoAccess := ioRange(port.bus.rsp.physicalAddress) + port.bus.rsp.isPaging := False + port.bus.rsp.exception := False + port.bus.rsp.refilling := False + port.bus.busy := False + } + } + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/VfuPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/VfuPlugin.scala new file mode 100644 index 0000000..a2c0930 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/VfuPlugin.scala @@ -0,0 +1,136 @@ +package vexriscv.plugin + +import vexriscv.{DecoderService, ExceptionCause, ExceptionService, Stage, Stageable, VexRiscv} +import spinal.core._ +import spinal.lib._ +import spinal.lib.bus.bmb.WeakConnector +import spinal.lib.bus.misc.{AddressMapping, DefaultMapping} +import vexriscv.Riscv.IMM + + +object VfuPlugin{ + val ROUND_MODE_WIDTH = 3 + +} + + +case class VfuParameter() //Empty for now + +case class VfuCmd( p : VfuParameter ) extends Bundle{ + val instruction = Bits(32 bits) + val inputs = Vec(Bits(32 bits), 2) + val rounding = Bits(VfuPlugin.ROUND_MODE_WIDTH bits) +} + +case class VfuRsp(p : VfuParameter) extends Bundle{ + val output = Bits(32 bits) +} + +case class VfuBus(p : VfuParameter) extends Bundle with IMasterSlave{ + val cmd = Stream(VfuCmd(p)) + val rsp = Stream(VfuRsp(p)) + + def <<(m : VfuBus) : Unit = { + val s = this + s.cmd << m.cmd + m.rsp << s.rsp + } + + override def asMaster(): Unit = { + master(cmd) + slave(rsp) + } +} + + + +class VfuPlugin(val stageCount : Int, + val allowZeroLatency : Boolean, + val parameter : VfuParameter) extends Plugin[VexRiscv]{ + def p = parameter + + var bus : VfuBus = null + + lazy val forkStage = pipeline.execute + lazy val joinStage = pipeline.stages(Math.min(pipeline.stages.length - 1, pipeline.indexOf(forkStage) + stageCount)) + + + object VFU_ENABLE extends Stageable(Bool()) + object VFU_IN_FLIGHT extends Stageable(Bool()) + + override def setup(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + bus = master(VfuBus(p)) + + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(VFU_ENABLE, False) + + decoderService.add( + key = M"-------------------------0001011", + values = List( + VFU_ENABLE -> True, + REGFILE_WRITE_VALID -> True, //If you want to write something back into the integer register file + BYPASSABLE_EXECUTE_STAGE -> Bool(stageCount == 0), + BYPASSABLE_MEMORY_STAGE -> Bool(stageCount <= 1), + RS1_USE -> True, + RS2_USE -> True + ) + ) + } + + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + + val csr = pipeline plug new Area{ + val factory = pipeline.service(classOf[CsrInterface]) + val rounding = Reg(Bits(VfuPlugin.ROUND_MODE_WIDTH bits)) + + factory.rw(csrAddress = 0xBC0, bitOffset = 0, that = rounding) + } + + + forkStage plug new Area{ + import forkStage._ + val hazard = stages.dropWhile(_ != forkStage).tail.map(s => s.arbitration.isValid && s.input(HAS_SIDE_EFFECT)).orR + val scheduleWish = arbitration.isValid && input(VFU_ENABLE) + val schedule = scheduleWish && !hazard + arbitration.haltItself setWhen(scheduleWish && hazard) + + val hold = RegInit(False) setWhen(schedule) clearWhen(bus.cmd.ready) + val fired = RegInit(False) setWhen(bus.cmd.fire) clearWhen(!arbitration.isStuck) + insert(VFU_IN_FLIGHT) := schedule || hold || fired + + bus.cmd.valid := (schedule || hold) && !fired + arbitration.haltItself setWhen(bus.cmd.valid && !bus.cmd.ready) + + bus.cmd.instruction := input(INSTRUCTION) + bus.cmd.inputs(0) := input(RS1) + bus.cmd.inputs(1) := input(RS2) + bus.cmd.rounding := csr.rounding + } + + joinStage plug new Area{ + import joinStage._ + + val rsp = if(forkStage != joinStage && allowZeroLatency) { + bus.rsp.s2mPipe() + } else { + bus.rsp.combStage() + } + + rsp.ready := False + when(input(VFU_IN_FLIGHT) && input(REGFILE_WRITE_VALID)){ + arbitration.haltItself setWhen(!bus.rsp.valid) + rsp.ready := !arbitration.isStuckByOthers + output(REGFILE_WRITE_DATA) := bus.rsp.output + } + } + + pipeline.stages.drop(1).foreach(s => s.output(VFU_IN_FLIGHT) clearWhen(s.arbitration.isStuck)) + addPrePopTask(() => stages.dropWhile(_ != memory).reverse.dropWhile(_ != joinStage).foreach(s => s.input(VFU_IN_FLIGHT).init(False))) + } +} + diff --git a/VexRiscv/src/main/scala/vexriscv/plugin/YamlPlugin.scala b/VexRiscv/src/main/scala/vexriscv/plugin/YamlPlugin.scala new file mode 100644 index 0000000..ca53e42 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/plugin/YamlPlugin.scala @@ -0,0 +1,32 @@ +package vexriscv.plugin + +import java.util + +import vexriscv.{ReportService, VexRiscv} +import org.yaml.snakeyaml.{DumperOptions, Yaml} + + +/** + * Created by spinalvm on 09.06.17. + */ +class YamlPlugin(path : String) extends Plugin[VexRiscv] with ReportService{ + + val content = new util.HashMap[String, Object]() + + def add(that : (String,Object)) : Unit = content.put(that._1,that._2) + + override def setup(pipeline: VexRiscv): Unit = { + + } + + override def build(pipeline: VexRiscv): Unit = { + val options = new DumperOptions() + options.setWidth(50) + options.setIndent(4) + options.setCanonical(true) + options.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK) + + val yaml = new Yaml() + yaml.dump(content, new java.io.FileWriter(path)) + } +} diff --git a/VexRiscv/src/main/scala/vexriscv/test/Swing.scala b/VexRiscv/src/main/scala/vexriscv/test/Swing.scala new file mode 100644 index 0000000..b3a1637 --- /dev/null +++ b/VexRiscv/src/main/scala/vexriscv/test/Swing.scala @@ -0,0 +1,55 @@ +package vexriscv.test + +import java.awt.event.{MouseEvent, MouseListener} +import java.awt.{Color, Dimension, Graphics} +import javax.swing.JPanel + +abstract class JLedArray(ledCount : Int,ledDiameter : Int = 20, blackThickness : Int = 2) extends JPanel{ + def getValue() : BigInt + + override def paintComponent(g : Graphics) : Unit = { + val value = getValue() + for(i <- 0 to ledCount-1) { + g.setColor(Color.BLACK) + val x = i*ledDiameter + 1 + g.fillOval(x,1,ledDiameter,ledDiameter) + if (((value >> (ledCount-1-i)) & 1) != 0) { + g.setColor(Color.GREEN.darker()) + g.fillOval(x+blackThickness,3,ledDiameter-blackThickness*2,ledDiameter-blackThickness*2); + } + } + g.setColor(Color.BLACK) + } + this.setPreferredSize(new Dimension(ledDiameter*ledCount+2, ledDiameter+2)) +} + +class JSwitchArray(ledCount : Int,switchDiameter : Int = 20, blackThickness : Int = 2) extends JPanel{ + var value = BigInt(0) + def getValue() = value + addMouseListener(new MouseListener { + override def mouseExited(mouseEvent: MouseEvent): Unit = {} + override def mousePressed(mouseEvent: MouseEvent): Unit = {} + override def mouseReleased(mouseEvent: MouseEvent): Unit = {} + override def mouseEntered(mouseEvent: MouseEvent): Unit = {} + override def mouseClicked(mouseEvent: MouseEvent): Unit = { + val idx = ledCount-1-(mouseEvent.getX-2)/switchDiameter + value ^= BigInt(1) << idx + } + }) + override def paintComponent(g : Graphics) : Unit = { + for(i <- 0 to ledCount-1) { + g.setColor(Color.GRAY.darker()) + val x = i*switchDiameter + 1 + g.fillRect(x,1,switchDiameter,switchDiameter) + if (((value >> (ledCount-1-i)) & 1) != 0) { + g.setColor(Color.GRAY) + }else{ + g.setColor(Color.GRAY.brighter()) + } + g.fillRect(x+blackThickness,3,switchDiameter-blackThickness*2,switchDiameter-blackThickness*2); + + } + g.setColor(Color.BLACK) + } + this.setPreferredSize(new Dimension(switchDiameter*ledCount+2, switchDiameter+2)) +}
\ No newline at end of file |