diff --git a/coupledL2 b/coupledL2 index 394b7392f5..0f9f935158 160000 --- a/coupledL2 +++ b/coupledL2 @@ -1 +1 @@ -Subproject commit 394b7392f5899ae277b0ff55b9ad694afbf3e4f9 +Subproject commit 0f9f93515853e66b8b1480fbe3f73508b1e270cc diff --git a/huancun b/huancun index 3fc7e7e0c2..90aaf59352 160000 --- a/huancun +++ b/huancun @@ -1 +1 @@ -Subproject commit 3fc7e7e0c2127c601b2a7d180f49845421a86d8d +Subproject commit 90aaf5935206ff322e461c3d021436c20dd0ac85 diff --git a/openLLC b/openLLC index 466bfd7663..8bec4d029b 160000 --- a/openLLC +++ b/openLLC @@ -1 +1 @@ -Subproject commit 466bfd766349934a3898b78d8307dd343f3977e8 +Subproject commit 8bec4d029bdf985f075396e641d514d1a7a19c15 diff --git a/src/main/scala/top/Top.scala b/src/main/scala/top/Top.scala index 81d7983bc3..bc07105b20 100644 --- a/src/main/scala/top/Top.scala +++ b/src/main/scala/top/Top.scala @@ -354,6 +354,9 @@ class XSTop()(implicit p: Parameters) extends BaseXSSoc() with HasSoCParameter core_with_l2.zip(chi_openllc_opt.get.io.debugTopDown.addrMatch).foreach { case (tile, l3Match) => tile.module.io.debugTopDown.l3MissMatch := l3Match } + core_with_l2.zip(chi_openllc_opt).foreach { case (tile, l3) => + tile.module.io.l3Miss := l3.io.l3Miss + } } } @@ -379,11 +382,14 @@ class XSTop()(implicit p: Parameters) extends BaseXSSoc() with HasSoCParameter } l3.module.io.debugTopDown.robHeadPaddr := core_with_l2.map(_.module.io.debugTopDown.robHeadPaddr) core_with_l2.zip(l3.module.io.debugTopDown.addrMatch).foreach { case (tile, l3Match) => tile.module.io.debugTopDown.l3MissMatch := l3Match } + core_with_l2.foreach(_.module.io.l3Miss := l3.module.io.l3Miss) case None => } (chi_openllc_opt, l3cacheOpt) match { - case (None, None) => core_with_l2.foreach(_.module.io.debugTopDown.l3MissMatch := false.B) + case (None, None) => + core_with_l2.foreach(_.module.io.debugTopDown.l3MissMatch := false.B) + core_with_l2.foreach(_.module.io.l3Miss := false.B) case _ => } diff --git a/src/main/scala/top/XSNoCTop.scala b/src/main/scala/top/XSNoCTop.scala index 34189ced58..f818ae05dc 100644 --- a/src/main/scala/top/XSNoCTop.scala +++ b/src/main/scala/top/XSNoCTop.scala @@ -205,6 +205,7 @@ class XSNoCTop()(implicit p: Parameters) extends BaseXSSoc with HasSoCParameter core_rst_node.out.head._1 := false.B.asAsyncReset core_with_l2.module.io.debugTopDown.l3MissMatch := false.B + core_with_l2.module.io.l3Miss := false.B } lazy val module = new XSNoCTopImp(this) diff --git a/src/main/scala/xiangshan/Bundle.scala b/src/main/scala/xiangshan/Bundle.scala index a72daa2e45..a424a19285 100644 --- a/src/main/scala/xiangshan/Bundle.scala +++ b/src/main/scala/xiangshan/Bundle.scala @@ -783,3 +783,15 @@ class L2ToL1Hint(implicit p: Parameters) extends XSBundle with HasDCacheParamete val isKeyword = Bool() // miss entry keyword -> L1 load queue replay } +class TopDownInfo(implicit p: Parameters) extends XSBundle { + val lqEmpty = Input(Bool()) + val sqEmpty = Input(Bool()) + val l1Miss = Input(Bool()) + val noUopsIssued = Output(Bool()) + val l2TopMiss = Input(new TopDownFromL2Top) +} + +class TopDownFromL2Top(implicit p: Parameters) extends XSBundle { + val l2Miss = Bool() + val l3Miss = Bool() +} diff --git a/src/main/scala/xiangshan/L2Top.scala b/src/main/scala/xiangshan/L2Top.scala index 937580f438..8a4f229e4e 100644 --- a/src/main/scala/xiangshan/L2Top.scala +++ b/src/main/scala/xiangshan/L2Top.scala @@ -183,6 +183,11 @@ class L2TopInlined()(implicit p: Parameters) extends LazyModule val robHeadPaddr = Flipped(Valid(UInt(36.W))) val l2MissMatch = Output(Bool()) } + val l2Miss = Output(Bool()) + val l3Miss = new Bundle { + val fromTile = Input(Bool()) + val toCore = Output(Bool()) + } val chi = if (enableCHI) Some(new PortIO) else None val nodeID = if (enableCHI) Some(Input(UInt(NodeIDWidth.W))) else None val l2_tlb_req = new TlbRequestIO(nRespDups = 2) @@ -201,6 +206,7 @@ class L2TopInlined()(implicit p: Parameters) extends LazyModule io.hartId.toCore := io.hartId.fromTile io.cpu_halt.toTile := io.cpu_halt.fromCore io.cpu_critical_error.toTile := io.cpu_critical_error.fromCore + io.l3Miss.toCore := io.l3Miss.fromTile // trace interface val traceToTile = io.traceCoreInterface.toTile val traceFromCore = io.traceCoreInterface.fromCore @@ -244,6 +250,7 @@ class L2TopInlined()(implicit p: Parameters) extends LazyModule l2.io.debugTopDown.robHeadPaddr := io.debugTopDown.robHeadPaddr l2.io.debugTopDown.robTrueCommit := io.debugTopDown.robTrueCommit io.debugTopDown.l2MissMatch := l2.io.debugTopDown.l2MissMatch + io.l2Miss := l2.io.l2Miss /* l2 tlb */ io.l2_tlb_req.req.bits := DontCare @@ -290,6 +297,7 @@ class L2TopInlined()(implicit p: Parameters) extends LazyModule } else { io.l2_hint := 0.U.asTypeOf(io.l2_hint) io.debugTopDown <> DontCare + io.l2Miss := false.B io.l2_tlb_req.req.valid := false.B io.l2_tlb_req.req.bits := DontCare diff --git a/src/main/scala/xiangshan/XSCore.scala b/src/main/scala/xiangshan/XSCore.scala index d99279a4aa..8bd1908481 100644 --- a/src/main/scala/xiangshan/XSCore.scala +++ b/src/main/scala/xiangshan/XSCore.scala @@ -102,6 +102,10 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer) val l2MissMatch = Input(Bool()) val l3MissMatch = Input(Bool()) } + val topDownInfo = Input(new Bundle { + val l2Miss = Bool() + val l3Miss = Bool() + }) }) println(s"FPGAPlatform:${env.FPGAPlatform} EnableDebug:${env.EnableDebug}") @@ -253,6 +257,14 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer) io.resetInFrontend := memBlock.io.resetInFrontendBypass.toL2Top memBlock.io.traceCoreInterfaceBypass.fromBackend <> backend.io.traceCoreInterface io.traceCoreInterface <> memBlock.io.traceCoreInterfaceBypass.toL2Top + memBlock.io.topDownInfo.fromL2Top.l2Miss := io.topDownInfo.l2Miss + memBlock.io.topDownInfo.fromL2Top.l3Miss := io.topDownInfo.l3Miss + memBlock.io.topDownInfo.toBackend.noUopsIssued := backend.io.topDownInfo.noUopsIssued + backend.io.topDownInfo.lqEmpty := memBlock.io.topDownInfo.toBackend.lqEmpty + backend.io.topDownInfo.sqEmpty := memBlock.io.topDownInfo.toBackend.sqEmpty + backend.io.topDownInfo.l1Miss := memBlock.io.topDownInfo.toBackend.l1Miss + backend.io.topDownInfo.l2TopMiss.l2Miss := memBlock.io.topDownInfo.toBackend.l2TopMiss.l2Miss + backend.io.topDownInfo.l2TopMiss.l3Miss := memBlock.io.topDownInfo.toBackend.l2TopMiss.l3Miss if (debugOpts.ResetGen) { diff --git a/src/main/scala/xiangshan/XSTile.scala b/src/main/scala/xiangshan/XSTile.scala index e88be0db82..aadd04a0f9 100644 --- a/src/main/scala/xiangshan/XSTile.scala +++ b/src/main/scala/xiangshan/XSTile.scala @@ -110,6 +110,7 @@ class XSTile()(implicit p: Parameters) extends LazyModule val robHeadPaddr = Valid(UInt(PAddrBits.W)) val l3MissMatch = Input(Bool()) } + val l3Miss = Input(Bool()) val chi = if (enableCHI) Some(new PortIO) else None val nodeID = if (enableCHI) Some(Input(UInt(NodeIDWidth.W))) else None val clintTime = Input(ValidIO(UInt(64.W))) @@ -153,6 +154,7 @@ class XSTile()(implicit p: Parameters) extends LazyModule l2top.module.io.debugTopDown.robTrueCommit := core.module.io.debugTopDown.robTrueCommit l2top.module.io.l2_pmp_resp := core.module.io.l2_pmp_resp core.module.io.l2_tlb_req <> l2top.module.io.l2_tlb_req + core.module.io.topDownInfo.l2Miss := l2top.module.io.l2Miss core.module.io.perfEvents <> l2top.module.io.perfEvents } else { @@ -164,6 +166,7 @@ class XSTile()(implicit p: Parameters) extends LazyModule core.module.io.l2PfqBusy := false.B core.module.io.debugTopDown.l2MissMatch := false.B + core.module.io.topDownInfo.l2Miss := false.B core.module.io.l2_tlb_req.req.valid := false.B core.module.io.l2_tlb_req.req.bits := DontCare @@ -175,6 +178,8 @@ class XSTile()(implicit p: Parameters) extends LazyModule io.debugTopDown.robHeadPaddr := core.module.io.debugTopDown.robHeadPaddr core.module.io.debugTopDown.l3MissMatch := io.debugTopDown.l3MissMatch + l2top.module.io.l3Miss.fromTile := io.l3Miss + core.module.io.topDownInfo.l3Miss := l2top.module.io.l3Miss.toCore io.chi.foreach(_ <> l2top.module.io.chi.get) l2top.module.io.nodeID.foreach(_ := io.nodeID.get) diff --git a/src/main/scala/xiangshan/XSTileWrap.scala b/src/main/scala/xiangshan/XSTileWrap.scala index 3ce65b1ad9..76a10bcead 100644 --- a/src/main/scala/xiangshan/XSTileWrap.scala +++ b/src/main/scala/xiangshan/XSTileWrap.scala @@ -67,6 +67,7 @@ class XSTileWrap()(implicit p: Parameters) extends LazyModule val robHeadPaddr = Valid(UInt(PAddrBits.W)) val l3MissMatch = Input(Bool()) } + val l3Miss = Input(Bool()) val chi = EnableCHIAsyncBridge match { case Some(param) => new AsyncPortIO(param) case None => new PortIO @@ -97,6 +98,7 @@ class XSTileWrap()(implicit p: Parameters) extends LazyModule io.hartIsInReset := tile.module.io.hartIsInReset io.traceCoreInterface <> tile.module.io.traceCoreInterface io.debugTopDown <> tile.module.io.debugTopDown + tile.module.io.l3Miss := io.l3Miss tile.module.io.nodeID.foreach(_ := io.nodeID.get) // CLINT Async Queue Sink diff --git a/src/main/scala/xiangshan/backend/Backend.scala b/src/main/scala/xiangshan/backend/Backend.scala index b0e208b758..10d00c7c31 100644 --- a/src/main/scala/xiangshan/backend/Backend.scala +++ b/src/main/scala/xiangshan/backend/Backend.scala @@ -492,6 +492,11 @@ class BackendInlinedImp(override val wrapper: BackendInlined)(implicit p: Parame dataPath.io.fromBypassNetwork := bypassNetwork.io.toDataPath dataPath.io.fromVecExcpMod.r := vecExcpMod.o.toVPRF.r dataPath.io.fromVecExcpMod.w := vecExcpMod.o.toVPRF.w + dataPath.io.topDownInfo.lqEmpty := DelayN(io.topDownInfo.lqEmpty, 2) + dataPath.io.topDownInfo.sqEmpty := DelayN(io.topDownInfo.sqEmpty, 2) + dataPath.io.topDownInfo.l1Miss := RegNext(io.topDownInfo.l1Miss) + dataPath.io.topDownInfo.l2TopMiss.l2Miss := io.topDownInfo.l2TopMiss.l2Miss + dataPath.io.topDownInfo.l2TopMiss.l3Miss := io.topDownInfo.l2TopMiss.l3Miss og2ForVector.io.flush := ctrlBlock.io.toDataPath.flush og2ForVector.io.ldCancel := io.mem.ldCancel @@ -851,6 +856,8 @@ class BackendInlinedImp(override val wrapper: BackendInlined)(implicit p: Parame io.debugRolling := ctrlBlock.io.debugRolling + io.topDownInfo.noUopsIssued := RegNext(dataPath.io.topDownInfo.noUopsIssued) + if(backendParams.debugEn) { dontTouch(memScheduler.io) dontTouch(dataPath.io.toMemExu) @@ -897,10 +904,12 @@ class BackendInlinedImp(override val wrapper: BackendInlined)(implicit p: Parame val fpSchedulerPerf = fpScheduler.asInstanceOf[SchedulerArithImp].getPerfEvents val vecSchedulerPerf = vfScheduler.asInstanceOf[SchedulerArithImp].getPerfEvents val memSchedulerPerf = memScheduler.asInstanceOf[SchedulerMemImp].getPerfEvents + val dataPathPerf = dataPath.getPerfEvents val perfBackend = Seq() // let index = 0 be no event - val allPerfEvents = Seq(("noEvent", 0.U)) ++ ctrlBlockPerf ++ intSchedulerPerf ++ fpSchedulerPerf ++ vecSchedulerPerf ++ memSchedulerPerf ++ perfBackend + val allPerfEvents = Seq(("noEvent", 0.U)) ++ ctrlBlockPerf ++ dataPathPerf ++ + intSchedulerPerf ++ fpSchedulerPerf ++ vecSchedulerPerf ++ memSchedulerPerf ++ perfBackend if (printEventCoding) { @@ -1061,4 +1070,5 @@ class BackendIO(implicit p: Parameters, params: BackendParams) extends XSBundle val fromCore = new CoreDispatchTopDownIO } val debugRolling = new RobDebugRollingIO + val topDownInfo = new TopDownInfo } diff --git a/src/main/scala/xiangshan/backend/MemBlock.scala b/src/main/scala/xiangshan/backend/MemBlock.scala index b7472c9305..b881abc6af 100644 --- a/src/main/scala/xiangshan/backend/MemBlock.scala +++ b/src/main/scala/xiangshan/backend/MemBlock.scala @@ -351,6 +351,11 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer) val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true)) val toL2Top = new TraceCoreInterface } + + val topDownInfo = new Bundle { + val fromL2Top = Input(new TopDownFromL2Top) + val toBackend = Flipped(new TopDownInfo) + } }) dontTouch(io.inner_hartId) @@ -1993,6 +1998,13 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer) dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay dcache.io.debugRolling := io.debugRolling + lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued + io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty + io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty + io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss + io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss) + io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss) + val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType))) val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType))) val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount diff --git a/src/main/scala/xiangshan/backend/datapath/DataPath.scala b/src/main/scala/xiangshan/backend/datapath/DataPath.scala index 7196d5de98..b7cb67ee20 100644 --- a/src/main/scala/xiangshan/backend/datapath/DataPath.scala +++ b/src/main/scala/xiangshan/backend/datapath/DataPath.scala @@ -18,6 +18,7 @@ import xiangshan.backend.issue.{FpScheduler, ImmExtractor, IntScheduler, MemSche import xiangshan.backend.issue.EntryBundles._ import xiangshan.backend.regfile._ import xiangshan.backend.regcache._ +import xiangshan.backend.fu.FuConfig import xiangshan.backend.fu.FuType.is0latency import xiangshan.mem.{LqPtr, SqPtr} @@ -36,7 +37,7 @@ class DataPath(params: BackendParams)(implicit p: Parameters) extends LazyModule } class DataPathImp(override val wrapper: DataPath)(implicit p: Parameters, params: BackendParams) - extends LazyModuleImp(wrapper) with HasXSParameter { + extends LazyModuleImp(wrapper) with HasXSParameter with HasPerfEvents { val io = IO(new DataPathIO()) @@ -837,6 +838,52 @@ class DataPathImp(override val wrapper: DataPath)(implicit p: Parameters, params XSPerfAccumulate(s"MEM_ExuId${exuParams.exuIdx}_src0_dataSource_zero", exu.fire && exu.bits.common.dataSources(0).readZero) } }) + + // Top-Down + def FewUops = 4 + + val lqEmpty = io.topDownInfo.lqEmpty + val sqEmpty = io.topDownInfo.sqEmpty + val l1Miss = io.topDownInfo.l1Miss + val l2Miss = io.topDownInfo.l2TopMiss.l2Miss + val l3Miss = io.topDownInfo.l2TopMiss.l3Miss + + val uopsIssued = fromIQ.flatten.map(_.fire).reduce(_ || _) + val uopsIssuedCnt = PopCount(fromIQ.flatten.map(_.fire)) + val fewUopsIssued = (0 until FewUops).map(_.U === uopsIssuedCnt).reduce(_ || _) + + val stallLoad = !uopsIssued + + val noStoreIssued = !fromMemIQ.flatten.filter(memIq => memIq.bits.exuParams.fuConfigs.contains(FuConfig.StaCfg) || + memIq.bits.exuParams.fuConfigs.contains(FuConfig.StdCfg) + ).map(_.fire).reduce(_ || _) + val stallStore = uopsIssued && noStoreIssued + + val stallLoadReg = DelayN(stallLoad, 2) + val stallStoreReg = DelayN(stallStore, 2) + + val memStallAnyLoad = stallLoadReg && !lqEmpty + val memStallStore = stallStoreReg && !sqEmpty + val memStallL1Miss = memStallAnyLoad && l1Miss + val memStallL2Miss = memStallL1Miss && l2Miss + val memStallL3Miss = memStallL2Miss && l3Miss + + io.topDownInfo.noUopsIssued := stallLoad + + XSPerfAccumulate("exec_stall_cycle", fewUopsIssued) + XSPerfAccumulate("mem_stall_store", memStallStore) + XSPerfAccumulate("mem_stall_l1miss", memStallL1Miss) + XSPerfAccumulate("mem_stall_l2miss", memStallL2Miss) + XSPerfAccumulate("mem_stall_l3miss", memStallL3Miss) + + val perfEvents = Seq( + ("EXEC_STALL_CYCLE", fewUopsIssued), + ("MEMSTALL_STORE", memStallStore), + ("MEMSTALL_L1MISS", memStallL1Miss), + ("MEMSTALL_L2MISS", memStallL2Miss), + ("MEMSTALL_L3MISS", memStallL3Miss), + ) + generatePerfEvent() } class DataPathIO()(implicit p: Parameters, params: BackendParams) extends XSBundle { @@ -923,4 +970,6 @@ class DataPathIO()(implicit p: Parameters, params: BackendParams) extends XSBund val diffV0Rat = if (params.basicDebugEn) Some(Input(Vec(1, UInt(log2Up(V0PhyRegs).W)))) else None val diffVlRat = if (params.basicDebugEn) Some(Input(Vec(1, UInt(log2Up(VlPhyRegs).W)))) else None val diffVl = if (params.basicDebugEn) Some(Output(UInt(VlData().dataWidth.W))) else None + + val topDownInfo = new TopDownInfo } diff --git a/src/main/scala/xiangshan/backend/decode/DecodeStage.scala b/src/main/scala/xiangshan/backend/decode/DecodeStage.scala index b003a39f1b..45cf5891a5 100644 --- a/src/main/scala/xiangshan/backend/decode/DecodeStage.scala +++ b/src/main/scala/xiangshan/backend/decode/DecodeStage.scala @@ -294,11 +294,20 @@ class DecodeStage(implicit p: Parameters) extends XSModule io.toCSR.trapInstInfo.valid := hasIllegalInst && !io.redirect io.toCSR.trapInstInfo.bits.fromDecodedInst(illegalInst) + val recoveryFlag = RegInit(false.B) + when(io.redirect) { + recoveryFlag := true.B + }.elsewhen(io.in.map(_.fire).reduce(_ || _)) { + recoveryFlag := false.B + } + XSPerfAccumulate("in_valid_count", PopCount(io.in.map(_.valid))) XSPerfAccumulate("in_fire_count", PopCount(io.in.map(_.fire))) XSPerfAccumulate("in_valid_not_ready_count", PopCount(io.in.map(x => x.valid && !x.ready))) XSPerfAccumulate("stall_cycle", io.in.head match { case x => x.valid && !x.ready}) XSPerfAccumulate("wait_cycle", !io.in.head.valid && io.out.head.ready) + XSPerfAccumulate("inst_spec", PopCount(io.in.map(_.fire))) + XSPerfAccumulate("recovery_bubble", recoveryFlag) XSPerfHistogram("in_valid_range", PopCount(io.in.map(_.valid)), true.B, 0, DecodeWidth + 1, 1) XSPerfHistogram("in_fire_range", PopCount(io.in.map(_.fire)), true.B, 0, DecodeWidth + 1, 1) @@ -312,6 +321,8 @@ class DecodeStage(implicit p: Parameters) extends XSModule ("decoder_waitInstr", PopCount(inValidNotReady) ), ("decoder_stall_cycle", hasValid && !io.out(0).ready), ("decoder_utilization", PopCount(io.in.map(_.valid))), + ("INST_SPEC", PopCount(io.in.map(_.fire))), + ("RECOVERY_BUBBLE", recoveryFlag) ) generatePerfEvent() diff --git a/src/main/scala/xiangshan/backend/rob/Rob.scala b/src/main/scala/xiangshan/backend/rob/Rob.scala index a83eabd6fa..92ef518092 100644 --- a/src/main/scala/xiangshan/backend/rob/Rob.scala +++ b/src/main/scala/xiangshan/backend/rob/Rob.scala @@ -137,6 +137,7 @@ class RobImp(override val wrapper: Rob)(implicit p: Parameters, params: BackendP val redirectWBs = io.writeback.filter(x => x.bits.redirect.nonEmpty).toSeq val vxsatWBs = io.exuWriteback.filter(x => x.bits.vxsat.nonEmpty).toSeq val branchWBs = io.exuWriteback.filter(_.bits.params.hasBrhFu).toSeq + val jmpWBs = io.exuWriteback.filter(_.bits.params.hasJmpFu).toSeq val csrWBs = io.exuWriteback.filter(x => x.bits.params.hasCSR).toSeq val numExuWbPorts = exuWBs.length @@ -611,6 +612,7 @@ class RobImp(override val wrapper: Rob)(implicit p: Parameters, params: BackendP io.flushOut.bits.ftqOffset := Mux(needModifyFtqIdxOffset, firstVInstrFtqOffset, deqPtrEntry.ftqOffset) io.flushOut.bits.level := Mux(deqHasReplayInst || intrEnable || deqHasException || needModifyFtqIdxOffset, RedirectLevel.flush, RedirectLevel.flushAfter) // TODO use this to implement "exception next" io.flushOut.bits.interrupt := true.B + XSPerfAccumulate("flush_num", io.flushOut.valid) XSPerfAccumulate("interrupt_num", io.flushOut.valid && intrEnable) XSPerfAccumulate("exception_num", io.flushOut.valid && deqHasException) XSPerfAccumulate("flush_pipe_num", io.flushOut.valid && isFlushPipe) @@ -1560,6 +1562,12 @@ class RobImp(override val wrapper: Rob)(implicit p: Parameters, params: BackendP } } + val brhMispred = PopCount(branchWBs.map(wb => wb.valid & wb.bits.redirect.get.valid)) + val jmpMispred = PopCount(jmpWBs.map(wb => wb.valid && wb.bits.redirect.get.valid)) + val misPred = brhMispred +& jmpMispred + + XSPerfAccumulate("br_mis_pred", misPred) + val commitLoadVec = VecInit(commitLoadValid) val commitBranchVec = VecInit(commitBranchValid) val commitStoreVec = VecInit(io.commits.commitValid.zip(commitIsStore).map { case (v, t) => v && t }) @@ -1580,6 +1588,8 @@ class RobImp(override val wrapper: Rob)(implicit p: Parameters, params: BackendP ("rob_2_4_valid ", numValidEntries > (RobSize / 4).U && numValidEntries <= (RobSize / 2).U), ("rob_3_4_valid ", numValidEntries > (RobSize / 2).U && numValidEntries <= (RobSize * 3 / 4).U), ("rob_4_4_valid ", numValidEntries > (RobSize * 3 / 4).U), + ("BR_MIS_PRED ", misPred), + ("TOTAL_FLUSH ", io.flushOut.valid) ) generatePerfEvent() diff --git a/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala b/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala index c145cb3d4c..1064ba6f84 100644 --- a/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala +++ b/src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala @@ -810,6 +810,7 @@ class DCacheIO(implicit p: Parameters) extends DCacheBundle { val l2_hint = Input(Valid(new L2ToL1Hint())) val cmoOpReq = Flipped(DecoupledIO(new CMOReq)) val cmoOpResp = DecoupledIO(new CMOResp) + val l1Miss = Output(Bool()) } private object ArbiterCtrl { @@ -1676,6 +1677,7 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame XSPerfAccumulate("num_loads", num_loads) io.mshrFull := missQueue.io.full + io.l1Miss := missQueue.io.l1Miss // performance counter // val ld_access = Wire(Vec(LoadPipelineWidth, missQueue.io.debug_early_replace.last.cloneType)) diff --git a/src/main/scala/xiangshan/cache/dcache/mainpipe/MissQueue.scala b/src/main/scala/xiangshan/cache/dcache/mainpipe/MissQueue.scala index f79050ae07..00a19839c7 100644 --- a/src/main/scala/xiangshan/cache/dcache/mainpipe/MissQueue.scala +++ b/src/main/scala/xiangshan/cache/dcache/mainpipe/MissQueue.scala @@ -414,6 +414,7 @@ class MissEntry(edge: TLEdgeOut, reqNum: Int)(implicit p: Parameters) extends DC } val nMaxPrefetchEntry = Input(UInt(64.W)) val matched = Output(Bool()) + val l1Miss = Output(Bool()) }) assert(!RegNext(io.primary_valid && !io.primary_ready)) @@ -846,6 +847,7 @@ class MissEntry(edge: TLEdgeOut, reqNum: Int)(implicit p: Parameters) extends DC prefetch := false.B } + io.l1Miss := req_valid // refill latency monitor val start_counting = GatedValidRegNext(io.mem_acquire.fire) || (GatedValidRegNextN(primary_fire, 2) && s_acquire) io.latency_monitor.load_miss_refilling := req_valid && req_primary_fire.isFromLoad && BoolStopWatch(start_counting, io.mem_grant.fire && !refill_done, true, true) @@ -948,6 +950,7 @@ class MissQueue(edge: TLEdgeOut, reqNum: Int)(implicit p: Parameters) extends DC val mq_enq_cancel = Output(Bool()) val debugTopDown = new DCacheTopDownIO + val l1Miss = Output(Bool()) }) // 128KBL1: FIXME: provide vaddr for l2 @@ -1199,6 +1202,7 @@ class MissQueue(edge: TLEdgeOut, reqNum: Int)(implicit p: Parameters) extends DC XSPerfAccumulate("max_inflight", max_inflight) QueuePerf(cfg.nMissEntries, num_valids, num_valids === cfg.nMissEntries.U) io.full := num_valids === cfg.nMissEntries.U + io.l1Miss := RegNext(Cat(entries.map(_.io.l1Miss)).orR) XSPerfHistogram("num_valids", num_valids, true.B, 0, cfg.nMissEntries, 1) XSPerfHistogram("L1DMLP_CPUData", PopCount(VecInit(entries.map(_.io.perf_pending_normal)).asUInt), true.B, 0, cfg.nMissEntries, 1) diff --git a/src/main/scala/xiangshan/frontend/Frontend.scala b/src/main/scala/xiangshan/frontend/Frontend.scala index 5491c6a2e8..fd416a38cb 100644 --- a/src/main/scala/xiangshan/frontend/Frontend.scala +++ b/src/main/scala/xiangshan/frontend/Frontend.scala @@ -410,8 +410,6 @@ class FrontendInlinedImp(outer: FrontendInlined) extends LazyModuleImp(outer) itlbRepeater1.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr - val frontendBubble = Mux(io.backend.canAccept, DecodeWidth.U - PopCount(ibuffer.io.out.map(_.valid)), 0.U) - XSPerfAccumulate("FrontendBubble", frontendBubble) io.frontendInfo.ibufFull := RegNext(ibuffer.io.full) io.resetInFrontend := reset.asBool diff --git a/src/main/scala/xiangshan/frontend/IBuffer.scala b/src/main/scala/xiangshan/frontend/IBuffer.scala index c33c3bfb24..1180031313 100644 --- a/src/main/scala/xiangshan/frontend/IBuffer.scala +++ b/src/main/scala/xiangshan/frontend/IBuffer.scala @@ -479,6 +479,11 @@ class IBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrH val FrontBubble = Mux(decodeCanAccept, DecodeWidth.U - numOut, 0.U) + val fetchLatency = decodeCanAccept && numOut === 0.U + + XSPerfAccumulate("if_fetch_bubble", FrontBubble) + XSPerfAccumulate("if_fetch_bubble_eq_max", fetchLatency) + val perfEvents = Seq( ("IBuffer_Flushed ", io.flush), ("IBuffer_hungry ", instrHungry), @@ -487,7 +492,8 @@ class IBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrH ("IBuffer_3_4_valid", (numValid >= (2 * (IBufSize / 4)).U) & (numValid < (3 * (IBufSize / 4)).U)), ("IBuffer_4_4_valid", (numValid >= (3 * (IBufSize / 4)).U) & (numValid < (4 * (IBufSize / 4)).U)), ("IBuffer_full ", numValid.andR), - ("Front_Bubble ", FrontBubble) + ("Front_Bubble ", FrontBubble), + ("Fetch_Latency_Bound", fetchLatency) ) generatePerfEvent() } diff --git a/src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala b/src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala index 4103509d82..cf84ba152d 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala @@ -128,6 +128,7 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete // top-down val debugTopDown = new LoadQueueTopDownIO + val noUopsIssued = Input(Bool()) }) val loadQueue = Module(new LoadQueue) @@ -288,6 +289,7 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete } loadQueue.io.debugTopDown <> io.debugTopDown + loadQueue.io.noUopsIssed := io.noUopsIssued assert(!(loadQueue.io.uncache.resp.valid && storeQueue.io.uncache.resp.valid)) when (!io.uncacheOutstanding) { diff --git a/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala b/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala index 6556be4c5d..d330306e2d 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala @@ -204,6 +204,7 @@ class LoadQueue(implicit p: Parameters) extends XSModule val lqDeqPtr = Output(new LqPtr) val debugTopDown = new LoadQueueTopDownIO + val noUopsIssed = Input(Bool()) }) val loadQueueRAR = Module(new LoadQueueRAR) // read-after-read violation @@ -332,6 +333,8 @@ class LoadQueue(implicit p: Parameters) extends XSModule loadQueueReplay.io.debugTopDown <> io.debugTopDown + virtualLoadQueue.io.noUopsIssued := io.noUopsIssed + val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull) XSPerfAccumulate("full_mask_000", full_mask === 0.U) XSPerfAccumulate("full_mask_001", full_mask === 1.U) diff --git a/src/main/scala/xiangshan/mem/lsqueue/VirtualLoadQueue.scala b/src/main/scala/xiangshan/mem/lsqueue/VirtualLoadQueue.scala index ef87aa45b6..9b6817ffbf 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/VirtualLoadQueue.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/VirtualLoadQueue.scala @@ -52,6 +52,8 @@ class VirtualLoadQueue(implicit p: Parameters) extends XSModule // to dispatch val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) + // for topdown + val noUopsIssued = Input(Bool()) }) println("VirtualLoadQueue: size: " + VirtualLoadQueueSize) @@ -276,7 +278,18 @@ class VirtualLoadQueue(implicit p: Parameters) extends XSModule val vecValidVec = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => allocated(i) && isvec(i)))) QueuePerf(VirtualLoadQueueSize, PopCount(vecValidVec), !allowEnqueue) io.lqFull := !allowEnqueue - val perfEvents: Seq[(String, UInt)] = Seq() + + def NLoadNotCompleted = 1 + val validCountReg = RegNext(validCount) + val noUopsIssued = io.noUopsIssued + val stallLoad = io.noUopsIssued && (validCountReg >= NLoadNotCompleted.U) + val memStallAnyLoad = RegNext(stallLoad) + + XSPerfAccumulate("mem_stall_anyload", memStallAnyLoad) + + val perfEvents: Seq[(String, UInt)] = Seq( + ("MEMSTALL_ANY_LOAD", memStallAnyLoad), + ) generatePerfEvent() // debug info