From 32f3804eb8bf2a9348ebd5d0867ddbaff0d01294 Mon Sep 17 00:00:00 2001 From: ywlcode Date: Wed, 27 May 2026 18:37:37 +0800 Subject: [PATCH 1/5] refactor(Prefetcher): improve request handling and logging in prefetch module --- src/main/scala/coupledL2/CoupledL2.scala | 10 +- .../scala/coupledL2/prefetch/Prefetcher.scala | 163 +++++++++--------- 2 files changed, 90 insertions(+), 83 deletions(-) diff --git a/src/main/scala/coupledL2/CoupledL2.scala b/src/main/scala/coupledL2/CoupledL2.scala index c85a767..8ad73f5 100644 --- a/src/main/scala/coupledL2/CoupledL2.scala +++ b/src/main/scala/coupledL2/CoupledL2.scala @@ -447,7 +447,9 @@ class CoupledL2(implicit p: Parameters) extends LazyModule with HasCoupledL2Para prefetchOpt.foreach { _ => fastArb(prefetchTrains.get, prefetcher.get.io.train, Some("prefetch_train")) - prefetcher.get.io.req.ready := Cat(prefetchReqsReady).orR + prefetcher.get.io.req.zip(prefetchReqsReady).foreach { + case (r, ready) => r.ready := ready + } prefetcher.get.hartId := io.hartId prefetcher.get.pfCtrlFromCore := io.pfCtrlFromCore fastArb(prefetchResps.get, prefetcher.get.io.resp, Some("prefetch_resp")) @@ -547,9 +549,9 @@ class CoupledL2(implicit p: Parameters) extends LazyModule with HasCoupledL2Para slice.io.prefetch.zip(prefetcher).foreach { case (s, p) => - s.req.valid := p.io.req.valid && bank_eq(Cat(p.io.req.bits.tag, p.io.req.bits.set), i, bankBits) - s.req.bits := p.io.req.bits - prefetchReqsReady(i) := s.req.ready && bank_eq(Cat(p.io.req.bits.tag, p.io.req.bits.set), i, bankBits) + s.req.valid := p.io.req(i).valid + s.req.bits := p.io.req(i).bits + prefetchReqsReady(i) := s.req.ready val train = Pipeline(s.train) val resp = Pipeline(s.resp) prefetchTrains.get(i) <> train diff --git a/src/main/scala/coupledL2/prefetch/Prefetcher.scala b/src/main/scala/coupledL2/prefetch/Prefetcher.scala index ccad4e1..f687b86 100644 --- a/src/main/scala/coupledL2/prefetch/Prefetcher.scala +++ b/src/main/scala/coupledL2/prefetch/Prefetcher.scala @@ -120,6 +120,7 @@ class PrefetchReq(implicit p: Parameters) extends PrefetchBundle { val pfSource = UInt(MemReqSource.reqSourceBits.W) def addr: UInt = Cat(tag, set, 0.U(offsetBits.W)) + def setaddr: UInt = Cat(tag, set) def isBOP:Bool = pfSource === MemReqSource.Prefetch2L2BOP.id.U def isPBOP:Bool = pfSource === MemReqSource.Prefetch2L2PBOP.id.U def isSMS:Bool = pfSource === MemReqSource.Prefetch2L2SMS.id.U @@ -131,7 +132,7 @@ class PrefetchReq(implicit p: Parameters) extends PrefetchBundle { pfSource === MemReqSource.Prefetch2L2PBOP.id.U || pfSource === MemReqSource.Prefetch2L2SMS.id.U || pfSource === MemReqSource.Prefetch2L2TP.id.U || - pfSource === MemReqSource.Prefetch2L2NL.id.U + pfSource === MemReqSource.Prefetch2L2NL.id.U } class PrefetchResp(implicit p: Parameters) extends PrefetchBundle { @@ -146,13 +147,13 @@ class PrefetchResp(implicit p: Parameters) extends PrefetchBundle { def isPBOP: Bool = pfSource === MemReqSource.Prefetch2L2PBOP.id.U def isSMS: Bool = pfSource === MemReqSource.Prefetch2L2SMS.id.U def isTP: Bool = pfSource === MemReqSource.Prefetch2L2TP.id.U - def isNL: Bool = pfSource === MemReqSource.Prefetch2L2NL.id.U + def isNL: Bool = pfSource === MemReqSource.Prefetch2L2NL.id.U def fromL2: Bool = pfSource === MemReqSource.Prefetch2L2BOP.id.U || pfSource === MemReqSource.Prefetch2L2PBOP.id.U || pfSource === MemReqSource.Prefetch2L2SMS.id.U || pfSource === MemReqSource.Prefetch2L2TP.id.U || - pfSource === MemReqSource.Prefetch2L2NL.id.U + pfSource === MemReqSource.Prefetch2L2NL.id.U } class PrefetchTrain(implicit p: Parameters) extends PrefetchBundle { @@ -181,8 +182,19 @@ class PrefetchIO(implicit p: Parameters) extends PrefetchBundle { })) } +class PrefetchTopIO(implicit p: Parameters) extends PrefetchBundle { + val train = Flipped(DecoupledIO(new PrefetchTrain)) + val tlb_req = new L2ToL1TlbIO(nRespDups= 1) + val req = Vec(1 << bankBits, DecoupledIO(new PrefetchReq)) + val resp = Flipped(DecoupledIO(new PrefetchResp)) + val recv_addr = Flipped(ValidIO(new Bundle() { + val addr = UInt(64.W) + val pfSource = UInt(MemReqSource.reqSourceBits.W) + })) +} + class Prefetcher(implicit p: Parameters) extends PrefetchModule { - val io = IO(new PrefetchIO) + val io = IO(new PrefetchTopIO) val tpio = IO(new Bundle() { val tpmeta_port = if (hasTPPrefetcher) Some(new tpmetaPortIO(hartIdLen, fullAddressBits, offsetBits)) else None }) @@ -249,8 +261,6 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { if (hasBOP) { vbop.get.io.enable := vbop_en vbop.get.io.pfCtrlOfDelayLatency := delay_latency - vbop.get.io.req.ready := (if(hasReceiver) !pfRcv.get.io.req.valid else true.B) && - (if(hasNLPrefetcher) !nl.get.io.req.valid else true.B) vbop.get.io.train <> io.train vbop.get.io.train.valid := io.train.valid && (io.train.bits.reqsource =/= MemReqSource.L1DataPrefetch.id.U) vbop.get.io.resp <> io.resp @@ -260,10 +270,6 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { pbop.get.io.enable := pbop_en pbop.get.io.pfCtrlOfDelayLatency := delay_latency - pbop.get.io.req.ready := - (if(hasReceiver) !pfRcv.get.io.req.valid else true.B) && - (if(hasNLPrefetcher) !nl.get.io.req.valid else true.B) && - (if(hasBOP) !vbop.get.io.req.valid else true.B) pbop.get.io.train <> io.train pbop.get.io.train.valid := io.train.valid && (io.train.bits.reqsource =/= MemReqSource.L1DataPrefetch.id.U) pbop.get.io.resp <> io.resp @@ -271,7 +277,6 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { } if (hasReceiver) { pfRcv.get.io_enable := pfRcv_en - pfRcv.get.io.req.ready := true.B pfRcv.get.io.recv_addr := ValidIODelay(io.recv_addr, 2) pfRcv.get.io.train.valid := false.B pfRcv.get.io.train.bits := 0.U.asTypeOf(new PrefetchTrain) @@ -291,9 +296,8 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { if (hasNLPrefetcher) { nl.get.io.enable := true.B - nl.get.io.train <> io.train - nl.get.io.resp <> io.resp - nl.get.io.req.ready := (if(hasReceiver) !pfRcv.get.io.req.valid else true.B) + nl.get.io.train <> io.train + nl.get.io.resp <> io.resp } if (hasTPPrefetcher) { @@ -301,9 +305,6 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { tp.get.io.train <> io.train tp.get.io.resp <> io.resp tp.get.io.hartid := hartId - tp.get.io.req.ready := (if(hasReceiver) !pfRcv.get.io.req.valid else true.B) && - (if(hasNLPrefetcher) !nl.get.io.req.valid else true.B) && - (if(hasBOP) !vbop.get.io.req.valid && !pbop.get.io.req.valid else true.B) tp.get.io.tpmeta_port <> tpio.tpmeta_port.get } @@ -311,64 +312,66 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { // =================== Connection of all Prefetchers ===================== /* prefetchers -> pftQueue -> pipe -> Slices.SinkA */ - val pftQueue = Module( - new OverwriteQueue( - gen = new PrefetchReq , - entries = inflightEntries, - hasFlow = true - ) - ) - val pipe = Module(new Pipeline(io.req.bits.cloneType, 1)) - private val SRC_NUM = 5 private val Seq(rcv_idx, nl_idx, vbop_idx, pbop_idx, tp_idx) = (0 until SRC_NUM).toSeq - val pftQueueEnqArb = Module(new Arbiter(new PrefetchReq, SRC_NUM)) - pftQueueEnqArb.io.in.foreach{ x => - x.valid := false.B - x.bits := 0.U.asTypeOf(new PrefetchReq) - } - if (hasReceiver) { - pftQueueEnqArb.io.in(rcv_idx).valid := pfRcv.get.io.req.valid - pftQueueEnqArb.io.in(rcv_idx).bits := pfRcv.get.io.req.bits - } - if (hasBOP) { - pftQueueEnqArb.io.in(vbop_idx).valid := vbop.get.io.req.valid - pftQueueEnqArb.io.in(vbop_idx).bits := vbop.get.io.req.bits - pftQueueEnqArb.io.in(pbop_idx).valid := pbop.get.io.req.valid - pftQueueEnqArb.io.in(pbop_idx).bits := pbop.get.io.req.bits + val reqs = Seq( + if (hasReceiver) Some(pfRcv.get.io.req) else None, + if (hasNLPrefetcher) Some(nl.get.io.req) else None, + if (hasBOP) Some(vbop.get.io.req) else None, + if (hasBOP) Some(pbop.get.io.req) else None, + if (hasTPPrefetcher) Some(tp.get.io.req) else None + ) + val reqsValid = reqs.map(_.map(_.valid).getOrElse(false.B)) + val reqsBits = reqs.map(_.map(_.bits).getOrElse(0.U.asTypeOf(new PrefetchReq))) + val reqsSetAddr = reqsBits.map(_.setaddr) + val banks = 1 << bankBits + val pftQueue = Seq.tabulate(banks) { _ => + Module(new OverwriteQueue( + gen = new PrefetchReq, + entries = inflightEntries, + hasFlow = true + )) } - if (hasTPPrefetcher) { - pftQueueEnqArb.io.in(tp_idx).valid := tp.get.io.req.valid - pftQueueEnqArb.io.in(tp_idx).bits := tp.get.io.req.bits + val pipe = Seq.tabulate(banks) { _ => Module(new Pipeline(new PrefetchReq, 1)) } + val select = Wire(Vec(banks, Vec(SRC_NUM, Bool()))) + val selectOH = Wire(Vec(banks, Vec(SRC_NUM, Bool()))) + + for (i <- 0 until banks) { + select(i) := VecInit(reqsValid.zip(reqsSetAddr).map { + case (valid, addr) => valid && bank_eq(addr, i, bankBits) + }) + selectOH(i) := VecInit(PriorityEncoderOH(select(i).asUInt).asBools) + pftQueue(i).io.enq.valid := select(i).asUInt.orR + pftQueue(i).io.enq.bits := ParallelPriorityMux(select(i).asUInt, reqsBits) + pipe(i).io.in <> pftQueue(i).io.deq + io.req(i) <> pipe(i).io.out } - if (hasNLPrefetcher) { - pftQueueEnqArb.io.in(nl_idx).valid := nl.get.io.req.valid - pftQueueEnqArb.io.in(nl_idx).bits := nl.get.io.req.bits + + for ((reqOpt, j) <- reqs.zipWithIndex) { + reqOpt.foreach { req => + req.ready := (0 until banks).map(i => selectOH(i)(j)).reduce(_ || _) + } } - pftQueue.io.enq <> pftQueueEnqArb.io.out - pipe.io.in <> pftQueue.io.deq - io.req <> pipe.io.out + val reqsFire = reqs.map(_.map(_.fire).getOrElse(false.B)) XSPerfAccumulate("prefetch_train_valid", io.train.valid) - XSPerfAccumulate("prefetch_req_fromL1", pftQueueEnqArb.io.in(rcv_idx).valid) - XSPerfAccumulate("prefetch_req_fromVBOP", pftQueueEnqArb.io.in(vbop_idx).valid) - XSPerfAccumulate("prefetch_req_fromPBOP", pftQueueEnqArb.io.in(pbop_idx).valid) - XSPerfAccumulate("prefetch_req_fromBOP", pftQueueEnqArb.io.in(vbop_idx).valid || pftQueueEnqArb.io.in(pbop_idx).valid) - XSPerfAccumulate("prefetch_req_fromTP", pftQueueEnqArb.io.in(tp_idx).valid) - XSPerfAccumulate("prefetch_req_fromNL", pftQueueEnqArb.io.in(nl_idx).valid) - - XSPerfAccumulate("prefetch_req_selectL1", pftQueueEnqArb.io.in(rcv_idx).fire) - XSPerfAccumulate("prefetch_req_selectVBOP", pftQueueEnqArb.io.in(vbop_idx).fire) - XSPerfAccumulate("prefetch_req_selectPBOP", pftQueueEnqArb.io.in(pbop_idx).fire) - XSPerfAccumulate("prefetch_req_selectBOP", pftQueueEnqArb.io.in(vbop_idx).fire || pftQueueEnqArb.io.in(pbop_idx).fire) - XSPerfAccumulate("prefetch_req_selectTP", pftQueueEnqArb.io.in(tp_idx).fire) - XSPerfAccumulate("prefetch_req_selectNL", pftQueueEnqArb.io.in(nl_idx).fire) + XSPerfAccumulate("prefetch_req_fromL1", reqsValid(rcv_idx)) + XSPerfAccumulate("prefetch_req_fromVBOP", reqsValid(vbop_idx)) + XSPerfAccumulate("prefetch_req_fromPBOP", reqsValid(pbop_idx)) + XSPerfAccumulate("prefetch_req_fromBOP", reqsValid(vbop_idx) || reqsValid(pbop_idx)) + XSPerfAccumulate("prefetch_req_fromTP", reqsValid(tp_idx)) + XSPerfAccumulate("prefetch_req_fromNL", reqsValid(nl_idx)) + + XSPerfAccumulate("prefetch_req_selectL1", reqsFire(rcv_idx)) + XSPerfAccumulate("prefetch_req_selectVBOP", reqsFire(vbop_idx)) + XSPerfAccumulate("prefetch_req_selectPBOP", reqsFire(pbop_idx)) + XSPerfAccumulate("prefetch_req_selectBOP", reqsFire(vbop_idx) || reqsFire(pbop_idx)) + XSPerfAccumulate("prefetch_req_selectTP", reqsFire(tp_idx)) + XSPerfAccumulate("prefetch_req_selectNL", reqsFire(nl_idx)) XSPerfAccumulate("prefetch_req_SMS_other_overlapped", - pftQueueEnqArb.io.in(rcv_idx).valid && ( - pftQueueEnqArb.io.in(vbop_idx).valid || pftQueueEnqArb.io.in(pbop_idx).valid || pftQueueEnqArb.io.in(tp_idx).valid - || pftQueueEnqArb.io.in(nl_idx).valid - ) + reqsValid(rcv_idx) && + (reqsValid(vbop_idx) || reqsValid(pbop_idx) || reqsValid(tp_idx) || reqsValid(nl_idx)) ) // NOTE: set basicDB false when debug over @@ -383,7 +386,7 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { val pfsource = UInt(PfSource.pfSourceBits.W) val reqsource = UInt(MemReqSource.reqSourceBits.W) } - val trainTT = ChiselDB.createTable("L2PrefetchTrainTable", new TrainEntry, basicDB = false) + val trainTT = ChiselDB.createTable("L2PrefetchTrainTable", new TrainEntry, basicDB = true) val e1 = Wire(new TrainEntry) e1.paddr := io.train.bits.addr e1.vaddr := io.train.bits.vaddr.getOrElse(0.U) << offsetBits @@ -396,7 +399,7 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { trainTT.log( data = e1, en = io.train.valid && (io.train.bits.reqsource =/= MemReqSource.L1DataPrefetch.id.U), - site = "L2Train_onlyBOP", + site = "L2Train", clock, reset ) @@ -405,15 +408,17 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { val needT = Bool() val pfsource = UInt(MemReqSource.reqSourceBits.W) } - val pfTT = ChiselDB.createTable("L2PrefetchPrefetchTable", new PrefetchEntry, basicDB = false) - val e2 = Wire(new PrefetchEntry) - e2.paddr := io.req.bits.addr - e2.needT := io.req.bits.needT - e2.pfsource := io.req.bits.pfSource - pfTT.log( - data = e2, - en = io.req.fire && io.req.bits.pfSource === MemReqSource.Prefetch2L2BOP.id.U, - site = "L2Prefetch_onlyBOP", - clock, reset - ) + val pfTT = ChiselDB.createTable("L2PrefetchReqTable", new PrefetchEntry, basicDB = true) + for (i <- 0 until banks) { + val e2 = Wire(new PrefetchEntry) + e2.paddr := io.req(i).bits.addr + e2.needT := io.req(i).bits.needT + e2.pfsource := io.req(i).bits.pfSource + pfTT.log( + data = e2, + en = io.req(i).fire, + site = "L2PrefetchReq", + clock, reset + ) + } } From 46e7330eef949ee4b9cc0b23a3e9ecf3f33f004b Mon Sep 17 00:00:00 2001 From: ywlcode <42512426+ywlcode@users.noreply.github.com> Date: Thu, 4 Jun 2026 15:42:42 +0800 Subject: [PATCH 2/5] chore: update pfTT ChiselDB --- src/main/scala/coupledL2/prefetch/Prefetcher.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/coupledL2/prefetch/Prefetcher.scala b/src/main/scala/coupledL2/prefetch/Prefetcher.scala index f687b86..f25f367 100644 --- a/src/main/scala/coupledL2/prefetch/Prefetcher.scala +++ b/src/main/scala/coupledL2/prefetch/Prefetcher.scala @@ -398,7 +398,7 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { e1.reqsource := io.train.bits.reqsource trainTT.log( data = e1, - en = io.train.valid && (io.train.bits.reqsource =/= MemReqSource.L1DataPrefetch.id.U), + en = io.train.valid, site = "L2Train", clock, reset ) From f6736885a0e6ade223be3bd7f05055a8843ab92d Mon Sep 17 00:00:00 2001 From: ywlcode <42512426+ywlcode@users.noreply.github.com> Date: Thu, 4 Jun 2026 15:45:18 +0800 Subject: [PATCH 3/5] chore: update pfTT basicDB --- src/main/scala/coupledL2/prefetch/Prefetcher.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/coupledL2/prefetch/Prefetcher.scala b/src/main/scala/coupledL2/prefetch/Prefetcher.scala index f25f367..bc5a61b 100644 --- a/src/main/scala/coupledL2/prefetch/Prefetcher.scala +++ b/src/main/scala/coupledL2/prefetch/Prefetcher.scala @@ -386,7 +386,7 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { val pfsource = UInt(PfSource.pfSourceBits.W) val reqsource = UInt(MemReqSource.reqSourceBits.W) } - val trainTT = ChiselDB.createTable("L2PrefetchTrainTable", new TrainEntry, basicDB = true) + val trainTT = ChiselDB.createTable("L2PrefetchTrainTable", new TrainEntry, basicDB = false) val e1 = Wire(new TrainEntry) e1.paddr := io.train.bits.addr e1.vaddr := io.train.bits.vaddr.getOrElse(0.U) << offsetBits @@ -408,7 +408,7 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { val needT = Bool() val pfsource = UInt(MemReqSource.reqSourceBits.W) } - val pfTT = ChiselDB.createTable("L2PrefetchReqTable", new PrefetchEntry, basicDB = true) + val pfTT = ChiselDB.createTable("L2PrefetchReqTable", new PrefetchEntry, basicDB = false) for (i <- 0 until banks) { val e2 = Wire(new PrefetchEntry) e2.paddr := io.req(i).bits.addr From 12b5606c6f20b177c8f32d8ad343a801a4dd962d Mon Sep 17 00:00:00 2001 From: ywlcode Date: Sun, 7 Jun 2026 20:15:56 +0800 Subject: [PATCH 4/5] chore: add train perf counter --- src/main/scala/coupledL2/CoupledL2.scala | 4 +- .../scala/coupledL2/prefetch/Prefetcher.scala | 60 +++++++++++-------- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/src/main/scala/coupledL2/CoupledL2.scala b/src/main/scala/coupledL2/CoupledL2.scala index 8ad73f5..9ec61d2 100644 --- a/src/main/scala/coupledL2/CoupledL2.scala +++ b/src/main/scala/coupledL2/CoupledL2.scala @@ -446,13 +446,13 @@ class CoupledL2(implicit p: Parameters) extends LazyModule with HasCoupledL2Para io.l2_tlb_req <> DontCare // TODO: l2_tlb_req should be Option prefetchOpt.foreach { _ => - fastArb(prefetchTrains.get, prefetcher.get.io.train, Some("prefetch_train")) + prefetcher.get.io.train <> prefetchTrains.get prefetcher.get.io.req.zip(prefetchReqsReady).foreach { case (r, ready) => r.ready := ready } prefetcher.get.hartId := io.hartId prefetcher.get.pfCtrlFromCore := io.pfCtrlFromCore - fastArb(prefetchResps.get, prefetcher.get.io.resp, Some("prefetch_resp")) + prefetcher.get.io.resp <> prefetchResps.get prefetcher.get.io.tlb_req <> io.l2_tlb_req } pf_recv_node match { diff --git a/src/main/scala/coupledL2/prefetch/Prefetcher.scala b/src/main/scala/coupledL2/prefetch/Prefetcher.scala index bc5a61b..333e644 100644 --- a/src/main/scala/coupledL2/prefetch/Prefetcher.scala +++ b/src/main/scala/coupledL2/prefetch/Prefetcher.scala @@ -23,7 +23,7 @@ import utility._ import org.chipsalliance.cde.config.Parameters import utility.mbist.MbistPipeline import xscache.coupledL2._ -import xscache.coupledL2.utils.OverwriteQueue +import xscache.coupledL2.utils._ /* virtual address */ trait HasPrefetcherHelper extends HasCircularQueuePtrHelper with HasCoupledL2Parameters { @@ -183,10 +183,10 @@ class PrefetchIO(implicit p: Parameters) extends PrefetchBundle { } class PrefetchTopIO(implicit p: Parameters) extends PrefetchBundle { - val train = Flipped(DecoupledIO(new PrefetchTrain)) + val train = Vec(1 << bankBits, Flipped(DecoupledIO(new PrefetchTrain))) val tlb_req = new L2ToL1TlbIO(nRespDups= 1) val req = Vec(1 << bankBits, DecoupledIO(new PrefetchReq)) - val resp = Flipped(DecoupledIO(new PrefetchResp)) + val resp = Vec(1 << bankBits, Flipped(DecoupledIO(new PrefetchResp))) val recv_addr = Flipped(ValidIO(new Bundle() { val addr = UInt(64.W) val pfSource = UInt(MemReqSource.reqSourceBits.W) @@ -207,6 +207,7 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { val vbop_en = pfCtrlFromCore.l2_pf_master_en && pfCtrlFromCore.l2_vbop_en val tp_en = pfCtrlFromCore.l2_pf_master_en && pfCtrlFromCore.l2_tp_en val delay_latency = pfCtrlFromCore.l2_pf_delay_latency + val banks = 1 << bankBits // =================== Prefetchers ===================== // TODO: consider separate VBOP and PBOP in prefetch param @@ -256,24 +257,29 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { // prefetch from upper level val pfRcv = if (hasReceiver) Some(Module(new PrefetchReceiver())) else None + val train = Wire(DecoupledIO(new PrefetchTrain)) + val resp = Wire(DecoupledIO(new PrefetchResp)) + fastArb(io.train, train, Some("prefetch_train")) + fastArb(io.resp, resp, Some("prefetch_resp")) + // =================== Connection for each Prefetcher ===================== // Rcv > NL >VBOP > PBOP > TP if (hasBOP) { vbop.get.io.enable := vbop_en vbop.get.io.pfCtrlOfDelayLatency := delay_latency - vbop.get.io.train <> io.train - vbop.get.io.train.valid := io.train.valid && (io.train.bits.reqsource =/= MemReqSource.L1DataPrefetch.id.U) - vbop.get.io.resp <> io.resp - vbop.get.io.resp.valid := io.resp.valid && io.resp.bits.isBOP + vbop.get.io.train <> train + vbop.get.io.train.valid := train.valid && (train.bits.reqsource =/= MemReqSource.L1DataPrefetch.id.U) + vbop.get.io.resp <> resp + vbop.get.io.resp.valid := resp.valid && resp.bits.isBOP vbop.get.io.tlb_req <> io.tlb_req vbop.get.io.pbopCrossPage := true.B // pbop.io.pbopCrossPage // let vbop have noting to do with pbop pbop.get.io.enable := pbop_en pbop.get.io.pfCtrlOfDelayLatency := delay_latency - pbop.get.io.train <> io.train - pbop.get.io.train.valid := io.train.valid && (io.train.bits.reqsource =/= MemReqSource.L1DataPrefetch.id.U) - pbop.get.io.resp <> io.resp - pbop.get.io.resp.valid := io.resp.valid && io.resp.bits.isPBOP + pbop.get.io.train <> train + pbop.get.io.train.valid := train.valid && (train.bits.reqsource =/= MemReqSource.L1DataPrefetch.id.U) + pbop.get.io.resp <> resp + pbop.get.io.resp.valid := resp.valid && resp.bits.isPBOP } if (hasReceiver) { pfRcv.get.io_enable := pfRcv_en @@ -296,14 +302,14 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { if (hasNLPrefetcher) { nl.get.io.enable := true.B - nl.get.io.train <> io.train - nl.get.io.resp <> io.resp + nl.get.io.train <> train + nl.get.io.resp <> resp } if (hasTPPrefetcher) { tp.get.io.enable := tp_en - tp.get.io.train <> io.train - tp.get.io.resp <> io.resp + tp.get.io.train <> train + tp.get.io.resp <> resp tp.get.io.hartid := hartId tp.get.io.tpmeta_port <> tpio.tpmeta_port.get @@ -324,7 +330,6 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { val reqsValid = reqs.map(_.map(_.valid).getOrElse(false.B)) val reqsBits = reqs.map(_.map(_.bits).getOrElse(0.U.asTypeOf(new PrefetchReq))) val reqsSetAddr = reqsBits.map(_.setaddr) - val banks = 1 << bankBits val pftQueue = Seq.tabulate(banks) { _ => Module(new OverwriteQueue( gen = new PrefetchReq, @@ -355,7 +360,10 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { val reqsFire = reqs.map(_.map(_.fire).getOrElse(false.B)) - XSPerfAccumulate("prefetch_train_valid", io.train.valid) + XSPerfAccumulate("prefetch_train_valid", train.valid) + XSPerfAccumulate("prefetch_train_in_valid", PopCount(io.train.map(_.valid))) + XSPerfAccumulate("prefetch_resp_valid", resp.valid) + XSPerfAccumulate("prefetch_resp_in_valid", PopCount(io.resp.map(_.valid))) XSPerfAccumulate("prefetch_req_fromL1", reqsValid(rcv_idx)) XSPerfAccumulate("prefetch_req_fromVBOP", reqsValid(vbop_idx)) XSPerfAccumulate("prefetch_req_fromPBOP", reqsValid(pbop_idx)) @@ -388,17 +396,17 @@ class Prefetcher(implicit p: Parameters) extends PrefetchModule { } val trainTT = ChiselDB.createTable("L2PrefetchTrainTable", new TrainEntry, basicDB = false) val e1 = Wire(new TrainEntry) - e1.paddr := io.train.bits.addr - e1.vaddr := io.train.bits.vaddr.getOrElse(0.U) << offsetBits - e1.needT := io.train.bits.needT - e1.hit := io.train.bits.hit - e1.prefetched := io.train.bits.prefetched - e1.source := io.train.bits.source - e1.pfsource := io.train.bits.pfsource - e1.reqsource := io.train.bits.reqsource + e1.paddr := train.bits.addr + e1.vaddr := train.bits.vaddr.getOrElse(0.U) << offsetBits + e1.needT := train.bits.needT + e1.hit := train.bits.hit + e1.prefetched := train.bits.prefetched + e1.source := train.bits.source + e1.pfsource := train.bits.pfsource + e1.reqsource := train.bits.reqsource trainTT.log( data = e1, - en = io.train.valid, + en = train.valid, site = "L2Train", clock, reset ) From a9d641b66c4eeb933696a57b242878478dd6bb28 Mon Sep 17 00:00:00 2001 From: ywlcode Date: Mon, 8 Jun 2026 15:10:17 +0800 Subject: [PATCH 5/5] refactor(PrefetchIO): simplify bank size calculation for train, req, and resp vectors --- src/main/scala/coupledL2/prefetch/Prefetcher.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/scala/coupledL2/prefetch/Prefetcher.scala b/src/main/scala/coupledL2/prefetch/Prefetcher.scala index 333e644..e06510d 100644 --- a/src/main/scala/coupledL2/prefetch/Prefetcher.scala +++ b/src/main/scala/coupledL2/prefetch/Prefetcher.scala @@ -183,10 +183,11 @@ class PrefetchIO(implicit p: Parameters) extends PrefetchBundle { } class PrefetchTopIO(implicit p: Parameters) extends PrefetchBundle { - val train = Vec(1 << bankBits, Flipped(DecoupledIO(new PrefetchTrain))) + val banks = 1 << bankBits + val train = Vec(banks, Flipped(DecoupledIO(new PrefetchTrain))) val tlb_req = new L2ToL1TlbIO(nRespDups= 1) - val req = Vec(1 << bankBits, DecoupledIO(new PrefetchReq)) - val resp = Vec(1 << bankBits, Flipped(DecoupledIO(new PrefetchResp))) + val req = Vec(banks, DecoupledIO(new PrefetchReq)) + val resp = Vec(banks, Flipped(DecoupledIO(new PrefetchResp))) val recv_addr = Flipped(ValidIO(new Bundle() { val addr = UInt(64.W) val pfSource = UInt(MemReqSource.reqSourceBits.W)