Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 169 additions & 44 deletions src/main/scala/coupledL2/CoupledL2.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import freechips.rocketchip.util._
import org.chipsalliance.cde.config.{Field, Parameters}

import scala.math.max
import scala.util.Try
import coupledL2.prefetch._
import huancun.{BankBitsKey, TPmetaReq, TPmetaResp}
import utility.mbist.{MbistInterface, MbistPipeline}
Expand Down Expand Up @@ -121,7 +122,38 @@ trait HasCoupledL2Parameters {
def edgeOut = p(EdgeOutKey)
def bankBits = p(BankBitsKey)

def clientBits = edgeIn.client.clients.count(_.supports.probe)
def allProbeClients = edgeIn.client.clients
.filter(c => c.supports.probe && c.visibility.nonEmpty)
.sortBy(_.sourceId.start)
def coherentClientChannelId(name: String): Option[Int] = name match {
case "dcache" => Some(0)
case "dcache_ch1" => Some(1)
case _ => None
}
def dcacheProbeClients = allProbeClients.filter(c => coherentClientChannelId(c.name).nonEmpty)
def selectedProbeClients = {
val sliceIdOpt = Try(p(SliceIdKey)).toOption
(cacheParams.sliceCoherentClientMap, sliceIdOpt) match {
case (Some(map), Some(sliceId)) =>
require(map.nonEmpty, "sliceCoherentClientMap must not be empty")
require(sliceId < map.length, s"sliceId $sliceId out of range for sliceCoherentClientMap")
val channelId = map(sliceId)
val matchedClients = allProbeClients.filter(c => coherentClientChannelId(c.name).contains(channelId))
require(
matchedClients.nonEmpty,
s"sliceCoherentClientMap($sliceId)=$channelId has no matching coherent client, candidates=${allProbeClients.map(_.name).mkString(",")}"
)
require(
matchedClients.size == 1,
s"sliceCoherentClientMap($sliceId)=$channelId matches multiple coherent clients=${matchedClients.map(_.name).mkString(",")}"
)
matchedClients
case _ =>
allProbeClients
}
}
def probeClients = selectedProbeClients
def clientBits = selectedProbeClients.size
def sourceIdBits = edgeIn.bundle.sourceBits // ids of L1
def msgSizeBits = edgeIn.bundle.sizeBits
def sourceIdAll = 1 << sourceIdBits
Expand Down Expand Up @@ -157,8 +189,7 @@ trait HasCoupledL2Parameters {
0.U
} else {
Cat(
edgeIn.client.clients
.filter(_.supports.probe)
probeClients
.map(c => {
c.sourceId.contains(sourceId).asInstanceOf[Bool]
})
Expand All @@ -173,9 +204,7 @@ trait HasCoupledL2Parameters {
} else {
Mux1H(
client,
edgeIn.client.clients
.filter(_.supports.probe)
.map(c => c.sourceId.start.U)
probeClients.map(c => c.sourceId.start.U)
)
}
}
Expand Down Expand Up @@ -334,11 +363,27 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has

require(banks == node.in.size)

private val hintProbeClients = node.in.head._2.client.clients
.filter(c => c.supports.probe && c.visibility.nonEmpty && coherentClientChannelId(c.name).nonEmpty)
.sortBy(_.sourceId.start)
private val hintSliceChannelMap = cacheParams.sliceCoherentClientMap
hintSliceChannelMap.foreach(map =>
require(map.length == banks, s"sliceCoherentClientMap length ${map.length} must match banks $banks")
)
private val hintChannelCount =
if (hintProbeClients.nonEmpty)
hintProbeClients.map(c => coherentClientChannelId(c.name).getOrElse(0)).foldLeft(0)(max) + 1
else
1
require(
hintProbeClients.groupBy(c => coherentClientChannelId(c.name).get).forall(_._2.size == 1),
s"coherent hint channels must map to unique clients, got ${hintProbeClients.map(_.name).mkString(",")}"
)

val io = IO(new Bundle {
val hartId = Input(UInt(hartIdLen.W))
val pfCtrlFromCore = Input(new PrefetchCtrlFromCore)
// val l2_hint = Valid(UInt(32.W))
val l2_hint = ValidIO(new L2ToL1Hint()(l2ECCParams))
val l2_hint = Vec(hintChannelCount, ValidIO(new L2ToL1Hint()(l2ECCParams)))
val l2_tlb_req = new L2ToL1TlbIO(nRespDups = 1)(l2TlbParams)
val debugTopDown = new Bundle {
val robTrueCommit = Input(UInt(64.W))
Expand Down Expand Up @@ -421,20 +466,15 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has
// ** we need only [hintCycleAhead - 1] later
val sliceAhead = hintCycleAhead - 1

val master_num = node.in.head._2.client.clients.size
val hintChosenVec = Wire(Vec(master_num, Valid(new Bundle {
val sliceId = UInt(banks.W)
val hasData = Bool()
})))
val hintChosen = Wire(Vec(hintChannelCount, UInt(banks.W)))
val hintFire = Wire(Vec(hintChannelCount, Bool()))

// if Hint indicates that this slice should fireD, yet no D resp comes out of this slice
// then we releaseSourceD, enabling io.d.ready for other slices
// TODO: if Hint for single slice is 100% accurate, may consider remove this
val releaseSourceD = Wire(Vec(banks, Bool()))
val allCanFire = (
RegNextN(!hintChosenVec.map(_.valid).reduce(_ || _), sliceAhead) &&
RegNextN(!hintChosenVec.map(h => h.valid && h.bits.hasData).reduce(_ || _), sliceAhead + 1)
) || Cat(releaseSourceD).orR
val anyHintFire = Cat(hintFire).orR
val allCanFire = (RegNextN(!anyHintFire, sliceAhead) && RegNextN(!anyHintFire, sliceAhead + 1)) || Cat(releaseSourceD).orR

val slices = node.in.zip(node.out).zipWithIndex.map {
case (((in, edgeIn), (out, edgeOut)), i) =>
Expand Down Expand Up @@ -463,8 +503,11 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has
// we will try our best to select the grant of slice X.
// If slice X has no grant then, it means that the hint at cycle T is wrong,
// so we relax the restriction on grant selection.
val sliceCanFire = RegNextN(hintChosenVec.map(h => h.valid && h.bits.sliceId === i.U).reduce(_ || _), sliceAhead) ||
RegNextN(hintChosenVec.map(h => h.valid && h.bits.sliceId === i.U && h.bits.hasData).reduce(_ || _), sliceAhead + 1)
val sliceMatched = Cat((0 until hintChannelCount).map { ch =>
hintFire(ch) && i.U === hintChosen(ch)
}).orR
val sliceCanFire = RegNextN(sliceMatched, sliceAhead) ||
RegNextN(sliceMatched, sliceAhead + 1)

releaseSourceD(i) := sliceCanFire && !slice.io.in.d.valid

Expand Down Expand Up @@ -543,34 +586,86 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has
if (enableHintGuidedGrant) {
// for timing consideration, hint should latch one cycle before sending to L1
// instead of adding a Pipeline/Queue to latch here, we just set hintQueue in GrantBuf & CustomL1Hint "flow=false"
val slices_l1Hint = VecInit(slices.map(_.io.l1Hint))
val l1HintValids = Cat(slices_l1Hint.map(_.valid).reverse)
val readysVec = Wire(Vec(node.in.head._2.client.clients.size, UInt(banks.W)))
(node.in.head._2.client.clients zip readysVec zip hintChosenVec).foreach {
case ((client, readysToSlice), hintChosen) =>
val master = Wire(Decoupled())
val contains = Cat(slices_l1Hint.map(hint => client.sourceId.contains(hint.bits.sourceId)).reverse)
val arbValids = l1HintValids & contains
val arbReadys = TLArbiter.roundRobin(arbValids.getWidth, arbValids, master.fire)
val fires = arbValids & arbReadys
assert(PopCount(fires) <= 1.U, "At most one hint per client may fire per cycle")
master.valid := fires.orR
val selectedHint = Mux1H(fires, slices_l1Hint.map(_.bits))
if (client.supports.probe) {
io.l2_hint.valid := master.fire && selectedHint.hasData
io.l2_hint.bits.isKeyword := selectedHint.isKeyword
io.l2_hint.bits.sourceId := selectedHint.sourceId - client.sourceId.start.U
val hintProbeClientsByChannel = Seq.tabulate(hintChannelCount) { ch =>
hintProbeClients.filter(c => coherentClientChannelId(c.name).contains(ch))
}
val l1HintArbs = Seq.tabulate(hintChannelCount)(_ => Module(new Arbiter(new L2ToL1HintInsideL2()(l2ECCParams), slices.size)))

slices.zipWithIndex.foreach { case (s, i) =>
hintSliceChannelMap match {
case Some(map) =>
val sliceHintChannel = map(i)
require(
sliceHintChannel < hintChannelCount,
s"slice $i maps to invalid hint channel $sliceHintChannel, hintChannelCount=$hintChannelCount"
)
for (ch <- 0 until hintChannelCount) {
l1HintArbs(ch).io.in(i).valid := (if (sliceHintChannel == ch) s.io.l1Hint.valid else false.B)
l1HintArbs(ch).io.in(i).bits := s.io.l1Hint.bits
}
s.io.l1Hint.ready := l1HintArbs(sliceHintChannel).io.in(i).ready
case None =>
val sliceHintChannelMatches = hintProbeClientsByChannel.map { channelClients =>
if (channelClients.nonEmpty) {
Cat(channelClients.map(_.sourceId.contains(s.io.l1Hint.bits.sourceId).asInstanceOf[Bool])).orR
} else {
false.B
}
}
assert(
!(s.io.l1Hint.valid && PopCount(Cat(sliceHintChannelMatches)) > 1.U),
"slice hint sourceId matches multiple dcache channels"
)
for (ch <- 0 until hintChannelCount) {
l1HintArbs(ch).io.in(i).valid := s.io.l1Hint.valid && sliceHintChannelMatches(ch)
l1HintArbs(ch).io.in(i).bits := s.io.l1Hint.bits
}
s.io.l1Hint.ready := Mux(
s.io.l1Hint.valid && Cat(sliceHintChannelMatches).orR,
Mux1H(sliceHintChannelMatches, l1HintArbs.map(_.io.in(i).ready)),
true.B
)
}
}

val hintFireVec = Wire(Vec(hintChannelCount, Bool()))
val hintChosenVec = Wire(Vec(hintChannelCount, UInt(banks.W)))
hintFireVec.foreach(_ := false.B)
hintChosenVec.foreach(_ := 0.U)

for (ch <- 0 until hintChannelCount) {
val channelClients = hintProbeClientsByChannel(ch)
if (channelClients.nonEmpty) {
val hintSourceId = l1HintArbs(ch).io.out.bits.sourceId
val dcacheSourceMatchVec = channelClients.map(_.sourceId.contains(hintSourceId).asInstanceOf[Bool])
val sourceIsDcache = Cat(dcacheSourceMatchVec).orR
val dcacheLocalSourceId = WireDefault(0.U.asTypeOf(io.l2_hint(ch).bits.sourceId))
when (sourceIsDcache) {
dcacheLocalSourceId := Mux1H(
dcacheSourceMatchVec,
channelClients.map(c => hintSourceId - c.sourceId.start.U)
)
}
hintChosen.valid := master.fire
hintChosen.bits.sliceId := OHToUInt(fires)
hintChosen.bits.hasData := selectedHint.hasData
readysToSlice := arbReadys & contains & Fill(arbValids.getWidth, master.ready)

master.ready := !RegNext(master.fire && selectedHint.hasData)
assert(!(l1HintArbs(ch).io.out.fire && PopCount(Cat(dcacheSourceMatchVec)) > 1.U),
s"l2_hint[$ch] sourceId matches multiple dcache clients")
io.l2_hint(ch).valid := l1HintArbs(ch).io.out.fire && l1HintArbs(ch).io.out.bits.hasData && sourceIsDcache
io.l2_hint(ch).bits.sourceId := dcacheLocalSourceId
io.l2_hint(ch).bits.isKeyword := l1HintArbs(ch).io.out.bits.isKeyword
// continuous hints can only be sent every two cycle, since GrantData takes two cycles
l1HintArbs(ch).io.out.ready := !RegNext(io.l2_hint(ch).valid, false.B)
hintFireVec(ch) := io.l2_hint(ch).valid
hintChosenVec(ch) := l1HintArbs(ch).io.chosen
} else {
io.l2_hint(ch).valid := false.B
io.l2_hint(ch).bits := 0.U.asTypeOf(io.l2_hint(ch).bits)
l1HintArbs(ch).io.out.ready := true.B
}
}
slices_l1Hint.zipWithIndex.foreach {
case (hint, i) =>
hint.ready := readysVec.map(_(i)).reduce(_||_)

for (ch <- 0 until hintChannelCount) {
hintChosen(ch) := hintChosenVec(ch)
hintFire(ch) := hintFireVec(ch)
}
}

Expand Down Expand Up @@ -614,6 +709,36 @@ abstract class CoupledL2Base(implicit p: Parameters) extends LazyModule with Has
}
XSPerfAccumulate("grant_data_fire", PopCount(VecInit(grant_data_fire)))

val hint_source = io.l2_hint.map(_.bits.sourceId)

val grant_data_source = ParallelPriorityMux(slices.map {
s => (s.io.in.d.fire, s.io.in.d.bits.source)
})

val hintPipe2 = Seq.tabulate(hintChannelCount)(_ => Module(new Pipeline(UInt(32.W), 2)))
hintPipe2.zipWithIndex.foreach { case (pipe, ch) =>
pipe.io.in.valid := io.l2_hint(ch).valid
pipe.io.in.bits := hint_source(ch)
pipe.io.out.ready := true.B
}

val hintPipe1 = Seq.tabulate(hintChannelCount)(_ => Module(new Pipeline(UInt(32.W), 1)))
hintPipe1.zipWithIndex.foreach { case (pipe, ch) =>
pipe.io.in.valid := io.l2_hint(ch).valid
pipe.io.in.bits := hint_source(ch)
pipe.io.out.ready := true.B
}

val accurateHint = PopCount(VecInit(hintPipe2.map(pipe =>
grant_data_fire.orR && pipe.io.out.valid && pipe.io.out.bits === grant_data_source
)))
XSPerfAccumulate("accurate3Hints", accurateHint)

val okHint = PopCount(VecInit(hintPipe1.map(pipe =>
grant_data_fire.orR && pipe.io.out.valid && pipe.io.out.bits === grant_data_source
)))
XSPerfAccumulate("ok2Hints", okHint)

private val sigFromSrams = Option.when(cacheParams.hasDFT)(SramHelper.genBroadCastBundleTop())
private val cg = Option.when(cacheParams.hasMbist)(utility.ClockGate.genTeSrc)
if (cacheParams.hasMbist) {
Expand Down
23 changes: 18 additions & 5 deletions src/main/scala/coupledL2/GrantBuffer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ import coupledL2.prefetch.PrefetchResp
class InflightGrantEntry(implicit p: Parameters) extends L2Bundle {
val set = UInt(setBits.W)
val tag = UInt(tagBits.W)
val sourceId = UInt(sourceIdBits.W)
val opcode = UInt(4.W)
val channel = UInt(3.W)
}

class TaskWithData(implicit p: Parameters) extends L2Bundle {
Expand Down Expand Up @@ -156,6 +159,7 @@ class GrantBuffer(implicit p: Parameters) extends L2Module {
mergeAtask.mergeA := false.B
mergeAtask.aMergeTask := 0.U.asTypeOf(new MergeTaskBundle)
val inflight_insertIdx = PriorityEncoder(inflightGrant.map(!_.valid))
val inflightTask = Mux(io.d_task.bits.task.mergeA, mergeAtask, io.d_task.bits.task)
val grantQueue_enq_isKeyword = Mux(io.d_task.bits.task.mergeA, mergeAtask.isKeyword.getOrElse(false.B), io.d_task.bits.task.isKeyword.getOrElse(false.B))
// The following is organized in the order of data flow
// =========== save d_task in queue[FIFO] ===========
Expand Down Expand Up @@ -268,8 +272,11 @@ class GrantBuffer(implicit p: Parameters) extends L2Module {
// choose an empty entry
val entry = inflightGrant(inflight_insertIdx)
entry.valid := true.B
entry.bits.set := io.d_task.bits.task.set
entry.bits.tag := io.d_task.bits.task.tag
entry.bits.set := inflightTask.set
entry.bits.tag := inflightTask.tag
entry.bits.sourceId := inflightTask.sourceId
entry.bits.opcode := inflightTask.opcode
entry.bits.channel := inflightTask.channel
}
val inflight_full = Cat(inflightGrant.map(_.valid)).andR
assert(!(inflight_full & (io.d_task.fire && (dtaskOpcode === Grant || dtaskOpcode === GrantData || io.d_task.bits.task.mergeA))), "inflightGrant entries overflow")
Expand All @@ -284,6 +291,8 @@ class GrantBuffer(implicit p: Parameters) extends L2Module {

when (io.e.fire) {
assert(io.e.bits.sink < grantBufInflightSize.U, "GrantBuf: e.sink overflow inflightGrant size")
assert(inflightGrant(io.e.bits.sink).valid,
"GrantBuf: received GrantAck for empty slot sink=%d", io.e.bits.sink)
inflightGrant(io.e.bits.sink).valid := false.B
}

Expand Down Expand Up @@ -328,11 +337,15 @@ class GrantBuffer(implicit p: Parameters) extends L2Module {
// =========== XSPerf ===========
if (cacheParams.enablePerf) {
val timers = RegInit(VecInit(Seq.fill(grantBufInflightSize){0.U(64.W)}))
inflightGrant zip timers map {
case (e, t) =>
inflightGrant.zip(timers).zipWithIndex.map {
case ((e, t), i) =>
when(e.valid) { t := t + 1.U }
when(RegNext(e.valid) && !e.valid) { t := 0.U }
assert(t < 10000.U, "Inflight Grant Leak")
assert(
t < 10000.U,
"Inflight Grant Leak idx=%d source=%d opcode=%d channel=0b%b set=0x%x tag=0x%x",
i.U, e.bits.sourceId, e.bits.opcode, e.bits.channel, e.bits.set, e.bits.tag
)

val enable = RegNext(e.valid) && !e.valid
XSPerfHistogram("grant_grantack_period", t, enable, 0, 12, 1)
Expand Down
3 changes: 3 additions & 0 deletions src/main/scala/coupledL2/L2Param.scala
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ case class L2Param(
pageBytes: Int = 4096,
channelBytes: TLChannelBeatBytes = TLChannelBeatBytes(32),
clientCaches: Seq[L1Param] = Nil,
// Optional static mapping from slice id to the coherent client index visible to that slice.
// This keeps the client bitmap precise when multiple coherent dcache channels share one L2.
sliceCoherentClientMap: Option[Seq[Int]] = None,
replacement: String = "drrip",
mshrs: Int = 16,
releaseData: Int = 3,
Expand Down
4 changes: 1 addition & 3 deletions src/main/scala/coupledL2/SourceB.scala
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ class SourceB(implicit p: Parameters) extends L2Module {
val grantStatus = Input(Vec(grantBufInflightSize, new GrantStatus))
})

val dcacheSourceIdStart = edgeIn.client.clients
.filter(_.supports.probe)
.map(c => c.sourceId.start.U).head
val dcacheSourceIdStart = probeClients.map(_.sourceId.start.U).head

def toTLBundleB(task: SourceBReq) = {
val b = Wire(new TLBundleB(edgeIn.bundle))
Expand Down
6 changes: 3 additions & 3 deletions src/main/scala/coupledL2/tl2chi/MSHR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -786,8 +786,8 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes {
),
clients = Mux(
req_prefetch,
Mux(dirResult.hit, meta.clients, Fill(clientBits, false.B)),
Fill(clientBits, !(req_get && (!dirResult.hit || meta_no_client)))
Mux(dirResult.hit, meta.clients, 0.U(clientBits.W)),
Mux(req_get, Mux(dirResult.hit, meta.clients, 0.U(clientBits.W)), getClientBitOH(req.sourceId))
),
alias = Some(aliasFinal),
prefetch = req_prefetch || dirResult.hit && meta_pft,
Expand Down Expand Up @@ -832,7 +832,7 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes {
TRUNK,
BRANCH
),
clients = Fill(clientBits, true.B),
clients = getClientBitOH(merge_task.sourceId),
alias = Some(merge_task.alias.getOrElse(0.U)),
prefetch = false.B,
accessed = true.B
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/coupledL2/tl2chi/MainPipe.scala
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ class MainPipe(implicit p: Parameters) extends TL2CHIL2Module with HasCHIOpcodes
val metaW_s3_a = MetaEntry(
dirty = metaOnHit_s3.dirty,
state = Mux(req_needT_s3 || sink_resp_s3_a_promoteT, TRUNK, metaOnHit_s3.state),
clients = Fill(clientBits, Mux(l2Error_s3, false.B, true.B)),
clients = Mux(l2Error_s3, 0.U(clientBits.W), getClientBitOH(req_s3.sourceId)),
alias = Some(metaW_s3_a_alias),
accessed = true.B,
tagErr = metaOnHit_s3.tagErr,
Expand Down
Loading