summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIru Cai <mytbk920423@gmail.com>2019-02-28 17:07:16 +0800
committerIru Cai <mytbk920423@gmail.com>2019-03-20 16:08:09 +0800
commit0ca254aa8381ba2fae61a4a056301e35da9ffab3 (patch)
tree612664055ade4cca58186a76fee4dd7522aeb305
parent476fd104a80095207eec0b594baa642937fbac01 (diff)
downloadgem5-0ca254aa8381ba2fae61a4a056301e35da9ffab3.tar.xz
invisispec-1.0 source
-rw-r--r--src/arch/arm/tlb.cc16
-rw-r--r--src/arch/arm/tlb.hh1
-rw-r--r--src/arch/generic/memhelpers.hh1
-rw-r--r--src/arch/x86/tlb.cc14
-rw-r--r--src/arch/x86/tlb.hh1
-rw-r--r--src/cpu/base_dyn_inst.hh188
-rw-r--r--src/cpu/base_dyn_inst_impl.hh5
-rw-r--r--src/cpu/o3/O3CPU.py16
-rw-r--r--src/cpu/o3/commit.hh17
-rw-r--r--src/cpu/o3/commit_impl.hh62
-rw-r--r--src/cpu/o3/cpu.cc3
-rw-r--r--src/cpu/o3/iew_impl.hh47
-rw-r--r--src/cpu/o3/inst_queue_impl.hh13
-rw-r--r--src/cpu/o3/lsq.hh21
-rw-r--r--src/cpu/o3/lsq_impl.hh50
-rw-r--r--src/cpu/o3/lsq_unit.hh201
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh697
-rw-r--r--src/cpu/o3/rename_impl.hh3
-rw-r--r--src/cpu/o3/rob.hh4
-rw-r--r--src/cpu/o3/rob_impl.hh79
-rw-r--r--src/mem/packet.cc19
-rw-r--r--src/mem/packet.hh113
-rw-r--r--src/mem/port.cc1
-rw-r--r--src/mem/protocol/MESI_Two_Level-L1cache.sm264
-rw-r--r--src/mem/protocol/MESI_Two_Level-L2cache.sm279
-rw-r--r--src/mem/protocol/MESI_Two_Level-dir.sm89
-rw-r--r--src/mem/protocol/MESI_Two_Level-msg.sm6
-rw-r--r--src/mem/protocol/RubySlicc_Defines.sm2
-rw-r--r--src/mem/protocol/RubySlicc_Exports.sm12
-rw-r--r--src/mem/protocol/RubySlicc_Types.sm3
-rw-r--r--src/mem/request.hh4
-rw-r--r--src/mem/ruby/SConscript3
-rw-r--r--src/mem/ruby/network/Network.cc6
-rw-r--r--src/mem/ruby/slicc_interface/AbstractController.cc82
-rw-r--r--src/mem/ruby/slicc_interface/AbstractController.hh15
-rw-r--r--src/mem/ruby/slicc_interface/RubyRequest.hh16
-rw-r--r--src/mem/ruby/structures/CacheMemory.cc4
-rw-r--r--src/mem/ruby/system/RubyPort.cc11
-rw-r--r--src/mem/ruby/system/RubyPort.hh2
-rw-r--r--src/mem/ruby/system/Sequencer.cc175
-rw-r--r--src/mem/ruby/system/Sequencer.hh24
41 files changed, 2479 insertions, 90 deletions
diff --git a/src/arch/arm/tlb.cc b/src/arch/arm/tlb.cc
index 79eef1b8d..521a43226 100644
--- a/src/arch/arm/tlb.cc
+++ b/src/arch/arm/tlb.cc
@@ -536,6 +536,11 @@ TLB::regStats()
.name(name() + ".prefetch_faults")
.desc("Number of TLB faults due to prefetch")
;
+
+ specTLBMisses
+ .name(name() + ".spec_tlb_misses")
+ .desc("Number of TLB misses from a speculative mem instructions")
+ ;
domainFaults
.name(name() + ".domain_faults")
@@ -1426,6 +1431,17 @@ TLB::getTE(TlbEntry **te, const RequestPtr &req, ThreadContext *tc, Mode mode,
vaddr_tainted, ArmFault::PrefetchTLBMiss, isStage2);
}
+ if (req->isSpec()) {
+ // if the request is a prefetch don't attempt to fill the TLB or go
+ // any further with the memory access (here we can safely use the
+ // fault status for the short desc. format in all cases)
+ specTLBMisses++;
+ //FIXME: currently resue the prefetch tlbmiss fault
+ //do not want to introduce new fault declaration
+ return std::make_shared<PrefetchAbort>(
+ vaddr_tainted, ArmFault::PrefetchTLBMiss, isStage2);
+ }
+
if (is_fetch)
instMisses++;
else if (is_write)
diff --git a/src/arch/arm/tlb.hh b/src/arch/arm/tlb.hh
index 336b31b78..9f2acda6c 100644
--- a/src/arch/arm/tlb.hh
+++ b/src/arch/arm/tlb.hh
@@ -177,6 +177,7 @@ class TLB : public BaseTLB
mutable Stats::Scalar flushedEntries;
mutable Stats::Scalar alignFaults;
mutable Stats::Scalar prefetchFaults;
+ mutable Stats::Scalar specTLBMisses;
mutable Stats::Scalar domainFaults;
mutable Stats::Scalar permsFaults;
diff --git a/src/arch/generic/memhelpers.hh b/src/arch/generic/memhelpers.hh
index 6fe1707a0..c77612d34 100644
--- a/src/arch/generic/memhelpers.hh
+++ b/src/arch/generic/memhelpers.hh
@@ -53,6 +53,7 @@
/// Initiate a read from memory in timing mode. Note that the 'mem'
/// parameter is unused; only the type of that parameter is used
/// to determine the size of the access.
+// XC: executeContextPtr [mengjia]
template <class XC, class MemT>
Fault
initiateMemRead(XC *xc, Trace::InstRecord *traceData, Addr addr,
diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc
index 8e83208f4..ba14d7ef3 100644
--- a/src/arch/x86/tlb.cc
+++ b/src/arch/x86/tlb.cc
@@ -340,6 +340,17 @@ TLB::translate(const RequestPtr &req,
wrAccesses++;
}
if (!entry) {
+ if(req->isSpec()){
+ // [InvisiSpec] do not perform TLB fill for
+ // speculative load
+ specMisses++;
+ DPRINTF(TLB, "Get a TLB miss for a speculative load "
+ "address %#x at pc %#x.\n",
+ vaddr, tc->instAddr());
+ //FIXME: currently reuse the GeneralProtection fault
+ //instead of creating new faults
+ return std::make_shared<GeneralProtection>(0);
+ }
DPRINTF(TLB, "Handling a TLB miss for "
"address %#x at pc %#x.\n",
vaddr, tc->instAddr());
@@ -472,6 +483,9 @@ TLB::regStats()
.name(name() + ".wrMisses")
.desc("TLB misses on write requests");
+ specMisses
+ .name(name() + ".spec_tlb_misses")
+ .desc("TLB misses on speculative memory requests");
}
void
diff --git a/src/arch/x86/tlb.hh b/src/arch/x86/tlb.hh
index 827ab8166..7213b8b41 100644
--- a/src/arch/x86/tlb.hh
+++ b/src/arch/x86/tlb.hh
@@ -105,6 +105,7 @@ namespace X86ISA
Stats::Scalar wrAccesses;
Stats::Scalar rdMisses;
Stats::Scalar wrMisses;
+ Stats::Scalar specMisses;
Fault translateInt(const RequestPtr &req, ThreadContext *tc);
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 93cafd694..0e81073cc 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -120,6 +120,28 @@ class BaseDynInst : public ExecContext, public RefCounted
/// instructions ahead of it
SerializeAfter, /// Needs to serialize instructions behind it
SerializeHandled, /// Serialization has been handled
+
+ SpecCompleted,
+ // [mengjia] indicates whether received specReadResp
+ ValidationCompleted,
+ // indicates whether validation finishes
+ ExposeCompleted,
+ ExposeSent,
+ // indicates whether expose finishes
+ // (should set when expose is sent out)
+ PrevInstsCompleted,
+ // indicate whether previous instructions completed
+ PrevBrsResolved,
+ // [mengjia] indicate whether previous branches are resolved
+ PrevInstsCommitted,
+ // indicate whether previous instructions committed
+ PrevBrsCommitted,
+ // [mengjia] indicate whether previous branches are committed
+ L1HitHigh,
+ L1HitLow,
+ SpecBuffObsoleteHigh,
+ SpecBuffObsoleteLow,
+ // [InvisiSpec] it hits in L1 and is open to invalidations
NumStatus
};
@@ -136,6 +158,22 @@ class BaseDynInst : public ExecContext, public RefCounted
IsStrictlyOrdered,
ReqMade,
MemOpDone,
+ // [mengjia] indicates need validation or expose
+ NeedPostFetch,
+ NeedDeletePostReq,
+ // [mengjia] indicates only need to expose, do not need to validate
+ NeedExposeOnly,
+ // [InvisiSpec] indicate the instruction needs to be delayed
+ // due to virtual fences before to defend against speculative attacks
+ FenceDelay,
+ // [InvisiSpec] indicate the load is legal to be visible
+ ReadyToExpose,
+ HitInvalidation,
+ HitExternalEviction,
+ ValidationFail,
+ OnlyWaitForFence,
+ OnlyWaitForExpose,
+ SpecTLBMiss,
MaxFlags
};
@@ -222,6 +260,9 @@ class BaseDynInst : public ExecContext, public RefCounted
/** Pointer to the data for the memory access. */
uint8_t *memData;
+ /** Pointer to the data for the validation result. */
+ uint8_t *vldData;
+
/** Load queue index. */
int16_t lqIdx;
@@ -238,6 +279,12 @@ class BaseDynInst : public ExecContext, public RefCounted
RequestPtr savedSreqLow;
RequestPtr savedSreqHigh;
+ /** [InvisiSpec]
+ * Saved memory requests (needed for post-fetch validation/expose).
+ */
+ RequestPtr postReq;
+ RequestPtr postSreqLow;
+ RequestPtr postSreqHigh;
/////////////////////// Checker //////////////////////
// Need a copy of main request pointer to verify on writes.
RequestPtr reqToVerify;
@@ -275,6 +322,43 @@ class BaseDynInst : public ExecContext, public RefCounted
bool memOpDone() const { return instFlags[MemOpDone]; }
void memOpDone(bool f) { instFlags[MemOpDone] = f; }
+ /** [mengjia] Whether or not need pseudo-validation.
+ * whether speculative laod finishes,
+ * whether validation completes or not (success) */
+ bool needExposeOnly() const { return instFlags[NeedExposeOnly]; }
+ void needExposeOnly(bool f) { instFlags[NeedExposeOnly] = f; }
+
+ bool needPostFetch() const { return instFlags[NeedPostFetch]; }
+ void needPostFetch(bool f) { instFlags[NeedPostFetch] = f; }
+
+ bool needDeletePostReq() const { return instFlags[NeedDeletePostReq]; }
+ void needDeletePostReq(bool f) { instFlags[NeedDeletePostReq] = f; }
+
+ bool fenceDelay() const { return instFlags[ReadyToExpose]; }
+ void fenceDelay(bool f) { instFlags[ReadyToExpose] = f; }
+
+ bool readyToExpose() const { return instFlags[FenceDelay]; }
+ void readyToExpose(bool f) { instFlags[FenceDelay] = f; }
+
+ bool hitInvalidation() const { return instFlags[HitInvalidation]; }
+ void hitInvalidation(bool f) { instFlags[HitInvalidation] = f; }
+
+ bool hitExternalEviction() const { return instFlags[HitExternalEviction]; }
+ void hitExternalEviction(bool f) { instFlags[HitExternalEviction] = f; }
+
+ bool validationFail() const { return instFlags[ValidationFail]; }
+ void validationFail(bool f) { instFlags[ValidationFail] = f; }
+
+ bool onlyWaitForFence() const { return instFlags[OnlyWaitForFence]; }
+ void onlyWaitForFence(bool f) { instFlags[OnlyWaitForFence] = f; }
+
+ bool onlyWaitForExpose() const { return instFlags[OnlyWaitForExpose]; }
+ void onlyWaitForExpose(bool f) { instFlags[OnlyWaitForExpose] = f; }
+
+ bool specTLBMiss() const { return instFlags[SpecTLBMiss]; }
+ void specTLBMiss(bool f) { instFlags[SpecTLBMiss] = f; }
+ /*[mengjia] added 2 new flags and corresponding functions*/
+
bool notAnInst() const { return instFlags[NotAnInst]; }
void setNotAnInst() { instFlags[NotAnInst] = true; }
@@ -704,6 +788,49 @@ class BaseDynInst : public ExecContext, public RefCounted
/** Returns whether or not this instruction is completed. */
bool isCompleted() const { return status[Completed]; }
+ /* [mengjia] new status for load operations */
+ //void setSpecSent() { status.set(SpecSent); }
+ //bool isSpecSent() const { return status[SpecSent]; }
+
+ void setSpecCompleted() { status.set(SpecCompleted); }
+ bool isSpecCompleted() const { return status[SpecCompleted]; }
+
+ void setValidationCompleted() { status.set(ValidationCompleted); }
+ bool isValidationCompleted() const { return status[ValidationCompleted]; }
+
+ void setExposeCompleted() { status.set(ExposeCompleted); }
+ bool isExposeCompleted() const { return status[ExposeCompleted]; }
+
+ void setExposeSent() { status.set(ExposeSent); }
+ bool isExposeSent() const { return status[ExposeSent]; }
+
+ void setL1HitHigh() { status.set(L1HitHigh); }
+ void clearL1HitHigh() { status.reset(L1HitHigh); }
+ bool isL1HitHigh() const { return status[L1HitHigh]; }
+
+ void setL1HitLow() { status.set(L1HitLow); }
+ void clearL1HitLow() { status.reset(L1HitLow); }
+ bool isL1HitLow() const { return status[L1HitLow]; }
+
+ void setPrevInstsCompleted() { status.set(PrevInstsCompleted); }
+ bool isPrevInstsCompleted() const { return status[PrevInstsCompleted]; }
+
+ void setSpecBuffObsoleteHigh() { status.set(SpecBuffObsoleteHigh); }
+ bool isSpecBuffObsoleteHigh() const { return status[SpecBuffObsoleteHigh]; }
+
+ void setSpecBuffObsoleteLow() { status.set(SpecBuffObsoleteLow); }
+ bool isSpecBuffObsoleteLow() const { return status[SpecBuffObsoleteLow]; }
+
+ void setPrevBrsResolved() { status.set(PrevBrsResolved); }
+ bool isPrevBrsResolved() const { return status[PrevBrsResolved]; }
+
+ void setPrevInstsCommitted() { status.set(PrevInstsCommitted); }
+ bool isPrevInstsCommitted() const { return status[PrevInstsCommitted]; }
+
+ void setPrevBrsCommitted() { status.set(PrevBrsCommitted); }
+ bool isPrevBrsCommitted() const { return status[PrevBrsCommitted]; }
+ /* Configure load related status */
+
/** Marks the result as ready. */
void setResultReady() { status.set(ResultReady); }
@@ -893,7 +1020,25 @@ Fault
BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
Request::Flags flags)
{
+ // [InvisiSpec] do not start translation if
+ // there is a virtual fence ahead
+ assert(!fenceDelay());
+
+ if ( (flags.isSet(Request::ATOMIC_RETURN_OP)
+ || flags.isSet(Request::ATOMIC_NO_RETURN_OP)
+ || flags.isSet(Request::UNCACHEABLE)
+ || flags.isSet(Request::LLSC)
+ || flags.isSet(Request::STRICT_ORDER))
+ && !readyToExpose()){
+ onlyWaitForExpose(true);
+ // FIXME: reschedule due to LLSC
+ // reuse TLBMiss for now
+ specTLBMiss(true);
+ return NoFault;
+ }
+
instFlags[ReqMade] = true;
+ instFlags[SpecTLBMiss] = false;
RequestPtr req = NULL;
RequestPtr sreqLow = NULL;
RequestPtr sreqHigh = NULL;
@@ -908,16 +1053,36 @@ BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
this->pc.instAddr(), thread->contextId());
req->taskId(cpu->taskId());
-
+ if(!readyToExpose()){
+ req->setFlags(Request::SPEC);
+ }
// Only split the request if the ISA supports unaligned accesses.
if (TheISA::HasUnalignedMemAcc) {
splitRequest(req, sreqLow, sreqHigh);
}
+
initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read);
+
}
if (translationCompleted()) {
+ // [InvisiSpec] to fix the memory leakage problem
+ // in the case the read is squashed and the request
+ // is never sent out due to a virtual fence ahead
if (fault == NoFault) {
+ /*
+ if (fenceDelay()){
+ translationStarted(false);
+ translationCompleted(false);
+ onlyWaitForFence(true);
+ delete req;
+ if (sreqLow){
+ delete sreqLow;
+ delete sreqHigh;
+ }
+ return NoFault;
+ }
+ */
effAddr = req->getVaddr();
effSize = size;
instFlags[EffAddrValid] = true;
@@ -925,10 +1090,30 @@ BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
if (cpu->checker) {
reqToVerify = std::make_shared<Request>(*req);
}
+
+ // issue load request [mengjia]
fault = cpu->read(req, sreqLow, sreqHigh, lqIdx);
} else {
// Commit will have to clean up whatever happened. Set this
// instruction as executed.
+
+ // [InvisiSpec] If it is a fault on translating a spec load
+ // Deffer it and retry when it is ready to expose
+ if (!readyToExpose()){
+ translationStarted(false);
+ translationCompleted(false);
+ onlyWaitForExpose(true);
+ specTLBMiss(true);
+ //delete req;
+ //if (sreqLow){
+ // delete sreqLow;
+ // delete sreqHigh;
+ //}
+ return NoFault;
+ }
+ // set it as executed and fault flag.
+ // when it enters ROB and try to commit,
+ // the commit stage will squash this inst [mengjia]
this->setExecuted();
}
}
@@ -939,6 +1124,7 @@ BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
return fault;
}
+
template<class Impl>
Fault
BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size, Addr addr,
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index b499fe4e6..d7b3d562c 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -87,6 +87,7 @@ void
BaseDynInst<Impl>::initVars()
{
memData = NULL;
+ vldData = NULL;
effAddr = 0;
physEffAddrLow = 0;
physEffAddrHigh = 0;
@@ -140,6 +141,10 @@ BaseDynInst<Impl>::~BaseDynInst()
delete [] memData;
}
+ if (vldData) {
+ delete [] vldData;
+ }
+
if (traceData) {
delete traceData;
}
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index b8152f663..371433eef 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -68,6 +68,10 @@ class DerivO3CPU(BaseCPU):
cacheStorePorts = Param.Unsigned(200, "Cache Ports. "
"Constrains stores only. Loads are constrained by load FUs.")
+ # we deal with validation very similar as store writes back
+ # FIXME: not sure whether it is the correct parameter or not
+ cacheValidationPorts = Param.Unsigned(200, "Validation Ports. "
+ "Constrains validations only. Loads are constrained by load FUs.")
decodeToFetchDelay = Param.Cycles(1, "Decode to fetch delay")
renameToFetchDelay = Param.Cycles(1 ,"Rename to fetch delay")
@@ -124,7 +128,7 @@ class DerivO3CPU(BaseCPU):
LFSTSize = Param.Unsigned(1024, "Last fetched store table size")
SSITSize = Param.Unsigned(1024, "Store set ID table size")
- numRobs = Param.Unsigned(1, "Number of Reorder Buffers");
+ numRobs = Param.Unsigned(1, "Number of Reorder Buffers")
numPhysIntRegs = Param.Unsigned(256, "Number of physical integer registers")
numPhysFloatRegs = Param.Unsigned(256, "Number of physical floating point "
@@ -157,10 +161,14 @@ class DerivO3CPU(BaseCPU):
smtCommitPolicy = Param.String('RoundRobin', "SMT Commit Policy")
branchPred = Param.BranchPredictor(TournamentBP(numThreads =
- Parent.numThreads),
+ Parent.numThreads),
"Branch Predictor")
- needsTSO = Param.Bool(buildEnv['TARGET_ISA'] == 'x86',
- "Enable TSO Memory model")
+
+ # [mengjia] add configuration variables
+ simulateScheme = Param.String('UnsafeBaseline',
+ "The scheme specificed for simulation")
+ needsTSO = Param.Bool(False, "Enable TSO Memory model")
+ allowSpecBuffHit = Param.Bool(True, "Enable hit/reuse spec buffer entries")
def addCheckerCpu(self):
if buildEnv['TARGET_ISA'] in ['arm']:
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index f508a372e..7fe4ad731 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -450,6 +450,7 @@ class DefaultCommit
/** The sequence number of the last commited instruction. */
InstSeqNum lastCommitedSeqNum[Impl::MaxThreads];
+ Tick lastCommitTick;
/** Records if there is a trap currently in flight. */
bool trapInFlight[Impl::MaxThreads];
@@ -479,6 +480,9 @@ class DefaultCommit
/** Updates commit stats based on this instruction. */
void updateComInstStats(DynInstPtr &inst);
+ /** [InvisiSpec] Updates squash stats based on this instruction. */
+ void updateSquashStats(DynInstPtr &inst);
+
/** Stat for the total number of squashed instructions discarded by commit.
*/
Stats::Scalar commitSquashedInsts;
@@ -488,6 +492,19 @@ class DefaultCommit
Stats::Scalar commitNonSpecStalls;
/** Stat for the total number of branch mispredicts that caused a squash. */
Stats::Scalar branchMispredicts;
+
+ // [InvisiSpec] count #squash
+ /** Stat for the total number of invalidation packets
+ * that caused a squash. */
+ Stats::Scalar loadHitInvalidations;
+ Stats::Scalar loadHitExternalEvictions;
+ /** Stat for the total number of failed validations
+ * that caused a squash. */
+ Stats::Scalar loadValidationFails;
+ // [InvisiSpec] count cycles stall due to waiting for
+ // validation responses
+ Stats::Scalar validationStalls;
+
/** Distribution of the number of committed instructions each cycle. */
Stats::Distribution numCommittedDist;
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index d32493cbc..d83011a66 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -143,6 +143,7 @@ DefaultCommit<Impl>::DefaultCommit(O3CPU *_cpu, DerivO3CPUParams *params)
lastCommitedSeqNum[tid] = 0;
squashAfterInst[tid] = NULL;
}
+ lastCommitTick = curTick();
interrupt = NoFault;
}
@@ -183,6 +184,28 @@ DefaultCommit<Impl>::regStats()
.desc("The number of times a branch was mispredicted")
.prereq(branchMispredicts);
+ // [InvisiSpec] stat for squash due to invalidation, failed validation
+ loadHitInvalidations
+ .name(name() + ".loadHitInvalidations")
+ .desc("The number of times a load hits a invalidation");
+ //.prereq(loadHitInvalidations);
+
+ loadHitExternalEvictions
+ .name(name() + ".loadHitExternalEvictions")
+ .desc("The number of times a load hits an external invalidation");
+ //.prereq(loadHitInvalidations);
+
+ loadValidationFails
+ .name(name() + ".loadValidationFails")
+ .desc("The number of times a load fails validation");
+ //.prereq(loadValidationFails);
+
+ validationStalls
+ .name(name() + ".validationStalls")
+ .desc("The number of ticks the commit is stalled due to waiting "
+ "for validation responses");
+ //.prereq(loadValidationFails);
+
numCommittedDist
.init(0,commitWidth,1)
.name(name() + ".committed_per_cycle")
@@ -579,6 +602,9 @@ DefaultCommit<Impl>::squashAll(ThreadID tid)
toIEW->commitInfo[tid].squashInst = NULL;
toIEW->commitInfo[tid].pc = pc[tid];
+
+ //TODO: send a packet to SpecBuffer to indicate flush
+ //
}
template <class Impl>
@@ -705,13 +731,21 @@ DefaultCommit<Impl>::tick()
} else if (!rob->isEmpty(tid)) {
DynInstPtr inst = rob->readHeadInst(tid);
+ if (inst->isExecuted() && inst->needPostFetch()
+ && !inst->isExposeCompleted()){
+ //stall due to waiting for validation response
+ if (curTick()-lastCommitTick > 0){
+ validationStalls+= curTick()-lastCommitTick;
+ }
+
+ }
ppCommitStall->notify(inst);
DPRINTF(Commit,"[tid:%i]: Can't commit, Instruction [sn:%lli] PC "
"%s is head of ROB and not ready\n",
tid, inst->seqNum, inst->pcState());
}
-
+ lastCommitTick = curTick();
DPRINTF(Commit, "[tid:%i]: ROB has %d insts & %d free entries.\n",
tid, rob->countInsts(tid), rob->numFreeEntries(tid));
}
@@ -832,6 +866,7 @@ DefaultCommit<Impl>::commit()
squashFromTrap(tid);
} else if (tcSquash[tid]) {
assert(commitStatus[tid] != TrapPending);
+ //TC: thread context. [mengjia]
squashFromTC(tid);
} else if (commitStatus[tid] == SquashAfterPending) {
// A squash from the previous cycle of the commit stage (i.e.,
@@ -1039,6 +1074,7 @@ DefaultCommit<Impl>::commitInsts()
toIEW->commitInfo[tid].doneSeqNum = head_inst->seqNum;
if (tid == 0) {
+ //maybe we can use this to mask interrupts [mengjia]
canHandleInterrupts = (!head_inst->isDelayedCommit()) &&
((THE_ISA != ALPHA_ISA) ||
(!(pc[0].instAddr() & 0x3)));
@@ -1219,6 +1255,8 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// execution doesn't generate extra squashes.
thread[tid]->noSquashFromTC = true;
+ // [InvisiSpec] update squash stat for invalidation or validation fails
+ updateSquashStats(head_inst);
// Execute the trap. Although it's slightly unrealistic in
// terms of timing (as it doesn't wait for the full timing of
// the trap event to complete before updating state), it's
@@ -1350,6 +1388,7 @@ DefaultCommit<Impl>::markCompletedInsts()
// Grab completed insts out of the IEW instruction queue, and mark
// instructions completed within the ROB.
for (int inst_num = 0; inst_num < fromIEW->size; ++inst_num) {
+ DPRINTF(Commit, "get the inst [num:%d]\n", inst_num);
assert(fromIEW->insts[inst_num]);
if (!fromIEW->insts[inst_num]->isSquashed()) {
DPRINTF(Commit, "[tid:%i]: Marking PC %s, [sn:%lli] ready "
@@ -1362,6 +1401,27 @@ DefaultCommit<Impl>::markCompletedInsts()
fromIEW->insts[inst_num]->setCanCommit();
}
}
+
+ // [InvisiSpec]
+ // update load status
+ // isPrevInstsCompleted; isPrevBrsResolved
+ rob->updateVisibleState();
+}
+
+// [InvisiSpec] update squash stat for loads
+template <class Impl>
+void
+DefaultCommit<Impl>::updateSquashStats(DynInstPtr &inst)
+{
+ if (inst->hitInvalidation()){
+ loadHitInvalidations++;
+ }
+ if (inst->validationFail()){
+ loadValidationFails++;
+ }
+ if (inst->hitExternalEviction()){
+ loadHitExternalEvictions++;
+ }
}
template <class Impl>
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index c4bc13fb4..27ad78e2e 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -593,6 +593,7 @@ FullO3CPU<Impl>::tick()
activityRec.advance();
+ DPRINTF(O3CPU, "activityRec.advance() complete\n");
if (removeInstsThisCycle) {
cleanUpRemovedInsts();
}
@@ -610,6 +611,8 @@ FullO3CPU<Impl>::tick()
schedule(tickEvent, clockEdge(Cycles(1)));
DPRINTF(O3CPU, "Scheduling next tick!\n");
}
+ } else {
+ DPRINTF(O3CPU, "tickEvent.scheduled == false, %lu", curTick());
}
if (!FullSystem)
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 8270a71b5..063394fdd 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -1186,9 +1186,16 @@ DefaultIEW<Impl>::executeInsts()
fetchRedirect[tid] = false;
}
+ // [mengjia] Validate/Expose any loads which are ready last cycle
+ // very tricky, need make the state consistent
+ // if we successfully commit sth, then we need to activate the stage or somehow
+ // problems happen when interacting with squash
+ // NOTE: we always send validations before execute load requests
+ ldstQueue.exposeLoads();
+
// Uncomment this if you want to see all available instructions.
// @todo This doesn't actually work anymore, we should fix it.
-// printAvailableInsts();
+ // printAvailableInsts();
// Execute/writeback any instructions that are available.
int insts_to_execute = fromIssue->size;
@@ -1235,18 +1242,40 @@ DefaultIEW<Impl>::executeInsts()
DPRINTF(IEW, "Execute: Calculating address for memory "
"reference.\n");
+ DPRINTF(IEW, "Execute: %s\n", inst->staticInst->getName());
// Tell the LDSTQ to execute this instruction (if it is a load).
if (inst->isLoad()) {
// Loads will mark themselves as executed, and their writeback
// event adds the instruction to the queue to commit
+
+ // [InvisiSpec] a lifetime of a load
+ // always let it translate --> translation not complete, defer
+ // if !loadInExec, need to check whether there
+ // is a virtual fence ahead
+ // --> if existing virtual fence, defer
+ if (inst->fenceDelay()){
+ DPRINTF(IEW, "Deferring load due to virtual fence.\n");
+ inst->onlyWaitForFence(true);
+ instQueue.deferMemInst(inst);
+ continue;
+ }
+
fault = ldstQueue.executeLoad(inst);
- if (inst->isTranslationDelayed() &&
+ // [InvisiSpec] delay the load if there is a virtual fence ahead
+ if ((inst->isTranslationDelayed() ) &&
fault == NoFault) {
// A hw page table walk is currently going on; the
// instruction must be deferred.
- DPRINTF(IEW, "Execute: Delayed translation, deferring "
- "load.\n");
+ DPRINTF(IEW, "Execute: Delayed translation, deferring load.\n");
+ instQueue.deferMemInst(inst);
+ continue;
+ }
+
+ if ((inst->specTLBMiss() ) &&
+ fault == NoFault) {
+ DPRINTF(IEW, "Execute: Speculative load gets a TLB miss,"
+ " deferring load.\n");
instQueue.deferMemInst(inst);
continue;
}
@@ -1381,10 +1410,11 @@ DefaultIEW<Impl>::executeInsts()
++memOrderViolationEvents;
}
}
- }
+ }
// Update and record activity if we processed any instructions.
if (inst_num) {
+
if (exeStatus == Idle) {
exeStatus = Running;
}
@@ -1476,16 +1506,18 @@ DefaultIEW<Impl>::tick()
dispatch(tid);
}
+ ldstQueue.updateVisibleState();
+
if (exeStatus != Squashing) {
executeInsts();
-
+
writebackInsts();
// Have the instruction queue try to schedule any ready instructions.
// (In actuality, this scheduling is for instructions that will
// be executed next cycle.)
instQueue.scheduleReadyInsts();
-
+
// Also should advance its own time buffers if the stage ran.
// Not the best place for it, but this works (hopefully).
issueToExecQueue.advance();
@@ -1502,6 +1534,7 @@ DefaultIEW<Impl>::tick()
// Writeback any stores using any leftover bandwidth.
ldstQueue.writebackStores();
+
// Check the committed load/store signals to see if there's a load
// or store to commit. Also check if it's being told to execute a
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 84ac5799c..504084165 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -1172,8 +1172,19 @@ InstructionQueue<Impl>::getDeferredMemInstToExecute()
{
for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
++it) {
- if ((*it)->translationCompleted() || (*it)->isSquashed()) {
+ // [InvisiSpec] we need to check the FenceDelay
+ // a load can be delayed due to
+ // 1. translation delay
+ // 2. virtual fence ahead
+ // 3. not ready to expose and gets a TLB miss
+ // for both (2, 3) we need to restart the translation
+ if ( (*it)->translationCompleted()
+ || ((*it)->onlyWaitForFence() && !(*it)->fenceDelay())
+ || ((*it)->onlyWaitForExpose() && (*it)->readyToExpose())
+ || (*it)->isSquashed()) {
DynInstPtr mem_inst = *it;
+ mem_inst->onlyWaitForFence(false);
+ mem_inst->onlyWaitForExpose(false);
deferredMemInsts.erase(it);
return mem_inst;
}
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index 7c78156d5..518153990 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -136,6 +136,23 @@ class LSQ {
/** Same as above, but only for one thread. */
void writebackStores(ThreadID tid);
+
+ /** [mengjia]
+ * Attempts to validate loads until all cache ports are used or the
+ * interface becomes blocked.
+ */
+ int exposeLoads();
+ /** Same as above, but only for one thread. */
+ int exposeLoads(ThreadID tid);
+
+ /** [mengjia]
+ * attempt to update FenceDelay state for load insts
+ */
+ void updateVisibleState();
+ /** Same as above, but only for one thread. */
+ void updateVisibleState(ThreadID tid);
+
+
/**
* Squash instructions from a thread until the specified sequence number.
*/
@@ -257,6 +274,10 @@ class LSQ {
int numStoresToWB(ThreadID tid)
{ return thread[tid].numStoresToWB(); }
+ /** Returns the number of stores a specific thread has to write back. */
+ int numLoadsToVLD(ThreadID tid)
+ { return thread[tid].numLoadsToVLD(); }
+
/** Returns if the LSQ will write back to memory this cycle. */
bool willWB();
/** Returns if the LSQ of a specific thread will write back to memory this
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 56b95a5b6..1e78f534e 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -310,6 +310,44 @@ LSQ<Impl>::writebackStores()
}
}
+// [mengjia]
+template<class Impl>
+int
+LSQ<Impl>::exposeLoads()
+{
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
+
+ int exposedLoads = 0;
+ while (threads != end) {
+ ThreadID tid = *threads++;
+
+ if (numLoadsToVLD(tid) > 0) {
+ DPRINTF(Writeback,"[tid:%i] Validate loads. %i loads "
+ "available for Validate.\n", tid, numLoadsToVLD(tid));
+ }
+
+ exposedLoads += thread[tid].exposeLoads();
+ }
+ return exposedLoads;
+}
+
+
+// [mengjia]
+template<class Impl>
+void
+LSQ<Impl>::updateVisibleState()
+{
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
+
+ while (threads != end) {
+ ThreadID tid = *threads++;
+
+ thread[tid].updateVisibleState();
+ }
+}
+
template<class Impl>
bool
LSQ<Impl>::violation()
@@ -339,6 +377,7 @@ LSQ<Impl>::recvReqRetry()
}
}
+// [InvisiSpec] Callback function for receiving a response
template <class Impl>
bool
LSQ<Impl>::recvTimingResp(PacketPtr pkt)
@@ -347,6 +386,17 @@ LSQ<Impl>::recvTimingResp(PacketPtr pkt)
DPRINTF(LSQ, "Got error packet back for address: %#X\n",
pkt->getAddr());
+ // for expose or validate request,
+ // if the instruction is squashed, maybe the req has been deleted
+ if (pkt->isValidate() || pkt->isExpose()){
+ if (!pkt->req){
+ delete pkt;
+ return true;
+ }
+ DPRINTF(LSQ, "Receive an expose/validate response, idx=%d\n",
+ pkt->reqIdx);
+ }
+
thread[cpu->contextToThread(pkt->req->contextId())]
.completeDataAccess(pkt);
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index f5b60b2fc..d3dd34ee2 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -136,6 +136,12 @@ class LSQUnit {
*/
void checkSnoop(PacketPtr pkt);
+ // [InvisiSpec] check whether current request will hit in the
+ // spec buffer or not
+ int checkSpecBuffHit(const RequestPtr req, const int req_idx);
+ void setSpecBuffState(const RequestPtr req);
+
+ bool checkPrevLoadsExecuted(const int req_idx);
/** Executes a load instruction. */
Fault executeLoad(DynInstPtr &inst);
@@ -154,6 +160,15 @@ class LSQUnit {
/** Writes back stores. */
void writebackStores();
+ /** [mengjia] Validate loads. */
+ int exposeLoads();
+
+ /** [mengjia] Update Visbible State.
+ * In the mode defence relying on fence: setup fenceDelay state.
+ * In the mode defence relying on invisibleSpec:
+ * setup readyToExpose*/
+ void updateVisibleState();
+
/** Completes the data access that has been returned from the
* memory system. */
void completeDataAccess(PacketPtr pkt);
@@ -219,6 +234,8 @@ class LSQUnit {
/** Returns the number of stores to writeback. */
int numStoresToWB() { return storesToWB; }
+ /** [InvisiSpec] Returns the number of loads to validate. */
+ int numLoadsToVLD() { return loadsToVLD; }
/** Returns if the LSQ unit will writeback on this cycle. */
bool willWB() { return storeQueue[storeWBIdx].canWB &&
@@ -235,18 +252,30 @@ class LSQUnit {
/** Writes back the instruction, sending it to IEW. */
void writeback(DynInstPtr &inst, PacketPtr pkt);
+ // [InvisiSpec] complete Validates
+ void completeValidate(DynInstPtr &inst, PacketPtr pkt);
+
/** Writes back a store that couldn't be completed the previous cycle. */
void writebackPendingStore();
+ /** Validates a load that couldn't be completed the previous cycle. */
+ void validatePendingLoad();
+
/** Handles completing the send of a store to memory. */
void storePostSend(PacketPtr pkt);
+ /** Handles completing the send of a validation to memory. */
+ //void validationPostSend(PacketPtr pkt, int loadVLDIdx);
+
/** Completes the store at the specified index. */
void completeStore(int store_idx);
/** Attempts to send a store to the cache. */
bool sendStore(PacketPtr data_pkt);
+ /** Attempts to send a validation to the cache. */
+ //bool sendValidation(PacketPtr data_pkt, int loadVLDIdx);
+
/** Increments the given store index (circular queue). */
inline void incrStIdx(int &store_idx) const;
/** Decrements the given store index (circular queue). */
@@ -409,6 +438,8 @@ class LSQUnit {
/** The number of load instructions in the LQ. */
int loads;
+ /** [mengjia] The number of store instructions in the SQ waiting to writeback. */
+ int loadsToVLD;
/** The number of store instructions in the SQ. */
int stores;
/** The number of store instructions in the SQ waiting to writeback. */
@@ -416,6 +447,10 @@ class LSQUnit {
/** The index of the head instruction in the LQ. */
int loadHead;
+ /** [mengjia] The index of the first instruction that may be ready to be
+ * validated, and has not yet been validated.
+ */
+ //int pendingLoadVLDIdx;
/** The index of the tail instruction in the LQ. */
int loadTail;
@@ -432,7 +467,7 @@ class LSQUnit {
/** The number of cache ports available each cycle (stores only). */
int cacheStorePorts;
- /** The number of used cache ports in this cycle by stores. */
+ /** [InvisiSpec] The number of used cache ports in this cycle by stores. */
int usedStorePorts;
//list<InstSeqNum> mshrSeqNums;
@@ -458,6 +493,9 @@ class LSQUnit {
/** Whehter or not a store is blocked due to the memory system. */
bool isStoreBlocked;
+ /** Whehter or not a validation is blocked due to the memory system. */
+ bool isValidationBlocked;
+
/** Whether or not a store is in flight. */
bool storeInFlight;
@@ -471,9 +509,21 @@ class LSQUnit {
/** The packet that is pending free cache ports. */
PacketPtr pendingPkt;
+ /* [mengjia] define scheme variables */
+ // Flag for whether issue packets in execution stage
+ bool loadInExec;
+
+ // Flag for whether to use invisible speculative load
+ bool isInvisibleSpec;
+
/** Flag for memory model. */
bool needsTSO;
+ // Flag for whether defending against spectre attack or future attacks
+ bool isFuturistic;
+ bool allowSpecBuffHit;
+ /* [mengjia] different schemes determine values of 4 variables. */
+
// Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times.
@@ -508,6 +558,12 @@ class LSQUnit {
/** Number of times the LSQ is blocked due to the cache. */
Stats::Scalar lsqCacheBlocked;
+ Stats::Scalar specBuffHits;
+ Stats::Scalar specBuffMisses;
+ Stats::Scalar numValidates;
+ Stats::Scalar numExposes;
+ Stats::Scalar numConvertedExposes;
+
public:
/** Executes the load at the given index. */
Fault read(const RequestPtr &req,
@@ -549,6 +605,8 @@ class LSQUnit {
bool isStalled() { return stalled; }
};
+
+// IMPORTANT: the function to issue packets, interact with memory [mengjia]
template <class Impl>
Fault
LSQUnit<Impl>::read(const RequestPtr &req,
@@ -578,6 +636,7 @@ LSQUnit<Impl>::read(const RequestPtr &req,
}
// Check the SQ for any previous stores that might lead to forwarding
+ // why we have store queue index for a load operation? [mengjia]
int store_idx = load_inst->sqIdx;
int store_size = 0;
@@ -587,6 +646,7 @@ LSQUnit<Impl>::read(const RequestPtr &req,
load_idx, store_idx, storeHead, req->getPaddr(),
sreqLow ? " split" : "");
+ // LLSC: load-link/store-conditional [mengjia]
if (req->isLLSC()) {
assert(!sreqLow);
// Disable recording the result temporarily. Writing to misc
@@ -597,12 +657,14 @@ LSQUnit<Impl>::read(const RequestPtr &req,
load_inst->recordResult(true);
}
+ // request to memory mapped register [mengjia]
if (req->isMmappedIpr()) {
assert(!load_inst->memData);
load_inst->memData = new uint8_t[64];
ThreadContext *thread = cpu->tcBase(lsqID);
Cycles delay(0);
+
PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq);
data_pkt->dataStatic(load_inst->memData);
@@ -750,6 +812,7 @@ LSQUnit<Impl>::read(const RequestPtr &req,
DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
load_inst->seqNum, load_inst->pcState());
+
// Allocate memory if this is the first time a load is issued.
if (!load_inst->memData) {
load_inst->memData = new uint8_t[req->getSize()];
@@ -757,10 +820,35 @@ LSQUnit<Impl>::read(const RequestPtr &req,
// if we the cache is not blocked, do cache access
bool completedFirst = false;
- PacketPtr data_pkt = Packet::createRead(req);
+
+ PacketPtr data_pkt = NULL;
PacketPtr fst_data_pkt = NULL;
PacketPtr snd_data_pkt = NULL;
+ // According to the isInsivisibleSpec variable to create
+ // corresponding type of packets [mengjia]
+ bool sendSpecRead = false;
+ if(isInvisibleSpec){
+ if(!load_inst->readyToExpose()){
+ assert(!req->isLLSC());
+ assert(!req->isStrictlyOrdered());
+ assert(!req->isMmappedIpr());
+ sendSpecRead = true;
+ DPRINTF(LSQUnit, "send a spec read for inst [sn:%lli]\n",
+ load_inst->seqNum);
+ }
+
+ }
+
+ assert( !(sendSpecRead && load_inst->isSpecCompleted()) &&
+ "Sending specRead twice for the same load insts");
+
+ if(sendSpecRead){
+ data_pkt = Packet::createReadSpec(req);
+ }else{
+ data_pkt = Packet::createRead(req);
+ }
+
data_pkt->dataStatic(load_inst->memData);
LSQSenderState *state = new LSQSenderState;
@@ -772,17 +860,64 @@ LSQUnit<Impl>::read(const RequestPtr &req,
if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
// Point the first packet at the main data packet.
fst_data_pkt = data_pkt;
+
+ fst_data_pkt->setFirst();
+ if (sendSpecRead){
+ int src_idx = checkSpecBuffHit(req, load_idx);
+ if (src_idx != -1) {
+ if (allowSpecBuffHit){
+ data_pkt->setOnlyAccessSpecBuff();
+ }
+ data_pkt->srcIdx = src_idx;
+ specBuffHits++;
+ }else{
+ specBuffMisses++;
+ }
+ }
+ fst_data_pkt->reqIdx = load_idx;
} else {
// Create the split packets.
- fst_data_pkt = Packet::createRead(sreqLow);
- snd_data_pkt = Packet::createRead(sreqHigh);
+ if(sendSpecRead){
+
+ fst_data_pkt = Packet::createReadSpec(sreqLow);
+ int fst_src_idx = checkSpecBuffHit(sreqLow, load_idx);
+ if ( fst_src_idx != -1 ) {
+ if (allowSpecBuffHit){
+ fst_data_pkt->setOnlyAccessSpecBuff();
+ }
+ fst_data_pkt->srcIdx = fst_src_idx;
+ specBuffHits++;
+ } else {
+ specBuffMisses++;
+ }
+
+ snd_data_pkt = Packet::createReadSpec(sreqHigh);
+ int snd_src_idx = checkSpecBuffHit(sreqHigh, load_idx);
+ if ( snd_src_idx != -1 ) {
+ if (allowSpecBuffHit){
+ snd_data_pkt->setOnlyAccessSpecBuff();
+ }
+ snd_data_pkt->srcIdx = snd_src_idx;
+ specBuffHits++;
+ } else {
+ specBuffMisses++;
+ }
+ }else{
+ fst_data_pkt = Packet::createRead(sreqLow);
+ snd_data_pkt = Packet::createRead(sreqHigh);
+ }
+ fst_data_pkt->setFirst();
fst_data_pkt->dataStatic(load_inst->memData);
snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
fst_data_pkt->senderState = state;
snd_data_pkt->senderState = state;
+ fst_data_pkt->reqIdx = load_idx;
+ snd_data_pkt->reqIdx = load_idx;
+ fst_data_pkt->isSplit = true;
+ snd_data_pkt->isSplit = true;
state->isSplit = true;
state->outstanding = 2;
state->mainPkt = data_pkt;
@@ -794,6 +929,8 @@ LSQUnit<Impl>::read(const RequestPtr &req,
// @todo We should account for cache port contention
// and arbitrate between loads and stores.
bool successful_load = true;
+ // MARK: here is the place memory request of read is sent [mengjia]
+ // [InvisiSpec] Sending out a memory request
if (!dcachePort->sendTimingReq(fst_data_pkt)) {
successful_load = false;
} else if (TheISA::HasUnalignedMemAcc && sreqLow) {
@@ -850,6 +987,62 @@ LSQUnit<Impl>::read(const RequestPtr &req,
return NoFault;
}
+ DPRINTF(LSQUnit, "successfully sent out packet(s) for inst [sn:%lli]\n",
+ load_inst->seqNum);
+ // Set everything ready for expose/validation after the read is
+ // successfully sent out
+ if(sendSpecRead){ // sending actual request
+
+ // [mengjia] Here we set the needExposeOnly flag
+ if (needsTSO && !load_inst->isDataPrefetch()){
+ // need to check whether previous load_instructions specComplete or not
+ if ( checkPrevLoadsExecuted(load_idx) ){
+ load_inst->needExposeOnly(true);
+ DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as "
+ "needExposeOnly\n",
+ load_inst->pcState(), load_inst->seqNum);
+ } else {
+ DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as "
+ "needValidation\n",
+ load_inst->pcState(), load_inst->seqNum);
+ }
+ }else{
+ //if RC, always only need expose
+ load_inst->needExposeOnly(true);
+ DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as needExposeOnly\n",
+ load_inst->pcState(), load_inst->seqNum);
+ }
+
+ load_inst->needPostFetch(true);
+ assert(!req->isMmappedIpr());
+ //save expose requestPtr
+ if (TheISA::HasUnalignedMemAcc && sreqLow) {
+ load_inst->postSreqLow = std::make_shared<Request>(*sreqLow);
+ load_inst->postSreqHigh = std::make_shared<Request>(*sreqHigh);
+ load_inst->postReq = nullptr;
+ }else{
+ load_inst->postReq = std::make_shared<Request>(*req);
+ load_inst->postSreqLow = nullptr;
+ load_inst->postSreqHigh = nullptr;
+ }
+ load_inst->needDeletePostReq(true);
+ DPRINTF(LSQUnit, "created validation/expose"
+ " request for inst [sn:%lli]"
+ "req=%#x, reqLow=%#x, reqHigh=%#x\n",
+ load_inst->seqNum, (Addr)(load_inst->postReq),
+ (Addr)(load_inst->postSreqLow),
+ (Addr)(load_inst->postSreqHigh));
+ } else {
+ load_inst->setExposeCompleted();
+ load_inst->needPostFetch(false);
+ if (TheISA::HasUnalignedMemAcc && sreqLow) {
+ setSpecBuffState(sreqLow);
+ setSpecBuffState(sreqHigh);
+ } else {
+ setSpecBuffState(req);
+ }
+ }
+
return NoFault;
}
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index c2750be7d..3da248977 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -89,6 +89,9 @@ LSQUnit<Impl>::WritebackEvent::description() const
return "Store writeback";
}
+
+// [InvisiSpec] This function deals with
+// acknowledge response to memory read/write
template<class Impl>
void
LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
@@ -107,6 +110,17 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
return;
}
+ // need to update hit info for corresponding instruction
+ if (pkt->isL1Hit() && pkt->isSpec() && pkt->isRead()){
+ if (state->isSplit && ! pkt->isFirst()){
+ inst->setL1HitHigh();
+ } else {
+ inst->setL1HitLow();
+ }
+ } else if (!pkt->isSpec()) {
+ setSpecBuffState(pkt->req);
+ }
+
// If this is a split access, wait until all packets are received.
if (TheISA::HasUnalignedMemAcc && !state->complete()) {
return;
@@ -117,7 +131,9 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
if (!state->noWB) {
// Only loads and store conditionals perform the writeback
// after receving the response from the memory
+ // [mengjia] validation also needs writeback, expose do not need
assert(inst->isLoad() || inst->isStoreConditional());
+
if (!TheISA::HasUnalignedMemAcc || !state->isSplit ||
!state->isLoad) {
writeback(inst, pkt);
@@ -129,6 +145,10 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
if (inst->isStore()) {
completeStore(state->idx);
}
+
+ if (pkt->isValidate() || pkt->isExpose()) {
+ completeValidate(inst, pkt);
+ }
}
if (TheISA::HasUnalignedMemAcc && state->isSplit && state->isLoad) {
@@ -136,6 +156,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
}
pkt->req->setAccessLatency();
+ // probe point, not sure about the mechanism [mengjia]
cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
delete state;
@@ -143,8 +164,8 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
template <class Impl>
LSQUnit<Impl>::LSQUnit()
- : loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
- isStoreBlocked(false), storeInFlight(false), hasPendingPkt(false),
+ : loads(0), loadsToVLD(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
+ isStoreBlocked(false), isValidationBlocked(false), storeInFlight(false), hasPendingPkt(false),
pendingPkt(nullptr)
{
}
@@ -178,7 +199,52 @@ LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
depCheckShift = params->LSQDepCheckShift;
checkLoads = params->LSQCheckLoads;
cacheStorePorts = params->cacheStorePorts;
+
+ // According to the scheme, we need to define actions as follows.
+ // loadInExec: if False, no packets are sent in execution stage;
+ // if True, send either readReq or readSpecReq
+ // isInvisibleSpec: if True, send readSpecReq in execution statge;
+ // if False, send readReq
+ // needsTSO: if True, squash read on receiving invalidations, and only allow one outstanding write at a time;
+ // if False, no squash on receiving invalidaiton, and allow multiple outstanding writes.
+ // isConservative: if True, react after all preceding instructions complete/no exception;
+ // if False, react only after all preceding stores/brancehs complete
+ const std::string scheme = params->simulateScheme;
+ if (scheme.compare("UnsafeBaseline")==0){
+ loadInExec = true;
+ isInvisibleSpec = false; // send real request
+ isFuturistic = false; // not relevant in unsafe mode.
+ }else if (scheme.compare("FuturisticSafeFence")==0){
+ // "LFENCE" before every load
+ loadInExec = false;
+ isInvisibleSpec = false; // not used since loadInExec is false
+ isFuturistic = true; // send readReq at head of ROB
+ }else if (scheme.compare("FuturisticSafeInvisibleSpec")==0){
+ // only make load visible when all preceding instructions
+ // complete and no exception
+ loadInExec = true;
+ isInvisibleSpec = true; // send request but not change cache state
+ isFuturistic = true; // conservative condition to send validations
+ }else if (scheme.compare("SpectreSafeFence")==0){
+ // "LFENCE" after every branch
+ loadInExec = false;
+ isInvisibleSpec = false; // not used since loadInExec is false
+ isFuturistic = false; // commit when preceding branches are resolved
+ }else if (scheme.compare("SpectreSafeInvisibleSpec")==0){
+ // make load visible when all preceiding branches are resolved
+ loadInExec = true;
+ isInvisibleSpec = true; // send request but not change cache state
+ isFuturistic = false; // only deal with spectre attacks
+ }else {
+ cprintf("ERROR: unsupported simulation scheme: %s!\n", scheme);
+ exit(1);
+ }
needsTSO = params->needsTSO;
+ allowSpecBuffHit = params->allowSpecBuffHit;
+ cprintf("Info: simulation uses scheme: %s; "
+ "needsTSO=%d; allowSpecBuffHit=%d\n",
+ scheme, needsTSO, allowSpecBuffHit);
+ // [mengjia] end of setting configuration variables
resetState();
}
@@ -188,7 +254,7 @@ template<class Impl>
void
LSQUnit<Impl>::resetState()
{
- loads = stores = storesToWB = 0;
+ loads = stores = loadsToVLD = storesToWB = 0;
loadHead = loadTail = 0;
@@ -258,6 +324,26 @@ LSQUnit<Impl>::regStats()
lsqCacheBlocked
.name(name() + ".cacheBlocked")
.desc("Number of times an access to memory failed due to the cache being blocked");
+
+ specBuffHits
+ .name(name() + ".specBuffHits")
+ .desc("Number of times an access hits in speculative buffer");
+
+ specBuffMisses
+ .name(name() + ".specBuffMisses")
+ .desc("Number of times an access misses in speculative buffer");
+
+ numValidates
+ .name(name() + ".numValidates")
+ .desc("Number of validates sent to cache");
+
+ numExposes
+ .name(name() + ".numExposes")
+ .desc("Number of exposes sent to cache");
+
+ numConvertedExposes
+ .name(name() + ".numConvertedExposes")
+ .desc("Number of exposes converted from validation");
}
template<class Impl>
@@ -289,6 +375,7 @@ LSQUnit<Impl>::drainSanityCheck() const
assert(!loadQueue[i]);
assert(storesToWB == 0);
+ assert(loadsToVLD == 0);
assert(!retryPkt);
}
@@ -377,6 +464,7 @@ LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst)
incrLdIdx(loadTail);
++loads;
+
}
template <class Impl>
@@ -400,6 +488,7 @@ LSQUnit<Impl>::insertStore(DynInstPtr &store_inst)
++stores;
}
+// It is an empty function? why? [mengjia]
template <class Impl>
typename Impl::DynInstPtr
LSQUnit<Impl>::getMemDepViolator()
@@ -460,13 +549,16 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
Addr load_addr_high = ld_inst->physEffAddrHigh & cacheBlockMask;
// Check that this snoop didn't just invalidate our lock flag
- if (ld_inst->effAddrValid() && (load_addr_low == invalidate_addr
- || load_addr_high == invalidate_addr)
+ // [InvisiSpec] also make sure the instruction has been sent out
+ // otherwise, we cause unneccessary squash
+ if (ld_inst->effAddrValid() && !ld_inst->fenceDelay()
+ && (load_addr_low == invalidate_addr
+ || load_addr_high == invalidate_addr)
&& ld_inst->memReqFlags & Request::LLSC)
TheISA::handleLockedSnoopHit(ld_inst.get());
}
- // If this is the only load in the LSQ we don't care
+ // If not match any load entry, then do nothing [mengjia]
if (load_idx == loadTail)
return;
@@ -477,7 +569,10 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
while (load_idx != loadTail) {
DynInstPtr ld_inst = loadQueue[load_idx];
- if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
+ // [SafeSpce] check snoop violation when the load has
+ // been sent out; otherwise, unneccessary squash
+ if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()
+ || ld_inst->fenceDelay()) {
incrLdIdx(load_idx);
continue;
}
@@ -495,11 +590,29 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
// all other loads, this load as well as *all* subsequent loads
// need to be squashed to prevent possible load reordering.
force_squash = true;
+
+ // [InvisiSpec] in InvisiSpec, we do not need to squash
+ // the load at the head of LQ,
+ // as well as the one do not need validation
+ if (isInvisibleSpec &&
+ (load_idx==loadHead || ld_inst->needExposeOnly())){
+ force_squash = false;
+ }
+ if (!pkt->isExternalEviction() && isInvisibleSpec){
+ force_squash = false;
+ ld_inst->clearL1HitHigh();
+ ld_inst->clearL1HitLow();
+ }
}
if (ld_inst->possibleLoadViolation() || force_squash) {
DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n",
pkt->getAddr(), ld_inst->seqNum);
+ //[InvisiSpec] mark the load hit invalidation
+ ld_inst->hitInvalidation(true);
+ if (pkt->isExternalEviction()){
+ ld_inst->hitExternalEviction(true);
+ }
// Mark the load for re-execution
ld_inst->fault = std::make_shared<ReExec>();
} else {
@@ -524,6 +637,103 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
}
template <class Impl>
+bool
+LSQUnit<Impl>::checkPrevLoadsExecuted(int req_idx)
+{
+ int load_idx = loadHead;
+ while (load_idx != req_idx){
+ if (!loadQueue[load_idx]->isExecuted()){
+ // if at least on load ahead of current load
+ // does not finish spec access,
+ // then return false
+ return false;
+ }
+ incrLdIdx(load_idx);
+ }
+
+ //if all executed, return true
+ return true;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::setSpecBuffState(RequestPtr expose_req)
+{
+ Addr req_eff_addr1 = expose_req->getPaddr() & cacheBlockMask;
+
+ int load_idx = loadHead;
+ while (load_idx != loadTail){
+ DynInstPtr ld_inst = loadQueue[load_idx];
+ if (ld_inst->effAddrValid()){
+
+ Addr ld_eff_addr1 = ld_inst->physEffAddrLow & cacheBlockMask;
+ Addr ld_eff_addr2 = ld_inst->physEffAddrHigh & cacheBlockMask;
+ if (ld_eff_addr1 == req_eff_addr1){
+ ld_inst->setSpecBuffObsoleteLow();
+ } else if (ld_eff_addr2 == req_eff_addr1){
+ ld_inst->setSpecBuffObsoleteHigh();
+ }
+ }
+ incrLdIdx(load_idx);
+ }
+}
+
+
+template <class Impl>
+int
+LSQUnit<Impl>::checkSpecBuffHit(RequestPtr req, int req_idx)
+{
+
+ Addr req_eff_addr1 = req->getPaddr() & cacheBlockMask;
+ //Addr req_eff_addr2 = (req->getPaddr() + req->getSize()-1) & cacheBlockMask;
+ // the req should be within the same cache line
+ //assert (req_eff_addr1 == req_eff_addr2);
+ assert (!loadQueue[req_idx]->isExecuted());
+
+ int load_idx = loadHead;
+
+ while (load_idx != loadTail){
+ DynInstPtr ld_inst = loadQueue[load_idx];
+ if (ld_inst->effAddrValid()){
+ Addr ld_eff_addr1 = ld_inst->physEffAddrLow & cacheBlockMask;
+ Addr ld_eff_addr2 = ld_inst->physEffAddrHigh & cacheBlockMask;
+
+ if ((req_eff_addr1 == ld_eff_addr1 && ld_inst->isL1HitLow())
+ || (req_eff_addr1 == ld_eff_addr2 && ld_inst->isL1HitHigh())){
+ return -1;
+ //already in L1, do not copy from buffer
+ } else {
+
+ if (ld_inst->isExecuted() && ld_inst->needPostFetch()
+ && !ld_inst->isSquashed() && ld_inst->fault==NoFault){
+ if (req_eff_addr1 == ld_eff_addr1 && !ld_inst->isL1HitLow()
+ && !ld_inst->isSpecBuffObsoleteLow()){
+ DPRINTF(LSQUnit, "Detected Spec Hit with inst [sn:%lli] "
+ "and [sn:%lli] (low) at address %#x\n",
+ loadQueue[req_idx]->seqNum, ld_inst->seqNum,
+ req_eff_addr1);
+ return load_idx;
+ } else if ( ld_eff_addr2 !=0 &&
+ req_eff_addr1 == ld_eff_addr2 && !ld_inst->isL1HitHigh()
+ && !ld_inst->isSpecBuffObsoleteHigh()){
+ DPRINTF(LSQUnit, "Detected Spec Hit with inst [sn:%lli] "
+ "and [sn:%lli] (high) at address %#x\n",
+ loadQueue[req_idx]->seqNum, ld_inst->seqNum,
+ req_eff_addr1);
+ return load_idx;
+ }
+ }
+ }
+ }
+ incrLdIdx(load_idx);
+ }
+
+ return -1;
+}
+
+
+
+template <class Impl>
Fault
LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
{
@@ -537,7 +747,10 @@ LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
*/
while (load_idx != loadTail) {
DynInstPtr ld_inst = loadQueue[load_idx];
- if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
+ // [InvisiSpec] no need to check violation for unsent load
+ // otherwise, unneccessary squash
+ if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()
+ || ld_inst->fenceDelay()) {
incrLdIdx(load_idx);
continue;
}
@@ -616,14 +829,25 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
assert(!inst->isSquashed());
+ // use ISA interface to generate correct access request
+ // initiateAcc is implemented in dyn_inst_impl.hh
+ // The interface calls corresponding ISA defined function
+ // check buld/ARM/arch/generic/memhelper.hh for more info [mengjia]
load_fault = inst->initiateAcc();
- if (inst->isTranslationDelayed() &&
+ // if translation delay, deferMem [mengjia]
+ // in the case it is not the correct time to send the load
+ // also defer it
+ if ( (inst->isTranslationDelayed() || inst->fenceDelay()
+ || inst->specTLBMiss()) &&
load_fault == NoFault)
return load_fault;
// If the instruction faulted or predicated false, then we need to send it
// along to commit without the instruction completing.
+ //
+ // if it is faulty, not execute it, send it to commit, and commit statge will deal with it
+ // here is signling the ROB, the inst can commit [mengjia]
if (load_fault != NoFault || !inst->readPredicate()) {
// Send this instruction to commit, also make sure iew stage
// realizes there is activity. Mark it as executed unless it
@@ -671,6 +895,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
// address. If so, then we have a memory ordering violation.
int load_idx = store_inst->lqIdx;
+ // TODO: Check whether this store tries to get an exclusive copy
+ // of target line [mengjia]
Fault store_fault = store_inst->initiateAcc();
if (store_inst->isTranslationDelayed() &&
@@ -769,15 +995,343 @@ LSQUnit<Impl>::writebackPendingStore()
if (hasPendingPkt) {
assert(pendingPkt != NULL);
- // If the cache is blocked, this will store the packet for retry.
- if (sendStore(pendingPkt)) {
- storePostSend(pendingPkt);
+ if(pendingPkt->isWrite()){
+ // If the cache is blocked, this will store the packet for retry.
+ if (sendStore(pendingPkt)) {
+ storePostSend(pendingPkt);
+ }
+ pendingPkt = NULL;
+ hasPendingPkt = false;
}
- pendingPkt = NULL;
- hasPendingPkt = false;
}
}
+
+
+
+// [InvisiSpec] update FenceDelay State
+template <class Impl>
+void
+LSQUnit<Impl>::updateVisibleState()
+{
+ int load_idx = loadHead;
+
+ //iterate all the loads and update its fencedelay state accordingly
+ while (load_idx != loadTail && loadQueue[load_idx]){
+ DynInstPtr inst = loadQueue[load_idx];
+
+ if (!loadInExec){
+
+ if ( (isFuturistic && inst->isPrevInstsCommitted()) ||
+ (!isFuturistic && inst->isPrevBrsCommitted())){
+ if (inst->fenceDelay()){
+ DPRINTF(LSQUnit, "Clear virtual fence for "
+ "inst [sn:%lli] PC %s\n",
+ inst->seqNum, inst->pcState());
+ }
+ inst->fenceDelay(false);
+ }else {
+ if (!inst->fenceDelay()){
+ DPRINTF(LSQUnit, "Deffering an inst [sn:%lli] PC %s"
+ " due to virtual fence\n",
+ inst->seqNum, inst->pcState());
+ }
+ inst->fenceDelay(true);
+ }
+ inst->readyToExpose(true);
+ } else if (loadInExec && isInvisibleSpec){
+
+ if ( (isFuturistic && inst->isPrevInstsCompleted()) ||
+ (!isFuturistic && inst->isPrevBrsResolved())){
+ if (!inst->readyToExpose()){
+ DPRINTF(LSQUnit, "Set readyToExpose for "
+ "inst [sn:%lli] PC %s\n",
+ inst->seqNum, inst->pcState());
+ if (inst->needPostFetch()){
+ ++loadsToVLD;
+ }
+ }
+ inst->readyToExpose(true);
+ }else {
+ if (inst->readyToExpose()){
+ DPRINTF(LSQUnit, "The load can not be validated "
+ "[sn:%lli] PC %s\n",
+ inst->seqNum, inst->pcState());
+ assert(0);
+ //--loadsToVLD;
+ }
+ inst->readyToExpose(false);
+ }
+ inst->fenceDelay(false);
+ } else {
+ inst->readyToExpose(true);
+ inst->fenceDelay(false);
+ }
+ incrLdIdx(load_idx);
+ }
+}
+
+// [InvisiSpec] validate loads
+template <class Impl>
+int
+LSQUnit<Impl>::exposeLoads()
+{
+ if(!isInvisibleSpec){
+ assert(loadsToVLD==0
+ && "request validation on Non invisible Spec mode");
+ }
+
+ int old_loadsToVLD = loadsToVLD;
+
+ // [InvisiSpec] Note:
+ // need to iterate from the head every time
+ // since the load can be exposed out-of-order
+ int loadVLDIdx = loadHead;
+
+ while (loadsToVLD > 0 &&
+ loadVLDIdx != loadTail &&
+ loadQueue[loadVLDIdx]) {
+
+ if (loadQueue[loadVLDIdx]->isSquashed()){
+ incrLdIdx(loadVLDIdx);
+ continue;
+ }
+ // skip the loads that either do not need to expose
+ // or exposed already
+ if(!loadQueue[loadVLDIdx]->needPostFetch()
+ || loadQueue[loadVLDIdx]->isExposeSent() ){
+ incrLdIdx(loadVLDIdx);
+ continue;
+ }
+
+ DynInstPtr load_inst = loadQueue[loadVLDIdx];
+ if (loadQueue[loadVLDIdx]->fault!=NoFault){
+ //load is executed, so it wait for expose complete
+ //to send it to commit, regardless of whether it is ready
+ //to expose
+ load_inst->setExposeCompleted();
+ load_inst->setExposeSent();
+ loadsToVLD--;
+ if (load_inst->isExecuted()){
+ DPRINTF(LSQUnit, "Execute finished and gets violation fault."
+ "Send inst [sn:%lli] to commit stage.\n",
+ load_inst->seqNum);
+ iewStage->instToCommit(load_inst);
+ iewStage->activityThisCycle();
+ }
+ incrLdIdx(loadVLDIdx);
+ continue;
+ }
+
+ // skip the loads that need expose but
+ // are not ready
+ if (loadQueue[loadVLDIdx]->needPostFetch()
+ && !loadQueue[loadVLDIdx]->readyToExpose()){
+ incrLdIdx(loadVLDIdx);
+ continue;
+ }
+
+ assert(loadQueue[loadVLDIdx]->needPostFetch()
+ && loadQueue[loadVLDIdx]->readyToExpose() );
+
+ assert(!load_inst->isCommitted());
+
+
+ RequestPtr req = load_inst->postReq;
+ RequestPtr sreqLow = load_inst->postSreqLow;
+ RequestPtr sreqHigh = load_inst->postSreqHigh;
+
+ // we should not have both req and sreqLow not NULL
+ assert( !(req && sreqLow));
+
+ DPRINTF(LSQUnit, "Validate/Expose request for inst [sn:%lli]"
+ " PC= %s. req=%#x, reqLow=%#x, reqHigh=%#x\n",
+ load_inst->seqNum, load_inst->pcState(),
+ (Addr)(load_inst->postReq),
+ (Addr)(load_inst->postSreqLow), (Addr)(load_inst->postSreqHigh));
+
+ PacketPtr data_pkt = NULL;
+ PacketPtr snd_data_pkt = NULL;
+
+ LSQSenderState *state = new LSQSenderState;
+ state->isLoad = false;
+ state->idx = loadVLDIdx;
+ state->inst = load_inst;
+ state->noWB = true;
+
+ bool split = false;
+ if (TheISA::HasUnalignedMemAcc && sreqLow) {
+ split = true;
+ } else {
+ assert(req);
+ }
+
+ bool onlyExpose = false;
+ if (!split) {
+ if (load_inst->needExposeOnly() || load_inst->isL1HitLow()){
+ data_pkt = Packet::createExpose(req);
+ onlyExpose = true;
+ }else {
+ data_pkt = Packet::createValidate(req);
+ if (!load_inst->vldData)
+ load_inst->vldData = new uint8_t[1];
+ data_pkt->dataStatic(load_inst->vldData);
+ }
+ data_pkt->senderState = state;
+ data_pkt->setFirst();
+ data_pkt->reqIdx = loadVLDIdx;
+ DPRINTF(LSQUnit, "contextid = %d\n", req->contextId());
+ } else {
+ // allocate memory if we need at least one validation
+ if (!load_inst->needExposeOnly() &&
+ (!load_inst->isL1HitLow() || !load_inst->isL1HitHigh())){
+ if (!load_inst->vldData)
+ load_inst->vldData = new uint8_t[2];
+ } else {
+ onlyExpose = true;
+ }
+
+ // Create the split packets. - first one
+ if (load_inst->needExposeOnly() || load_inst->isL1HitLow()){
+ data_pkt = Packet::createExpose(sreqLow);
+ }else{
+ data_pkt = Packet::createValidate(sreqLow);
+ assert(load_inst->vldData);
+ data_pkt->dataStatic(load_inst->vldData);
+ }
+
+ // Create the split packets. - second one
+ if (load_inst->needExposeOnly() || load_inst->isL1HitHigh()){
+ snd_data_pkt = Packet::createExpose(sreqHigh);
+ } else {
+ snd_data_pkt = Packet::createValidate(sreqHigh);
+ assert(load_inst->vldData);
+ snd_data_pkt->dataStatic(&(load_inst->vldData[1]));
+ }
+
+ data_pkt->senderState = state;
+ data_pkt->setFirst();
+ snd_data_pkt->senderState = state;
+ data_pkt->reqIdx = loadVLDIdx;
+ snd_data_pkt->reqIdx = loadVLDIdx;
+
+ data_pkt->isSplit = true;
+ snd_data_pkt->isSplit = true;
+ state->isSplit = true;
+ state->outstanding = 2;
+ state->mainPkt = data_pkt;
+
+ DPRINTF(LSQUnit, "contextid = %d, %d\n",
+ sreqLow->contextId(), sreqHigh->contextId());
+ req = sreqLow;
+ }
+
+ assert(!req->isStrictlyOrdered());
+ assert(!req->isMmappedIpr());
+
+ DPRINTF(LSQUnit, "D-Cache: Validating/Exposing load idx:%i PC:%s "
+ "to Addr:%#x, data:%#x [sn:%lli]\n",
+ loadVLDIdx, load_inst->pcState(),
+ //FIXME: resultData not memData
+ req->getPaddr(), (int)*(load_inst->memData),
+ load_inst->seqNum);
+
+ bool successful_expose = true;
+ bool completedFirst = false;
+
+ if (!dcachePort->sendTimingReq(data_pkt)){
+ DPRINTF(IEW, "D-Cache became blocked when "
+ "validating [sn:%lli], will retry later\n",
+ load_inst->seqNum);
+ successful_expose = false;
+ } else {
+ if (split) {
+ // If split, try to send the second packet too
+ completedFirst = true;
+ assert(snd_data_pkt);
+
+ if (!dcachePort->sendTimingReq(snd_data_pkt)){
+ state->complete();
+ state->cacheBlocked = true;
+ successful_expose = false;
+ DPRINTF(IEW, "D-Cache became blocked when validating"
+ " [sn:%lli] second packet, will retry later\n",
+ load_inst->seqNum);
+ }
+ }
+ }
+
+ if (!successful_expose){
+ if (!split) {
+ delete state;
+ delete data_pkt;
+ }else{
+ if (!completedFirst){
+ delete state;
+ delete data_pkt;
+ delete snd_data_pkt;
+ } else {
+ delete snd_data_pkt;
+ }
+ }
+ //cpu->wakeCPU(); // This will cause issue(wrong activity count and affects the memory transactions
+ ++lsqCacheBlocked;
+ break;
+ } else {
+ // Here is to fix memory leakage
+ // it is ugly, but we have to do it now.
+ load_inst->needDeletePostReq(false);
+
+ // if all the packets we sent out is expose,
+ // we assume the expose is alreay completed
+ if (onlyExpose) {
+ load_inst->setExposeCompleted();
+ numExposes++;
+ } else {
+ numValidates++;
+ }
+ if (load_inst->needExposeOnly()){
+ numConvertedExposes++;
+ }
+ if (load_inst->isExecuted() && load_inst->isExposeCompleted()
+ && !load_inst->isSquashed()){
+ DPRINTF(LSQUnit, "Expose finished. Execution done."
+ "Send inst [sn:%lli] to commit stage.\n",
+ load_inst->seqNum);
+ iewStage->instToCommit(load_inst);
+ iewStage->activityThisCycle();
+ } else{
+ DPRINTF(LSQUnit, "Need validation or execution not finishes."
+ "Need to wait for readResp/validateResp "
+ "for inst [sn:%lli].\n",
+ load_inst->seqNum);
+ }
+
+ load_inst->setExposeSent();
+ --loadsToVLD;
+ incrLdIdx(loadVLDIdx);
+ if (!split){
+ setSpecBuffState(req);
+ } else {
+ setSpecBuffState(sreqLow);
+ setSpecBuffState(sreqHigh);
+ }
+ }
+ }
+
+ DPRINTF(LSQUnit, "Send validate/expose for %d insts. loadsToVLD=%d"
+ ". loadHead=%d. loadTail=%d.\n",
+ old_loadsToVLD-loadsToVLD, loadsToVLD, loadHead,
+ loadTail);
+
+ assert(loads>=0 && loadsToVLD >= 0);
+
+ return old_loadsToVLD-loadsToVLD;
+}
+
+
+
+
template <class Impl>
void
LSQUnit<Impl>::writebackStores()
@@ -797,7 +1351,7 @@ LSQUnit<Impl>::writebackStores()
if (isStoreBlocked) {
DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
- " is blocked!\n");
+ " is blocked on stores!\n");
break;
}
@@ -1007,6 +1561,12 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
stallingLoadIdx = 0;
}
+ if (loadQueue[load_idx]->needPostFetch() &&
+ loadQueue[load_idx]->readyToExpose() &&
+ !loadQueue[load_idx]->isExposeSent()){
+ loadsToVLD --;
+ }
+
// Clear the smart pointer to make sure it is decremented.
loadQueue[load_idx]->setSquashed();
loadQueue[load_idx] = NULL;
@@ -1017,6 +1577,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
decrLdIdx(load_idx);
++lsqSquashedLoads;
+
}
if (memDepViolator && squashed_num < memDepViolator->seqNum) {
@@ -1071,6 +1632,10 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
}
}
+
+// after sent, we assume the store is complete
+// thus, we can wekeup and forward data
+// In TSO, mark inFlightStore as true to block following stores [mengjia]
template <class Impl>
void
LSQUnit<Impl>::storePostSend(PacketPtr pkt)
@@ -1100,9 +1665,58 @@ LSQUnit<Impl>::storePostSend(PacketPtr pkt)
storeInFlight = true;
}
+ DPRINTF(LSQUnit, "Post sending store for inst [sn:%lli]\n",
+ storeQueue[storeWBIdx].inst->seqNum);
incrStIdx(storeWBIdx);
}
+
+
+template <class Impl>
+void
+LSQUnit<Impl>::completeValidate(DynInstPtr &inst, PacketPtr pkt)
+{
+ iewStage->wakeCPU();
+ // if instruction fault, no need to check value,
+ // return directly
+ //assert(!inst->needExposeOnly());
+ if (inst->isExposeCompleted() || inst->isSquashed()){
+ //assert(inst->fault != NoFault);
+ //Already sent to commit, do nothing
+ return;
+ }
+ //Check validation result
+ bool validation_fail = false;
+ if (!inst->isL1HitLow() && inst->vldData[0]==0) {
+ validation_fail = true;
+ } else {
+ if (pkt->isSplit && !inst->isL1HitHigh()
+ && inst->vldData[1]==0){
+ validation_fail = true;
+ }
+ }
+ if (validation_fail){
+ // Mark the load for re-execution
+ inst->fault = std::make_shared<ReExec>();
+ inst->validationFail(true);
+ DPRINTF(LSQUnit, "Validation failed.\n",
+ inst->seqNum);
+ }
+
+ inst->setExposeCompleted();
+ if ( inst->isExecuted() && inst->isExposeCompleted() ){
+ DPRINTF(LSQUnit, "Validation finished. Execution done."
+ "Send inst [sn:%lli] to commit stage.\n",
+ inst->seqNum);
+ iewStage->instToCommit(inst);
+ iewStage->activityThisCycle();
+ } else{
+ DPRINTF(LSQUnit, "Validation done. Execution not finishes."
+ "Need to wait for readResp for inst [sn:%lli].\n",
+ inst->seqNum);
+ }
+}
+
template <class Impl>
void
LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
@@ -1116,6 +1730,11 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
return;
}
+ //DPRINTF(LSQUnit, "write back for inst [sn:%lli]\n", inst->seqNum);
+ assert(!(inst->isExecuted() && inst->isExposeCompleted() &&
+ inst->fault==NoFault) &&
+ "in this case, we will put it into ROB twice.");
+
if (!inst->isExecuted()) {
inst->setExecuted();
@@ -1135,8 +1754,42 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
}
}
- // Need to insert instruction into queue to commit
- iewStage->instToCommit(inst);
+ // [mengjia]
+ // check schemes to decide whether to set load can be committed
+ // on receiving readResp or readSpecResp
+ if(!isInvisibleSpec){
+ // if not invisibleSpec mode, we only receive readResp
+ assert(!pkt->isSpec() && !pkt->isValidate() &&
+ "Receiving spec or validation response "
+ "in non invisibleSpec mode");
+ iewStage->instToCommit(inst);
+ } else if (inst->fault != NoFault){
+ inst->setExposeCompleted();
+ inst->setExposeSent();
+ iewStage->instToCommit(inst);
+ } else {
+ //isInvisibleSpec == true
+ if (pkt->isSpec()) {
+ inst->setSpecCompleted();
+ }
+
+ assert(!pkt->isValidate() && "receiving validation response"
+ "in invisibleSpec RC mode");
+ assert(!pkt->isExpose() && "receiving expose response"
+ "on write back path");
+
+ // check whether the instruction can be committed
+ if ( !inst->isExposeCompleted() && inst->needPostFetch() ){
+ DPRINTF(LSQUnit, "Expose not finished. "
+ "Wait until expose completion"
+ " to send inst [sn:%lli] to commit stage\n", inst->seqNum);
+ }else{
+ DPRINTF(LSQUnit, "Expose and execution both finished. "
+ "Send inst [sn:%lli] to commit stage\n", inst->seqNum);
+ iewStage->instToCommit(inst);
+ }
+
+ }
iewStage->activityThisCycle();
@@ -1144,6 +1797,8 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
iewStage->checkMisprediction(inst);
}
+// set store to complete [mengjia]
+// complete the store after it commits
template <class Impl>
void
LSQUnit<Impl>::completeStore(int store_idx)
@@ -1219,9 +1874,12 @@ LSQUnit<Impl>::sendStore(PacketPtr data_pkt)
retryPkt = data_pkt;
return false;
}
+ setSpecBuffState(data_pkt->req);
return true;
}
+
+
template <class Impl>
void
LSQUnit<Impl>::recvRetry()
@@ -1229,6 +1887,7 @@ LSQUnit<Impl>::recvRetry()
if (isStoreBlocked) {
DPRINTF(LSQUnit, "Receiving retry: store blocked\n");
assert(retryPkt != NULL);
+ assert(retryPkt->isWrite());
LSQSenderState *state =
dynamic_cast<LSQSenderState *>(retryPkt->senderState);
@@ -1277,7 +1936,7 @@ template <class Impl>
inline void
LSQUnit<Impl>::incrLdIdx(int &load_idx) const
{
- if (++load_idx >= LQEntries)
+ if ((++load_idx) >= LQEntries)
load_idx = 0;
}
@@ -1285,7 +1944,7 @@ template <class Impl>
inline void
LSQUnit<Impl>::decrLdIdx(int &load_idx) const
{
- if (--load_idx < 0)
+ if ((--load_idx) < 0)
load_idx += LQEntries;
}
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index bc024f603..2720ab914 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -1370,7 +1370,8 @@ DefaultRename<Impl>::serializeAfter(InstQueue &inst_list, ThreadID tid)
// Mark a bit to say that I must serialize on the next instruction.
serializeOnNextInst[tid] = true;
return;
- }
+ }
+
// Set the next instruction as serializing.
inst_list.front()->setSerializeBefore();
diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh
index 1c3cc2815..7024d9920 100644
--- a/src/cpu/o3/rob.hh
+++ b/src/cpu/o3/rob.hh
@@ -212,6 +212,10 @@ class ROB
/** Updates the tail instruction with the new youngest instruction. */
void updateTail();
+ /** [SafeSpce] Updates load instructions visible condition
+ * set isPrevInstsCompleted and isPrevBrsResolved. */
+ void updateVisibleState();
+
/** Reads the PC of the oldest head instruction. */
// uint64_t readHeadPC();
diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh
index 5a9dc90f9..ebfbb9754 100644
--- a/src/cpu/o3/rob_impl.hh
+++ b/src/cpu/o3/rob_impl.hh
@@ -402,6 +402,85 @@ ROB<Impl>::doSquash(ThreadID tid)
}
+/* **************************
+ * [InvisiSpec] update load insts state
+ * isPrevInstsCompleted; isPrevBrsResolved
+ * *************************/
+template <class Impl>
+void
+ROB<Impl>::updateVisibleState()
+{
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
+
+ while (threads != end) {
+ ThreadID tid = *threads++;
+
+ if (instList[tid].empty())
+ continue;
+
+ InstIt inst_it = instList[tid].begin();
+ InstIt tail_inst_it = instList[tid].end();
+
+ bool prevInstsComplete=true;
+ bool prevBrsResolved=true;
+ bool prevInstsCommitted=true;
+ bool prevBrsCommitted=true;
+
+ while (inst_it != tail_inst_it) {
+ DynInstPtr inst = *inst_it++;
+
+ assert(inst!=0);
+
+ if (!prevInstsComplete &&
+ !prevBrsResolved) {
+ break;
+ }
+
+ if (inst->isLoad()) {
+ if (prevInstsComplete) {
+ inst->setPrevInstsCompleted();
+ }
+ if (prevBrsResolved){
+ inst->setPrevBrsResolved();
+ }
+ if (prevInstsCommitted) {
+ inst->setPrevInstsCommitted();
+ }
+ if (prevBrsCommitted) {
+ inst->setPrevBrsCommitted();
+ }
+ }
+
+ // Update prev control insts state
+ if (inst->isControl()){
+ prevBrsCommitted = false;
+ if (!inst->readyToCommit() || inst->getFault()!=NoFault
+ || inst->isSquashed()){
+ prevBrsResolved = false;
+ }
+ }
+
+ prevInstsCommitted = false;
+
+ // Update prev insts state
+ if (inst->isNonSpeculative() || inst->isStoreConditional()
+ || inst->isMemBarrier() || inst->isWriteBarrier() ||
+ (inst->isLoad() && inst->strictlyOrdered())){
+ //Some special instructions, directly set canCommit
+ //when entering ROB
+ prevInstsComplete = false;
+ }
+ if (!inst->readyToCommit() || inst->getFault()!=NoFault
+ || inst->isSquashed()){
+ prevInstsComplete = false;
+ }
+
+ }
+ }
+}
+
+
template <class Impl>
void
ROB<Impl>::updateHead()
diff --git a/src/mem/packet.cc b/src/mem/packet.cc
index 866bc9051..d6089f9f7 100644
--- a/src/mem/packet.cc
+++ b/src/mem/packet.cc
@@ -221,7 +221,24 @@ MemCmd::commandInfo[] =
InvalidateResp, "InvalidateReq" },
/* Invalidation Response */
{ SET2(IsInvalidate, IsResponse),
- InvalidCmd, "InvalidateResp" }
+ InvalidCmd, "InvalidateResp" },
+ /* [InvisiSpec] New command info */
+ { SET4(IsRead, IsRequest, NeedsResponse, IsSpec),
+ ReadSpecResp, "ReadSpecReq" },
+ { SET4(IsRead, IsResponse, HasData, IsSpec),
+ InvalidCmd, "ReadSpecResp" },
+ { SET4(IsRead, IsRequest, NeedsResponse, IsValidate),
+ ValidateResp, "ValidateReq" },
+ { SET4(IsRead, IsResponse, HasData, IsValidate),
+ InvalidCmd, "ValidateResp" },
+ { SET4(IsRead, IsRequest, NeedsResponse, IsExpose),
+ ExposeResp, "ExposeReq" },
+ { SET3(IsRead, IsResponse, IsExpose),
+ InvalidCmd, "ExposeResp" },
+ { SET3(IsRequest, NeedsResponse, IsSpecFlush),
+ SpecFlushResp, "SpecFlushReq" },
+ { SET2(IsResponse, IsSpecFlush),
+ InvalidCmd, "SpecFlushResp" }
};
bool
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 0f45a7bae..1e012e710 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -133,6 +133,15 @@ class MemCmd
FlushReq, //request for a cache flush
InvalidateReq, // request for address to be invalidated
InvalidateResp,
+ /* [InvisiSpec] New commands */
+ ReadSpecReq,
+ ReadSpecResp,
+ ValidateReq,
+ ValidateResp,
+ ExposeReq,
+ ExposeResp,
+ SpecFlushReq,
+ SpecFlushResp,
NUM_MEM_CMDS
};
@@ -160,6 +169,11 @@ class MemCmd
IsPrint, //!< Print state matching address (for debugging)
IsFlush, //!< Flush the address from caches
FromCache, //!< Request originated from a caching agent
+ /* [InvisiSpec] New attributes */
+ IsSpec, //!< Speculatively issued
+ IsValidate,
+ IsExpose,
+ IsSpecFlush,
NUM_COMMAND_ATTRIBUTES
};
@@ -226,6 +240,12 @@ class MemCmd
bool isPrint() const { return testCmdAttrib(IsPrint); }
bool isFlush() const { return testCmdAttrib(IsFlush); }
+ /// [InvisiSpec] InvisiSpec attributes
+ bool isSpec() const { return testCmdAttrib(IsSpec); }
+ bool isValidate() const { return testCmdAttrib(IsValidate); }
+ bool isExpose() const { return testCmdAttrib(IsExpose); }
+ bool isSpecFlush() const { return testCmdAttrib(IsSpecFlush); }
+
Command
responseCommand() const
{
@@ -306,7 +326,17 @@ class Packet : public Printable
// Signal block present to squash prefetch and cache evict packets
// through express snoop flag
- BLOCK_CACHED = 0x00010000
+ BLOCK_CACHED = 0x00010000,
+
+ // [InvisiSpec] ReadSpecReq was L1 hit.
+ L1_HIT = 0x00020000,
+
+ // [InvisiSpec] this packet is the first one of split packets
+ // maximum split is 2
+ FIRST_IN_SPLIT = 0x00040000,
+ ONLY_ACCESS_SPEC_BUFF = 0x00080000,
+
+ EXTERNAL_EVICTION = 0x00100000,
};
Flags flags;
@@ -379,6 +409,12 @@ class Packet : public Printable
*/
uint32_t payloadDelay;
+ //[InvisiSpec] indicate the source buffer entry
+ //if the load should get data from specbuffer
+ int srcIdx;
+ int reqIdx;
+ bool isSplit;
+
/**
* A virtual base opaque structure used to hold state associated
* with the packet (e.g., an MSHR), specific to a MemObject that
@@ -552,6 +588,45 @@ class Packet : public Printable
bool isPrint() const { return cmd.isPrint(); }
bool isFlush() const { return cmd.isFlush(); }
+ /// [InvisiSpec] InvisiSpec flags
+ bool isSpec() const { return cmd.isSpec(); }
+ bool isValidate() const { return cmd.isValidate(); }
+ bool isExpose() const { return cmd.isExpose(); }
+ bool isSpecFlush() const { return cmd.isSpecFlush(); }
+ bool isL1Hit() const { return flags.isSet(L1_HIT); }
+ bool isExternalEviction() const { return flags.isSet(EXTERNAL_EVICTION); }
+ // [InvisiSpec] Check whether it is the first in split packets
+ bool isFirst() const { return flags.isSet(FIRST_IN_SPLIT); }
+ bool onlyAccessSpecBuff() const
+ { return flags.isSet(ONLY_ACCESS_SPEC_BUFF); }
+
+ void setL1Hit()
+ {
+ assert(isSpec());
+ assert(!flags.isSet(L1_HIT));
+ flags.set(L1_HIT);
+ }
+
+ void setExternalEviction()
+ {
+ assert(!flags.isSet(EXTERNAL_EVICTION));
+ flags.set(EXTERNAL_EVICTION);
+ }
+
+ void setOnlyAccessSpecBuff()
+ {
+ assert(isSpec());
+ assert(!flags.isSet(ONLY_ACCESS_SPEC_BUFF));
+ flags.set(ONLY_ACCESS_SPEC_BUFF);
+ }
+
+ void setFirst()
+ {
+ //assert(isSpec());
+ assert(!flags.isSet(FIRST_IN_SPLIT));
+ flags.set(FIRST_IN_SPLIT);
+ }
+
//@{
/// Snoop flags
/**
@@ -771,7 +846,8 @@ class Packet : public Printable
: cmd(_cmd), id((PacketId)_req.get()), req(_req),
data(nullptr), addr(0), _isSecure(false), size(0),
_qosValue(0), headerDelay(0), snoopDelay(0),
- payloadDelay(0), senderState(NULL)
+ payloadDelay(0), srcIdx(-1), reqIdx(-1), isSplit(false),
+ senderState(NULL)
{
if (req->hasPaddr()) {
addr = req->getPaddr();
@@ -793,7 +869,8 @@ class Packet : public Printable
: cmd(_cmd), id(_id ? _id : (PacketId)_req.get()), req(_req),
data(nullptr), addr(0), _isSecure(false),
_qosValue(0), headerDelay(0),
- snoopDelay(0), payloadDelay(0), senderState(NULL)
+ snoopDelay(0), payloadDelay(0), srcIdx(-1), reqIdx(-1), isSplit(false),
+ senderState(NULL)
{
if (req->hasPaddr()) {
addr = req->getPaddr() & ~(_blkSize - 1);
@@ -820,6 +897,9 @@ class Packet : public Printable
headerDelay(pkt->headerDelay),
snoopDelay(0),
payloadDelay(pkt->payloadDelay),
+ srcIdx(pkt->srcIdx),
+ reqIdx(pkt->reqIdx),
+ isSplit(pkt->isSplit),
senderState(pkt->senderState)
{
if (!clear_flags)
@@ -894,6 +974,33 @@ class Packet : public Printable
}
/**
+ * [InvisiSpec] Methods that return Packets for InvisiSpec.
+ */
+ static PacketPtr
+ createReadSpec(const RequestPtr req)
+ {
+ return new Packet(req, MemCmd::ReadSpecReq);
+ }
+
+ static PacketPtr
+ createValidate(const RequestPtr req)
+ {
+ return new Packet(req, MemCmd::ValidateReq);
+ }
+
+ static PacketPtr
+ createExpose(const RequestPtr req)
+ {
+ return new Packet(req, MemCmd::ExposeReq);
+ }
+
+ static PacketPtr
+ createSpecFlush(const RequestPtr req)
+ {
+ return new Packet(req, MemCmd::SpecFlushReq);
+ }
+
+ /**
* clean up packet variables
*/
~Packet()
diff --git a/src/mem/port.cc b/src/mem/port.cc
index 47f56e633..318a65308 100644
--- a/src/mem/port.cc
+++ b/src/mem/port.cc
@@ -176,6 +176,7 @@ MasterPort::sendFunctional(PacketPtr pkt)
return _slavePort->recvFunctional(pkt);
}
+// [InvisiSpec] Request from CPU to Ruby
bool
MasterPort::sendTimingReq(PacketPtr pkt)
{
diff --git a/src/mem/protocol/MESI_Two_Level-L1cache.sm b/src/mem/protocol/MESI_Two_Level-L1cache.sm
index 87684ce10..846af7da5 100644
--- a/src/mem/protocol/MESI_Two_Level-L1cache.sm
+++ b/src/mem/protocol/MESI_Two_Level-L1cache.sm
@@ -76,10 +76,12 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
S, AccessPermission:Read_Only, desc="a L1 cache entry Shared";
E, AccessPermission:Read_Only, desc="a L1 cache entry Exclusive";
M, AccessPermission:Read_Write, desc="a L1 cache entry Modified", format="!b";
+ X, AccessPermission:Read_Only, desc="a L1 cache entry Speculatively observed";
// Transient States
IS, AccessPermission:Busy, desc="L1 idle, issued GETS, have not seen response yet";
IM, AccessPermission:Busy, desc="L1 idle, issued GETX, have not seen response yet";
+ IX, AccessPermission:Busy, desc="L1 idle, issued GETSPEC, have not seen response yet";
SM, AccessPermission:Read_Only, desc="L1 idle, issued GETX, have not seen response yet";
IS_I, AccessPermission:Busy, desc="L1 idle, issued GETS, saw Inv before data because directory doesn't block on GETS hit";
@@ -99,6 +101,8 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
Load, desc="Load request from the home processor";
Ifetch, desc="I-fetch request from the home processor";
Store, desc="Store request from the home processor";
+ SpecLoad, desc="SpecLoad request from the home processor";
+ Expose, desc="Expose request from the home processor";
Inv, desc="Invalidate request from L2 bank";
@@ -110,6 +114,8 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
Fwd_GETX, desc="GETX from other processor";
Fwd_GETS, desc="GETS from other processor";
Fwd_GET_INSTR, desc="GET_INSTR from other processor";
+ Fwd_GETSPEC, desc="GETSPEC from other processor";
+ Fwd_EXPOSE, desc="EXPOSE from other processor";
Data, desc="Data for processor";
Data_Exclusive, desc="Data for processor";
@@ -188,6 +194,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
State getState(TBE tbe, Entry cache_entry, Addr addr) {
+ // [InvisiSpec] The same cache line cannot be present in L1D and L1I at the same time.
assert((L1Dcache.isTagPresent(addr) && L1Icache.isTagPresent(addr)) == false);
if(is_valid(tbe)) {
@@ -265,6 +272,10 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
return Event:Ifetch;
} else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) {
return Event:Store;
+ } else if (type == RubyRequestType:SPEC_LD) {
+ return Event:SpecLoad;
+ } else if (type == RubyRequestType:EXPOSE) {
+ return Event:Expose;
} else {
error("Invalid RubyRequestType");
}
@@ -387,6 +398,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
trigger(Event:Data_Exclusive, in_msg.addr, cache_entry, tbe);
} else if(in_msg.Type == CoherenceResponseType:DATA) {
if ((getState(tbe, cache_entry, in_msg.addr) == State:IS ||
+ getState(tbe, cache_entry, in_msg.addr) == State:IX ||
getState(tbe, cache_entry, in_msg.addr) == State:IS_I ||
getState(tbe, cache_entry, in_msg.addr) == State:PF_IS ||
getState(tbe, cache_entry, in_msg.addr) == State:PF_IS_I) &&
@@ -433,6 +445,10 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
trigger(Event:Fwd_GETS, in_msg.addr, cache_entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:GET_INSTR) {
trigger(Event:Fwd_GET_INSTR, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:GETSPEC) {
+ trigger(Event:Fwd_GETSPEC, in_msg.addr, cache_entry, tbe);
+ } else if (in_msg.Type == CoherenceRequestType:EXPOSE) {
+ trigger(Event:Fwd_EXPOSE, in_msg.addr, cache_entry, tbe);
} else {
error("Invalid forwarded request type");
}
@@ -534,6 +550,43 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
out_msg.MessageSize := MessageSizeType:Control;
out_msg.Prefetch := in_msg.Prefetch;
out_msg.AccessMode := in_msg.AccessMode;
+ out_msg.idx := in_msg.idx;
+ }
+ }
+ }
+
+ action(as_issueGETSPEC, "as", desc="Issue GETSPEC") {
+ peek(mandatoryQueue_in, RubyRequest) {
+ enqueue(requestL1Network_out, RequestMsg, l1_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:GETSPEC;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits, intToID(0)));
+ DPRINTF(RubySlicc, "address: %#x, destination: %s\n",
+ address, out_msg.Destination);
+ out_msg.MessageSize := MessageSizeType:SPECLD_Control;
+ out_msg.Prefetch := in_msg.Prefetch;
+ out_msg.AccessMode := in_msg.AccessMode;
+ out_msg.idx := in_msg.idx;
+ }
+ }
+ }
+
+ action(ex_issueEXPOSE, "ex", desc="Issue EXPOSE") {
+ peek(mandatoryQueue_in, RubyRequest) {
+ enqueue(requestL1Network_out, RequestMsg, l1_request_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceRequestType:EXPOSE;
+ out_msg.Requestor := machineID;
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits, intToID(0)));
+ DPRINTF(RubySlicc, "address: %#x, destination: %s\n",
+ address, out_msg.Destination);
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Control;
+ out_msg.Prefetch := in_msg.Prefetch;
+ out_msg.AccessMode := in_msg.AccessMode;
+ out_msg.idx := in_msg.idx;
}
}
}
@@ -568,6 +621,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
out_msg.MessageSize := MessageSizeType:Control;
out_msg.Prefetch := in_msg.Prefetch;
out_msg.AccessMode := in_msg.AccessMode;
+ out_msg.idx := in_msg.idx;
}
}
}
@@ -606,6 +660,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
out_msg.MessageSize := MessageSizeType:Control;
out_msg.Prefetch := in_msg.Prefetch;
out_msg.AccessMode := in_msg.AccessMode;
+ out_msg.idx := in_msg.idx;
}
}
}
@@ -643,6 +698,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
out_msg.MessageSize := MessageSizeType:Control;
out_msg.Prefetch := in_msg.Prefetch;
out_msg.AccessMode := in_msg.AccessMode;
+ out_msg.idx := in_msg.idx;
}
}
}
@@ -662,6 +718,36 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
}
+ action(dex_sendDataToExposeRequestor, "dex", desc="send data to requestor") {
+ peek(requestL1Network_in, RequestMsg) {
+ enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.Dirty := cache_entry.Dirty;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+ }
+ }
+ }
+
+ action(ds_sendDataToSpecRequestor, "ds", desc="send data to requestor") {
+ peek(requestL1Network_in, RequestMsg) {
+ enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.Dirty := cache_entry.Dirty;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.MessageSize := MessageSizeType:SPECLD_Data;
+ }
+ }
+ }
+
action(d2_sendDataToL2, "d2", desc="send data to the L2 cache because of M downgrade") {
enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
assert(is_valid(cache_entry));
@@ -676,6 +762,20 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
}
+ action(d2ex_sendExposeDataToL2, "d2ex", desc="send data to the L2 cache because of M downgrade") {
+ enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.Dirty := cache_entry.Dirty;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits, intToID(0)));
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+ }
+ }
+
action(dt_sendDataToRequestor_fromTBE, "dt", desc="send data to requestor") {
peek(requestL1Network_in, RequestMsg) {
enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
@@ -691,6 +791,36 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
}
+ action(dtex_sendDataToExposeRequestor_fromTBE, "dtex", desc="send data to requestor") {
+ peek(requestL1Network_in, RequestMsg) {
+ enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
+ assert(is_valid(tbe));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Dirty := tbe.Dirty;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+ }
+ }
+ }
+
+ action(dts_sendDataToSpecRequestor_fromTBE, "dts", desc="send data to requestor") {
+ peek(requestL1Network_in, RequestMsg) {
+ enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
+ assert(is_valid(tbe));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Dirty := tbe.Dirty;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.MessageSize := MessageSizeType:SPECLD_Data;
+ }
+ }
+ }
+
action(d2t_sendDataToL2_fromTBE, "d2t", desc="send data to the L2 cache") {
enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
assert(is_valid(tbe));
@@ -705,6 +835,20 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
}
+ action(d2tex_sendExposeDataToL2_fromTBE, "d2tex", desc="send data to the L2 cache") {
+ enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
+ assert(is_valid(tbe));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.DataBlk := tbe.DataBlk;
+ out_msg.Dirty := tbe.Dirty;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache,
+ l2_select_low_bit, l2_select_num_bits, intToID(0)));
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+ }
+ }
+
action(e_sendAckToRequestor, "e", desc="send invalidate ack to requestor (could be L2 or L1)") {
peek(requestL1Network_in, RequestMsg) {
enqueue(responseL1Network_out, ResponseMsg, l1_response_latency) {
@@ -761,7 +905,14 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
action(forward_eviction_to_cpu, "\cc", desc="sends eviction information to the processor") {
if (send_evictions) {
DPRINTF(RubySlicc, "Sending invalidation for %#x to the CPU\n", address);
- sequencer.evictionCallback(address);
+ sequencer.evictionCallback(address, false);
+ }
+ }
+
+ action(forward_external_eviction_to_cpu, "\ccc", desc="sends external eviction information to the processor") {
+ if (send_evictions) {
+ DPRINTF(RubySlicc, "Sending invalidation for %#x to the CPU\n", address);
+ sequencer.evictionCallback(address, true);
}
}
@@ -822,6 +973,14 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
sequencer.readCallback(address, cache_entry.DataBlk);
}
+ action(h_spec_load_hit, "hs",
+ desc="Notify sequencer the spec load completed.")
+ {
+ assert(is_valid(cache_entry));
+ DPRINTF(RubySlicc, "%s\n", cache_entry.DataBlk);
+ sequencer.readCallback(address, cache_entry.DataBlk);
+ }
+
action(h_ifetch_hit, "hi", desc="Notify sequencer the instruction fetch completed.")
{
assert(is_valid(cache_entry));
@@ -839,6 +998,15 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
sequencer.readCallback(address, cache_entry.DataBlk, true);
}
+ action(hsx_spec_load_hit, "hsx", desc="Notify sequencer the external load completed.")
+ {
+ peek(responseL1Network_in, ResponseMsg) {
+ // [InvisiSpec] Hack for in_msg.DataBlk returning const DataBlk
+ tbe.DataBlk := in_msg.DataBlk;
+ sequencer.readCallback(address, tbe.DataBlk, true);
+ }
+ }
+
action(hh_store_hit, "\h", desc="Notify sequencer that store completed.")
{
assert(is_valid(cache_entry));
@@ -868,6 +1036,14 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
tbe.DataBlk := cache_entry.DataBlk;
}
+ action(iw_allocateTBEWithoutCacheEntry, "iw", desc="Allocate TBE without a cache entry") {
+ check_allocate(TBEs);
+ assert(!is_valid(cache_entry) || cache_entry.CacheState == State:I);
+ TBEs.allocate(address);
+ set_tbe(TBEs[address]);
+ tbe.isPrefetch := false;
+ }
+
action(k_popMandatoryQueue, "k", desc="Pop mandatory queue.") {
mandatoryQueue_in.dequeue(clockEdge());
}
@@ -989,13 +1165,19 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
cache_entry.isPrefetch := true;
}
+ action(x_expose_done, "xd",
+ desc="Notify sequencer the expose completed.")
+ {
+ sequencer.readCallback(address, cache_entry.DataBlk);
+ }
+
//*****************************************************
// TRANSITIONS
//*****************************************************
// Transitions for Load/Store/Replacement/WriteBack from transient states
- transition({IS, IM, IS_I, M_I, SM, SINK_WB_ACK}, {Load, Ifetch, Store, L1_Replacement}) {
+ transition({IS, IM, IX, IS_I, M_I, SM, SINK_WB_ACK}, {Load, Expose, SpecLoad, Ifetch, Store, L1_Replacement}) {
z_stallAndWaitMandatoryQueue;
}
@@ -1003,7 +1185,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
z_stallAndWaitMandatoryQueue;
}
- transition({PF_IM, PF_SM}, {Load, Ifetch, L1_Replacement}) {
+ transition({PF_IM, PF_SM}, {Load, Expose, SpecLoad, Ifetch, L1_Replacement}) {
z_stallAndWaitMandatoryQueue;
}
@@ -1016,7 +1198,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
ff_deallocateL1CacheBlock;
}
- transition({S,E,M,IS,IM,SM,IS_I,PF_IS_I,M_I,SINK_WB_ACK,PF_IS,PF_IM},
+ transition({S,E,M,IS,IM,IX,SM,IS_I,PF_IS_I,M_I,SINK_WB_ACK,PF_IS,PF_IM},
{PF_Load, PF_Store, PF_Ifetch}) {
pq_popPrefetchQueue;
}
@@ -1030,6 +1212,21 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
k_popMandatoryQueue;
}
+ transition({NP,I}, Expose, IS) {
+ oo_allocateL1DCacheBlock;
+ i_allocateTBE;
+ ex_issueEXPOSE;
+ uu_profileDataMiss;
+ //po_observeMiss;
+ k_popMandatoryQueue;
+ }
+
+ transition({NP,I}, SpecLoad, IX) {
+ iw_allocateTBEWithoutCacheEntry;
+ as_issueGETSPEC;
+ k_popMandatoryQueue;
+ }
+
transition({NP,I}, PF_Load, PF_IS) {
oo_allocateL1DCacheBlock;
i_allocateTBE;
@@ -1037,13 +1234,13 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
pq_popPrefetchQueue;
}
- transition(PF_IS, Load, IS) {
+ transition(PF_IS, {Load, Expose}, IS) {
uu_profileDataMiss;
ppm_observePfMiss;
k_popMandatoryQueue;
}
- transition(PF_IS_I, Load, IS_I) {
+ transition(PF_IS_I, {Load, Expose}, IS_I) {
uu_profileDataMiss;
ppm_observePfMiss;
k_popMandatoryQueue;
@@ -1055,6 +1252,10 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
k_popMandatoryQueue;
}
+ transition({PF_IS, PF_IS_I}, SpecLoad) {
+ k_popMandatoryQueue;
+ }
+
transition({NP,I}, Ifetch, IS) {
pp_allocateL1ICacheBlock;
i_allocateTBE;
@@ -1107,19 +1308,24 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
k_popMandatoryQueue;
}
- transition({NP, I}, Inv) {
+ transition({NP, I, IX}, Inv) {
fi_sendInvAck;
l_popRequestQueue;
}
// Transitions from Shared
- transition({S,E,M}, Load) {
+ transition({S,E,M}, {Load, Expose}) {
h_load_hit;
uu_profileDataHit;
po_observeHit;
k_popMandatoryQueue;
}
+ transition({S,E,M}, SpecLoad) {
+ h_spec_load_hit;
+ k_popMandatoryQueue;
+ }
+
transition({S,E,M}, Ifetch) {
h_ifetch_hit;
uu_profileInstHit;
@@ -1140,7 +1346,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
transition(S, Inv, I) {
- forward_eviction_to_cpu;
+ forward_external_eviction_to_cpu;
fi_sendInvAck;
l_popRequestQueue;
}
@@ -1164,13 +1370,13 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
transition(E, Inv, I) {
// don't send data
- forward_eviction_to_cpu;
+ forward_external_eviction_to_cpu;
fi_sendInvAck;
l_popRequestQueue;
}
transition(E, Fwd_GETX, I) {
- forward_eviction_to_cpu;
+ forward_external_eviction_to_cpu;
d_sendDataToRequestor;
l_popRequestQueue;
}
@@ -1181,6 +1387,17 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
l_popRequestQueue;
}
+ transition({E, M}, Fwd_GETSPEC) {
+ ds_sendDataToSpecRequestor;
+ l_popRequestQueue;
+ }
+
+ transition({E, M}, Fwd_EXPOSE, S) {
+ dex_sendDataToExposeRequestor;
+ d2ex_sendExposeDataToL2;
+ l_popRequestQueue;
+ }
+
// Transitions from Modified
transition(M, {L1_Replacement, PF_L1_Replacement}, M_I) {
@@ -1197,7 +1414,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
transition(M, Inv, I) {
- forward_eviction_to_cpu;
+ forward_external_eviction_to_cpu;
f_sendDataToL2;
l_popRequestQueue;
}
@@ -1208,7 +1425,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
}
transition(M, Fwd_GETX, I) {
- forward_eviction_to_cpu;
+ forward_external_eviction_to_cpu;
d_sendDataToRequestor;
l_popRequestQueue;
}
@@ -1230,6 +1447,17 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
l_popRequestQueue;
}
+ transition(M_I, Fwd_EXPOSE, SINK_WB_ACK) {
+ dtex_sendDataToExposeRequestor_fromTBE;
+ d2tex_sendExposeDataToL2_fromTBE;
+ l_popRequestQueue;
+ }
+
+ transition(M_I, Fwd_GETSPEC) {
+ dts_sendDataToSpecRequestor_fromTBE;
+ l_popRequestQueue;
+ }
+
// Transitions from IS
transition({IS, IS_I}, Inv, IS_I) {
fi_sendInvAck;
@@ -1341,6 +1569,14 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
kd_wakeUpDependents;
}
+ // [InvisiSpec] Data and Data_Exclusive are not possible at IX
+ transition(IX, {Data_all_Acks, DataS_fromL1}, I) {
+ hsx_spec_load_hit;
+ s_deallocateTBE;
+ o_popIncomingResponseQueue;
+ kd_wakeUpDependents;
+ }
+
// Transitions from IM
transition(IM, Inv, IM) {
fi_sendInvAck;
@@ -1384,7 +1620,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
// transitions from SM
transition(SM, Inv, IM) {
- forward_eviction_to_cpu;
+ forward_external_eviction_to_cpu;
fi_sendInvAck;
dg_invalidate_sc;
l_popRequestQueue;
diff --git a/src/mem/protocol/MESI_Two_Level-L2cache.sm b/src/mem/protocol/MESI_Two_Level-L2cache.sm
index 5a8cfae6d..ea884133e 100644
--- a/src/mem/protocol/MESI_Two_Level-L2cache.sm
+++ b/src/mem/protocol/MESI_Two_Level-L2cache.sm
@@ -72,6 +72,8 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
ISS, AccessPermission:Busy, desc="L2 idle, got single L1_GETS, issued memory fetch, have not seen response yet";
IS, AccessPermission:Busy, desc="L2 idle, got L1_GET_INSTR or multiple L1_GETS, issued memory fetch, have not seen response yet";
IM, AccessPermission:Busy, desc="L2 idle, got L1_GETX, issued memory fetch, have not seen response(s) yet";
+ II, AccessPermission:Busy, desc="L2 idle, got single L1_GETSPEC, issued memory fetch, have not seen response yet";
+ IEE, AccessPermission:Busy, desc="L2 idle, got single L1_EXPOSE, issued memory fetch, have not seen response yet";
// Blocking states
SS_MB, AccessPermission:Busy, desc="Blocked for L1_GETX from SS";
@@ -96,6 +98,9 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
L1_PUTX, desc="L1 replacing data";
L1_PUTX_old, desc="L1 replacing data, but no longer sharer";
+ L1_GETSPEC, desc="L1 GETSPEC request for a block mapped to us";
+ L1_EXPOSE, desc="L1 EXPOSE request for a block mapped to us";
+
// events initiated by this L2
L2_Replacement, desc="L2 Replacement", format="!r";
L2_Replacement_clean, desc="L2 Replacement, but data is clean", format="!r";
@@ -135,6 +140,8 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
bool Dirty, default="false", desc="Data is Dirty";
NetDest L1_GetS_IDs, desc="Set of the internal processors that want the block in shared state";
+ NetDest L1_GetSPEC_IDs, desc="Set of the internal processors that want the block speculatively";
+ NetDest L1_Expose_IDs, desc="Set of the internal processors that want the block to be exposed";
MachineID L1_GetX_ID, desc="ID of the L1 cache to forward the block to once we get a response";
int pendingAcks, desc="number of pending acks for invalidates during writeback";
}
@@ -267,6 +274,10 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
} else {
return Event:L1_PUTX_old;
}
+ } else if (type == CoherenceRequestType:GETSPEC) {
+ return Event:L1_GETSPEC;
+ } else if (type == CoherenceRequestType:EXPOSE) {
+ return Event:L1_EXPOSE;
} else {
DPRINTF(RubySlicc, "address: %#x, Request Type: %s\n", addr, type);
error("Invalid L1 forwarded request type");
@@ -399,10 +410,12 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
peek(L1RequestL2Network_in, RequestMsg) {
enqueue(DirRequestL2Network_out, RequestMsg, l2_request_latency) {
out_msg.addr := address;
- out_msg.Type := CoherenceRequestType:GETS;
+ out_msg.Type := in_msg.Type;
out_msg.Requestor := machineID;
out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
- out_msg.MessageSize := MessageSizeType:Control;
+ out_msg.MessageSize := in_msg.MessageSize;
+ out_msg.idx := in_msg.idx;
+ out_msg.origin := in_msg.Requestor;
}
}
}
@@ -420,6 +433,32 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
}
+ action(bs_forwardSpecRequestToExclusive, "bs", desc="Forward request to the exclusive L1") {
+ peek(L1RequestL2Network_in, RequestMsg) {
+ enqueue(L1RequestL2Network_out, RequestMsg, to_l1_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.Destination.add(cache_entry.Exclusive);
+ out_msg.MessageSize := MessageSizeType:SPECLD_Request_Control;
+ }
+ }
+ }
+
+ action(bex_forwardExposeRequestToExclusive, "bex", desc="Forward request to the exclusive L1") {
+ peek(L1RequestL2Network_in, RequestMsg) {
+ enqueue(L1RequestL2Network_out, RequestMsg, to_l1_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := in_msg.Type;
+ out_msg.Requestor := in_msg.Requestor;
+ out_msg.Destination.add(cache_entry.Exclusive);
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Request_Control;
+ }
+ }
+ }
+
action(c_exclusiveReplacement, "c", desc="Send data to memory") {
enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) {
assert(is_valid(cache_entry));
@@ -494,6 +533,25 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
}
+ action(ddex_sendExclusiveDataToExposeRequestor, "ddex", desc="Send data from cache to reqeustor") {
+ peek(L1RequestL2Network_in, RequestMsg) {
+ enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+
+ out_msg.AckCount := 0 - cache_entry.Sharers.count();
+ if (cache_entry.Sharers.isElement(in_msg.Requestor)) {
+ out_msg.AckCount := out_msg.AckCount + 1;
+ }
+ }
+ }
+ }
+
action(ds_sendSharedDataToRequestor, "ds", desc="Send data from cache to reqeustor") {
peek(L1RequestL2Network_in, RequestMsg) {
enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) {
@@ -509,9 +567,39 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
}
+ action(dss_sendSharedDataToSpecRequestor, "dss", desc="Send data from cache to reqeustor") {
+ peek(L1RequestL2Network_in, RequestMsg) {
+ enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:SPECLD_Data;
+ out_msg.AckCount := 0;
+ }
+ }
+ }
+
+ action(dsex_sendSharedDataToExposeRequestor, "dsex", desc="Send data from cache to reqeustor") {
+ peek(L1RequestL2Network_in, RequestMsg) {
+ enqueue(responseL2Network_out, ResponseMsg, l2_response_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.Requestor);
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+ out_msg.AckCount := 0;
+ }
+ }
+ }
+
action(e_sendDataToGetSRequestors, "e", desc="Send data from cache to all GetS IDs") {
assert(is_valid(tbe));
- assert(tbe.L1_GetS_IDs.count() > 0);
+ assert(tbe.L1_GetS_IDs.count() + tbe.L1_GetSPEC_IDs.count() + tbe.L1_Expose_IDs.count() > 0);
enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) {
assert(is_valid(cache_entry));
out_msg.addr := address;
@@ -523,9 +611,40 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
}
+ action(es_sendDataToGetSpecRequestors, "es", desc="Send data from cache to all GetSpec IDs") {
+ assert(is_valid(tbe));
+ assert(tbe.L1_GetS_IDs.count() + tbe.L1_GetSPEC_IDs.count() + tbe.L1_Expose_IDs.count() > 0);
+ peek(responseL2Network_in, ResponseMsg) {
+ enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.Sender := machineID;
+ out_msg.Destination := tbe.L1_GetSPEC_IDs; // internal nodes
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.MessageSize := MessageSizeType:SPECLD_Data;
+ }
+ }
+ }
+
+ action(eex_sendDataToExposeRequestors, "eex", desc="Send data from cache to all GetSpec IDs") {
+ assert(is_valid(tbe));
+ assert(tbe.L1_GetS_IDs.count() + tbe.L1_GetSPEC_IDs.count() + tbe.L1_Expose_IDs.count() > 0);
+ peek(responseL2Network_in, ResponseMsg) {
+ enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA;
+ out_msg.Sender := machineID;
+ out_msg.Destination := tbe.L1_Expose_IDs; // internal nodes
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+ }
+ }
+ }
+
action(ex_sendExclusiveDataToGetSRequestors, "ex", desc="Send data from cache to all GetS IDs") {
assert(is_valid(tbe));
assert(tbe.L1_GetS_IDs.count() == 1);
+ assert(tbe.L1_GetSPEC_IDs.count() + tbe.L1_Expose_IDs.count() == 0);
enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) {
assert(is_valid(cache_entry));
out_msg.addr := address;
@@ -537,6 +656,21 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
}
+ action(exex_sendExclusiveDataToExposeRequestors, "exex", desc="Send data from cache to all GetS IDs") {
+ assert(is_valid(tbe));
+ assert(tbe.L1_Expose_IDs.count() == 1);
+ assert(tbe.L1_GetS_IDs.count() + tbe.L1_GetSPEC_IDs.count() == 0);
+ enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) {
+ assert(is_valid(cache_entry));
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
+ out_msg.Sender := machineID;
+ out_msg.Destination := tbe.L1_Expose_IDs; // internal nodes
+ out_msg.DataBlk := cache_entry.DataBlk;
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+ }
+ }
+
action(ee_sendDataToGetXRequestor, "ee", desc="Send data from cache to GetX ID") {
enqueue(responseL2Network_out, ResponseMsg, to_l1_latency) {
assert(is_valid(tbe));
@@ -598,11 +732,23 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
TBEs.allocate(address);
set_tbe(TBEs[address]);
tbe.L1_GetS_IDs.clear();
+ tbe.L1_GetSPEC_IDs.clear();
+ tbe.L1_Expose_IDs.clear();
tbe.DataBlk := cache_entry.DataBlk;
tbe.Dirty := cache_entry.Dirty;
tbe.pendingAcks := cache_entry.Sharers.count();
}
+ action(iw_allocateTBEWithoutCacheEntry, "iw", desc="Allocate TBE for request without a cache entry") {
+ check_allocate(TBEs);
+ assert(!is_valid(cache_entry));
+ TBEs.allocate(address);
+ set_tbe(TBEs[address]);
+ tbe.L1_GetS_IDs.clear();
+ tbe.L1_GetSPEC_IDs.clear();
+ tbe.L1_Expose_IDs.clear();
+ }
+
action(s_deallocateTBE, "s", desc="Deallocate external TBE") {
TBEs.deallocate(address);
unset_tbe();
@@ -668,6 +814,20 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
}
+ action(sss_recordGetSPECL1ID, "\sss", desc="Record L1 GetSpec for load response") {
+ peek(L1RequestL2Network_in, RequestMsg) {
+ assert(is_valid(tbe));
+ tbe.L1_GetSPEC_IDs.add(in_msg.Requestor);
+ }
+ }
+
+ action(ssss_recordExposeL1ID, "\ssss", desc="Record L1 Expose for load response") {
+ peek(L1RequestL2Network_in, RequestMsg) {
+ assert(is_valid(tbe));
+ tbe.L1_Expose_IDs.add(in_msg.Requestor);
+ }
+ }
+
action(xx_recordGetXL1ID, "\x", desc="Record L1 GetX for store response") {
peek(L1RequestL2Network_in, RequestMsg) {
assert(is_valid(tbe));
@@ -793,21 +953,22 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
// BASE STATE - I
// Transitions from I (Idle)
- transition({NP, IS, ISS, IM, SS, M, M_I, I_I, S_I, MT_IB, MT_SB}, L1_PUTX) {
+ transition({NP, IS, ISS, IEE, IM, II, SS, M, M_I, I_I, S_I, MT_IB, MT_SB}, L1_PUTX) {
t_sendWBAck;
jj_popL1RequestQueue;
}
- transition({NP, SS, M, MT, M_I, I_I, S_I, IS, ISS, IM, MT_IB, MT_SB}, L1_PUTX_old) {
+ transition({NP, SS, M, MT, M_I, I_I, S_I, IS, ISS, IEE, IM, II, MT_IB, MT_SB}, L1_PUTX_old) {
t_sendWBAck;
jj_popL1RequestQueue;
}
- transition({IM, IS, ISS, SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L2_Replacement, L2_Replacement_clean}) {
+ transition({IM, IS, ISS, IEE, II, SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L2_Replacement, L2_Replacement_clean}) {
zz_stallAndWaitL1RequestQueue;
}
- transition({IM, IS, ISS, SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, MEM_Inv) {
+ // [InvisiSpec] TODO: How to handle Mem_Inv at II? Stall or ignore?
+ transition({IM, IS, ISS, IEE, II, SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, MEM_Inv) {
zn_recycleResponseNetwork;
}
@@ -816,7 +977,7 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
- transition({SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L1_GETS, L1_GET_INSTR, L1_GETX, L1_UPGRADE}) {
+ transition({SS_MB, MT_MB, MT_IIB, MT_IB, MT_SB}, {L1_GETS, L1_EXPOSE, L1_GET_INSTR, L1_GETX, L1_UPGRADE, L1_GETSPEC}) {
zz_stallAndWaitL1RequestQueue;
}
@@ -832,6 +993,17 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
jj_popL1RequestQueue;
}
+ transition(NP, L1_EXPOSE, IEE) {
+ qq_allocateL2CacheBlock;
+ ll_clearSharers;
+ nn_addSharer;
+ i_allocateTBE;
+ ssss_recordExposeL1ID;
+ a_issueFetchToMemory;
+ uu_profileMiss;
+ jj_popL1RequestQueue;
+ }
+
transition(NP, L1_GET_INSTR, IS) {
qq_allocateL2CacheBlock;
ll_clearSharers;
@@ -854,12 +1026,28 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
jj_popL1RequestQueue;
}
+ transition(NP, L1_GETSPEC, II) {
+ iw_allocateTBEWithoutCacheEntry;
+ sss_recordGetSPECL1ID;
+ a_issueFetchToMemory;
+ jj_popL1RequestQueue;
+ }
+
// transitions from IS/IM
transition(ISS, Mem_Data, MT_MB) {
m_writeDataToCache;
ex_sendExclusiveDataToGetSRequestors;
+ es_sendDataToGetSpecRequestors;
+ s_deallocateTBE;
+ o_popIncomingResponseQueue;
+ }
+
+ transition(IEE, Mem_Data, MT_MB) {
+ m_writeDataToCache;
+ exex_sendExclusiveDataToExposeRequestors;
+ es_sendDataToGetSpecRequestors;
s_deallocateTBE;
o_popIncomingResponseQueue;
}
@@ -867,6 +1055,8 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
transition(IS, Mem_Data, SS) {
m_writeDataToCache;
e_sendDataToGetSRequestors;
+ es_sendDataToGetSpecRequestors;
+ eex_sendDataToExposeRequestors;
s_deallocateTBE;
o_popIncomingResponseQueue;
kd_wakeUpDependents;
@@ -879,18 +1069,48 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
o_popIncomingResponseQueue;
}
- transition({IS, ISS}, {L1_GETS, L1_GET_INSTR}, IS) {
+ transition(II, Mem_Data, NP) {
+ es_sendDataToGetSpecRequestors;
+ s_deallocateTBE;
+ o_popIncomingResponseQueue;
+ kd_wakeUpDependents;
+ }
+
+ transition({IS, ISS, IEE}, {L1_GETS, L1_GET_INSTR}, IS) {
nn_addSharer;
ss_recordGetSL1ID;
uu_profileMiss;
jj_popL1RequestQueue;
}
- transition({IS, ISS}, L1_GETX) {
+ transition({IS, ISS, IEE}, L1_EXPOSE, IS) {
+ nn_addSharer;
+ ssss_recordExposeL1ID;
+ uu_profileMiss;
+ jj_popL1RequestQueue;
+ }
+
+ transition({IS, ISS, IEE}, L1_GETSPEC, IS) {
+ sss_recordGetSPECL1ID;
+ jj_popL1RequestQueue;
+ }
+
+ transition(II, L1_GETSPEC) {
+ sss_recordGetSPECL1ID;
+ jj_popL1RequestQueue;
+ }
+
+ // [InvisiSpec] L1_GET_INSTR should not be received at II
+ transition(II, {L1_GETS, L1_EXPOSE}) {
zz_stallAndWaitL1RequestQueue;
}
- transition(IM, {L1_GETX, L1_GETS, L1_GET_INSTR}) {
+ // [InvisiSpec] TODO: Maybe we can optimize this?
+ transition({IS, ISS, IEE, II}, L1_GETX) {
+ zz_stallAndWaitL1RequestQueue;
+ }
+
+ transition(IM, {L1_GETX, L1_GETS, L1_EXPOSE, L1_GET_INSTR, L1_GETSPEC}) {
zz_stallAndWaitL1RequestQueue;
}
@@ -903,6 +1123,19 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
jj_popL1RequestQueue;
}
+ transition(SS, L1_EXPOSE) {
+ dsex_sendSharedDataToExposeRequestor;
+ nn_addSharer;
+ set_setMRU;
+ uu_profileHit;
+ jj_popL1RequestQueue;
+ }
+
+ transition({SS, M}, L1_GETSPEC) {
+ dss_sendSharedDataToSpecRequestor;
+ jj_popL1RequestQueue;
+ }
+
transition(SS, L1_GETX, SS_MB) {
d_sendDataToRequestor;
@@ -956,6 +1189,14 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
jj_popL1RequestQueue;
}
+ // [InvisiSpec] TODO
+ transition(M, L1_EXPOSE, MT_MB) {
+ ddex_sendExclusiveDataToExposeRequestor;
+ set_setMRU;
+ uu_profileHit;
+ jj_popL1RequestQueue;
+ }
+
transition(M, {L2_Replacement, MEM_Inv}, M_I) {
i_allocateTBE;
c_exclusiveReplacement;
@@ -986,6 +1227,20 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
jj_popL1RequestQueue;
}
+ // [InvisiSpec] TODO: Ack packets are currently not recorded as EXPOSE traffic.
+ transition(MT, L1_EXPOSE, MT_IIB) {
+ bex_forwardExposeRequestToExclusive;
+ uu_profileMiss;
+ set_setMRU;
+ jj_popL1RequestQueue;
+ }
+
+ // [InvisiSpec] Do we need to block?
+ transition(MT, L1_GETSPEC) {
+ bs_forwardSpecRequestToExclusive;
+ jj_popL1RequestQueue;
+ }
+
transition(MT, {L2_Replacement, MEM_Inv}, MT_I) {
i_allocateTBE;
f_sendInvToSharers;
@@ -1039,7 +1294,7 @@ machine(MachineType:L2Cache, "MESI Directory L2 Cache CMP")
}
// writeback states
- transition({I_I, S_I, MT_I, MCT_I, M_I}, {L1_GETX, L1_UPGRADE, L1_GETS, L1_GET_INSTR}) {
+ transition({I_I, S_I, MT_I, MCT_I, M_I}, {L1_GETX, L1_UPGRADE, L1_GETS, L1_EXPOSE, L1_GET_INSTR, L1_GETSPEC}) {
zz_stallAndWaitL1RequestQueue;
}
diff --git a/src/mem/protocol/MESI_Two_Level-dir.sm b/src/mem/protocol/MESI_Two_Level-dir.sm
index 991de5a2c..9934f57a8 100644
--- a/src/mem/protocol/MESI_Two_Level-dir.sm
+++ b/src/mem/protocol/MESI_Two_Level-dir.sm
@@ -49,6 +49,8 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
M, AccessPermission:Maybe_Stale, desc="memory copy may be stale, i.e. other modified copies may exist";
IM, AccessPermission:Busy, desc="Intermediate State I>M";
+ IE, AccessPermission:Busy, desc="Intermediate State I>M";
+ II, AccessPermission:Busy, desc="Intermediate State I>I for SpecFetch";
MI, AccessPermission:Busy, desc="Intermediate State M>I";
M_DRD, AccessPermission:Busy, desc="Intermediate State when there is a dma read";
M_DRDI, AccessPermission:Busy, desc="Intermediate State when there is a dma read";
@@ -59,6 +61,8 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
// Events
enumeration(Event, desc="Directory events") {
Fetch, desc="A memory fetch arrives";
+ Expose, desc="A memory expose arrives";
+ SpecFetch, desc="A memory fetch for speculative execution arrives";
Data, desc="writeback data arrives";
Memory_Data, desc="Fetched data from memory arrives";
Memory_Ack, desc="Writeback Ack from memory arrives";
@@ -198,6 +202,10 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
assert(in_msg.Destination.isElement(machineID));
if (isGETRequest(in_msg.Type)) {
trigger(Event:Fetch, in_msg.addr, TBEs[in_msg.addr]);
+ } else if (in_msg.Type == CoherenceRequestType:EXPOSE) {
+ trigger(Event:Expose, in_msg.addr, TBEs[in_msg.addr]);
+ } else if (in_msg.Type == CoherenceRequestType:GETSPEC) {
+ trigger(Event:SpecFetch, in_msg.addr, TBEs[in_msg.addr]);
} else if (in_msg.Type == CoherenceRequestType:DMA_READ) {
trigger(Event:DMA_READ, makeLineAddress(in_msg.addr),
TBEs[makeLineAddress(in_msg.addr)]);
@@ -275,6 +283,40 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
}
}
+ action(dex_sendExposeData, "dex", desc="Send data to requestor") {
+ peek(memQueue_in, MemoryMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, to_mem_ctrl_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:MEMORY_DATA;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.OriginalRequestorMachId);
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Dirty := false;
+ out_msg.MessageSize := MessageSizeType:EXPOSE_Data;
+
+ Entry e := getDirectoryEntry(in_msg.addr);
+ e.Owner := in_msg.OriginalRequestorMachId;
+ }
+ }
+ }
+
+ action(ds_sendSpecData, "ds", desc="Send data to requestor") {
+ peek(memQueue_in, MemoryMsg) {
+ enqueue(responseNetwork_out, ResponseMsg, to_mem_ctrl_latency) {
+ out_msg.addr := address;
+ out_msg.Type := CoherenceResponseType:MEMORY_DATA;
+ out_msg.Sender := machineID;
+ out_msg.Destination.add(in_msg.OriginalRequestorMachId);
+ out_msg.DataBlk := in_msg.DataBlk;
+ out_msg.Dirty := false;
+ out_msg.MessageSize := MessageSizeType:SPECLD_Data;
+
+ Entry e := getDirectoryEntry(in_msg.addr);
+ e.Owner := in_msg.OriginalRequestorMachId;
+ }
+ }
+ }
+
// Actions
action(aa_sendAck, "aa", desc="Send ack to L2") {
peek(memQueue_in, MemoryMsg) {
@@ -306,7 +348,19 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") {
peek(requestNetwork_in, RequestMsg) {
- queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency);
+ queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency, in_msg.origin, in_msg.idx, 0);
+ }
+ }
+
+ action(qfs_queueMemorySpecFetchRequest, "qfs", desc="Queue off-chip fetch request") {
+ peek(requestNetwork_in, RequestMsg) {
+ queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency, in_msg.origin, in_msg.idx, 1);
+ }
+ }
+
+ action(qfe_queueMemoryExposeRequest, "qfe", desc="Queue off-chip fetch request") {
+ peek(requestNetwork_in, RequestMsg) {
+ queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency, in_msg.origin, in_msg.idx, 2);
}
}
@@ -320,7 +374,8 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
//added by SS for dma
action(qf_queueMemoryFetchRequestDMA, "qfd", desc="Queue off-chip fetch request") {
peek(requestNetwork_in, RequestMsg) {
- queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency);
+ assert(false);
+ queueMemoryRead(in_msg.Requestor, address, to_mem_ctrl_latency, in_msg.Requestor, -1, -1);
}
}
@@ -425,7 +480,18 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
j_popIncomingRequestQueue;
}
- transition(M, Fetch) {
+ transition(I, Expose, IE) {
+ qfe_queueMemoryExposeRequest;
+ j_popIncomingRequestQueue;
+ }
+
+ transition(I, SpecFetch, II) {
+ qfs_queueMemorySpecFetchRequest;
+ j_popIncomingRequestQueue;
+ }
+
+ // [InvisiSpec] Is it secure?
+ transition(M, {Fetch, Expose, SpecFetch}) {
inv_sendCacheInvalidate;
z_stallAndWaitRequest;
}
@@ -435,6 +501,19 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
l_popMemQueue;
kd_wakeUpDependents;
}
+
+ transition(IE, Memory_Data, M) {
+ dex_sendExposeData;
+ l_popMemQueue;
+ kd_wakeUpDependents;
+ }
+
+ transition(II, Memory_Data, I) {
+ ds_sendSpecData;
+ l_popMemQueue;
+ kd_wakeUpDependents;
+ }
+
//added by SS
transition(M, CleanReplacement, I) {
a_sendAck;
@@ -481,11 +560,11 @@ machine(MachineType:Directory, "MESI Two Level directory protocol")
kd_wakeUpDependents;
}
- transition({ID, ID_W, M_DRDI, M_DWRI, IM, MI}, {Fetch, Data} ) {
+ transition({ID, ID_W, M_DRDI, M_DWRI, IM, IE, MI, II}, {Fetch, Expose, SpecFetch, Data} ) {
z_stallAndWaitRequest;
}
- transition({ID, ID_W, M_DRD, M_DRDI, M_DWR, M_DWRI, IM, MI}, {DMA_WRITE, DMA_READ} ) {
+ transition({ID, ID_W, M_DRD, M_DRDI, M_DWR, M_DWRI, IM, IE, MI, II}, {DMA_WRITE, DMA_READ} ) {
zz_recycleDMAQueue;
}
diff --git a/src/mem/protocol/MESI_Two_Level-msg.sm b/src/mem/protocol/MESI_Two_Level-msg.sm
index 738019e7b..d4269193d 100644
--- a/src/mem/protocol/MESI_Two_Level-msg.sm
+++ b/src/mem/protocol/MESI_Two_Level-msg.sm
@@ -36,6 +36,8 @@ enumeration(CoherenceRequestType, desc="...") {
GET_INSTR, desc="Get Instruction";
INV, desc="INValidate";
PUTX, desc="Replacement message";
+ GETSPEC, desc="Get Speculatively";
+ EXPOSE, desc="Expose";
WB_ACK, desc="Writeback ack";
@@ -68,7 +70,9 @@ structure(RequestMsg, desc="...", interface="Message") {
int Len;
bool Dirty, default="false", desc="Dirty bit";
PrefetchBit Prefetch, desc="Is this a prefetch request";
-
+ MachineID origin;
+ int idx, default="-1", desc="LQ index";
+
bool functionalRead(Packet *pkt) {
// Only PUTX messages contains the data block
if (Type == CoherenceRequestType:PUTX) {
diff --git a/src/mem/protocol/RubySlicc_Defines.sm b/src/mem/protocol/RubySlicc_Defines.sm
index eb235f8f3..7df82847e 100644
--- a/src/mem/protocol/RubySlicc_Defines.sm
+++ b/src/mem/protocol/RubySlicc_Defines.sm
@@ -35,7 +35,7 @@ Cycles recycle_latency;
// Functions implemented in the AbstractController class for
// making timing access to the memory maintained by the
// memory controllers.
-void queueMemoryRead(MachineID id, Addr addr, Cycles latency);
+void queueMemoryRead(MachineID id, Addr addr, Cycles latency, MachineID origin, int idx, int type);
void queueMemoryWrite(MachineID id, Addr addr, Cycles latency,
DataBlock block);
void queueMemoryWritePartial(MachineID id, Addr addr, Cycles latency,
diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm
index 8e17f9849..be64706b9 100644
--- a/src/mem/protocol/RubySlicc_Exports.sm
+++ b/src/mem/protocol/RubySlicc_Exports.sm
@@ -175,6 +175,11 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") {
Release, desc="Release operation";
Acquire, desc="Acquire opertion";
AcquireRelease, desc="Acquire and Release opertion";
+ // [InvisiSpec] New request types
+ SPEC_LD, desc="Speculative load";
+ EXPOSE, desc="Expose";
+ VALIDATE, desc="Validate";
+ SPEC_FLUSH, desc="Flush SpecBuffer";
}
enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") {
@@ -256,6 +261,12 @@ enumeration(MessageSizeType, desc="...") {
Unblock_Control, desc="Unblock control";
Persistent_Control, desc="Persistent request activation messages";
Completion_Control, desc="Completion messages";
+ SPECLD_Control, desc="SPECLD control message";
+ SPECLD_Request_Control, desc="SPECLD forward message";
+ SPECLD_Data, desc="SPECLD data response";
+ EXPOSE_Control, desc="EXPOSE control message";
+ EXPOSE_Request_Control, desc="EXPOSE forward request";
+ EXPOSE_Data, desc="EXPOSE data response";
}
// AccessType
@@ -345,6 +356,7 @@ enumeration(RequestStatus, desc="...", default="RequestStatus_NULL") {
Issued, desc="The sequencer successfully issued the request";
BufferFull, desc="Can not issue because the sequencer is full";
Aliased, desc="This request aliased with a currently outstanding request";
+ Merged, desc="This request merged with a currently outstanding request";
NULL, desc="";
}
diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm
index 27a045d29..5c73b4320 100644
--- a/src/mem/protocol/RubySlicc_Types.sm
+++ b/src/mem/protocol/RubySlicc_Types.sm
@@ -113,7 +113,7 @@ structure (Sequencer, external = "yes") {
Cycles, Cycles, Cycles);
void checkCoherence(Addr);
- void evictionCallback(Addr);
+ void evictionCallback(Addr, bool);
void recordRequestType(SequencerRequestType);
bool checkResourceAvailable(CacheResourceType, Addr);
void invalidateSC(Addr);
@@ -172,6 +172,7 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") {
HSAScope scope, desc="HSA scope";
HSASegment segment, desc="HSA segment";
PacketPtr pkt, desc="Packet associated with this request";
+ int idx, desc="LQ index";
}
structure(AbstractEntry, primitive="yes", external = "yes") {
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 3df29aa1c..f14d2d1da 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -149,6 +149,9 @@ class Request
MEM_SWAP = 0x00400000,
MEM_SWAP_COND = 0x00800000,
+ /** [InvisiSpec] it is a spec request */
+ SPEC = 0x00004000,
+
/** The request is a prefetch. */
PREFETCH = 0x01000000,
/** The request should be prefetched into the exclusive state. */
@@ -861,6 +864,7 @@ class Request
bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); }
bool isInstFetch() const { return _flags.isSet(INST_FETCH); }
bool isPrefetch() const { return _flags.isSet(PREFETCH); }
+ bool isSpec() const { return _flags.isSet(SPEC); }
bool isLLSC() const { return _flags.isSet(LLSC); }
bool isPriv() const { return _flags.isSet(PRIVILEGED); }
bool isLockedRMW() const { return _flags.isSet(LOCKED_RMW); }
diff --git a/src/mem/ruby/SConscript b/src/mem/ruby/SConscript
index be52c02d0..e6d45a419 100644
--- a/src/mem/ruby/SConscript
+++ b/src/mem/ruby/SConscript
@@ -59,6 +59,9 @@ DebugFlag('RubySystem')
DebugFlag('RubyTester')
DebugFlag('RubyStats')
DebugFlag('RubyResourceStalls')
+DebugFlag('SpecBuffer')
+DebugFlag('SpecBufferValidate')
+DebugFlag('MemSpecBuffer')
CompoundFlag('Ruby', [ 'RubyQueue', 'RubyNetwork', 'RubyTester',
'RubyGenerated', 'RubySlicc', 'RubySystem', 'RubyCache',
diff --git a/src/mem/ruby/network/Network.cc b/src/mem/ruby/network/Network.cc
index 57834f2e2..7d4d71eb3 100644
--- a/src/mem/ruby/network/Network.cc
+++ b/src/mem/ruby/network/Network.cc
@@ -144,12 +144,18 @@ Network::MessageSizeType_to_int(MessageSizeType size_type)
case MessageSizeType_Unblock_Control:
case MessageSizeType_Persistent_Control:
case MessageSizeType_Completion_Control:
+ case MessageSizeType_SPECLD_Control:
+ case MessageSizeType_SPECLD_Request_Control:
+ case MessageSizeType_EXPOSE_Control:
+ case MessageSizeType_EXPOSE_Request_Control:
return m_control_msg_size;
case MessageSizeType_Data:
case MessageSizeType_Response_Data:
case MessageSizeType_ResponseLocal_Data:
case MessageSizeType_ResponseL2hit_Data:
case MessageSizeType_Writeback_Data:
+ case MessageSizeType_SPECLD_Data:
+ case MessageSizeType_EXPOSE_Data:
return m_data_msg_size;
default:
panic("Invalid range for type MessageSizeType");
diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc
index de7e03dd7..a4ba1fe07 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.cc
+++ b/src/mem/ruby/slicc_interface/AbstractController.cc
@@ -41,6 +41,7 @@
#include "mem/ruby/slicc_interface/AbstractController.hh"
#include "debug/RubyQueue.hh"
+#include "debug/MemSpecBuffer.hh"
#include "mem/protocol/MemoryMsg.hh"
#include "mem/ruby/network/Network.hh"
#include "mem/ruby/system/GPUCoalescer.hh"
@@ -96,6 +97,14 @@ AbstractController::regStats()
.name(name() + ".fully_busy_cycles")
.desc("cycles for which number of transistions == max transitions")
.flags(Stats::nozero);
+ m_expose_hits
+ .name(name() + ".expose_hits")
+ .desc("number of expose hits at LLC spec buffer")
+ .flags(Stats::nozero);
+ m_expose_misses
+ .name(name() + ".expose_misses")
+ .desc("number of expose misses at LLC spec buffer")
+ .flags(Stats::nozero);
}
void
@@ -238,8 +247,67 @@ AbstractController::getMasterPort(const std::string &if_name,
void
AbstractController::queueMemoryRead(const MachineID &id, Addr addr,
- Cycles latency)
+ Cycles latency, MachineID origin, int idx, int type)
{
+ int coreId = origin.num;
+ int sbeId = idx;
+ // type 0: non-spec 1: spec 2: expose
+ // DPRINTFR(MemSpecBuffer, "%10s MemRead (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(addr));
+ // if idx == -1, it is a write request which cannot be spec or expose.
+ assert(!(type != 0 && sbeId == -1));
+ assert(sbeId >= -1 && sbeId <= 65);
+ assert(coreId < 8);
+ assert(type >=0 && type <= 2);
+ if (type == 0) {
+ for (int c = 0; c < 8; ++c) {
+ for (int i = 0; i < 66; ++i) {
+ if (m_specBuf[c][i].address == addr) {
+ DPRINTFR(MemSpecBuffer, "%10s Cleared by Read (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), c, type, i, printAddress(addr));
+ m_specBuf[c][i].address = 0;
+ m_specBuf[c][i].data.clear();
+ }
+ }
+ }
+ } else if (type == 1) {
+
+ } else if (type == 2) {
+ if (m_specBuf[coreId][sbeId].address == addr) {
+ DPRINTFR(MemSpecBuffer, "%10s Expose Hit (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(addr));
+ ++m_expose_hits;
+ assert(getMemoryQueue());
+ std::shared_ptr<MemoryMsg> msg = std::make_shared<MemoryMsg>(clockEdge());
+ (*msg).m_addr = addr;
+ (*msg).m_Sender = m_machineID;
+ (*msg).m_OriginalRequestorMachId = id;
+ (*msg).m_Type = MemoryRequestType_MEMORY_READ;
+ (*msg).m_MessageSize = MessageSizeType_Response_Data;
+ (*msg).m_DataBlk = m_specBuf[coreId][sbeId].data;
+ getMemoryQueue()->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)));
+ for (int c = 0; c < 8; ++c) {
+ for (int i = 0; i < 66; ++i) {
+ if (m_specBuf[c][i].address == addr) {
+ DPRINTFR(MemSpecBuffer, "%10s Cleared by Expose Hit (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), c, type, i, printAddress(addr));
+ m_specBuf[c][i].address = 0;
+ m_specBuf[c][i].data.clear();
+ }
+ }
+ }
+ return;
+ } else {
+ DPRINTFR(MemSpecBuffer, "%10s Expose Miss (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(addr));
+ ++m_expose_misses;
+ for (int c = 0; c < 8; ++c) {
+ for (int i = 0; i < 66; ++i) {
+ if (m_specBuf[c][i].address == addr) {
+ DPRINTFR(MemSpecBuffer, "%10s Cleared by Expose Miss (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), c, type, i, printAddress(addr));
+ m_specBuf[c][i].address = 0;
+ m_specBuf[c][i].data.clear();
+ }
+ }
+ }
+ }
+ }
+
RequestPtr req = std::make_shared<Request>(
addr, RubySystem::getBlockSizeBytes(), 0, m_masterId);
@@ -248,6 +316,9 @@ AbstractController::queueMemoryRead(const MachineID &id, Addr addr,
pkt->dataDynamic(newData);
SenderState *s = new SenderState(id);
+ s->type = type;
+ s->coreId = coreId;
+ s->sbeId = sbeId;
pkt->pushSenderState(s);
// Use functional rather than timing accesses during warmup
@@ -339,6 +410,9 @@ AbstractController::recvTimingResp(PacketPtr pkt)
SenderState *s = dynamic_cast<SenderState *>(pkt->senderState);
(*msg).m_OriginalRequestorMachId = s->id;
+ int type = s->type;
+ int coreId = s->coreId;
+ int sbeId = s->sbeId;
delete s;
if (pkt->isRead()) {
@@ -348,6 +422,12 @@ AbstractController::recvTimingResp(PacketPtr pkt)
// Copy data from the packet
(*msg).m_DataBlk.setData(pkt->getPtr<uint8_t>(), 0,
RubySystem::getBlockSizeBytes());
+ if (type == 1) {
+ DPRINTFR(MemSpecBuffer, "%10s Updated by ReadSpec (core=%d, type=%d, idx=%d, addr=%#x)\n", curTick(), coreId, type, sbeId, printAddress(pkt->getAddr()));
+ m_specBuf[coreId][sbeId].address = pkt->getAddr();
+ m_specBuf[coreId][sbeId].data.setData(pkt->getPtr<uint8_t>(), 0,
+ RubySystem::getBlockSizeBytes());
+ }
} else if (pkt->isWrite()) {
(*msg).m_Type = MemoryRequestType_MEMORY_WB;
(*msg).m_MessageSize = MessageSizeType_Writeback_Control;
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh
index 35cd3d2a5..b65a511d0 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -129,7 +129,7 @@ class AbstractController : public MemObject, public Consumer
BaseMasterPort& getMasterPort(const std::string& if_name,
PortID idx = InvalidPortID);
- void queueMemoryRead(const MachineID &id, Addr addr, Cycles latency);
+ void queueMemoryRead(const MachineID &id, Addr addr, Cycles latency, MachineID origin, int idx, int type);
void queueMemoryWrite(const MachineID &id, Addr addr, Cycles latency,
const DataBlock &block);
void queueMemoryWritePartial(const MachineID &id, Addr addr, Cycles latency,
@@ -199,6 +199,8 @@ class AbstractController : public MemObject, public Consumer
//! Counter for the number of cycles when the transitions carried out
//! were equal to the maximum allowed
Stats::Scalar m_fully_busy_cycles;
+ Stats::Scalar m_expose_hits;
+ Stats::Scalar m_expose_misses;
//! Histogram for profiling delay for the messages this controller
//! cares for
@@ -250,6 +252,9 @@ class AbstractController : public MemObject, public Consumer
{
// Id of the machine from which the request originated.
MachineID id;
+ int type;
+ int coreId;
+ int sbeId;
SenderState(MachineID _id) : id(_id)
{}
@@ -258,6 +263,14 @@ class AbstractController : public MemObject, public Consumer
private:
/** The address range to which the controller responds on the CPU side. */
const AddrRangeList addrRanges;
+
+ struct SBE
+ {
+ Addr address;
+ DataBlock data;
+ };
+
+ SBE m_specBuf[8][66];
};
#endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__
diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh
index 6c84f3823..2fc4c9f98 100644
--- a/src/mem/ruby/slicc_interface/RubyRequest.hh
+++ b/src/mem/ruby/slicc_interface/RubyRequest.hh
@@ -60,6 +60,7 @@ class RubyRequest : public Message
int m_wfid;
HSAScope m_scope;
HSASegment m_segment;
+ int m_idx;
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
@@ -82,6 +83,11 @@ class RubyRequest : public Message
m_segment(_segment)
{
m_LineAddress = makeLineAddress(m_PhysicalAddress);
+ if (_pkt->reqIdx == -1) {
+ m_idx = _pkt->reqIdx;
+ } else {
+ m_idx = (_pkt->reqIdx) * 2 + (_pkt->isFirst()? 0 : 1);
+ }
}
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
@@ -109,6 +115,11 @@ class RubyRequest : public Message
m_segment(_segment)
{
m_LineAddress = makeLineAddress(m_PhysicalAddress);
+ if (_pkt->reqIdx == -1) {
+ m_idx = _pkt->reqIdx;
+ } else {
+ m_idx = (_pkt->reqIdx) * 2 + (_pkt->isFirst()? 0 : 1);
+ }
}
RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len,
@@ -137,6 +148,11 @@ class RubyRequest : public Message
m_segment(_segment)
{
m_LineAddress = makeLineAddress(m_PhysicalAddress);
+ if (_pkt->reqIdx == -1) {
+ m_idx = _pkt->reqIdx;
+ } else {
+ m_idx = (_pkt->reqIdx) * 2 + (_pkt->isFirst()? 0 : 1);
+ }
}
diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc
index 8d99c90aa..dc5898bea 100644
--- a/src/mem/ruby/structures/CacheMemory.cc
+++ b/src/mem/ruby/structures/CacheMemory.cc
@@ -176,7 +176,9 @@ CacheMemory::tryCacheAccess(Addr address, RubyRequestType type,
return true;
}
if ((entry->m_Permission == AccessPermission_Read_Only) &&
- (type == RubyRequestType_LD || type == RubyRequestType_IFETCH)) {
+ (type == RubyRequestType_LD ||
+ type == RubyRequestType_IFETCH ||
+ type == RubyRequestType_SPEC_LD)) {
return true;
}
// The line must not be accessible
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc
index 84a70c0f1..15013e056 100644
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -246,6 +246,7 @@ RubyPort::PioSlavePort::recvAtomic(PacketPtr pkt)
panic("Could not find address in Ruby PIO address ranges!\n");
}
+// [InvisiSpec] Request on the way from CPU to Ruby
bool
RubyPort::MemSlavePort::recvTimingReq(PacketPtr pkt)
{
@@ -429,6 +430,7 @@ RubyPort::MemSlavePort::recvFunctional(PacketPtr pkt)
}
}
+// [InvisiSpec] On the way from Ruby to CPU
void
RubyPort::ruby_hit_callback(PacketPtr pkt)
{
@@ -512,6 +514,7 @@ RubyPort::drain()
}
}
+// [InvisiSpec] Still on the way from Ruby to CPU
void
RubyPort::MemSlavePort::hitCallback(PacketPtr pkt)
{
@@ -545,7 +548,7 @@ RubyPort::MemSlavePort::hitCallback(PacketPtr pkt)
}
// Flush, acquire, release requests don't access physical memory
- if (pkt->isFlush() || pkt->cmd == MemCmd::MemFenceReq) {
+ if (pkt->isFlush() || pkt->isExpose() || pkt->cmd == MemCmd::MemFenceReq) {
accessPhysMem = false;
}
@@ -572,6 +575,7 @@ RubyPort::MemSlavePort::hitCallback(PacketPtr pkt)
// Ruby protocol.
schedTimingResp(pkt, curTick());
} else {
+ // [InvisiSpec] Delete the packet if a reponse is not required
delete pkt;
}
@@ -602,7 +606,7 @@ RubyPort::MemSlavePort::isPhysMemAddress(Addr addr) const
}
void
-RubyPort::ruby_eviction_callback(Addr address)
+RubyPort::ruby_eviction_callback(Addr address, bool external)
{
DPRINTF(RubyPort, "Sending invalidations.\n");
// Allocate the invalidate request and packet on the stack, as it is
@@ -615,6 +619,9 @@ RubyPort::ruby_eviction_callback(Addr address)
// Use a single packet to signal all snooping ports of the invalidation.
// This assumes that snooping ports do NOT modify the packet/request
Packet pkt(request, MemCmd::InvalidateReq);
+ if (external) {
+ pkt.setExternalEviction();
+ }
for (CpuPortIter p = slave_ports.begin(); p != slave_ports.end(); ++p) {
// check if the connected master port is snooping
if ((*p)->isSnooping()) {
diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh
index 146443282..9c0200829 100644
--- a/src/mem/ruby/system/RubyPort.hh
+++ b/src/mem/ruby/system/RubyPort.hh
@@ -172,7 +172,7 @@ class RubyPort : public MemObject
void trySendRetries();
void ruby_hit_callback(PacketPtr pkt);
void testDrainComplete();
- void ruby_eviction_callback(Addr address);
+ void ruby_eviction_callback(Addr address, bool external);
/**
* Called by the PIO port when receiving a timing response.
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index f30369710..5a11d3165 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -36,6 +36,8 @@
#include "debug/ProtocolTrace.hh"
#include "debug/RubySequencer.hh"
#include "debug/RubyStats.hh"
+#include "debug/SpecBuffer.hh"
+#include "debug/SpecBufferValidate.hh"
#include "mem/packet.hh"
#include "mem/protocol/PrefetchBit.hh"
#include "mem/protocol/RubyAccessMode.hh"
@@ -54,7 +56,9 @@ RubySequencerParams::create()
Sequencer::Sequencer(const Params *p)
: RubyPort(p), m_IncompleteTimes(MachineType_NUM),
- deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check")
+ deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check"),
+ m_specBuf(33),
+ specBufferHitEvent([this]{ specBufferHitCallback(); }, "Sequencer spec buffer hit")
{
m_outstanding_count = 0;
@@ -160,6 +164,7 @@ void Sequencer::resetStats()
}
}
+// [InvisiSpec] Request on the way from CPU to Ruby
// Insert the request on the correct request table. Return true if
// the entry was already present.
RequestStatus
@@ -190,6 +195,7 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
RequestTable::value_type default_entry(line_addr,
(SequencerRequest*) NULL);
+ // [InvisiSpec] If store
if ((request_type == RubyRequestType_ST) ||
(request_type == RubyRequestType_RMW_Read) ||
(request_type == RubyRequestType_RMW_Write) ||
@@ -217,6 +223,7 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
m_store_waiting_on_store++;
return RequestStatus_Aliased;
}
+ // [InvisiSpec] If load
} else {
// Check if there is any outstanding write request for the same
// cache line.
@@ -232,6 +239,16 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
RequestTable::iterator i = r.first;
i->second = new SequencerRequest(pkt, request_type, curCycle());
m_outstanding_count++;
+ } else if (request_type == RubyRequestType_SPEC_LD) {
+ auto i = m_readRequestTable.find(line_addr);
+ if (i->second->m_type == RubyRequestType_SPEC_LD) {
+ DPRINTFR(SpecBuffer, "%10s Merging (idx=%d-%d, addr=%#x) with %d\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()), i->second->pkt->reqIdx);
+ i->second->dependentSpecRequests.push_back(pkt);
+ return RequestStatus_Merged;
+ } else {
+ m_load_waiting_on_load++;
+ return RequestStatus_Aliased;
+ }
} else {
// There is an outstanding read request for the cache line
m_load_waiting_on_load++;
@@ -412,6 +429,19 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
initialRequestTime, forwardRequestTime, firstResponseTime);
}
+bool Sequencer::updateSBB(PacketPtr pkt, DataBlock& data, Addr dataAddress) {
+ uint8_t idx = pkt->reqIdx;
+ SBE& sbe = m_specBuf[idx];
+ int blkIdx = pkt->isFirst() ? 0 : 1;
+ SBB& sbb = sbe.blocks[blkIdx];
+ if (makeLineAddress(sbb.reqAddress) == dataAddress) {
+ sbb.data = data;
+ return true;
+ }
+ return false;
+}
+
+// [InvisiSpec] Called by Ruby to send a response to CPU.
void
Sequencer::readCallback(Addr address, DataBlock& data,
bool externalHit, const MachineType mach,
@@ -430,13 +460,79 @@ Sequencer::readCallback(Addr address, DataBlock& data,
markRemoved();
assert((request->m_type == RubyRequestType_LD) ||
+ (request->m_type == RubyRequestType_SPEC_LD) ||
+ (request->m_type == RubyRequestType_EXPOSE) ||
(request->m_type == RubyRequestType_IFETCH));
+
+ PacketPtr pkt = request->pkt;
+ if (pkt->isSpec()) {
+ assert(!pkt->onlyAccessSpecBuff());
+ DPRINTFR(SpecBuffer, "%10s SPEC_LD callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+ updateSBB(pkt, data, address);
+ if (!externalHit) {
+ pkt->setL1Hit();
+ }
+ } else if (pkt->isExpose()) {
+ DPRINTFR(SpecBuffer, "%10s EXPOSE callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+ } else if (pkt->isValidate()) {
+ DPRINTFR(SpecBuffer, "%10s VALIDATE callback (idx=%d-%d, addr=%#x)\n", curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+ uint8_t idx = pkt->reqIdx;
+ SBE& sbe = m_specBuf[idx];
+ int blkIdx = pkt->isFirst() ? 0 : 1;
+ SBB& sbb = sbe.blocks[blkIdx];
+ assert(makeLineAddress(sbb.reqAddress) == address);
+ if (!memcmp(sbb.data.getData(getOffset(pkt->getAddr()), pkt->getSize()), data.getData(getOffset(pkt->getAddr()), pkt->getSize()), pkt->getSize())) {
+ *(pkt->getPtr<uint8_t>()) = 1;
+ } else {
+ // std::ostringstream os;
+ // sbb.data.print(os);
+ // DPRINTFR(SpecBufferValidate, "%s\n", os.str());
+ // os.str("");
+ // data.print(os);
+ // DPRINTFR(SpecBufferValidate, "%s\n", os.str());
+ *(pkt->getPtr<uint8_t>()) = 0;
+ }
+ }
+
+ for (auto& dependentPkt : request->dependentSpecRequests) {
+ assert(!dependentPkt->onlyAccessSpecBuff());
+ DPRINTFR(SpecBuffer, "%10s Merged SPEC_LD callback (idx=%d-%d, addr=%#x)\n", curTick(), dependentPkt->reqIdx, dependentPkt->isFirst()? 0 : 1, printAddress(dependentPkt->getAddr()));
+ assert(dependentPkt->isSpec());
+ updateSBB(dependentPkt, data, address);
+ if (!externalHit) {
+ dependentPkt->setL1Hit();
+ }
+ memcpy(dependentPkt->getPtr<uint8_t>(),
+ data.getData(getOffset(dependentPkt->getAddr()), dependentPkt->getSize()),
+ dependentPkt->getSize());
+ ruby_hit_callback(dependentPkt);
+ }
hitCallback(request, data, true, mach, externalHit,
initialRequestTime, forwardRequestTime, firstResponseTime);
}
void
+Sequencer::specBufferHitCallback()
+{
+ assert(m_specRequestQueue.size());
+ while (m_specRequestQueue.size()) {
+ auto specReq = m_specRequestQueue.front();
+ if (specReq.second <= curTick()) {
+ PacketPtr pkt = specReq.first;
+ assert(pkt->onlyAccessSpecBuff());
+ DPRINTFR(SpecBuffer, "%10s SB Hit Callback (idx=%d, addr=%#x)\n", curTick(), pkt->reqIdx, printAddress(pkt->getAddr()));
+ ruby_hit_callback(pkt);
+ m_specRequestQueue.pop();
+ } else {
+ schedule(specBufferHitEvent, specReq.second);
+ break;
+ }
+ }
+}
+
+// [InvisiSpec] Response on the way from Ruby to CPU
+void
Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
bool llscSuccess,
const MachineType mach, const bool externalHit,
@@ -470,8 +566,9 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
if (RubySystem::getWarmupEnabled()) {
data.setData(pkt->getConstPtr<uint8_t>(),
getOffset(request_address), pkt->getSize());
- } else if (!pkt->isFlush()) {
+ } else if (!pkt->isFlush() && !pkt->isExpose() && !pkt->isValidate()) {
if ((type == RubyRequestType_LD) ||
+ (type == RubyRequestType_SPEC_LD) ||
(type == RubyRequestType_IFETCH) ||
(type == RubyRequestType_RMW_Read) ||
(type == RubyRequestType_Locked_RMW_Read) ||
@@ -533,6 +630,7 @@ Sequencer::empty() const
return m_writeRequestTable.empty() && m_readRequestTable.empty();
}
+// [InvisiSpec] Request on the way from CPU to Ruby
RequestStatus
Sequencer::makeRequest(PacketPtr pkt)
{
@@ -543,7 +641,56 @@ Sequencer::makeRequest(PacketPtr pkt)
RubyRequestType primary_type = RubyRequestType_NULL;
RubyRequestType secondary_type = RubyRequestType_NULL;
- if (pkt->isLLSC()) {
+ // [InvisiSpec] Handle new requests
+ if (pkt->isSpec()) {
+ assert(pkt->cmd == MemCmd::ReadSpecReq);
+ assert(pkt->isSplit || pkt->isFirst());
+ uint8_t idx = pkt->reqIdx;
+ SBE& sbe = m_specBuf[idx];
+ sbe.isSplit = pkt->isSplit;
+ int blkIdx = pkt->isFirst() ? 0 : 1;
+ SBB& sbb = sbe.blocks[blkIdx];
+ sbb.reqAddress = pkt->getAddr();
+ sbb.reqSize = pkt->getSize();
+ if (pkt->onlyAccessSpecBuff()) {
+ int srcIdx = pkt->srcIdx;
+ SBE& srcEntry = m_specBuf[srcIdx];
+ if (makeLineAddress(sbb.reqAddress) == makeLineAddress(srcEntry.blocks[0].reqAddress)) {
+ sbb.data = srcEntry.blocks[0].data;
+ } else if (makeLineAddress(sbb.reqAddress) == makeLineAddress(srcEntry.blocks[1].reqAddress)) {
+ sbb.data = srcEntry.blocks[1].data;
+ } else {
+ fatal("Requested address %#x is not present in the spec buffer\n", printAddress(sbb.reqAddress));
+ }
+ memcpy(pkt->getPtr<uint8_t>(),
+ sbb.data.getData(getOffset(sbb.reqAddress), sbb.reqSize),
+ sbb.reqSize);
+ m_specRequestQueue.push({pkt, curTick()});
+ DPRINTFR(SpecBuffer, "%10s SB Hit (idx=%d, addr=%#x) on (srcIdx=%d)\n", curTick(), idx, printAddress(sbb.reqAddress), srcIdx);
+ if (!specBufferHitEvent.scheduled()) {
+ schedule(specBufferHitEvent, clockEdge(Cycles(1)));
+ }
+ return RequestStatus_Issued;
+ } else {
+ // assert it is not in the buffer
+ primary_type = secondary_type = RubyRequestType_SPEC_LD;
+ }
+ } else if (pkt->isExpose() || pkt->isValidate()) {
+ assert(pkt->cmd == MemCmd::ExposeReq || pkt->cmd == MemCmd::ValidateReq);
+ assert(pkt->isSplit || pkt->isFirst());
+ uint8_t idx = pkt->reqIdx;
+ SBE& sbe = m_specBuf[idx];
+ sbe.isSplit = pkt->isSplit;
+ int blkIdx = pkt->isFirst() ? 0 : 1;
+ SBB& sbb = sbe.blocks[blkIdx];
+ if (sbb.reqAddress != pkt->getAddr()) {
+ fatal("sbb.reqAddress != pkt->getAddr: %#x != %#x\n", printAddress(sbb.reqAddress), printAddress(pkt->getAddr()));
+ }
+ if (sbb.reqSize != pkt->getSize()) {
+ fatal("sbb.reqSize != pkt->getSize(): %d != %d\n", sbb.reqSize, pkt->getSize());
+ }
+ primary_type = secondary_type = RubyRequestType_EXPOSE;
+ } else if (pkt->isLLSC()) {
//
// Alpha LL/SC instructions need to be handled carefully by the cache
// coherence protocol to ensure they follow the proper semantics. In
@@ -614,8 +761,22 @@ Sequencer::makeRequest(PacketPtr pkt)
}
RequestStatus status = insertRequest(pkt, primary_type);
- if (status != RequestStatus_Ready)
+ if (status == RequestStatus_Merged) {
+ return RequestStatus_Issued;
+ } else if (status != RequestStatus_Ready) {
return status;
+ }
+
+ if (pkt->isSpec()) {
+ DPRINTFR(SpecBuffer, "%10s Issuing SPEC_LD (idx=%d-%d, addr=%#x)\n",
+ curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+ } else if (pkt->isExpose()) {
+ DPRINTFR(SpecBuffer, "%10s Issuing EXPOSE (idx=%d-%d, addr=%#x)\n",
+ curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+ } else if (pkt->isValidate()) {
+ DPRINTFR(SpecBuffer, "%10s Issuing VALIDATE (idx=%d-%d, addr=%#x)\n",
+ curTick(), pkt->reqIdx, pkt->isFirst()? 0 : 1, printAddress(pkt->getAddr()));
+ }
issueRequest(pkt, secondary_type);
@@ -642,7 +803,7 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
// requests do not
std::shared_ptr<RubyRequest> msg =
std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
- pkt->isFlush() ?
+ pkt->isFlush() || pkt->isExpose() ?
nullptr : pkt->getPtr<uint8_t>(),
pkt->getSize(), pc, secondary_type,
RubyAccessMode_Supervisor, pkt,
@@ -716,9 +877,9 @@ Sequencer::recordRequestType(SequencerRequestType requestType) {
void
-Sequencer::evictionCallback(Addr address)
+Sequencer::evictionCallback(Addr address, bool external)
{
- ruby_eviction_callback(address);
+ ruby_eviction_callback(address, external);
}
void
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index fcfa8ad86..66ff92777 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -31,6 +31,7 @@
#include <iostream>
#include <unordered_map>
+#include <queue>
#include "mem/protocol/MachineType.hh"
#include "mem/protocol/RubyRequestType.hh"
@@ -45,6 +46,7 @@ struct SequencerRequest
PacketPtr pkt;
RubyRequestType m_type;
Cycles issue_time;
+ std::vector<PacketPtr> dependentSpecRequests;
SequencerRequest(PacketPtr _pkt, RubyRequestType _m_type,
Cycles _issue_time)
@@ -54,6 +56,19 @@ struct SequencerRequest
std::ostream& operator<<(std::ostream& out, const SequencerRequest& obj);
+struct SBB // SpecBufferBlock
+{
+ Addr reqAddress;
+ unsigned reqSize;
+ DataBlock data;
+};
+
+struct SBE // SpecBufferEntry
+{
+ bool isSplit;
+ SBB blocks[2];
+};
+
class Sequencer : public RubyPort
{
public:
@@ -83,6 +98,9 @@ class Sequencer : public RubyPort
const Cycles forwardRequestTime = Cycles(0),
const Cycles firstResponseTime = Cycles(0));
+ void specBufferHitCallback();
+ bool updateSBB(PacketPtr pkt, DataBlock& data, Addr dataAddress);
+
RequestStatus makeRequest(PacketPtr pkt);
bool empty() const;
int outstandingCount() const { return m_outstanding_count; }
@@ -97,7 +115,7 @@ class Sequencer : public RubyPort
void checkCoherence(Addr address);
void markRemoved();
- void evictionCallback(Addr address);
+ void evictionCallback(Addr address, bool external);
void invalidateSC(Addr address);
int coreId() const { return m_coreId; }
@@ -238,6 +256,10 @@ class Sequencer : public RubyPort
std::vector<Stats::Counter> m_IncompleteTimes;
EventFunctionWrapper deadlockCheckEvent;
+
+ std::vector<SBE> m_specBuf;
+ std::queue<std::pair<PacketPtr, Tick>> m_specRequestQueue;
+ EventFunctionWrapper specBufferHitEvent;
};
inline std::ostream&