summaryrefslogtreecommitdiff
path: root/src/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu')
-rw-r--r--src/cpu/base_dyn_inst.hh188
-rw-r--r--src/cpu/base_dyn_inst_impl.hh5
-rw-r--r--src/cpu/o3/O3CPU.py16
-rw-r--r--src/cpu/o3/commit.hh17
-rw-r--r--src/cpu/o3/commit_impl.hh62
-rw-r--r--src/cpu/o3/cpu.cc3
-rw-r--r--src/cpu/o3/iew_impl.hh47
-rw-r--r--src/cpu/o3/inst_queue_impl.hh13
-rw-r--r--src/cpu/o3/lsq.hh21
-rw-r--r--src/cpu/o3/lsq_impl.hh50
-rw-r--r--src/cpu/o3/lsq_unit.hh201
-rw-r--r--src/cpu/o3/lsq_unit_impl.hh697
-rw-r--r--src/cpu/o3/rename_impl.hh3
-rw-r--r--src/cpu/o3/rob.hh4
-rw-r--r--src/cpu/o3/rob_impl.hh79
15 files changed, 1368 insertions, 38 deletions
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index 93cafd694..0e81073cc 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -120,6 +120,28 @@ class BaseDynInst : public ExecContext, public RefCounted
/// instructions ahead of it
SerializeAfter, /// Needs to serialize instructions behind it
SerializeHandled, /// Serialization has been handled
+
+ SpecCompleted,
+ // [mengjia] indicates whether received specReadResp
+ ValidationCompleted,
+ // indicates whether validation finishes
+ ExposeCompleted,
+ ExposeSent,
+ // indicates whether expose finishes
+ // (should set when expose is sent out)
+ PrevInstsCompleted,
+ // indicate whether previous instructions completed
+ PrevBrsResolved,
+ // [mengjia] indicate whether previous branches are resolved
+ PrevInstsCommitted,
+ // indicate whether previous instructions committed
+ PrevBrsCommitted,
+ // [mengjia] indicate whether previous branches are committed
+ L1HitHigh,
+ L1HitLow,
+ SpecBuffObsoleteHigh,
+ SpecBuffObsoleteLow,
+ // [InvisiSpec] it hits in L1 and is open to invalidations
NumStatus
};
@@ -136,6 +158,22 @@ class BaseDynInst : public ExecContext, public RefCounted
IsStrictlyOrdered,
ReqMade,
MemOpDone,
+ // [mengjia] indicates need validation or expose
+ NeedPostFetch,
+ NeedDeletePostReq,
+ // [mengjia] indicates only need to expose, do not need to validate
+ NeedExposeOnly,
+ // [InvisiSpec] indicate the instruction needs to be delayed
+ // due to virtual fences before to defend against speculative attacks
+ FenceDelay,
+ // [InvisiSpec] indicate the load is legal to be visible
+ ReadyToExpose,
+ HitInvalidation,
+ HitExternalEviction,
+ ValidationFail,
+ OnlyWaitForFence,
+ OnlyWaitForExpose,
+ SpecTLBMiss,
MaxFlags
};
@@ -222,6 +260,9 @@ class BaseDynInst : public ExecContext, public RefCounted
/** Pointer to the data for the memory access. */
uint8_t *memData;
+ /** Pointer to the data for the validation result. */
+ uint8_t *vldData;
+
/** Load queue index. */
int16_t lqIdx;
@@ -238,6 +279,12 @@ class BaseDynInst : public ExecContext, public RefCounted
RequestPtr savedSreqLow;
RequestPtr savedSreqHigh;
+ /** [InvisiSpec]
+ * Saved memory requests (needed for post-fetch validation/expose).
+ */
+ RequestPtr postReq;
+ RequestPtr postSreqLow;
+ RequestPtr postSreqHigh;
/////////////////////// Checker //////////////////////
// Need a copy of main request pointer to verify on writes.
RequestPtr reqToVerify;
@@ -275,6 +322,43 @@ class BaseDynInst : public ExecContext, public RefCounted
bool memOpDone() const { return instFlags[MemOpDone]; }
void memOpDone(bool f) { instFlags[MemOpDone] = f; }
+ /** [mengjia] Whether or not need pseudo-validation.
+ * whether speculative laod finishes,
+ * whether validation completes or not (success) */
+ bool needExposeOnly() const { return instFlags[NeedExposeOnly]; }
+ void needExposeOnly(bool f) { instFlags[NeedExposeOnly] = f; }
+
+ bool needPostFetch() const { return instFlags[NeedPostFetch]; }
+ void needPostFetch(bool f) { instFlags[NeedPostFetch] = f; }
+
+ bool needDeletePostReq() const { return instFlags[NeedDeletePostReq]; }
+ void needDeletePostReq(bool f) { instFlags[NeedDeletePostReq] = f; }
+
+ bool fenceDelay() const { return instFlags[ReadyToExpose]; }
+ void fenceDelay(bool f) { instFlags[ReadyToExpose] = f; }
+
+ bool readyToExpose() const { return instFlags[FenceDelay]; }
+ void readyToExpose(bool f) { instFlags[FenceDelay] = f; }
+
+ bool hitInvalidation() const { return instFlags[HitInvalidation]; }
+ void hitInvalidation(bool f) { instFlags[HitInvalidation] = f; }
+
+ bool hitExternalEviction() const { return instFlags[HitExternalEviction]; }
+ void hitExternalEviction(bool f) { instFlags[HitExternalEviction] = f; }
+
+ bool validationFail() const { return instFlags[ValidationFail]; }
+ void validationFail(bool f) { instFlags[ValidationFail] = f; }
+
+ bool onlyWaitForFence() const { return instFlags[OnlyWaitForFence]; }
+ void onlyWaitForFence(bool f) { instFlags[OnlyWaitForFence] = f; }
+
+ bool onlyWaitForExpose() const { return instFlags[OnlyWaitForExpose]; }
+ void onlyWaitForExpose(bool f) { instFlags[OnlyWaitForExpose] = f; }
+
+ bool specTLBMiss() const { return instFlags[SpecTLBMiss]; }
+ void specTLBMiss(bool f) { instFlags[SpecTLBMiss] = f; }
+ /*[mengjia] added 2 new flags and corresponding functions*/
+
bool notAnInst() const { return instFlags[NotAnInst]; }
void setNotAnInst() { instFlags[NotAnInst] = true; }
@@ -704,6 +788,49 @@ class BaseDynInst : public ExecContext, public RefCounted
/** Returns whether or not this instruction is completed. */
bool isCompleted() const { return status[Completed]; }
+ /* [mengjia] new status for load operations */
+ //void setSpecSent() { status.set(SpecSent); }
+ //bool isSpecSent() const { return status[SpecSent]; }
+
+ void setSpecCompleted() { status.set(SpecCompleted); }
+ bool isSpecCompleted() const { return status[SpecCompleted]; }
+
+ void setValidationCompleted() { status.set(ValidationCompleted); }
+ bool isValidationCompleted() const { return status[ValidationCompleted]; }
+
+ void setExposeCompleted() { status.set(ExposeCompleted); }
+ bool isExposeCompleted() const { return status[ExposeCompleted]; }
+
+ void setExposeSent() { status.set(ExposeSent); }
+ bool isExposeSent() const { return status[ExposeSent]; }
+
+ void setL1HitHigh() { status.set(L1HitHigh); }
+ void clearL1HitHigh() { status.reset(L1HitHigh); }
+ bool isL1HitHigh() const { return status[L1HitHigh]; }
+
+ void setL1HitLow() { status.set(L1HitLow); }
+ void clearL1HitLow() { status.reset(L1HitLow); }
+ bool isL1HitLow() const { return status[L1HitLow]; }
+
+ void setPrevInstsCompleted() { status.set(PrevInstsCompleted); }
+ bool isPrevInstsCompleted() const { return status[PrevInstsCompleted]; }
+
+ void setSpecBuffObsoleteHigh() { status.set(SpecBuffObsoleteHigh); }
+ bool isSpecBuffObsoleteHigh() const { return status[SpecBuffObsoleteHigh]; }
+
+ void setSpecBuffObsoleteLow() { status.set(SpecBuffObsoleteLow); }
+ bool isSpecBuffObsoleteLow() const { return status[SpecBuffObsoleteLow]; }
+
+ void setPrevBrsResolved() { status.set(PrevBrsResolved); }
+ bool isPrevBrsResolved() const { return status[PrevBrsResolved]; }
+
+ void setPrevInstsCommitted() { status.set(PrevInstsCommitted); }
+ bool isPrevInstsCommitted() const { return status[PrevInstsCommitted]; }
+
+ void setPrevBrsCommitted() { status.set(PrevBrsCommitted); }
+ bool isPrevBrsCommitted() const { return status[PrevBrsCommitted]; }
+ /* Configure load related status */
+
/** Marks the result as ready. */
void setResultReady() { status.set(ResultReady); }
@@ -893,7 +1020,25 @@ Fault
BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
Request::Flags flags)
{
+ // [InvisiSpec] do not start translation if
+ // there is a virtual fence ahead
+ assert(!fenceDelay());
+
+ if ( (flags.isSet(Request::ATOMIC_RETURN_OP)
+ || flags.isSet(Request::ATOMIC_NO_RETURN_OP)
+ || flags.isSet(Request::UNCACHEABLE)
+ || flags.isSet(Request::LLSC)
+ || flags.isSet(Request::STRICT_ORDER))
+ && !readyToExpose()){
+ onlyWaitForExpose(true);
+ // FIXME: reschedule due to LLSC
+ // reuse TLBMiss for now
+ specTLBMiss(true);
+ return NoFault;
+ }
+
instFlags[ReqMade] = true;
+ instFlags[SpecTLBMiss] = false;
RequestPtr req = NULL;
RequestPtr sreqLow = NULL;
RequestPtr sreqHigh = NULL;
@@ -908,16 +1053,36 @@ BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
this->pc.instAddr(), thread->contextId());
req->taskId(cpu->taskId());
-
+ if(!readyToExpose()){
+ req->setFlags(Request::SPEC);
+ }
// Only split the request if the ISA supports unaligned accesses.
if (TheISA::HasUnalignedMemAcc) {
splitRequest(req, sreqLow, sreqHigh);
}
+
initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read);
+
}
if (translationCompleted()) {
+ // [InvisiSpec] to fix the memory leakage problem
+ // in the case the read is squashed and the request
+ // is never sent out due to a virtual fence ahead
if (fault == NoFault) {
+ /*
+ if (fenceDelay()){
+ translationStarted(false);
+ translationCompleted(false);
+ onlyWaitForFence(true);
+ delete req;
+ if (sreqLow){
+ delete sreqLow;
+ delete sreqHigh;
+ }
+ return NoFault;
+ }
+ */
effAddr = req->getVaddr();
effSize = size;
instFlags[EffAddrValid] = true;
@@ -925,10 +1090,30 @@ BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
if (cpu->checker) {
reqToVerify = std::make_shared<Request>(*req);
}
+
+ // issue load request [mengjia]
fault = cpu->read(req, sreqLow, sreqHigh, lqIdx);
} else {
// Commit will have to clean up whatever happened. Set this
// instruction as executed.
+
+ // [InvisiSpec] If it is a fault on translating a spec load
+ // Deffer it and retry when it is ready to expose
+ if (!readyToExpose()){
+ translationStarted(false);
+ translationCompleted(false);
+ onlyWaitForExpose(true);
+ specTLBMiss(true);
+ //delete req;
+ //if (sreqLow){
+ // delete sreqLow;
+ // delete sreqHigh;
+ //}
+ return NoFault;
+ }
+ // set it as executed and fault flag.
+ // when it enters ROB and try to commit,
+ // the commit stage will squash this inst [mengjia]
this->setExecuted();
}
}
@@ -939,6 +1124,7 @@ BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
return fault;
}
+
template<class Impl>
Fault
BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size, Addr addr,
diff --git a/src/cpu/base_dyn_inst_impl.hh b/src/cpu/base_dyn_inst_impl.hh
index b499fe4e6..d7b3d562c 100644
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -87,6 +87,7 @@ void
BaseDynInst<Impl>::initVars()
{
memData = NULL;
+ vldData = NULL;
effAddr = 0;
physEffAddrLow = 0;
physEffAddrHigh = 0;
@@ -140,6 +141,10 @@ BaseDynInst<Impl>::~BaseDynInst()
delete [] memData;
}
+ if (vldData) {
+ delete [] vldData;
+ }
+
if (traceData) {
delete traceData;
}
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index b8152f663..371433eef 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -68,6 +68,10 @@ class DerivO3CPU(BaseCPU):
cacheStorePorts = Param.Unsigned(200, "Cache Ports. "
"Constrains stores only. Loads are constrained by load FUs.")
+ # we deal with validation very similar as store writes back
+ # FIXME: not sure whether it is the correct parameter or not
+ cacheValidationPorts = Param.Unsigned(200, "Validation Ports. "
+ "Constrains validations only. Loads are constrained by load FUs.")
decodeToFetchDelay = Param.Cycles(1, "Decode to fetch delay")
renameToFetchDelay = Param.Cycles(1 ,"Rename to fetch delay")
@@ -124,7 +128,7 @@ class DerivO3CPU(BaseCPU):
LFSTSize = Param.Unsigned(1024, "Last fetched store table size")
SSITSize = Param.Unsigned(1024, "Store set ID table size")
- numRobs = Param.Unsigned(1, "Number of Reorder Buffers");
+ numRobs = Param.Unsigned(1, "Number of Reorder Buffers")
numPhysIntRegs = Param.Unsigned(256, "Number of physical integer registers")
numPhysFloatRegs = Param.Unsigned(256, "Number of physical floating point "
@@ -157,10 +161,14 @@ class DerivO3CPU(BaseCPU):
smtCommitPolicy = Param.String('RoundRobin', "SMT Commit Policy")
branchPred = Param.BranchPredictor(TournamentBP(numThreads =
- Parent.numThreads),
+ Parent.numThreads),
"Branch Predictor")
- needsTSO = Param.Bool(buildEnv['TARGET_ISA'] == 'x86',
- "Enable TSO Memory model")
+
+ # [mengjia] add configuration variables
+ simulateScheme = Param.String('UnsafeBaseline',
+ "The scheme specificed for simulation")
+ needsTSO = Param.Bool(False, "Enable TSO Memory model")
+ allowSpecBuffHit = Param.Bool(True, "Enable hit/reuse spec buffer entries")
def addCheckerCpu(self):
if buildEnv['TARGET_ISA'] in ['arm']:
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index f508a372e..7fe4ad731 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -450,6 +450,7 @@ class DefaultCommit
/** The sequence number of the last commited instruction. */
InstSeqNum lastCommitedSeqNum[Impl::MaxThreads];
+ Tick lastCommitTick;
/** Records if there is a trap currently in flight. */
bool trapInFlight[Impl::MaxThreads];
@@ -479,6 +480,9 @@ class DefaultCommit
/** Updates commit stats based on this instruction. */
void updateComInstStats(DynInstPtr &inst);
+ /** [InvisiSpec] Updates squash stats based on this instruction. */
+ void updateSquashStats(DynInstPtr &inst);
+
/** Stat for the total number of squashed instructions discarded by commit.
*/
Stats::Scalar commitSquashedInsts;
@@ -488,6 +492,19 @@ class DefaultCommit
Stats::Scalar commitNonSpecStalls;
/** Stat for the total number of branch mispredicts that caused a squash. */
Stats::Scalar branchMispredicts;
+
+ // [InvisiSpec] count #squash
+ /** Stat for the total number of invalidation packets
+ * that caused a squash. */
+ Stats::Scalar loadHitInvalidations;
+ Stats::Scalar loadHitExternalEvictions;
+ /** Stat for the total number of failed validations
+ * that caused a squash. */
+ Stats::Scalar loadValidationFails;
+ // [InvisiSpec] count cycles stall due to waiting for
+ // validation responses
+ Stats::Scalar validationStalls;
+
/** Distribution of the number of committed instructions each cycle. */
Stats::Distribution numCommittedDist;
diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh
index d32493cbc..d83011a66 100644
--- a/src/cpu/o3/commit_impl.hh
+++ b/src/cpu/o3/commit_impl.hh
@@ -143,6 +143,7 @@ DefaultCommit<Impl>::DefaultCommit(O3CPU *_cpu, DerivO3CPUParams *params)
lastCommitedSeqNum[tid] = 0;
squashAfterInst[tid] = NULL;
}
+ lastCommitTick = curTick();
interrupt = NoFault;
}
@@ -183,6 +184,28 @@ DefaultCommit<Impl>::regStats()
.desc("The number of times a branch was mispredicted")
.prereq(branchMispredicts);
+ // [InvisiSpec] stat for squash due to invalidation, failed validation
+ loadHitInvalidations
+ .name(name() + ".loadHitInvalidations")
+ .desc("The number of times a load hits a invalidation");
+ //.prereq(loadHitInvalidations);
+
+ loadHitExternalEvictions
+ .name(name() + ".loadHitExternalEvictions")
+ .desc("The number of times a load hits an external invalidation");
+ //.prereq(loadHitInvalidations);
+
+ loadValidationFails
+ .name(name() + ".loadValidationFails")
+ .desc("The number of times a load fails validation");
+ //.prereq(loadValidationFails);
+
+ validationStalls
+ .name(name() + ".validationStalls")
+ .desc("The number of ticks the commit is stalled due to waiting "
+ "for validation responses");
+ //.prereq(loadValidationFails);
+
numCommittedDist
.init(0,commitWidth,1)
.name(name() + ".committed_per_cycle")
@@ -579,6 +602,9 @@ DefaultCommit<Impl>::squashAll(ThreadID tid)
toIEW->commitInfo[tid].squashInst = NULL;
toIEW->commitInfo[tid].pc = pc[tid];
+
+ //TODO: send a packet to SpecBuffer to indicate flush
+ //
}
template <class Impl>
@@ -705,13 +731,21 @@ DefaultCommit<Impl>::tick()
} else if (!rob->isEmpty(tid)) {
DynInstPtr inst = rob->readHeadInst(tid);
+ if (inst->isExecuted() && inst->needPostFetch()
+ && !inst->isExposeCompleted()){
+ //stall due to waiting for validation response
+ if (curTick()-lastCommitTick > 0){
+ validationStalls+= curTick()-lastCommitTick;
+ }
+
+ }
ppCommitStall->notify(inst);
DPRINTF(Commit,"[tid:%i]: Can't commit, Instruction [sn:%lli] PC "
"%s is head of ROB and not ready\n",
tid, inst->seqNum, inst->pcState());
}
-
+ lastCommitTick = curTick();
DPRINTF(Commit, "[tid:%i]: ROB has %d insts & %d free entries.\n",
tid, rob->countInsts(tid), rob->numFreeEntries(tid));
}
@@ -832,6 +866,7 @@ DefaultCommit<Impl>::commit()
squashFromTrap(tid);
} else if (tcSquash[tid]) {
assert(commitStatus[tid] != TrapPending);
+ //TC: thread context. [mengjia]
squashFromTC(tid);
} else if (commitStatus[tid] == SquashAfterPending) {
// A squash from the previous cycle of the commit stage (i.e.,
@@ -1039,6 +1074,7 @@ DefaultCommit<Impl>::commitInsts()
toIEW->commitInfo[tid].doneSeqNum = head_inst->seqNum;
if (tid == 0) {
+ //maybe we can use this to mask interrupts [mengjia]
canHandleInterrupts = (!head_inst->isDelayedCommit()) &&
((THE_ISA != ALPHA_ISA) ||
(!(pc[0].instAddr() & 0x3)));
@@ -1219,6 +1255,8 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// execution doesn't generate extra squashes.
thread[tid]->noSquashFromTC = true;
+ // [InvisiSpec] update squash stat for invalidation or validation fails
+ updateSquashStats(head_inst);
// Execute the trap. Although it's slightly unrealistic in
// terms of timing (as it doesn't wait for the full timing of
// the trap event to complete before updating state), it's
@@ -1350,6 +1388,7 @@ DefaultCommit<Impl>::markCompletedInsts()
// Grab completed insts out of the IEW instruction queue, and mark
// instructions completed within the ROB.
for (int inst_num = 0; inst_num < fromIEW->size; ++inst_num) {
+ DPRINTF(Commit, "get the inst [num:%d]\n", inst_num);
assert(fromIEW->insts[inst_num]);
if (!fromIEW->insts[inst_num]->isSquashed()) {
DPRINTF(Commit, "[tid:%i]: Marking PC %s, [sn:%lli] ready "
@@ -1362,6 +1401,27 @@ DefaultCommit<Impl>::markCompletedInsts()
fromIEW->insts[inst_num]->setCanCommit();
}
}
+
+ // [InvisiSpec]
+ // update load status
+ // isPrevInstsCompleted; isPrevBrsResolved
+ rob->updateVisibleState();
+}
+
+// [InvisiSpec] update squash stat for loads
+template <class Impl>
+void
+DefaultCommit<Impl>::updateSquashStats(DynInstPtr &inst)
+{
+ if (inst->hitInvalidation()){
+ loadHitInvalidations++;
+ }
+ if (inst->validationFail()){
+ loadValidationFails++;
+ }
+ if (inst->hitExternalEviction()){
+ loadHitExternalEvictions++;
+ }
}
template <class Impl>
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index c4bc13fb4..27ad78e2e 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -593,6 +593,7 @@ FullO3CPU<Impl>::tick()
activityRec.advance();
+ DPRINTF(O3CPU, "activityRec.advance() complete\n");
if (removeInstsThisCycle) {
cleanUpRemovedInsts();
}
@@ -610,6 +611,8 @@ FullO3CPU<Impl>::tick()
schedule(tickEvent, clockEdge(Cycles(1)));
DPRINTF(O3CPU, "Scheduling next tick!\n");
}
+ } else {
+ DPRINTF(O3CPU, "tickEvent.scheduled == false, %lu", curTick());
}
if (!FullSystem)
diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh
index 8270a71b5..063394fdd 100644
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -1186,9 +1186,16 @@ DefaultIEW<Impl>::executeInsts()
fetchRedirect[tid] = false;
}
+ // [mengjia] Validate/Expose any loads which are ready last cycle
+ // very tricky, need make the state consistent
+ // if we successfully commit sth, then we need to activate the stage or somehow
+ // problems happen when interacting with squash
+ // NOTE: we always send validations before execute load requests
+ ldstQueue.exposeLoads();
+
// Uncomment this if you want to see all available instructions.
// @todo This doesn't actually work anymore, we should fix it.
-// printAvailableInsts();
+ // printAvailableInsts();
// Execute/writeback any instructions that are available.
int insts_to_execute = fromIssue->size;
@@ -1235,18 +1242,40 @@ DefaultIEW<Impl>::executeInsts()
DPRINTF(IEW, "Execute: Calculating address for memory "
"reference.\n");
+ DPRINTF(IEW, "Execute: %s\n", inst->staticInst->getName());
// Tell the LDSTQ to execute this instruction (if it is a load).
if (inst->isLoad()) {
// Loads will mark themselves as executed, and their writeback
// event adds the instruction to the queue to commit
+
+ // [InvisiSpec] a lifetime of a load
+ // always let it translate --> translation not complete, defer
+ // if !loadInExec, need to check whether there
+ // is a virtual fence ahead
+ // --> if existing virtual fence, defer
+ if (inst->fenceDelay()){
+ DPRINTF(IEW, "Deferring load due to virtual fence.\n");
+ inst->onlyWaitForFence(true);
+ instQueue.deferMemInst(inst);
+ continue;
+ }
+
fault = ldstQueue.executeLoad(inst);
- if (inst->isTranslationDelayed() &&
+ // [InvisiSpec] delay the load if there is a virtual fence ahead
+ if ((inst->isTranslationDelayed() ) &&
fault == NoFault) {
// A hw page table walk is currently going on; the
// instruction must be deferred.
- DPRINTF(IEW, "Execute: Delayed translation, deferring "
- "load.\n");
+ DPRINTF(IEW, "Execute: Delayed translation, deferring load.\n");
+ instQueue.deferMemInst(inst);
+ continue;
+ }
+
+ if ((inst->specTLBMiss() ) &&
+ fault == NoFault) {
+ DPRINTF(IEW, "Execute: Speculative load gets a TLB miss,"
+ " deferring load.\n");
instQueue.deferMemInst(inst);
continue;
}
@@ -1381,10 +1410,11 @@ DefaultIEW<Impl>::executeInsts()
++memOrderViolationEvents;
}
}
- }
+ }
// Update and record activity if we processed any instructions.
if (inst_num) {
+
if (exeStatus == Idle) {
exeStatus = Running;
}
@@ -1476,16 +1506,18 @@ DefaultIEW<Impl>::tick()
dispatch(tid);
}
+ ldstQueue.updateVisibleState();
+
if (exeStatus != Squashing) {
executeInsts();
-
+
writebackInsts();
// Have the instruction queue try to schedule any ready instructions.
// (In actuality, this scheduling is for instructions that will
// be executed next cycle.)
instQueue.scheduleReadyInsts();
-
+
// Also should advance its own time buffers if the stage ran.
// Not the best place for it, but this works (hopefully).
issueToExecQueue.advance();
@@ -1502,6 +1534,7 @@ DefaultIEW<Impl>::tick()
// Writeback any stores using any leftover bandwidth.
ldstQueue.writebackStores();
+
// Check the committed load/store signals to see if there's a load
// or store to commit. Also check if it's being told to execute a
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 84ac5799c..504084165 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -1172,8 +1172,19 @@ InstructionQueue<Impl>::getDeferredMemInstToExecute()
{
for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
++it) {
- if ((*it)->translationCompleted() || (*it)->isSquashed()) {
+ // [InvisiSpec] we need to check the FenceDelay
+ // a load can be delayed due to
+ // 1. translation delay
+ // 2. virtual fence ahead
+ // 3. not ready to expose and gets a TLB miss
+ // for both (2, 3) we need to restart the translation
+ if ( (*it)->translationCompleted()
+ || ((*it)->onlyWaitForFence() && !(*it)->fenceDelay())
+ || ((*it)->onlyWaitForExpose() && (*it)->readyToExpose())
+ || (*it)->isSquashed()) {
DynInstPtr mem_inst = *it;
+ mem_inst->onlyWaitForFence(false);
+ mem_inst->onlyWaitForExpose(false);
deferredMemInsts.erase(it);
return mem_inst;
}
diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh
index 7c78156d5..518153990 100644
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -136,6 +136,23 @@ class LSQ {
/** Same as above, but only for one thread. */
void writebackStores(ThreadID tid);
+
+ /** [mengjia]
+ * Attempts to validate loads until all cache ports are used or the
+ * interface becomes blocked.
+ */
+ int exposeLoads();
+ /** Same as above, but only for one thread. */
+ int exposeLoads(ThreadID tid);
+
+ /** [mengjia]
+ * attempt to update FenceDelay state for load insts
+ */
+ void updateVisibleState();
+ /** Same as above, but only for one thread. */
+ void updateVisibleState(ThreadID tid);
+
+
/**
* Squash instructions from a thread until the specified sequence number.
*/
@@ -257,6 +274,10 @@ class LSQ {
int numStoresToWB(ThreadID tid)
{ return thread[tid].numStoresToWB(); }
+ /** Returns the number of stores a specific thread has to write back. */
+ int numLoadsToVLD(ThreadID tid)
+ { return thread[tid].numLoadsToVLD(); }
+
/** Returns if the LSQ will write back to memory this cycle. */
bool willWB();
/** Returns if the LSQ of a specific thread will write back to memory this
diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh
index 56b95a5b6..1e78f534e 100644
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -310,6 +310,44 @@ LSQ<Impl>::writebackStores()
}
}
+// [mengjia]
+template<class Impl>
+int
+LSQ<Impl>::exposeLoads()
+{
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
+
+ int exposedLoads = 0;
+ while (threads != end) {
+ ThreadID tid = *threads++;
+
+ if (numLoadsToVLD(tid) > 0) {
+ DPRINTF(Writeback,"[tid:%i] Validate loads. %i loads "
+ "available for Validate.\n", tid, numLoadsToVLD(tid));
+ }
+
+ exposedLoads += thread[tid].exposeLoads();
+ }
+ return exposedLoads;
+}
+
+
+// [mengjia]
+template<class Impl>
+void
+LSQ<Impl>::updateVisibleState()
+{
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
+
+ while (threads != end) {
+ ThreadID tid = *threads++;
+
+ thread[tid].updateVisibleState();
+ }
+}
+
template<class Impl>
bool
LSQ<Impl>::violation()
@@ -339,6 +377,7 @@ LSQ<Impl>::recvReqRetry()
}
}
+// [InvisiSpec] Callback function for receiving a response
template <class Impl>
bool
LSQ<Impl>::recvTimingResp(PacketPtr pkt)
@@ -347,6 +386,17 @@ LSQ<Impl>::recvTimingResp(PacketPtr pkt)
DPRINTF(LSQ, "Got error packet back for address: %#X\n",
pkt->getAddr());
+ // for expose or validate request,
+ // if the instruction is squashed, maybe the req has been deleted
+ if (pkt->isValidate() || pkt->isExpose()){
+ if (!pkt->req){
+ delete pkt;
+ return true;
+ }
+ DPRINTF(LSQ, "Receive an expose/validate response, idx=%d\n",
+ pkt->reqIdx);
+ }
+
thread[cpu->contextToThread(pkt->req->contextId())]
.completeDataAccess(pkt);
diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh
index f5b60b2fc..d3dd34ee2 100644
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
@@ -136,6 +136,12 @@ class LSQUnit {
*/
void checkSnoop(PacketPtr pkt);
+ // [InvisiSpec] check whether current request will hit in the
+ // spec buffer or not
+ int checkSpecBuffHit(const RequestPtr req, const int req_idx);
+ void setSpecBuffState(const RequestPtr req);
+
+ bool checkPrevLoadsExecuted(const int req_idx);
/** Executes a load instruction. */
Fault executeLoad(DynInstPtr &inst);
@@ -154,6 +160,15 @@ class LSQUnit {
/** Writes back stores. */
void writebackStores();
+ /** [mengjia] Validate loads. */
+ int exposeLoads();
+
+ /** [mengjia] Update Visbible State.
+ * In the mode defence relying on fence: setup fenceDelay state.
+ * In the mode defence relying on invisibleSpec:
+ * setup readyToExpose*/
+ void updateVisibleState();
+
/** Completes the data access that has been returned from the
* memory system. */
void completeDataAccess(PacketPtr pkt);
@@ -219,6 +234,8 @@ class LSQUnit {
/** Returns the number of stores to writeback. */
int numStoresToWB() { return storesToWB; }
+ /** [InvisiSpec] Returns the number of loads to validate. */
+ int numLoadsToVLD() { return loadsToVLD; }
/** Returns if the LSQ unit will writeback on this cycle. */
bool willWB() { return storeQueue[storeWBIdx].canWB &&
@@ -235,18 +252,30 @@ class LSQUnit {
/** Writes back the instruction, sending it to IEW. */
void writeback(DynInstPtr &inst, PacketPtr pkt);
+ // [InvisiSpec] complete Validates
+ void completeValidate(DynInstPtr &inst, PacketPtr pkt);
+
/** Writes back a store that couldn't be completed the previous cycle. */
void writebackPendingStore();
+ /** Validates a load that couldn't be completed the previous cycle. */
+ void validatePendingLoad();
+
/** Handles completing the send of a store to memory. */
void storePostSend(PacketPtr pkt);
+ /** Handles completing the send of a validation to memory. */
+ //void validationPostSend(PacketPtr pkt, int loadVLDIdx);
+
/** Completes the store at the specified index. */
void completeStore(int store_idx);
/** Attempts to send a store to the cache. */
bool sendStore(PacketPtr data_pkt);
+ /** Attempts to send a validation to the cache. */
+ //bool sendValidation(PacketPtr data_pkt, int loadVLDIdx);
+
/** Increments the given store index (circular queue). */
inline void incrStIdx(int &store_idx) const;
/** Decrements the given store index (circular queue). */
@@ -409,6 +438,8 @@ class LSQUnit {
/** The number of load instructions in the LQ. */
int loads;
+ /** [mengjia] The number of store instructions in the SQ waiting to writeback. */
+ int loadsToVLD;
/** The number of store instructions in the SQ. */
int stores;
/** The number of store instructions in the SQ waiting to writeback. */
@@ -416,6 +447,10 @@ class LSQUnit {
/** The index of the head instruction in the LQ. */
int loadHead;
+ /** [mengjia] The index of the first instruction that may be ready to be
+ * validated, and has not yet been validated.
+ */
+ //int pendingLoadVLDIdx;
/** The index of the tail instruction in the LQ. */
int loadTail;
@@ -432,7 +467,7 @@ class LSQUnit {
/** The number of cache ports available each cycle (stores only). */
int cacheStorePorts;
- /** The number of used cache ports in this cycle by stores. */
+ /** [InvisiSpec] The number of used cache ports in this cycle by stores. */
int usedStorePorts;
//list<InstSeqNum> mshrSeqNums;
@@ -458,6 +493,9 @@ class LSQUnit {
/** Whehter or not a store is blocked due to the memory system. */
bool isStoreBlocked;
+ /** Whehter or not a validation is blocked due to the memory system. */
+ bool isValidationBlocked;
+
/** Whether or not a store is in flight. */
bool storeInFlight;
@@ -471,9 +509,21 @@ class LSQUnit {
/** The packet that is pending free cache ports. */
PacketPtr pendingPkt;
+ /* [mengjia] define scheme variables */
+ // Flag for whether issue packets in execution stage
+ bool loadInExec;
+
+ // Flag for whether to use invisible speculative load
+ bool isInvisibleSpec;
+
/** Flag for memory model. */
bool needsTSO;
+ // Flag for whether defending against spectre attack or future attacks
+ bool isFuturistic;
+ bool allowSpecBuffHit;
+ /* [mengjia] different schemes determine values of 4 variables. */
+
// Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times.
@@ -508,6 +558,12 @@ class LSQUnit {
/** Number of times the LSQ is blocked due to the cache. */
Stats::Scalar lsqCacheBlocked;
+ Stats::Scalar specBuffHits;
+ Stats::Scalar specBuffMisses;
+ Stats::Scalar numValidates;
+ Stats::Scalar numExposes;
+ Stats::Scalar numConvertedExposes;
+
public:
/** Executes the load at the given index. */
Fault read(const RequestPtr &req,
@@ -549,6 +605,8 @@ class LSQUnit {
bool isStalled() { return stalled; }
};
+
+// IMPORTANT: the function to issue packets, interact with memory [mengjia]
template <class Impl>
Fault
LSQUnit<Impl>::read(const RequestPtr &req,
@@ -578,6 +636,7 @@ LSQUnit<Impl>::read(const RequestPtr &req,
}
// Check the SQ for any previous stores that might lead to forwarding
+ // why we have store queue index for a load operation? [mengjia]
int store_idx = load_inst->sqIdx;
int store_size = 0;
@@ -587,6 +646,7 @@ LSQUnit<Impl>::read(const RequestPtr &req,
load_idx, store_idx, storeHead, req->getPaddr(),
sreqLow ? " split" : "");
+ // LLSC: load-link/store-conditional [mengjia]
if (req->isLLSC()) {
assert(!sreqLow);
// Disable recording the result temporarily. Writing to misc
@@ -597,12 +657,14 @@ LSQUnit<Impl>::read(const RequestPtr &req,
load_inst->recordResult(true);
}
+ // request to memory mapped register [mengjia]
if (req->isMmappedIpr()) {
assert(!load_inst->memData);
load_inst->memData = new uint8_t[64];
ThreadContext *thread = cpu->tcBase(lsqID);
Cycles delay(0);
+
PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq);
data_pkt->dataStatic(load_inst->memData);
@@ -750,6 +812,7 @@ LSQUnit<Impl>::read(const RequestPtr &req,
DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
load_inst->seqNum, load_inst->pcState());
+
// Allocate memory if this is the first time a load is issued.
if (!load_inst->memData) {
load_inst->memData = new uint8_t[req->getSize()];
@@ -757,10 +820,35 @@ LSQUnit<Impl>::read(const RequestPtr &req,
// if we the cache is not blocked, do cache access
bool completedFirst = false;
- PacketPtr data_pkt = Packet::createRead(req);
+
+ PacketPtr data_pkt = NULL;
PacketPtr fst_data_pkt = NULL;
PacketPtr snd_data_pkt = NULL;
+ // According to the isInsivisibleSpec variable to create
+ // corresponding type of packets [mengjia]
+ bool sendSpecRead = false;
+ if(isInvisibleSpec){
+ if(!load_inst->readyToExpose()){
+ assert(!req->isLLSC());
+ assert(!req->isStrictlyOrdered());
+ assert(!req->isMmappedIpr());
+ sendSpecRead = true;
+ DPRINTF(LSQUnit, "send a spec read for inst [sn:%lli]\n",
+ load_inst->seqNum);
+ }
+
+ }
+
+ assert( !(sendSpecRead && load_inst->isSpecCompleted()) &&
+ "Sending specRead twice for the same load insts");
+
+ if(sendSpecRead){
+ data_pkt = Packet::createReadSpec(req);
+ }else{
+ data_pkt = Packet::createRead(req);
+ }
+
data_pkt->dataStatic(load_inst->memData);
LSQSenderState *state = new LSQSenderState;
@@ -772,17 +860,64 @@ LSQUnit<Impl>::read(const RequestPtr &req,
if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
// Point the first packet at the main data packet.
fst_data_pkt = data_pkt;
+
+ fst_data_pkt->setFirst();
+ if (sendSpecRead){
+ int src_idx = checkSpecBuffHit(req, load_idx);
+ if (src_idx != -1) {
+ if (allowSpecBuffHit){
+ data_pkt->setOnlyAccessSpecBuff();
+ }
+ data_pkt->srcIdx = src_idx;
+ specBuffHits++;
+ }else{
+ specBuffMisses++;
+ }
+ }
+ fst_data_pkt->reqIdx = load_idx;
} else {
// Create the split packets.
- fst_data_pkt = Packet::createRead(sreqLow);
- snd_data_pkt = Packet::createRead(sreqHigh);
+ if(sendSpecRead){
+
+ fst_data_pkt = Packet::createReadSpec(sreqLow);
+ int fst_src_idx = checkSpecBuffHit(sreqLow, load_idx);
+ if ( fst_src_idx != -1 ) {
+ if (allowSpecBuffHit){
+ fst_data_pkt->setOnlyAccessSpecBuff();
+ }
+ fst_data_pkt->srcIdx = fst_src_idx;
+ specBuffHits++;
+ } else {
+ specBuffMisses++;
+ }
+
+ snd_data_pkt = Packet::createReadSpec(sreqHigh);
+ int snd_src_idx = checkSpecBuffHit(sreqHigh, load_idx);
+ if ( snd_src_idx != -1 ) {
+ if (allowSpecBuffHit){
+ snd_data_pkt->setOnlyAccessSpecBuff();
+ }
+ snd_data_pkt->srcIdx = snd_src_idx;
+ specBuffHits++;
+ } else {
+ specBuffMisses++;
+ }
+ }else{
+ fst_data_pkt = Packet::createRead(sreqLow);
+ snd_data_pkt = Packet::createRead(sreqHigh);
+ }
+ fst_data_pkt->setFirst();
fst_data_pkt->dataStatic(load_inst->memData);
snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
fst_data_pkt->senderState = state;
snd_data_pkt->senderState = state;
+ fst_data_pkt->reqIdx = load_idx;
+ snd_data_pkt->reqIdx = load_idx;
+ fst_data_pkt->isSplit = true;
+ snd_data_pkt->isSplit = true;
state->isSplit = true;
state->outstanding = 2;
state->mainPkt = data_pkt;
@@ -794,6 +929,8 @@ LSQUnit<Impl>::read(const RequestPtr &req,
// @todo We should account for cache port contention
// and arbitrate between loads and stores.
bool successful_load = true;
+ // MARK: here is the place memory request of read is sent [mengjia]
+ // [InvisiSpec] Sending out a memory request
if (!dcachePort->sendTimingReq(fst_data_pkt)) {
successful_load = false;
} else if (TheISA::HasUnalignedMemAcc && sreqLow) {
@@ -850,6 +987,62 @@ LSQUnit<Impl>::read(const RequestPtr &req,
return NoFault;
}
+ DPRINTF(LSQUnit, "successfully sent out packet(s) for inst [sn:%lli]\n",
+ load_inst->seqNum);
+ // Set everything ready for expose/validation after the read is
+ // successfully sent out
+ if(sendSpecRead){ // sending actual request
+
+ // [mengjia] Here we set the needExposeOnly flag
+ if (needsTSO && !load_inst->isDataPrefetch()){
+ // need to check whether previous load_instructions specComplete or not
+ if ( checkPrevLoadsExecuted(load_idx) ){
+ load_inst->needExposeOnly(true);
+ DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as "
+ "needExposeOnly\n",
+ load_inst->pcState(), load_inst->seqNum);
+ } else {
+ DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as "
+ "needValidation\n",
+ load_inst->pcState(), load_inst->seqNum);
+ }
+ }else{
+ //if RC, always only need expose
+ load_inst->needExposeOnly(true);
+ DPRINTF(LSQUnit, "Set load PC %s, [sn:%lli] as needExposeOnly\n",
+ load_inst->pcState(), load_inst->seqNum);
+ }
+
+ load_inst->needPostFetch(true);
+ assert(!req->isMmappedIpr());
+ //save expose requestPtr
+ if (TheISA::HasUnalignedMemAcc && sreqLow) {
+ load_inst->postSreqLow = std::make_shared<Request>(*sreqLow);
+ load_inst->postSreqHigh = std::make_shared<Request>(*sreqHigh);
+ load_inst->postReq = nullptr;
+ }else{
+ load_inst->postReq = std::make_shared<Request>(*req);
+ load_inst->postSreqLow = nullptr;
+ load_inst->postSreqHigh = nullptr;
+ }
+ load_inst->needDeletePostReq(true);
+ DPRINTF(LSQUnit, "created validation/expose"
+ " request for inst [sn:%lli]"
+ "req=%#x, reqLow=%#x, reqHigh=%#x\n",
+ load_inst->seqNum, (Addr)(load_inst->postReq),
+ (Addr)(load_inst->postSreqLow),
+ (Addr)(load_inst->postSreqHigh));
+ } else {
+ load_inst->setExposeCompleted();
+ load_inst->needPostFetch(false);
+ if (TheISA::HasUnalignedMemAcc && sreqLow) {
+ setSpecBuffState(sreqLow);
+ setSpecBuffState(sreqHigh);
+ } else {
+ setSpecBuffState(req);
+ }
+ }
+
return NoFault;
}
diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh
index c2750be7d..3da248977 100644
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
@@ -89,6 +89,9 @@ LSQUnit<Impl>::WritebackEvent::description() const
return "Store writeback";
}
+
+// [InvisiSpec] This function deals with
+// acknowledge response to memory read/write
template<class Impl>
void
LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
@@ -107,6 +110,17 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
return;
}
+ // need to update hit info for corresponding instruction
+ if (pkt->isL1Hit() && pkt->isSpec() && pkt->isRead()){
+ if (state->isSplit && ! pkt->isFirst()){
+ inst->setL1HitHigh();
+ } else {
+ inst->setL1HitLow();
+ }
+ } else if (!pkt->isSpec()) {
+ setSpecBuffState(pkt->req);
+ }
+
// If this is a split access, wait until all packets are received.
if (TheISA::HasUnalignedMemAcc && !state->complete()) {
return;
@@ -117,7 +131,9 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
if (!state->noWB) {
// Only loads and store conditionals perform the writeback
// after receving the response from the memory
+ // [mengjia] validation also needs writeback, expose do not need
assert(inst->isLoad() || inst->isStoreConditional());
+
if (!TheISA::HasUnalignedMemAcc || !state->isSplit ||
!state->isLoad) {
writeback(inst, pkt);
@@ -129,6 +145,10 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
if (inst->isStore()) {
completeStore(state->idx);
}
+
+ if (pkt->isValidate() || pkt->isExpose()) {
+ completeValidate(inst, pkt);
+ }
}
if (TheISA::HasUnalignedMemAcc && state->isSplit && state->isLoad) {
@@ -136,6 +156,7 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
}
pkt->req->setAccessLatency();
+ // probe point, not sure about the mechanism [mengjia]
cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
delete state;
@@ -143,8 +164,8 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
template <class Impl>
LSQUnit<Impl>::LSQUnit()
- : loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
- isStoreBlocked(false), storeInFlight(false), hasPendingPkt(false),
+ : loads(0), loadsToVLD(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
+ isStoreBlocked(false), isValidationBlocked(false), storeInFlight(false), hasPendingPkt(false),
pendingPkt(nullptr)
{
}
@@ -178,7 +199,52 @@ LSQUnit<Impl>::init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
depCheckShift = params->LSQDepCheckShift;
checkLoads = params->LSQCheckLoads;
cacheStorePorts = params->cacheStorePorts;
+
+ // According to the scheme, we need to define actions as follows.
+ // loadInExec: if False, no packets are sent in execution stage;
+ // if True, send either readReq or readSpecReq
+ // isInvisibleSpec: if True, send readSpecReq in execution statge;
+ // if False, send readReq
+ // needsTSO: if True, squash read on receiving invalidations, and only allow one outstanding write at a time;
+ // if False, no squash on receiving invalidaiton, and allow multiple outstanding writes.
+ // isConservative: if True, react after all preceding instructions complete/no exception;
+ // if False, react only after all preceding stores/brancehs complete
+ const std::string scheme = params->simulateScheme;
+ if (scheme.compare("UnsafeBaseline")==0){
+ loadInExec = true;
+ isInvisibleSpec = false; // send real request
+ isFuturistic = false; // not relevant in unsafe mode.
+ }else if (scheme.compare("FuturisticSafeFence")==0){
+ // "LFENCE" before every load
+ loadInExec = false;
+ isInvisibleSpec = false; // not used since loadInExec is false
+ isFuturistic = true; // send readReq at head of ROB
+ }else if (scheme.compare("FuturisticSafeInvisibleSpec")==0){
+ // only make load visible when all preceding instructions
+ // complete and no exception
+ loadInExec = true;
+ isInvisibleSpec = true; // send request but not change cache state
+ isFuturistic = true; // conservative condition to send validations
+ }else if (scheme.compare("SpectreSafeFence")==0){
+ // "LFENCE" after every branch
+ loadInExec = false;
+ isInvisibleSpec = false; // not used since loadInExec is false
+ isFuturistic = false; // commit when preceding branches are resolved
+ }else if (scheme.compare("SpectreSafeInvisibleSpec")==0){
+ // make load visible when all preceiding branches are resolved
+ loadInExec = true;
+ isInvisibleSpec = true; // send request but not change cache state
+ isFuturistic = false; // only deal with spectre attacks
+ }else {
+ cprintf("ERROR: unsupported simulation scheme: %s!\n", scheme);
+ exit(1);
+ }
needsTSO = params->needsTSO;
+ allowSpecBuffHit = params->allowSpecBuffHit;
+ cprintf("Info: simulation uses scheme: %s; "
+ "needsTSO=%d; allowSpecBuffHit=%d\n",
+ scheme, needsTSO, allowSpecBuffHit);
+ // [mengjia] end of setting configuration variables
resetState();
}
@@ -188,7 +254,7 @@ template<class Impl>
void
LSQUnit<Impl>::resetState()
{
- loads = stores = storesToWB = 0;
+ loads = stores = loadsToVLD = storesToWB = 0;
loadHead = loadTail = 0;
@@ -258,6 +324,26 @@ LSQUnit<Impl>::regStats()
lsqCacheBlocked
.name(name() + ".cacheBlocked")
.desc("Number of times an access to memory failed due to the cache being blocked");
+
+ specBuffHits
+ .name(name() + ".specBuffHits")
+ .desc("Number of times an access hits in speculative buffer");
+
+ specBuffMisses
+ .name(name() + ".specBuffMisses")
+ .desc("Number of times an access misses in speculative buffer");
+
+ numValidates
+ .name(name() + ".numValidates")
+ .desc("Number of validates sent to cache");
+
+ numExposes
+ .name(name() + ".numExposes")
+ .desc("Number of exposes sent to cache");
+
+ numConvertedExposes
+ .name(name() + ".numConvertedExposes")
+ .desc("Number of exposes converted from validation");
}
template<class Impl>
@@ -289,6 +375,7 @@ LSQUnit<Impl>::drainSanityCheck() const
assert(!loadQueue[i]);
assert(storesToWB == 0);
+ assert(loadsToVLD == 0);
assert(!retryPkt);
}
@@ -377,6 +464,7 @@ LSQUnit<Impl>::insertLoad(DynInstPtr &load_inst)
incrLdIdx(loadTail);
++loads;
+
}
template <class Impl>
@@ -400,6 +488,7 @@ LSQUnit<Impl>::insertStore(DynInstPtr &store_inst)
++stores;
}
+// It is an empty function? why? [mengjia]
template <class Impl>
typename Impl::DynInstPtr
LSQUnit<Impl>::getMemDepViolator()
@@ -460,13 +549,16 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
Addr load_addr_high = ld_inst->physEffAddrHigh & cacheBlockMask;
// Check that this snoop didn't just invalidate our lock flag
- if (ld_inst->effAddrValid() && (load_addr_low == invalidate_addr
- || load_addr_high == invalidate_addr)
+ // [InvisiSpec] also make sure the instruction has been sent out
+ // otherwise, we cause unneccessary squash
+ if (ld_inst->effAddrValid() && !ld_inst->fenceDelay()
+ && (load_addr_low == invalidate_addr
+ || load_addr_high == invalidate_addr)
&& ld_inst->memReqFlags & Request::LLSC)
TheISA::handleLockedSnoopHit(ld_inst.get());
}
- // If this is the only load in the LSQ we don't care
+ // If not match any load entry, then do nothing [mengjia]
if (load_idx == loadTail)
return;
@@ -477,7 +569,10 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
while (load_idx != loadTail) {
DynInstPtr ld_inst = loadQueue[load_idx];
- if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
+ // [SafeSpce] check snoop violation when the load has
+ // been sent out; otherwise, unneccessary squash
+ if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()
+ || ld_inst->fenceDelay()) {
incrLdIdx(load_idx);
continue;
}
@@ -495,11 +590,29 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
// all other loads, this load as well as *all* subsequent loads
// need to be squashed to prevent possible load reordering.
force_squash = true;
+
+ // [InvisiSpec] in InvisiSpec, we do not need to squash
+ // the load at the head of LQ,
+ // as well as the one do not need validation
+ if (isInvisibleSpec &&
+ (load_idx==loadHead || ld_inst->needExposeOnly())){
+ force_squash = false;
+ }
+ if (!pkt->isExternalEviction() && isInvisibleSpec){
+ force_squash = false;
+ ld_inst->clearL1HitHigh();
+ ld_inst->clearL1HitLow();
+ }
}
if (ld_inst->possibleLoadViolation() || force_squash) {
DPRINTF(LSQUnit, "Conflicting load at addr %#x [sn:%lli]\n",
pkt->getAddr(), ld_inst->seqNum);
+ //[InvisiSpec] mark the load hit invalidation
+ ld_inst->hitInvalidation(true);
+ if (pkt->isExternalEviction()){
+ ld_inst->hitExternalEviction(true);
+ }
// Mark the load for re-execution
ld_inst->fault = std::make_shared<ReExec>();
} else {
@@ -524,6 +637,103 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
}
template <class Impl>
+bool
+LSQUnit<Impl>::checkPrevLoadsExecuted(int req_idx)
+{
+ int load_idx = loadHead;
+ while (load_idx != req_idx){
+ if (!loadQueue[load_idx]->isExecuted()){
+ // if at least on load ahead of current load
+ // does not finish spec access,
+ // then return false
+ return false;
+ }
+ incrLdIdx(load_idx);
+ }
+
+ //if all executed, return true
+ return true;
+}
+
+template <class Impl>
+void
+LSQUnit<Impl>::setSpecBuffState(RequestPtr expose_req)
+{
+ Addr req_eff_addr1 = expose_req->getPaddr() & cacheBlockMask;
+
+ int load_idx = loadHead;
+ while (load_idx != loadTail){
+ DynInstPtr ld_inst = loadQueue[load_idx];
+ if (ld_inst->effAddrValid()){
+
+ Addr ld_eff_addr1 = ld_inst->physEffAddrLow & cacheBlockMask;
+ Addr ld_eff_addr2 = ld_inst->physEffAddrHigh & cacheBlockMask;
+ if (ld_eff_addr1 == req_eff_addr1){
+ ld_inst->setSpecBuffObsoleteLow();
+ } else if (ld_eff_addr2 == req_eff_addr1){
+ ld_inst->setSpecBuffObsoleteHigh();
+ }
+ }
+ incrLdIdx(load_idx);
+ }
+}
+
+
+template <class Impl>
+int
+LSQUnit<Impl>::checkSpecBuffHit(RequestPtr req, int req_idx)
+{
+
+ Addr req_eff_addr1 = req->getPaddr() & cacheBlockMask;
+ //Addr req_eff_addr2 = (req->getPaddr() + req->getSize()-1) & cacheBlockMask;
+ // the req should be within the same cache line
+ //assert (req_eff_addr1 == req_eff_addr2);
+ assert (!loadQueue[req_idx]->isExecuted());
+
+ int load_idx = loadHead;
+
+ while (load_idx != loadTail){
+ DynInstPtr ld_inst = loadQueue[load_idx];
+ if (ld_inst->effAddrValid()){
+ Addr ld_eff_addr1 = ld_inst->physEffAddrLow & cacheBlockMask;
+ Addr ld_eff_addr2 = ld_inst->physEffAddrHigh & cacheBlockMask;
+
+ if ((req_eff_addr1 == ld_eff_addr1 && ld_inst->isL1HitLow())
+ || (req_eff_addr1 == ld_eff_addr2 && ld_inst->isL1HitHigh())){
+ return -1;
+ //already in L1, do not copy from buffer
+ } else {
+
+ if (ld_inst->isExecuted() && ld_inst->needPostFetch()
+ && !ld_inst->isSquashed() && ld_inst->fault==NoFault){
+ if (req_eff_addr1 == ld_eff_addr1 && !ld_inst->isL1HitLow()
+ && !ld_inst->isSpecBuffObsoleteLow()){
+ DPRINTF(LSQUnit, "Detected Spec Hit with inst [sn:%lli] "
+ "and [sn:%lli] (low) at address %#x\n",
+ loadQueue[req_idx]->seqNum, ld_inst->seqNum,
+ req_eff_addr1);
+ return load_idx;
+ } else if ( ld_eff_addr2 !=0 &&
+ req_eff_addr1 == ld_eff_addr2 && !ld_inst->isL1HitHigh()
+ && !ld_inst->isSpecBuffObsoleteHigh()){
+ DPRINTF(LSQUnit, "Detected Spec Hit with inst [sn:%lli] "
+ "and [sn:%lli] (high) at address %#x\n",
+ loadQueue[req_idx]->seqNum, ld_inst->seqNum,
+ req_eff_addr1);
+ return load_idx;
+ }
+ }
+ }
+ }
+ incrLdIdx(load_idx);
+ }
+
+ return -1;
+}
+
+
+
+template <class Impl>
Fault
LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
{
@@ -537,7 +747,10 @@ LSQUnit<Impl>::checkViolations(int load_idx, DynInstPtr &inst)
*/
while (load_idx != loadTail) {
DynInstPtr ld_inst = loadQueue[load_idx];
- if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
+ // [InvisiSpec] no need to check violation for unsent load
+ // otherwise, unneccessary squash
+ if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()
+ || ld_inst->fenceDelay()) {
incrLdIdx(load_idx);
continue;
}
@@ -616,14 +829,25 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
assert(!inst->isSquashed());
+ // use ISA interface to generate correct access request
+ // initiateAcc is implemented in dyn_inst_impl.hh
+ // The interface calls corresponding ISA defined function
+ // check buld/ARM/arch/generic/memhelper.hh for more info [mengjia]
load_fault = inst->initiateAcc();
- if (inst->isTranslationDelayed() &&
+ // if translation delay, deferMem [mengjia]
+ // in the case it is not the correct time to send the load
+ // also defer it
+ if ( (inst->isTranslationDelayed() || inst->fenceDelay()
+ || inst->specTLBMiss()) &&
load_fault == NoFault)
return load_fault;
// If the instruction faulted or predicated false, then we need to send it
// along to commit without the instruction completing.
+ //
+ // if it is faulty, not execute it, send it to commit, and commit statge will deal with it
+ // here is signling the ROB, the inst can commit [mengjia]
if (load_fault != NoFault || !inst->readPredicate()) {
// Send this instruction to commit, also make sure iew stage
// realizes there is activity. Mark it as executed unless it
@@ -671,6 +895,8 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
// address. If so, then we have a memory ordering violation.
int load_idx = store_inst->lqIdx;
+ // TODO: Check whether this store tries to get an exclusive copy
+ // of target line [mengjia]
Fault store_fault = store_inst->initiateAcc();
if (store_inst->isTranslationDelayed() &&
@@ -769,15 +995,343 @@ LSQUnit<Impl>::writebackPendingStore()
if (hasPendingPkt) {
assert(pendingPkt != NULL);
- // If the cache is blocked, this will store the packet for retry.
- if (sendStore(pendingPkt)) {
- storePostSend(pendingPkt);
+ if(pendingPkt->isWrite()){
+ // If the cache is blocked, this will store the packet for retry.
+ if (sendStore(pendingPkt)) {
+ storePostSend(pendingPkt);
+ }
+ pendingPkt = NULL;
+ hasPendingPkt = false;
}
- pendingPkt = NULL;
- hasPendingPkt = false;
}
}
+
+
+
+// [InvisiSpec] update FenceDelay State
+template <class Impl>
+void
+LSQUnit<Impl>::updateVisibleState()
+{
+ int load_idx = loadHead;
+
+ //iterate all the loads and update its fencedelay state accordingly
+ while (load_idx != loadTail && loadQueue[load_idx]){
+ DynInstPtr inst = loadQueue[load_idx];
+
+ if (!loadInExec){
+
+ if ( (isFuturistic && inst->isPrevInstsCommitted()) ||
+ (!isFuturistic && inst->isPrevBrsCommitted())){
+ if (inst->fenceDelay()){
+ DPRINTF(LSQUnit, "Clear virtual fence for "
+ "inst [sn:%lli] PC %s\n",
+ inst->seqNum, inst->pcState());
+ }
+ inst->fenceDelay(false);
+ }else {
+ if (!inst->fenceDelay()){
+ DPRINTF(LSQUnit, "Deffering an inst [sn:%lli] PC %s"
+ " due to virtual fence\n",
+ inst->seqNum, inst->pcState());
+ }
+ inst->fenceDelay(true);
+ }
+ inst->readyToExpose(true);
+ } else if (loadInExec && isInvisibleSpec){
+
+ if ( (isFuturistic && inst->isPrevInstsCompleted()) ||
+ (!isFuturistic && inst->isPrevBrsResolved())){
+ if (!inst->readyToExpose()){
+ DPRINTF(LSQUnit, "Set readyToExpose for "
+ "inst [sn:%lli] PC %s\n",
+ inst->seqNum, inst->pcState());
+ if (inst->needPostFetch()){
+ ++loadsToVLD;
+ }
+ }
+ inst->readyToExpose(true);
+ }else {
+ if (inst->readyToExpose()){
+ DPRINTF(LSQUnit, "The load can not be validated "
+ "[sn:%lli] PC %s\n",
+ inst->seqNum, inst->pcState());
+ assert(0);
+ //--loadsToVLD;
+ }
+ inst->readyToExpose(false);
+ }
+ inst->fenceDelay(false);
+ } else {
+ inst->readyToExpose(true);
+ inst->fenceDelay(false);
+ }
+ incrLdIdx(load_idx);
+ }
+}
+
+// [InvisiSpec] validate loads
+template <class Impl>
+int
+LSQUnit<Impl>::exposeLoads()
+{
+ if(!isInvisibleSpec){
+ assert(loadsToVLD==0
+ && "request validation on Non invisible Spec mode");
+ }
+
+ int old_loadsToVLD = loadsToVLD;
+
+ // [InvisiSpec] Note:
+ // need to iterate from the head every time
+ // since the load can be exposed out-of-order
+ int loadVLDIdx = loadHead;
+
+ while (loadsToVLD > 0 &&
+ loadVLDIdx != loadTail &&
+ loadQueue[loadVLDIdx]) {
+
+ if (loadQueue[loadVLDIdx]->isSquashed()){
+ incrLdIdx(loadVLDIdx);
+ continue;
+ }
+ // skip the loads that either do not need to expose
+ // or exposed already
+ if(!loadQueue[loadVLDIdx]->needPostFetch()
+ || loadQueue[loadVLDIdx]->isExposeSent() ){
+ incrLdIdx(loadVLDIdx);
+ continue;
+ }
+
+ DynInstPtr load_inst = loadQueue[loadVLDIdx];
+ if (loadQueue[loadVLDIdx]->fault!=NoFault){
+ //load is executed, so it wait for expose complete
+ //to send it to commit, regardless of whether it is ready
+ //to expose
+ load_inst->setExposeCompleted();
+ load_inst->setExposeSent();
+ loadsToVLD--;
+ if (load_inst->isExecuted()){
+ DPRINTF(LSQUnit, "Execute finished and gets violation fault."
+ "Send inst [sn:%lli] to commit stage.\n",
+ load_inst->seqNum);
+ iewStage->instToCommit(load_inst);
+ iewStage->activityThisCycle();
+ }
+ incrLdIdx(loadVLDIdx);
+ continue;
+ }
+
+ // skip the loads that need expose but
+ // are not ready
+ if (loadQueue[loadVLDIdx]->needPostFetch()
+ && !loadQueue[loadVLDIdx]->readyToExpose()){
+ incrLdIdx(loadVLDIdx);
+ continue;
+ }
+
+ assert(loadQueue[loadVLDIdx]->needPostFetch()
+ && loadQueue[loadVLDIdx]->readyToExpose() );
+
+ assert(!load_inst->isCommitted());
+
+
+ RequestPtr req = load_inst->postReq;
+ RequestPtr sreqLow = load_inst->postSreqLow;
+ RequestPtr sreqHigh = load_inst->postSreqHigh;
+
+ // we should not have both req and sreqLow not NULL
+ assert( !(req && sreqLow));
+
+ DPRINTF(LSQUnit, "Validate/Expose request for inst [sn:%lli]"
+ " PC= %s. req=%#x, reqLow=%#x, reqHigh=%#x\n",
+ load_inst->seqNum, load_inst->pcState(),
+ (Addr)(load_inst->postReq),
+ (Addr)(load_inst->postSreqLow), (Addr)(load_inst->postSreqHigh));
+
+ PacketPtr data_pkt = NULL;
+ PacketPtr snd_data_pkt = NULL;
+
+ LSQSenderState *state = new LSQSenderState;
+ state->isLoad = false;
+ state->idx = loadVLDIdx;
+ state->inst = load_inst;
+ state->noWB = true;
+
+ bool split = false;
+ if (TheISA::HasUnalignedMemAcc && sreqLow) {
+ split = true;
+ } else {
+ assert(req);
+ }
+
+ bool onlyExpose = false;
+ if (!split) {
+ if (load_inst->needExposeOnly() || load_inst->isL1HitLow()){
+ data_pkt = Packet::createExpose(req);
+ onlyExpose = true;
+ }else {
+ data_pkt = Packet::createValidate(req);
+ if (!load_inst->vldData)
+ load_inst->vldData = new uint8_t[1];
+ data_pkt->dataStatic(load_inst->vldData);
+ }
+ data_pkt->senderState = state;
+ data_pkt->setFirst();
+ data_pkt->reqIdx = loadVLDIdx;
+ DPRINTF(LSQUnit, "contextid = %d\n", req->contextId());
+ } else {
+ // allocate memory if we need at least one validation
+ if (!load_inst->needExposeOnly() &&
+ (!load_inst->isL1HitLow() || !load_inst->isL1HitHigh())){
+ if (!load_inst->vldData)
+ load_inst->vldData = new uint8_t[2];
+ } else {
+ onlyExpose = true;
+ }
+
+ // Create the split packets. - first one
+ if (load_inst->needExposeOnly() || load_inst->isL1HitLow()){
+ data_pkt = Packet::createExpose(sreqLow);
+ }else{
+ data_pkt = Packet::createValidate(sreqLow);
+ assert(load_inst->vldData);
+ data_pkt->dataStatic(load_inst->vldData);
+ }
+
+ // Create the split packets. - second one
+ if (load_inst->needExposeOnly() || load_inst->isL1HitHigh()){
+ snd_data_pkt = Packet::createExpose(sreqHigh);
+ } else {
+ snd_data_pkt = Packet::createValidate(sreqHigh);
+ assert(load_inst->vldData);
+ snd_data_pkt->dataStatic(&(load_inst->vldData[1]));
+ }
+
+ data_pkt->senderState = state;
+ data_pkt->setFirst();
+ snd_data_pkt->senderState = state;
+ data_pkt->reqIdx = loadVLDIdx;
+ snd_data_pkt->reqIdx = loadVLDIdx;
+
+ data_pkt->isSplit = true;
+ snd_data_pkt->isSplit = true;
+ state->isSplit = true;
+ state->outstanding = 2;
+ state->mainPkt = data_pkt;
+
+ DPRINTF(LSQUnit, "contextid = %d, %d\n",
+ sreqLow->contextId(), sreqHigh->contextId());
+ req = sreqLow;
+ }
+
+ assert(!req->isStrictlyOrdered());
+ assert(!req->isMmappedIpr());
+
+ DPRINTF(LSQUnit, "D-Cache: Validating/Exposing load idx:%i PC:%s "
+ "to Addr:%#x, data:%#x [sn:%lli]\n",
+ loadVLDIdx, load_inst->pcState(),
+ //FIXME: resultData not memData
+ req->getPaddr(), (int)*(load_inst->memData),
+ load_inst->seqNum);
+
+ bool successful_expose = true;
+ bool completedFirst = false;
+
+ if (!dcachePort->sendTimingReq(data_pkt)){
+ DPRINTF(IEW, "D-Cache became blocked when "
+ "validating [sn:%lli], will retry later\n",
+ load_inst->seqNum);
+ successful_expose = false;
+ } else {
+ if (split) {
+ // If split, try to send the second packet too
+ completedFirst = true;
+ assert(snd_data_pkt);
+
+ if (!dcachePort->sendTimingReq(snd_data_pkt)){
+ state->complete();
+ state->cacheBlocked = true;
+ successful_expose = false;
+ DPRINTF(IEW, "D-Cache became blocked when validating"
+ " [sn:%lli] second packet, will retry later\n",
+ load_inst->seqNum);
+ }
+ }
+ }
+
+ if (!successful_expose){
+ if (!split) {
+ delete state;
+ delete data_pkt;
+ }else{
+ if (!completedFirst){
+ delete state;
+ delete data_pkt;
+ delete snd_data_pkt;
+ } else {
+ delete snd_data_pkt;
+ }
+ }
+ //cpu->wakeCPU(); // This will cause issue(wrong activity count and affects the memory transactions
+ ++lsqCacheBlocked;
+ break;
+ } else {
+ // Here is to fix memory leakage
+ // it is ugly, but we have to do it now.
+ load_inst->needDeletePostReq(false);
+
+ // if all the packets we sent out is expose,
+ // we assume the expose is alreay completed
+ if (onlyExpose) {
+ load_inst->setExposeCompleted();
+ numExposes++;
+ } else {
+ numValidates++;
+ }
+ if (load_inst->needExposeOnly()){
+ numConvertedExposes++;
+ }
+ if (load_inst->isExecuted() && load_inst->isExposeCompleted()
+ && !load_inst->isSquashed()){
+ DPRINTF(LSQUnit, "Expose finished. Execution done."
+ "Send inst [sn:%lli] to commit stage.\n",
+ load_inst->seqNum);
+ iewStage->instToCommit(load_inst);
+ iewStage->activityThisCycle();
+ } else{
+ DPRINTF(LSQUnit, "Need validation or execution not finishes."
+ "Need to wait for readResp/validateResp "
+ "for inst [sn:%lli].\n",
+ load_inst->seqNum);
+ }
+
+ load_inst->setExposeSent();
+ --loadsToVLD;
+ incrLdIdx(loadVLDIdx);
+ if (!split){
+ setSpecBuffState(req);
+ } else {
+ setSpecBuffState(sreqLow);
+ setSpecBuffState(sreqHigh);
+ }
+ }
+ }
+
+ DPRINTF(LSQUnit, "Send validate/expose for %d insts. loadsToVLD=%d"
+ ". loadHead=%d. loadTail=%d.\n",
+ old_loadsToVLD-loadsToVLD, loadsToVLD, loadHead,
+ loadTail);
+
+ assert(loads>=0 && loadsToVLD >= 0);
+
+ return old_loadsToVLD-loadsToVLD;
+}
+
+
+
+
template <class Impl>
void
LSQUnit<Impl>::writebackStores()
@@ -797,7 +1351,7 @@ LSQUnit<Impl>::writebackStores()
if (isStoreBlocked) {
DPRINTF(LSQUnit, "Unable to write back any more stores, cache"
- " is blocked!\n");
+ " is blocked on stores!\n");
break;
}
@@ -1007,6 +1561,12 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
stallingLoadIdx = 0;
}
+ if (loadQueue[load_idx]->needPostFetch() &&
+ loadQueue[load_idx]->readyToExpose() &&
+ !loadQueue[load_idx]->isExposeSent()){
+ loadsToVLD --;
+ }
+
// Clear the smart pointer to make sure it is decremented.
loadQueue[load_idx]->setSquashed();
loadQueue[load_idx] = NULL;
@@ -1017,6 +1577,7 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
decrLdIdx(load_idx);
++lsqSquashedLoads;
+
}
if (memDepViolator && squashed_num < memDepViolator->seqNum) {
@@ -1071,6 +1632,10 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
}
}
+
+// after sent, we assume the store is complete
+// thus, we can wekeup and forward data
+// In TSO, mark inFlightStore as true to block following stores [mengjia]
template <class Impl>
void
LSQUnit<Impl>::storePostSend(PacketPtr pkt)
@@ -1100,9 +1665,58 @@ LSQUnit<Impl>::storePostSend(PacketPtr pkt)
storeInFlight = true;
}
+ DPRINTF(LSQUnit, "Post sending store for inst [sn:%lli]\n",
+ storeQueue[storeWBIdx].inst->seqNum);
incrStIdx(storeWBIdx);
}
+
+
+template <class Impl>
+void
+LSQUnit<Impl>::completeValidate(DynInstPtr &inst, PacketPtr pkt)
+{
+ iewStage->wakeCPU();
+ // if instruction fault, no need to check value,
+ // return directly
+ //assert(!inst->needExposeOnly());
+ if (inst->isExposeCompleted() || inst->isSquashed()){
+ //assert(inst->fault != NoFault);
+ //Already sent to commit, do nothing
+ return;
+ }
+ //Check validation result
+ bool validation_fail = false;
+ if (!inst->isL1HitLow() && inst->vldData[0]==0) {
+ validation_fail = true;
+ } else {
+ if (pkt->isSplit && !inst->isL1HitHigh()
+ && inst->vldData[1]==0){
+ validation_fail = true;
+ }
+ }
+ if (validation_fail){
+ // Mark the load for re-execution
+ inst->fault = std::make_shared<ReExec>();
+ inst->validationFail(true);
+ DPRINTF(LSQUnit, "Validation failed.\n",
+ inst->seqNum);
+ }
+
+ inst->setExposeCompleted();
+ if ( inst->isExecuted() && inst->isExposeCompleted() ){
+ DPRINTF(LSQUnit, "Validation finished. Execution done."
+ "Send inst [sn:%lli] to commit stage.\n",
+ inst->seqNum);
+ iewStage->instToCommit(inst);
+ iewStage->activityThisCycle();
+ } else{
+ DPRINTF(LSQUnit, "Validation done. Execution not finishes."
+ "Need to wait for readResp for inst [sn:%lli].\n",
+ inst->seqNum);
+ }
+}
+
template <class Impl>
void
LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
@@ -1116,6 +1730,11 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
return;
}
+ //DPRINTF(LSQUnit, "write back for inst [sn:%lli]\n", inst->seqNum);
+ assert(!(inst->isExecuted() && inst->isExposeCompleted() &&
+ inst->fault==NoFault) &&
+ "in this case, we will put it into ROB twice.");
+
if (!inst->isExecuted()) {
inst->setExecuted();
@@ -1135,8 +1754,42 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
}
}
- // Need to insert instruction into queue to commit
- iewStage->instToCommit(inst);
+ // [mengjia]
+ // check schemes to decide whether to set load can be committed
+ // on receiving readResp or readSpecResp
+ if(!isInvisibleSpec){
+ // if not invisibleSpec mode, we only receive readResp
+ assert(!pkt->isSpec() && !pkt->isValidate() &&
+ "Receiving spec or validation response "
+ "in non invisibleSpec mode");
+ iewStage->instToCommit(inst);
+ } else if (inst->fault != NoFault){
+ inst->setExposeCompleted();
+ inst->setExposeSent();
+ iewStage->instToCommit(inst);
+ } else {
+ //isInvisibleSpec == true
+ if (pkt->isSpec()) {
+ inst->setSpecCompleted();
+ }
+
+ assert(!pkt->isValidate() && "receiving validation response"
+ "in invisibleSpec RC mode");
+ assert(!pkt->isExpose() && "receiving expose response"
+ "on write back path");
+
+ // check whether the instruction can be committed
+ if ( !inst->isExposeCompleted() && inst->needPostFetch() ){
+ DPRINTF(LSQUnit, "Expose not finished. "
+ "Wait until expose completion"
+ " to send inst [sn:%lli] to commit stage\n", inst->seqNum);
+ }else{
+ DPRINTF(LSQUnit, "Expose and execution both finished. "
+ "Send inst [sn:%lli] to commit stage\n", inst->seqNum);
+ iewStage->instToCommit(inst);
+ }
+
+ }
iewStage->activityThisCycle();
@@ -1144,6 +1797,8 @@ LSQUnit<Impl>::writeback(DynInstPtr &inst, PacketPtr pkt)
iewStage->checkMisprediction(inst);
}
+// set store to complete [mengjia]
+// complete the store after it commits
template <class Impl>
void
LSQUnit<Impl>::completeStore(int store_idx)
@@ -1219,9 +1874,12 @@ LSQUnit<Impl>::sendStore(PacketPtr data_pkt)
retryPkt = data_pkt;
return false;
}
+ setSpecBuffState(data_pkt->req);
return true;
}
+
+
template <class Impl>
void
LSQUnit<Impl>::recvRetry()
@@ -1229,6 +1887,7 @@ LSQUnit<Impl>::recvRetry()
if (isStoreBlocked) {
DPRINTF(LSQUnit, "Receiving retry: store blocked\n");
assert(retryPkt != NULL);
+ assert(retryPkt->isWrite());
LSQSenderState *state =
dynamic_cast<LSQSenderState *>(retryPkt->senderState);
@@ -1277,7 +1936,7 @@ template <class Impl>
inline void
LSQUnit<Impl>::incrLdIdx(int &load_idx) const
{
- if (++load_idx >= LQEntries)
+ if ((++load_idx) >= LQEntries)
load_idx = 0;
}
@@ -1285,7 +1944,7 @@ template <class Impl>
inline void
LSQUnit<Impl>::decrLdIdx(int &load_idx) const
{
- if (--load_idx < 0)
+ if ((--load_idx) < 0)
load_idx += LQEntries;
}
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index bc024f603..2720ab914 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -1370,7 +1370,8 @@ DefaultRename<Impl>::serializeAfter(InstQueue &inst_list, ThreadID tid)
// Mark a bit to say that I must serialize on the next instruction.
serializeOnNextInst[tid] = true;
return;
- }
+ }
+
// Set the next instruction as serializing.
inst_list.front()->setSerializeBefore();
diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh
index 1c3cc2815..7024d9920 100644
--- a/src/cpu/o3/rob.hh
+++ b/src/cpu/o3/rob.hh
@@ -212,6 +212,10 @@ class ROB
/** Updates the tail instruction with the new youngest instruction. */
void updateTail();
+ /** [SafeSpce] Updates load instructions visible condition
+ * set isPrevInstsCompleted and isPrevBrsResolved. */
+ void updateVisibleState();
+
/** Reads the PC of the oldest head instruction. */
// uint64_t readHeadPC();
diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh
index 5a9dc90f9..ebfbb9754 100644
--- a/src/cpu/o3/rob_impl.hh
+++ b/src/cpu/o3/rob_impl.hh
@@ -402,6 +402,85 @@ ROB<Impl>::doSquash(ThreadID tid)
}
+/* **************************
+ * [InvisiSpec] update load insts state
+ * isPrevInstsCompleted; isPrevBrsResolved
+ * *************************/
+template <class Impl>
+void
+ROB<Impl>::updateVisibleState()
+{
+ list<ThreadID>::iterator threads = activeThreads->begin();
+ list<ThreadID>::iterator end = activeThreads->end();
+
+ while (threads != end) {
+ ThreadID tid = *threads++;
+
+ if (instList[tid].empty())
+ continue;
+
+ InstIt inst_it = instList[tid].begin();
+ InstIt tail_inst_it = instList[tid].end();
+
+ bool prevInstsComplete=true;
+ bool prevBrsResolved=true;
+ bool prevInstsCommitted=true;
+ bool prevBrsCommitted=true;
+
+ while (inst_it != tail_inst_it) {
+ DynInstPtr inst = *inst_it++;
+
+ assert(inst!=0);
+
+ if (!prevInstsComplete &&
+ !prevBrsResolved) {
+ break;
+ }
+
+ if (inst->isLoad()) {
+ if (prevInstsComplete) {
+ inst->setPrevInstsCompleted();
+ }
+ if (prevBrsResolved){
+ inst->setPrevBrsResolved();
+ }
+ if (prevInstsCommitted) {
+ inst->setPrevInstsCommitted();
+ }
+ if (prevBrsCommitted) {
+ inst->setPrevBrsCommitted();
+ }
+ }
+
+ // Update prev control insts state
+ if (inst->isControl()){
+ prevBrsCommitted = false;
+ if (!inst->readyToCommit() || inst->getFault()!=NoFault
+ || inst->isSquashed()){
+ prevBrsResolved = false;
+ }
+ }
+
+ prevInstsCommitted = false;
+
+ // Update prev insts state
+ if (inst->isNonSpeculative() || inst->isStoreConditional()
+ || inst->isMemBarrier() || inst->isWriteBarrier() ||
+ (inst->isLoad() && inst->strictlyOrdered())){
+ //Some special instructions, directly set canCommit
+ //when entering ROB
+ prevInstsComplete = false;
+ }
+ if (!inst->readyToCommit() || inst->getFault()!=NoFault
+ || inst->isSquashed()){
+ prevInstsComplete = false;
+ }
+
+ }
+ }
+}
+
+
template <class Impl>
void
ROB<Impl>::updateHead()