summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cpu/o3/probe/ElasticTrace.py62
-rw-r--r--src/cpu/o3/probe/SConscript5
-rw-r--r--src/cpu/o3/probe/elastic_trace.cc939
-rw-r--r--src/cpu/o3/probe/elastic_trace.hh537
-rw-r--r--src/proto/SConscript1
-rw-r--r--src/proto/inst_dep_record.proto75
-rw-r--r--src/proto/packet.proto3
7 files changed, 1622 insertions, 0 deletions
diff --git a/src/cpu/o3/probe/ElasticTrace.py b/src/cpu/o3/probe/ElasticTrace.py
new file mode 100644
index 000000000..fb3093a2c
--- /dev/null
+++ b/src/cpu/o3/probe/ElasticTrace.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2013 - 2015 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Radhika Jagtap
+# Andreas Hansson
+# Thomas Grass
+
+from Probe import *
+
+class ElasticTrace(ProbeListenerObject):
+ type = 'ElasticTrace'
+ cxx_header = 'cpu/o3/probe/elastic_trace.hh'
+
+ # Trace files for the following params are created in the output directory.
+ # User is forced to provide these when an instance of this class is created.
+ instFetchTraceFile = Param.String(desc="Protobuf trace file name for " \
+ "instruction fetch tracing")
+ dataDepTraceFile = Param.String(desc="Protobuf trace file name for " \
+ "data dependency tracing")
+ # The dependency window size param must be equal to or greater than the
+ # number of entries in the O3CPU ROB, a typical value is 3 times ROB size
+ depWindowSize = Param.Unsigned(desc="Instruction window size used for " \
+ "recording and processing data " \
+ "dependencies")
+ # The committed instruction count from which to start tracing
+ startTraceInst = Param.UInt64(0, "The number of committed instructions " \
+ "after which to start tracing. Default " \
+ "zero means start tracing from first " \
+ "committed instruction.")
+
diff --git a/src/cpu/o3/probe/SConscript b/src/cpu/o3/probe/SConscript
index c8ab2b53f..54f12e21e 100644
--- a/src/cpu/o3/probe/SConscript
+++ b/src/cpu/o3/probe/SConscript
@@ -43,3 +43,8 @@ if 'O3CPU' in env['CPU_MODELS']:
SimObject('SimpleTrace.py')
Source('simple_trace.cc')
DebugFlag('SimpleTrace')
+
+ if env['HAVE_PROTOBUF']:
+ SimObject('ElasticTrace.py')
+ Source('elastic_trace.cc')
+ DebugFlag('ElasticTrace')
diff --git a/src/cpu/o3/probe/elastic_trace.cc b/src/cpu/o3/probe/elastic_trace.cc
new file mode 100644
index 000000000..a7a0403f9
--- /dev/null
+++ b/src/cpu/o3/probe/elastic_trace.cc
@@ -0,0 +1,939 @@
+/*
+ * Copyright (c) 2013 - 2015 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Radhika Jagtap
+ * Andreas Hansson
+ * Thomas Grass
+ */
+
+#include "cpu/o3/probe/elastic_trace.hh"
+
+#include "base/callback.hh"
+#include "base/output.hh"
+#include "base/trace.hh"
+#include "cpu/reg_class.hh"
+#include "debug/ElasticTrace.hh"
+#include "mem/packet.hh"
+
+ElasticTrace::ElasticTrace(const ElasticTraceParams* params)
+ : ProbeListenerObject(params),
+ regEtraceListenersEvent(this),
+ firstWin(true),
+ lastClearedSeqNum(0),
+ depWindowSize(params->depWindowSize),
+ dataTraceStream(nullptr),
+ instTraceStream(nullptr),
+ startTraceInst(params->startTraceInst),
+ allProbesReg(false)
+{
+ cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager);
+ fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\
+ "support dependency tracing.\n", name());
+
+ fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\
+ "Recommended size is 3x ROB size in the O3CPU.\n");
+
+ fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\
+ "single-threaded workload only", cpu->numThreads, name());
+ // Initialize the protobuf output stream
+ fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\
+ "trace file path to instFetchTraceFile");
+ fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\
+ "trace file path to dataDepTraceFile");
+ std::string filename = simout.resolve(name() + "." +
+ params->instFetchTraceFile);
+ instTraceStream = new ProtoOutputStream(filename);
+ filename = simout.resolve(name() + "." + params->dataDepTraceFile);
+ dataTraceStream = new ProtoOutputStream(filename);
+ // Create a protobuf message for the header and write it to the stream
+ ProtoMessage::PacketHeader inst_pkt_header;
+ inst_pkt_header.set_obj_id(name());
+ inst_pkt_header.set_tick_freq(SimClock::Frequency);
+ instTraceStream->write(inst_pkt_header);
+ // Create a protobuf message for the header and write it to
+ // the stream
+ ProtoMessage::InstDepRecordHeader data_rec_header;
+ data_rec_header.set_obj_id(name());
+ data_rec_header.set_tick_freq(SimClock::Frequency);
+ data_rec_header.set_window_size(depWindowSize);
+ dataTraceStream->write(data_rec_header);
+ // Register a callback to flush trace records and close the output streams.
+ Callback* cb = new MakeCallback<ElasticTrace,
+ &ElasticTrace::flushTraces>(this);
+ registerExitCallback(cb);
+}
+
+void
+ElasticTrace::regProbeListeners()
+{
+ inform("@%llu: regProbeListeners() called, startTraceInst = %llu",
+ curTick(), startTraceInst);
+ if (startTraceInst == 0) {
+ // If we want to start tracing from the start of the simulation,
+ // register all elastic trace probes now.
+ regEtraceListeners();
+ } else {
+ // Schedule an event to register all elastic trace probes when
+ // specified no. of instructions are committed.
+ cpu->comInstEventQueue[(ThreadID)0]->schedule(&regEtraceListenersEvent,
+ startTraceInst);
+ }
+}
+
+void
+ElasticTrace::regEtraceListeners()
+{
+ assert(!allProbesReg);
+ inform("@%llu: No. of instructions committed = %llu, registering elastic"
+ " probe listeners", curTick(), cpu->numSimulatedInsts());
+ // Create new listeners: provide method to be called upon a notify() for
+ // each probe point.
+ listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this,
+ "FetchRequest", &ElasticTrace::fetchReqTrace));
+ listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
+ "Execute", &ElasticTrace::recordExecTick));
+ listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
+ "ToCommit", &ElasticTrace::recordToCommTick));
+ listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
+ "Rename", &ElasticTrace::updateRegDep));
+ listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this,
+ "SquashInRename", &ElasticTrace::removeRegDepMapEntry));
+ listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
+ "Squash", &ElasticTrace::addSquashedInst));
+ listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this,
+ "Commit", &ElasticTrace::addCommittedInst));
+ allProbesReg = true;
+}
+
+void
+ElasticTrace::fetchReqTrace(const RequestPtr &req)
+{
+
+ DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n",
+ (MemCmd::ReadReq),
+ req->getPC(), req->getVaddr(), req->getPaddr(),
+ req->getFlags(), req->getSize(), curTick());
+
+ // Create a protobuf message including the request fields necessary to
+ // recreate the request in the TraceCPU.
+ ProtoMessage::Packet inst_fetch_pkt;
+ inst_fetch_pkt.set_tick(curTick());
+ inst_fetch_pkt.set_cmd(MemCmd::ReadReq);
+ inst_fetch_pkt.set_pc(req->getPC());
+ inst_fetch_pkt.set_flags(req->getFlags());
+ inst_fetch_pkt.set_addr(req->getPaddr());
+ inst_fetch_pkt.set_size(req->getSize());
+ // Write the message to the stream.
+ instTraceStream->write(inst_fetch_pkt);
+}
+
+void
+ElasticTrace::recordExecTick(const DynInstPtr &dyn_inst)
+{
+
+ // In a corner case, a retired instruction is propagated backward to the
+ // IEW instruction queue to handle some side-channel information. But we
+ // must not process an instruction again. So we test the sequence number
+ // against the lastClearedSeqNum and skip adding the instruction for such
+ // corner cases.
+ if (dyn_inst->seqNum <= lastClearedSeqNum) {
+ DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \
+ has already retired (mostly squashed)", dyn_inst->seqNum);
+ // Do nothing as program has proceeded and this inst has been
+ // propagated backwards to handle something.
+ return;
+ }
+
+ DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum,
+ curTick());
+ // Either the execution info object will already exist if this
+ // instruction had a register dependency recorded in the rename probe
+ // listener before entering execute stage or it will not exist and will
+ // need to be created here.
+ InstExecInfo* exec_info_ptr;
+ auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
+ if (itr_exec_info != tempStore.end()) {
+ exec_info_ptr = itr_exec_info->second;
+ } else {
+ exec_info_ptr = new InstExecInfo;
+ tempStore[dyn_inst->seqNum] = exec_info_ptr;
+ }
+
+ exec_info_ptr->executeTick = curTick();
+ maxTempStoreSize = std::max(tempStore.size(),
+ (std::size_t)maxTempStoreSize.value());
+}
+
+void
+ElasticTrace::recordToCommTick(const DynInstPtr &dyn_inst)
+{
+ // If tracing has just been enabled then the instruction at this stage of
+ // execution is far enough that we cannot gather info about its past like
+ // the tick it started execution. Simply return until we see an instruction
+ // that is found in the tempStore.
+ auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
+ if (itr_exec_info == tempStore.end()) {
+ DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store,"
+ " skipping.\n", dyn_inst->seqNum);
+ return;
+ }
+
+ DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum,
+ curTick());
+ InstExecInfo* exec_info_ptr = itr_exec_info->second;
+ exec_info_ptr->toCommitTick = curTick();
+
+}
+
+void
+ElasticTrace::updateRegDep(const DynInstPtr &dyn_inst)
+{
+ // Get the sequence number of the instruction
+ InstSeqNum seq_num = dyn_inst->seqNum;
+
+ assert(dyn_inst->seqNum > lastClearedSeqNum);
+
+ // Since this is the first probe activated in the pipeline, create
+ // a new execution info object to track this instruction as it
+ // progresses through the pipeline.
+ InstExecInfo* exec_info_ptr = new InstExecInfo;
+ tempStore[seq_num] = exec_info_ptr;
+
+ // Loop through the source registers and look up the dependency map. If
+ // the source register entry is found in the dependency map, add a
+ // dependency on the last writer.
+ int8_t max_regs = dyn_inst->numSrcRegs();
+ for (int src_idx = 0; src_idx < max_regs; src_idx++) {
+ // Get the physical register index of the i'th source register.
+ PhysRegIndex src_reg = dyn_inst->renamedSrcRegIdx(src_idx);
+ DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg %i\n", seq_num,
+ src_reg);
+ auto itr_last_writer = physRegDepMap.find(src_reg);
+ if (itr_last_writer != physRegDepMap.end()) {
+ InstSeqNum last_writer = itr_last_writer->second;
+ // Additionally the dependency distance is kept less than the window
+ // size parameter to limit the memory allocation to nodes in the
+ // graph. If the window were tending to infinite we would have to
+ // load a large number of node objects during replay.
+ if (seq_num - last_writer < depWindowSize) {
+ // Record a physical register dependency.
+ exec_info_ptr->physRegDepSet.insert(last_writer);
+ }
+ }
+ }
+
+ // Loop through the destination registers of this instruction and update
+ // the physical register dependency map for last writers to registers.
+ max_regs = dyn_inst->numDestRegs();
+ for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) {
+ // For data dependency tracking the register must be an int, float or
+ // CC register and not a Misc register.
+ TheISA::RegIndex dest_reg = dyn_inst->destRegIdx(dest_idx);
+ if (regIdxToClass(dest_reg) != MiscRegClass) {
+ // Get the physical register index of the i'th destination register.
+ dest_reg = dyn_inst->renamedDestRegIdx(dest_idx);
+ if (dest_reg != TheISA::ZeroReg) {
+ DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg %i\n",
+ seq_num, dest_reg);
+ physRegDepMap[dest_reg] = seq_num;
+ }
+ }
+ }
+ maxPhysRegDepMapSize = std::max(physRegDepMap.size(),
+ (std::size_t)maxPhysRegDepMapSize.value());
+}
+
+void
+ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
+{
+ DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n",
+ inst_reg_pair.second);
+ auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second);
+ if (itr_regdep_map != physRegDepMap.end())
+ physRegDepMap.erase(itr_regdep_map);
+}
+
+void
+ElasticTrace::addSquashedInst(const DynInstPtr &head_inst)
+{
+ // If the squashed instruction was squashed before being processed by
+ // execute stage then it will not be in the temporary store. In this case
+ // do nothing and return.
+ auto itr_exec_info = tempStore.find(head_inst->seqNum);
+ if (itr_exec_info == tempStore.end())
+ return;
+
+ // If there is a squashed load for which a read request was
+ // sent before it got squashed then add it to the trace.
+ DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n",
+ head_inst->seqNum);
+ // Get pointer to the execution info object corresponding to the inst.
+ InstExecInfo* exec_info_ptr = itr_exec_info->second;
+ if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick &&
+ exec_info_ptr->toCommitTick != MaxTick &&
+ head_inst->hasRequest() &&
+ head_inst->getFault() == NoFault) {
+ // Add record to depTrace with commit parameter as false.
+ addDepTraceRecord(head_inst, exec_info_ptr, false);
+ }
+ // As the information contained is no longer needed, remove the execution
+ // info object from the temporary store.
+ clearTempStoreUntil(head_inst);
+}
+
+void
+ElasticTrace::addCommittedInst(const DynInstPtr &head_inst)
+{
+ DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
+ head_inst->seqNum);
+
+ // Add the instruction to the depTrace.
+ if (!head_inst->isNop()) {
+
+ // If tracing has just been enabled then the instruction at this stage
+ // of execution is far enough that we cannot gather info about its past
+ // like the tick it started execution. Simply return until we see an
+ // instruction that is found in the tempStore.
+ auto itr_temp_store = tempStore.find(head_inst->seqNum);
+ if (itr_temp_store == tempStore.end()) {
+ DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp "
+ "store, skipping.\n", head_inst->seqNum);
+ return;
+ }
+
+ // Get pointer to the execution info object corresponding to the inst.
+ InstExecInfo* exec_info_ptr = itr_temp_store->second;
+ assert(exec_info_ptr->executeTick != MaxTick);
+ assert(exec_info_ptr->toCommitTick != MaxTick);
+
+ // Check if the instruction had a fault, if it predicated false and
+ // thus previous register values were restored or if it was a
+ // load/store that did not have a request (e.g. when the size of the
+ // request is zero). In all these cases the instruction is set as
+ // executed and is picked up by the commit probe listener. But a
+ // request is not issued and registers are not written. So practically,
+ // skipping these should not hurt as execution would not stall on them.
+ // Alternatively, these could be included merely as a compute node in
+ // the graph. Removing these for now. If correlation accuracy needs to
+ // be improved in future these can be turned into comp nodes at the
+ // cost of bigger traces.
+ if (head_inst->getFault() != NoFault) {
+ DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so "
+ "skip adding it to the trace\n",
+ (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
+ head_inst->seqNum);
+ } else if (head_inst->isMemRef() && !head_inst->hasRequest()) {
+ DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so "
+ "skip adding it to the trace\n", head_inst->seqNum);
+ } else if (!head_inst->readPredicate()) {
+ DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so "
+ "skip adding it to the trace\n",
+ (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
+ head_inst->seqNum);
+ } else {
+ // Add record to depTrace with commit parameter as true.
+ addDepTraceRecord(head_inst, exec_info_ptr, true);
+ }
+ }
+ // As the information contained is no longer needed, remove the execution
+ // info object from the temporary store.
+ clearTempStoreUntil(head_inst);
+}
+
+void
+ElasticTrace::addDepTraceRecord(const DynInstPtr &head_inst,
+ InstExecInfo* exec_info_ptr, bool commit)
+{
+ // Create a record to assign dynamic intruction related fields.
+ TraceInfo* new_record = new TraceInfo;
+ // Add to map for sequence number look up to retrieve the TraceInfo pointer
+ traceInfoMap[head_inst->seqNum] = new_record;
+
+ // Assign fields from the instruction
+ new_record->instNum = head_inst->seqNum;
+ new_record->load = head_inst->isLoad();
+ new_record->store = head_inst->isStore();
+ new_record->commit = commit;
+
+ // Assign fields for creating a request in case of a load/store
+ new_record->reqFlags = head_inst->memReqFlags;
+ new_record->addr = head_inst->physEffAddrLow;
+ // Currently the tracing does not support split requests.
+ new_record->size = head_inst->effSize;
+ new_record->pc = head_inst->instAddr();
+
+ // Assign the timing information stored in the execution info object
+ new_record->executeTick = exec_info_ptr->executeTick;
+ new_record->toCommitTick = exec_info_ptr->toCommitTick;
+ new_record->commitTick = curTick();
+
+ // Assign initial values for number of dependents and computational delay
+ new_record->numDepts = 0;
+ new_record->compDelay = -1;
+
+ // The physical register dependency set of the first instruction is
+ // empty. Since there are no records in the depTrace at this point, the
+ // case of adding an ROB dependency by using a reverse iterator is not
+ // applicable. Thus, populate the fields of the record corresponding to the
+ // first instruction and return.
+ if (depTrace.empty()) {
+ // Store the record in depTrace.
+ depTrace.push_back(new_record);
+ DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n",
+ new_record->instNum);
+ return;
+ }
+
+ // Clear register dependencies for squashed loads as they may be dependent
+ // on squashed instructions and we do not add those to the trace.
+ if (head_inst->isLoad() && !commit) {
+ (exec_info_ptr->physRegDepSet).clear();
+ }
+
+ // Assign the register dependencies stored in the execution info object
+ std::set<InstSeqNum>::const_iterator dep_set_it;
+ for (dep_set_it = (exec_info_ptr->physRegDepSet).begin();
+ dep_set_it != (exec_info_ptr->physRegDepSet).end();
+ ++dep_set_it) {
+ auto trace_info_itr = traceInfoMap.find(*dep_set_it);
+ if (trace_info_itr != traceInfoMap.end()) {
+ // The register dependency is valid. Assign it and calculate
+ // computational delay
+ new_record->physRegDepList.push_back(*dep_set_it);
+ DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
+ "%lli\n", new_record->instNum, *dep_set_it);
+ TraceInfo* reg_dep = trace_info_itr->second;
+ reg_dep->numDepts++;
+ compDelayPhysRegDep(reg_dep, new_record);
+ ++numRegDep;
+ } else {
+ // The instruction that this has a register dependency on was
+ // not added to the trace because of one of the following
+ // 1. it was an instruction that had a fault
+ // 2. it was an instruction that was predicated false and
+ // previous register values were restored
+ // 3. it was load/store that did not have a request (e.g. when
+ // the size of the request is zero but this may not be a fault)
+ // In all these cases the instruction is set as executed and is
+ // picked up by the commit probe listener. But a request is not
+ // issued and registers are not written to in these cases.
+ DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
+ "%lli is skipped\n",new_record->instNum, *dep_set_it);
+ }
+ }
+
+ // Check for and assign an ROB dependency in addition to register
+ // dependency before adding the record to the trace.
+ // As stores have to commit in order a store is dependent on the last
+ // committed load/store. This is recorded in the ROB dependency.
+ if (head_inst->isStore()) {
+ // Look up store-after-store order dependency
+ updateCommitOrderDep(new_record, false);
+ // Look up store-after-load order dependency
+ updateCommitOrderDep(new_record, true);
+ }
+
+ // In case a node is dependency-free or its dependency got discarded
+ // because it was outside the window, it is marked ready in the ROB at the
+ // time of issue. A request is sent as soon as possible. To model this, a
+ // node is assigned an issue order dependency on a committed instruction
+ // that completed earlier than it. This is done to avoid the problem of
+ // determining the issue times of such dependency-free nodes during replay
+ // which could lead to too much parallelism, thinking conservatively.
+ if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) {
+ updateIssueOrderDep(new_record);
+ }
+
+ // Store the record in depTrace.
+ depTrace.push_back(new_record);
+ DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n",
+ (commit ? "committed" : "squashed"), new_record->instNum);
+
+ // To process the number of records specified by depWindowSize in the
+ // forward direction, the depTrace must have twice as many records
+ // to check for dependencies.
+ if (depTrace.size() == 2 * depWindowSize) {
+
+ DPRINTF(ElasticTrace, "Writing out trace...\n");
+
+ // Write out the records which have been processed to the trace
+ // and remove them from the depTrace.
+ writeDepTrace(depWindowSize);
+
+ // After the first window, writeDepTrace() must check for valid
+ // compDelay.
+ firstWin = false;
+ }
+}
+
+void
+ElasticTrace::updateCommitOrderDep(TraceInfo* new_record,
+ bool find_load_not_store)
+{
+ assert(new_record->store);
+ // Iterate in reverse direction to search for the last committed
+ // load/store that completed earlier than the new record
+ depTraceRevItr from_itr(depTrace.end());
+ depTraceRevItr until_itr(depTrace.begin());
+ TraceInfo* past_record = *from_itr;
+ uint32_t num_go_back = 0;
+
+ // The execution time of this store is when it is sent, that is committed
+ Tick execute_tick = curTick();
+ // Search for store-after-load or store-after-store order dependency
+ while (num_go_back < depWindowSize && from_itr != until_itr) {
+ if (find_load_not_store) {
+ // Check if previous inst is a load completed earlier by comparing
+ // with execute tick
+ if (hasLoadCompleted(past_record, execute_tick)) {
+ // Assign rob dependency and calculate the computational delay
+ assignRobDep(past_record, new_record);
+ ++numOrderDepStores;
+ return;
+ }
+ } else {
+ // Check if previous inst is a store sent earlier by comparing with
+ // execute tick
+ if (hasStoreCommitted(past_record, execute_tick)) {
+ // Assign rob dependency and calculate the computational delay
+ assignRobDep(past_record, new_record);
+ ++numOrderDepStores;
+ return;
+ }
+ }
+ ++from_itr;
+ past_record = *from_itr;
+ ++num_go_back;
+ }
+}
+
+void
+ElasticTrace::updateIssueOrderDep(TraceInfo* new_record)
+{
+ // Interate in reverse direction to search for the last committed
+ // record that completed earlier than the new record
+ depTraceRevItr from_itr(depTrace.end());
+ depTraceRevItr until_itr(depTrace.begin());
+ TraceInfo* past_record = *from_itr;
+
+ uint32_t num_go_back = 0;
+ Tick execute_tick = 0;
+
+ if (new_record->load) {
+ // The execution time of a load is when a request is sent
+ execute_tick = new_record->executeTick;
+ ++numIssueOrderDepLoads;
+ } else if (new_record->store) {
+ // The execution time of a store is when it is sent, i.e. committed
+ execute_tick = curTick();
+ ++numIssueOrderDepStores;
+ } else {
+ // The execution time of a non load/store is when it completes
+ execute_tick = new_record->toCommitTick;
+ ++numIssueOrderDepOther;
+ }
+
+ // We search if this record has an issue order dependency on a past record.
+ // Once we find it, we update both the new record and the record it depends
+ // on and return.
+ while (num_go_back < depWindowSize && from_itr != until_itr) {
+ // Check if a previous inst is a load sent earlier, or a store sent
+ // earlier, or a comp inst completed earlier by comparing with execute
+ // tick
+ if (hasLoadBeenSent(past_record, execute_tick) ||
+ hasStoreCommitted(past_record, execute_tick) ||
+ hasCompCompleted(past_record, execute_tick)) {
+ // Assign rob dependency and calculate the computational delay
+ assignRobDep(past_record, new_record);
+ return;
+ }
+ ++from_itr;
+ past_record = *from_itr;
+ ++num_go_back;
+ }
+}
+
+void
+ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) {
+ DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n",
+ new_record->load ? "Load" : (new_record->store ? "Store" :
+ "Non load/store"),
+ new_record->instNum, past_record->instNum);
+
+ // Add dependency on past record
+ new_record->robDepList.push_back(past_record->instNum);
+ // Update new_record's compute delay with respect to the past record
+ compDelayRob(past_record, new_record);
+ // Increment number of dependents of the past record
+ ++(past_record->numDepts);
+ // Update stat to log max number of dependents
+ maxNumDependents = std::max(past_record->numDepts,
+ (uint32_t)maxNumDependents.value());
+}
+
+bool
+ElasticTrace::hasStoreCommitted(TraceInfo* past_record,
+ Tick execute_tick) const
+{
+ return (past_record->store && past_record->commitTick <= execute_tick);
+}
+
+bool
+ElasticTrace::hasLoadCompleted(TraceInfo* past_record,
+ Tick execute_tick) const
+{
+ return(past_record->load && past_record->commit &&
+ past_record->toCommitTick <= execute_tick);
+}
+
+bool
+ElasticTrace::hasLoadBeenSent(TraceInfo* past_record,
+ Tick execute_tick) const
+{
+ // Check if previous inst is a load sent earlier than this
+ return (past_record->load && past_record->commit &&
+ past_record->executeTick <= execute_tick);
+}
+
+bool
+ElasticTrace::hasCompCompleted(TraceInfo* past_record,
+ Tick execute_tick) const
+{
+ return(!past_record->store && !past_record->load &&
+ past_record->toCommitTick <= execute_tick);
+}
+
+void
+ElasticTrace::clearTempStoreUntil(const DynInstPtr head_inst)
+{
+ // Clear from temp store starting with the execution info object
+ // corresponding the head_inst and continue clearing by decrementing the
+ // sequence number until the last cleared sequence number.
+ InstSeqNum temp_sn = (head_inst->seqNum);
+ while (temp_sn > lastClearedSeqNum) {
+ auto itr_exec_info = tempStore.find(temp_sn);
+ if (itr_exec_info != tempStore.end()) {
+ InstExecInfo* exec_info_ptr = itr_exec_info->second;
+ // Free allocated memory for the info object
+ delete exec_info_ptr;
+ // Remove entry from temporary store
+ tempStore.erase(itr_exec_info);
+ }
+ temp_sn--;
+ }
+ // Update the last cleared sequence number to that of the head_inst
+ lastClearedSeqNum = head_inst->seqNum;
+}
+
+void
+ElasticTrace::compDelayRob(TraceInfo* past_record, TraceInfo* new_record)
+{
+ // The computation delay is the delay between the completion tick of the
+ // inst. pointed to by past_record and the execution tick of its dependent
+ // inst. pointed to by new_record.
+ int64_t comp_delay = -1;
+ Tick execution_tick = 0, completion_tick = 0;
+
+ DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n",
+ new_record->instNum, past_record->instNum);
+
+ // Get the tick when the node is executed as per the modelling of
+ // computation delay
+ execution_tick = new_record->getExecuteTick();
+
+ if (past_record->load) {
+ if (new_record->store) {
+ completion_tick = past_record->toCommitTick;
+ } else {
+ completion_tick = past_record->executeTick;
+ }
+ } else if (past_record->store) {
+ completion_tick = past_record->commitTick;
+ } else {
+ completion_tick = past_record->toCommitTick;
+ }
+ assert(execution_tick >= completion_tick);
+ comp_delay = execution_tick - completion_tick;
+
+ DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
+ execution_tick, completion_tick, comp_delay);
+
+ // Assign the computational delay with respect to the dependency which
+ // completes the latest.
+ if (new_record->compDelay == -1)
+ new_record->compDelay = comp_delay;
+ else
+ new_record->compDelay = std::min(comp_delay, new_record->compDelay);
+ DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
+ new_record->compDelay);
+}
+
+void
+ElasticTrace::compDelayPhysRegDep(TraceInfo* past_record,
+ TraceInfo* new_record)
+{
+ // The computation delay is the delay between the completion tick of the
+ // inst. pointed to by past_record and the execution tick of its dependent
+ // inst. pointed to by new_record.
+ int64_t comp_delay = -1;
+ Tick execution_tick = 0, completion_tick = 0;
+
+ DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num"
+ " %lli.\n", new_record->instNum, past_record->instNum);
+
+ // Get the tick when the node is executed as per the modelling of
+ // computation delay
+ execution_tick = new_record->getExecuteTick();
+
+ // When there is a physical register dependency on an instruction, the
+ // completion tick of that instruction is when it wrote to the register,
+ // that is toCommitTick. In case, of a store updating a destination
+ // register, this is approximated to commitTick instead
+ if (past_record->store) {
+ completion_tick = past_record->commitTick;
+ } else {
+ completion_tick = past_record->toCommitTick;
+ }
+ assert(execution_tick >= completion_tick);
+ comp_delay = execution_tick - completion_tick;
+ DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
+ execution_tick, completion_tick, comp_delay);
+
+ // Assign the computational delay with respect to the dependency which
+ // completes the latest.
+ if (new_record->compDelay == -1)
+ new_record->compDelay = comp_delay;
+ else
+ new_record->compDelay = std::min(comp_delay, new_record->compDelay);
+ DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
+ new_record->compDelay);
+}
+
+Tick
+ElasticTrace::TraceInfo::getExecuteTick() const
+{
+ if (load) {
+ // Execution tick for a load instruction is when the request was sent,
+ // that is executeTick.
+ return executeTick;
+ } else if (store) {
+ // Execution tick for a store instruction is when the request was sent,
+ // that is commitTick.
+ return commitTick;
+ } else {
+ // Execution tick for a non load/store instruction is when the register
+ // value was written to, that is commitTick.
+ return toCommitTick;
+ }
+}
+
+void
+ElasticTrace::writeDepTrace(uint32_t num_to_write)
+{
+ // Write the trace with fields as follows:
+ // Instruction sequence number
+ // If instruction was a load
+ // If instruction was a store
+ // If instruction has addr
+ // If instruction has size
+ // If instruction has flags
+ // List of order dependencies - optional, repeated
+ // Computational delay with respect to last completed dependency
+ // List of physical register RAW dependencies - optional, repeated
+ // Weight of a node equal to no. of filtered nodes before it - optional
+ uint16_t num_filtered_nodes = 0;
+ depTraceItr dep_trace_itr(depTrace.begin());
+ depTraceItr dep_trace_itr_start = dep_trace_itr;
+ while (num_to_write > 0) {
+ TraceInfo* temp_ptr = *dep_trace_itr;
+ // If no node dependends on a non load/store node then there is
+ // no reason to track it in the dependency graph. We filter out such
+ // nodes but count them and add a weight field to the subsequent node
+ // that we do include in the trace.
+ if (temp_ptr->numDepts != 0 || temp_ptr->load || temp_ptr->store) {
+
+ DPRINTFR(ElasticTrace, "Instruction with seq. num %lli "
+ "is as follows:\n", temp_ptr->instNum);
+ if (temp_ptr->load || temp_ptr->store) {
+ DPRINTFR(ElasticTrace, "\tis a %s\n",
+ (temp_ptr->load ? "Load" : "Store"));
+ DPRINTFR(ElasticTrace, "\thas a request with addr %i, size %i,"
+ " flags %i\n", temp_ptr->addr, temp_ptr->size,
+ temp_ptr->reqFlags);
+ } else {
+ DPRINTFR(ElasticTrace, "\tis not a load or store\n");
+ }
+ if (firstWin && temp_ptr->compDelay == -1) {
+ if (temp_ptr->load) {
+ temp_ptr->compDelay = temp_ptr->executeTick;
+ } else if (temp_ptr->store) {
+ temp_ptr->compDelay = temp_ptr->commitTick;
+ } else {
+ temp_ptr->compDelay = temp_ptr->toCommitTick;
+ }
+ }
+ assert(temp_ptr->compDelay != -1);
+ DPRINTFR(ElasticTrace, "\thas computational delay %lli\n",
+ temp_ptr->compDelay);
+
+ // Create a protobuf message for the dependency record
+ ProtoMessage::InstDepRecord dep_pkt;
+ dep_pkt.set_seq_num(temp_ptr->instNum);
+ dep_pkt.set_load(temp_ptr->load);
+ dep_pkt.set_store(temp_ptr->store);
+ dep_pkt.set_pc(temp_ptr->pc);
+ if (temp_ptr->load || temp_ptr->store) {
+ dep_pkt.set_flags(temp_ptr->reqFlags);
+ dep_pkt.set_addr(temp_ptr->addr);
+ dep_pkt.set_size(temp_ptr->size);
+ }
+ dep_pkt.set_comp_delay(temp_ptr->compDelay);
+ if (temp_ptr->robDepList.empty()) {
+ DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n");
+ }
+ while (!temp_ptr->robDepList.empty()) {
+ DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n",
+ temp_ptr->robDepList.front());
+ dep_pkt.add_rob_dep(temp_ptr->robDepList.front());
+ temp_ptr->robDepList.pop_front();
+ }
+ if (temp_ptr->physRegDepList.empty()) {
+ DPRINTFR(ElasticTrace, "\thas no register dependencies\n");
+ }
+ while (!temp_ptr->physRegDepList.empty()) {
+ DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n",
+ temp_ptr->physRegDepList.front());
+ dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front());
+ temp_ptr->physRegDepList.pop_front();
+ }
+ if (num_filtered_nodes != 0) {
+ // Set the weight of this node as the no. of filtered nodes
+ // between this node and the last node that we wrote to output
+ // stream. The weight will be used during replay to model ROB
+ // occupancy of filtered nodes.
+ dep_pkt.set_weight(num_filtered_nodes);
+ num_filtered_nodes = 0;
+ }
+ // Write the message to the protobuf output stream
+ dataTraceStream->write(dep_pkt);
+ } else {
+ // Don't write the node to the trace but note that we have filtered
+ // out a node.
+ ++numFilteredNodes;
+ ++num_filtered_nodes;
+ }
+ dep_trace_itr++;
+ traceInfoMap.erase(temp_ptr->instNum);
+ delete temp_ptr;
+ num_to_write--;
+ }
+ depTrace.erase(dep_trace_itr_start, dep_trace_itr);
+}
+
+void
+ElasticTrace::regStats() {
+ using namespace Stats;
+ numRegDep
+ .name(name() + ".numRegDep")
+ .desc("Number of register dependencies recorded during tracing")
+ ;
+
+ numOrderDepStores
+ .name(name() + ".numOrderDepStores")
+ .desc("Number of commit order (rob) dependencies for a store recorded"
+ " on a past load/store during tracing")
+ ;
+
+ numIssueOrderDepLoads
+ .name(name() + ".numIssueOrderDepLoads")
+ .desc("Number of loads that got assigned issue order dependency"
+ " because they were dependency-free")
+ ;
+
+ numIssueOrderDepStores
+ .name(name() + ".numIssueOrderDepStores")
+ .desc("Number of stores that got assigned issue order dependency"
+ " because they were dependency-free")
+ ;
+
+ numIssueOrderDepOther
+ .name(name() + ".numIssueOrderDepOther")
+ .desc("Number of non load/store insts that got assigned issue order"
+ " dependency because they were dependency-free")
+ ;
+
+ numFilteredNodes
+ .name(name() + ".numFilteredNodes")
+ .desc("No. of nodes filtered out before writing the output trace")
+ ;
+
+ maxNumDependents
+ .name(name() + ".maxNumDependents")
+ .desc("Maximum number or dependents on any instruction")
+ ;
+
+ maxTempStoreSize
+ .name(name() + ".maxTempStoreSize")
+ .desc("Maximum size of the temporary store during the run")
+ ;
+
+ maxPhysRegDepMapSize
+ .name(name() + ".maxPhysRegDepMapSize")
+ .desc("Maximum size of register dependency map")
+ ;
+}
+
+const std::string
+ElasticTrace::name() const
+{
+ return ProbeListenerObject::name();
+}
+
+void
+ElasticTrace::flushTraces()
+{
+ // Write to trace all records in the depTrace.
+ writeDepTrace(depTrace.size());
+ // Delete the stream objects
+ delete dataTraceStream;
+ delete instTraceStream;
+}
+
+ElasticTrace*
+ElasticTraceParams::create()
+{
+ return new ElasticTrace(this);
+}
diff --git a/src/cpu/o3/probe/elastic_trace.hh b/src/cpu/o3/probe/elastic_trace.hh
new file mode 100644
index 000000000..5dd2949e8
--- /dev/null
+++ b/src/cpu/o3/probe/elastic_trace.hh
@@ -0,0 +1,537 @@
+/*
+ * Copyright (c) 2013 - 2015 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Radhika Jagtap
+ * Andreas Hansson
+ * Thomas Grass
+ */
+
+/**
+ * @file This file describes a trace component which is a cpu probe listener
+ * used to generate elastic cpu traces. It registers listeners to probe points
+ * in the fetch, rename, iew and commit stages of the O3CPU. It processes the
+ * dependency graph of the cpu execution and writes out a protobuf trace. It
+ * also generates a protobuf trace of the instruction fetch requests.
+ */
+
+#ifndef __CPU_O3_PROBE_ELASTIC_TRACE_HH__
+#define __CPU_O3_PROBE_ELASTIC_TRACE_HH__
+
+#include <set>
+#include <unordered_map>
+#include <utility>
+
+#include "cpu/o3/dyn_inst.hh"
+#include "cpu/o3/impl.hh"
+#include "mem/request.hh"
+#include "params/ElasticTrace.hh"
+#include "proto/inst_dep_record.pb.h"
+#include "proto/packet.pb.h"
+#include "proto/protoio.hh"
+#include "sim/eventq.hh"
+#include "sim/probe/probe.hh"
+
+/**
+ * The elastic trace is a type of probe listener and listens to probe points
+ * in multiple stages of the O3CPU. The notify method is called on a probe
+ * point typically when an instruction successfully progresses through that
+ * stage.
+ *
+ * As different listener methods mapped to the different probe points execute,
+ * relevant information about the instruction, e.g. timestamps and register
+ * accesses, are captured and stored in temporary data structures. When the
+ * instruction progresses through the commit stage, the timing as well as
+ * dependency information about the instruction is finalised and encapsulated in
+ * a struct called TraceInfo. TraceInfo objects are collected in a list instead
+ * of writing them out to the trace file one a time. This is required as the
+ * trace is processed in chunks to evaluate order dependencies and computational
+ * delay in case an instruction does not have any register dependencies. By this
+ * we achieve a simpler algorithm during replay because every record in the
+ * trace can be hooked onto a record in its past. The trace is written out as
+ * a protobuf format output file.
+ *
+ * The output trace can be read in and played back by the TraceCPU.
+ */
+class ElasticTrace : public ProbeListenerObject
+{
+
+ public:
+ typedef typename O3CPUImpl::DynInstPtr DynInstPtr;
+ typedef typename std::pair<InstSeqNum, PhysRegIndex> SeqNumRegPair;
+
+ /** Constructor */
+ ElasticTrace(const ElasticTraceParams *params);
+
+ /**
+ * Register the probe listeners that is the methods called on a probe point
+ * notify() call.
+ */
+ void regProbeListeners();
+
+ /** Register all listeners. */
+ void regEtraceListeners();
+
+ /** Returns the name of the trace probe listener. */
+ const std::string name() const;
+
+ /**
+ * Process any outstanding trace records, flush them out to the protobuf
+ * output streams and delete the streams at simulation exit.
+ */
+ void flushTraces();
+
+ /**
+ * Take the fields of the request class object that are relevant to create
+ * an instruction fetch request. It creates a protobuf message containing
+ * the request fields and writes it to instTraceStream.
+ *
+ * @param req pointer to the fetch request
+ */
+ void fetchReqTrace(const RequestPtr &req);
+
+ /**
+ * Populate the execute timestamp field in an InstExecInfo object for an
+ * instruction in flight.
+ *
+ * @param dyn_inst pointer to dynamic instruction in flight
+ */
+ void recordExecTick(const DynInstPtr &dyn_inst);
+
+ /**
+ * Populate the timestamp field in an InstExecInfo object for an
+ * instruction in flight when it is execution is complete and it is ready
+ * to commit.
+ *
+ * @param dyn_inst pointer to dynamic instruction in flight
+ */
+ void recordToCommTick(const DynInstPtr &dyn_inst);
+
+ /**
+ * Record a Read After Write physical register dependency if there has
+ * been a write to the source register and update the physical register
+ * map. For this look up the physRegDepMap with this instruction as the
+ * writer of its destination register. If the dependency falls outside the
+ * window it is assumed as already complete. Duplicate entries are avoided.
+ *
+ * @param dyn_inst pointer to dynamic instruction in flight
+ */
+ void updateRegDep(const DynInstPtr &dyn_inst);
+
+ /**
+ * When an instruction gets squashed the destination register mapped to it
+ * is freed up in the rename stage. Remove the register entry from the
+ * physRegDepMap as well to avoid dependencies on squashed instructions.
+ *
+ * @param inst_reg_pair pair of inst. sequence no. and the register
+ */
+ void removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair);
+
+ /**
+ * Add an instruction that is at the head of the ROB and is squashed only
+ * if it is a load and a request was sent for it.
+ *
+ * @param head_inst pointer to dynamic instruction to be squashed
+ */
+ void addSquashedInst(const DynInstPtr &head_inst);
+
+ /**
+ * Add an instruction that is at the head of the ROB and is committed.
+ *
+ * @param head_inst pointer to dynamic instruction to be committed
+ */
+ void addCommittedInst(const DynInstPtr &head_inst);
+
+ /** Register statistics for the elastic trace. */
+ void regStats();
+
+ /** Event to trigger registering this listener for all probe points. */
+ EventWrapper<ElasticTrace,
+ &ElasticTrace::regEtraceListeners> regEtraceListenersEvent;
+
+ private:
+ /**
+ * Used for checking the first window for processing and writing of
+ * dependency trace. At the start of the program there can be dependency-
+ * free instructions and such cases are handled differently.
+ */
+ bool firstWin;
+
+ /**
+ * @defgroup InstExecInfo Struct for storing information before an
+ * instruction reaches the commit stage, e.g. execute timestamp.
+ */
+ struct InstExecInfo
+ {
+ /**
+ * @ingroup InstExecInfo
+ * @{
+ */
+ /** Timestamp when instruction was first processed by execute stage */
+ Tick executeTick;
+ /**
+ * Timestamp when instruction execution is completed in execute stage
+ * and instruction is marked as ready to commit
+ */
+ Tick toCommitTick;
+ /**
+ * Set of instruction sequence numbers that this instruction depends on
+ * due to Read After Write data dependency based on physical register.
+ */
+ std::set<InstSeqNum> physRegDepSet;
+ /** @} */
+
+ /** Constructor */
+ InstExecInfo()
+ : executeTick(MaxTick),
+ toCommitTick(MaxTick)
+ { }
+ };
+
+ /**
+ * Temporary store of InstExecInfo objects. Later on when an instruction
+ * is processed for commit or retire, if it is chosen to be written to
+ * the output trace then this information is looked up using the instruction
+ * sequence number as the key. If it is not chosen then the entry for it in
+ * the store is cleared.
+ */
+ std::unordered_map<InstSeqNum, InstExecInfo*> tempStore;
+
+ /**
+ * The last cleared instruction sequence number used to free up the memory
+ * allocated in the temporary store.
+ */
+ InstSeqNum lastClearedSeqNum;
+
+ /**
+ * Map for recording the producer of a physical register to check Read
+ * After Write dependencies. The key is the renamed physical register and
+ * the value is the instruction sequence number of its last producer.
+ */
+ std::unordered_map<PhysRegIndex, InstSeqNum> physRegDepMap;
+
+ /**
+ * @defgroup TraceInfo Struct for a record in the instruction dependency
+ * trace. All information required to process and calculate the
+ * computational delay is stored in TraceInfo objects. The memory request
+ * fields for a load or store instruction are also included here. Note
+ * that the structure TraceInfo does not store pointers to children
+ * or parents. The dependency trace is maintained as an ordered collection
+ * of records for writing to the output trace and not as a tree data
+ * structure.
+ */
+ struct TraceInfo
+ {
+ /**
+ * @ingroup TraceInfo
+ * @{
+ */
+ /* Instruction sequence number. */
+ InstSeqNum instNum;
+ /* Tick when instruction was in execute stage. */
+ Tick executeTick;
+ /* Tick when instruction was marked ready and sent to commit stage. */
+ Tick toCommitTick;
+ /* Tick when instruction was committed. */
+ Tick commitTick;
+ /* If instruction was a load, a store, committed. */
+ bool load, store, commit;
+ /* List of order dependencies. */
+ std::list<InstSeqNum> robDepList;
+ /* List of physical register RAW dependencies. */
+ std::list<InstSeqNum> physRegDepList;
+ /**
+ * Computational delay after the last dependent inst. completed.
+ * A value of -1 which means instruction has no dependencies.
+ */
+ int64_t compDelay;
+ /* Number of dependents. */
+ uint32_t numDepts;
+ /* The instruction PC for a load, store or non load/store. */
+ Addr pc;
+ /* Request flags in case of a load/store instruction */
+ Request::FlagsType reqFlags;
+ /* Request address in case of a load/store instruction */
+ Addr addr;
+ /* Request size in case of a load/store instruction */
+ unsigned size;
+ /** @} */
+
+ /**
+ * Get the execute tick of the instruction.
+ *
+ * @return Tick when instruction was executed
+ */
+ Tick getExecuteTick() const;
+ };
+
+ /**
+ * The instruction dependency trace containing TraceInfo objects. The
+ * container implemented is sequential as dependencies obey commit
+ * order (program order). For example, if B is dependent on A then B must
+ * be committed after A. Thus records are updated with dependency
+ * information and written to the trace in commit order. This ensures that
+ * when a graph is reconstructed from the trace during replay, all the
+ * dependencies are stored in the graph before the dependent itself is
+ * added. This facilitates creating a tree data structure during replay,
+ * i.e. adding children as records are read from the trace in an efficient
+ * manner.
+ */
+ std::vector<TraceInfo*> depTrace;
+
+ /**
+ * Map where the instruction sequence number is mapped to the pointer to
+ * the TraceInfo object.
+ */
+ std::unordered_map<InstSeqNum, TraceInfo*> traceInfoMap;
+
+ /** Typedef of iterator to the instruction dependency trace. */
+ typedef typename std::vector<TraceInfo*>::iterator depTraceItr;
+
+ /** Typedef of the reverse iterator to the instruction dependency trace. */
+ typedef typename std::reverse_iterator<depTraceItr> depTraceRevItr;
+
+ /**
+ * The maximum distance for a dependency and is set by a top level
+ * level parameter. It must be equal to or greater than the number of
+ * entries in the ROB. This variable is used as the length of the sliding
+ * window for processing the dependency trace.
+ */
+ uint32_t depWindowSize;
+
+ /** Protobuf output stream for data dependency trace */
+ ProtoOutputStream* dataTraceStream;
+
+ /** Protobuf output stream for instruction fetch trace. */
+ ProtoOutputStream* instTraceStream;
+
+ /** Number of instructions after which to enable tracing. */
+ const InstSeqNum startTraceInst;
+
+ /**
+ * Whther the elastic trace listener has been registered for all probes.
+ *
+ * When enabling tracing after a specified number of instructions have
+ * committed, check this to prevent re-registering the listener.
+ */
+ bool allProbesReg;
+
+ /** Pointer to the O3CPU that is this listener's parent a.k.a. manager */
+ FullO3CPU<O3CPUImpl>* cpu;
+
+ /**
+ * Add a record to the dependency trace depTrace which is a sequential
+ * container. A record is inserted per committed instruction and in the same
+ * order as the order in which instructions are committed.
+ *
+ * @param head_inst Pointer to the instruction which is head of the
+ * ROB and ready to commit
+ * @param exec_info_ptr Pointer to InstExecInfo for that instruction
+ * @param commit True if instruction is committed, false if squashed
+ */
+ void addDepTraceRecord(const DynInstPtr &head_inst,
+ InstExecInfo* exec_info_ptr, bool commit);
+
+ /**
+ * Clear entries in the temporary store of execution info objects to free
+ * allocated memory until the present instruction being added to the trace.
+ *
+ * @param head_inst pointer to dynamic instruction
+ */
+ void clearTempStoreUntil(const DynInstPtr head_inst);
+
+ /**
+ * Calculate the computational delay between an instruction and a
+ * subsequent instruction that has an ROB (order) dependency on it
+ *
+ * @param past_record Pointer to instruction
+ *
+ * @param new_record Pointer to subsequent instruction having an ROB
+ * dependency on the instruction pointed to by
+ * past_record
+ */
+ void compDelayRob(TraceInfo* past_record, TraceInfo* new_record);
+
+ /**
+ * Calculate the computational delay between an instruction and a
+ * subsequent instruction that has a Physical Register (data) dependency on
+ * it.
+ *
+ * @param past_record Pointer to instruction
+ *
+ * @param new_record Pointer to subsequent instruction having a Physical
+ * Register dependency on the instruction pointed to
+ * by past_record
+ */
+ void compDelayPhysRegDep(TraceInfo* past_record, TraceInfo* new_record);
+
+ /**
+ * Write out given number of records to the trace starting with the first
+ * record in depTrace and iterating through the trace in sequence. A
+ * record is deleted after it is written.
+ *
+ * @param num_to_write Number of records to write to the trace
+ */
+ void writeDepTrace(uint32_t num_to_write);
+
+ /**
+ * Reverse iterate through the graph, search for a store-after-store or
+ * store-after-load dependency and update the new node's Rob dependency list.
+ *
+ * If a dependency is found, then call the assignRobDep() method that
+ * updates the store with the dependency information. This function is only
+ * called when a new store node is added to the trace.
+ *
+ * @param new_record pointer to new store record
+ * @param find_load_not_store true for searching store-after-load and false
+ * for searching store-after-store dependency
+ */
+ void updateCommitOrderDep(TraceInfo* new_record, bool find_load_not_store);
+
+ /**
+ * Reverse iterate through the graph, search for an issue order dependency
+ * for a new node and update the new node's Rob dependency list.
+ *
+ * If a dependency is found, call the assignRobDep() method that updates
+ * the node with its dependency information. This function is called in
+ * case a new node to be added to the trace is dependency-free or its
+ * dependency got discarded because the dependency was outside the window.
+ *
+ * @param new_record pointer to new record to be added to the trace
+ */
+ void updateIssueOrderDep(TraceInfo* new_record);
+
+ /**
+ * The new_record has an order dependency on a past_record, thus update the
+ * new record's Rob dependency list and increment the number of dependents
+ * of the past record.
+ *
+ * @param new_record pointer to new record
+ * @param past_record pointer to record that new_record has a rob
+ * dependency on
+ */
+ void assignRobDep(TraceInfo* past_record, TraceInfo* new_record);
+
+ /**
+ * Check if past record is a store sent earlier than the execute tick.
+ *
+ * @param past_record pointer to past store
+ * @param execute_tick tick with which to compare past store's commit tick
+ *
+ * @return true if past record is store sent earlier
+ */
+ bool hasStoreCommitted(TraceInfo* past_record, Tick execute_tick) const;
+
+ /**
+ * Check if past record is a load that completed earlier than the execute
+ * tick.
+ *
+ * @param past_record pointer to past load
+ * @param execute_tick tick with which to compare past load's complete
+ * tick
+ *
+ * @return true if past record is load completed earlier
+ */
+ bool hasLoadCompleted(TraceInfo* past_record, Tick execute_tick) const;
+
+ /**
+ * Check if past record is a load sent earlier than the execute tick.
+ *
+ * @param past_record pointer to past load
+ * @param execute_tick tick with which to compare past load's send tick
+ *
+ * @return true if past record is load sent earlier
+ */
+ bool hasLoadBeenSent(TraceInfo* past_record, Tick execute_tick) const;
+
+ /**
+ * Check if past record is a comp node that completed earlier than the
+ * execute tick.
+ *
+ * @param past_record pointer to past comp node
+ * @param execute_tick tick with which to compare past comp node's
+ * completion tick
+ *
+ * @return true if past record is comp completed earlier
+ */
+ bool hasCompCompleted(TraceInfo* past_record, Tick execute_tick) const;
+
+ /** Number of register dependencies recorded during tracing */
+ Stats::Scalar numRegDep;
+
+ /**
+ * Number of stores that got assigned a commit order dependency
+ * on a past load/store.
+ */
+ Stats::Scalar numOrderDepStores;
+
+ /**
+ * Number of load insts that got assigned an issue order dependency
+ * because they were dependency-free.
+ */
+ Stats::Scalar numIssueOrderDepLoads;
+
+ /**
+ * Number of store insts that got assigned an issue order dependency
+ * because they were dependency-free.
+ */
+ Stats::Scalar numIssueOrderDepStores;
+
+ /**
+ * Number of non load/store insts that got assigned an issue order
+ * dependency because they were dependency-free.
+ */
+ Stats::Scalar numIssueOrderDepOther;
+
+ /** Number of filtered nodes */
+ Stats::Scalar numFilteredNodes;
+
+ /** Maximum number of dependents on any instruction */
+ Stats::Scalar maxNumDependents;
+
+ /**
+ * Maximum size of the temporary store mostly useful as a check that it is
+ * not growing
+ */
+ Stats::Scalar maxTempStoreSize;
+
+ /**
+ * Maximum size of the map that holds the last writer to a physical
+ * register.
+ * */
+ Stats::Scalar maxPhysRegDepMapSize;
+
+};
+#endif//__CPU_O3_PROBE_ELASTIC_TRACE_HH__
diff --git a/src/proto/SConscript b/src/proto/SConscript
index 292a23639..ef6bc2aca 100644
--- a/src/proto/SConscript
+++ b/src/proto/SConscript
@@ -41,6 +41,7 @@ Import('*')
# Only build if we have protobuf support
if env['HAVE_PROTOBUF']:
+ ProtoBuf('inst_dep_record.proto')
ProtoBuf('packet.proto')
ProtoBuf('inst.proto')
Source('protoio.cc')
diff --git a/src/proto/inst_dep_record.proto b/src/proto/inst_dep_record.proto
new file mode 100644
index 000000000..7035bfc74
--- /dev/null
+++ b/src/proto/inst_dep_record.proto
@@ -0,0 +1,75 @@
+// Copyright (c) 2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder. You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Radhika Jagtap
+
+// Put all the generated messages in a namespace
+package ProtoMessage;
+
+// Packet header for the o3cpu data dependency trace. The header fields are the
+// identifier describing what object captured the trace, the version of this
+// file format, the tick frequency of the object and the window size used to
+// limit the register dependencies during capture.
+message InstDepRecordHeader {
+ required string obj_id = 1;
+ optional uint32 ver = 2 [default = 0];
+ required uint64 tick_freq = 3;
+ required uint32 window_size = 4;
+}
+
+// Packet to encapsulate an instruction in the o3cpu data dependency trace.
+// The required fields include the instruction sequence number, whether it
+// is a load, and whether it is a store. The request related fields are
+// optional, namely address, size and flags. These exist only if the
+// instruction is a load or store. The dependency related information includes
+// a repeated field for order dependencies, a repeated field for register
+// dependencies and the computational delay with respect to the dependency
+// that completed last. A weight field is used to account for committed
+// instructions that were filtered out before writing the trace and is used
+// to estimate ROB occupancy during replay. An optional field is provided for
+// the instruction PC.
+message InstDepRecord {
+ required uint64 seq_num = 1;
+ required bool load = 2;
+ required bool store = 3;
+ optional uint64 addr = 4;
+ optional uint32 size = 5;
+ optional uint32 flags = 6;
+ repeated uint64 rob_dep = 7;
+ required uint64 comp_delay = 8;
+ repeated uint64 reg_dep = 9;
+ optional uint32 weight = 10;
+ optional uint64 pc = 11;
+}
diff --git a/src/proto/packet.proto b/src/proto/packet.proto
index d27599691..c07206742 100644
--- a/src/proto/packet.proto
+++ b/src/proto/packet.proto
@@ -55,6 +55,8 @@ message PacketHeader {
// not, etc. An optional id field is added for generic use to identify
// the packet or the "owner" of the packet. An example of the latter
// is the sequential id of an instruction, or the master id etc.
+// An optional field for PC of the instruction for which this request is made
+// is provided.
message Packet {
required uint64 tick = 1;
required uint32 cmd = 2;
@@ -62,4 +64,5 @@ message Packet {
required uint32 size = 4;
optional uint32 flags = 5;
optional uint64 pkt_id = 6;
+ optional uint64 pc = 7;
}