diff options
-rw-r--r-- | src/cpu/o3/probe/ElasticTrace.py | 62 | ||||
-rw-r--r-- | src/cpu/o3/probe/SConscript | 5 | ||||
-rw-r--r-- | src/cpu/o3/probe/elastic_trace.cc | 939 | ||||
-rw-r--r-- | src/cpu/o3/probe/elastic_trace.hh | 537 | ||||
-rw-r--r-- | src/proto/SConscript | 1 | ||||
-rw-r--r-- | src/proto/inst_dep_record.proto | 75 | ||||
-rw-r--r-- | src/proto/packet.proto | 3 |
7 files changed, 1622 insertions, 0 deletions
diff --git a/src/cpu/o3/probe/ElasticTrace.py b/src/cpu/o3/probe/ElasticTrace.py new file mode 100644 index 000000000..fb3093a2c --- /dev/null +++ b/src/cpu/o3/probe/ElasticTrace.py @@ -0,0 +1,62 @@ +# Copyright (c) 2013 - 2015 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Radhika Jagtap +# Andreas Hansson +# Thomas Grass + +from Probe import * + +class ElasticTrace(ProbeListenerObject): + type = 'ElasticTrace' + cxx_header = 'cpu/o3/probe/elastic_trace.hh' + + # Trace files for the following params are created in the output directory. + # User is forced to provide these when an instance of this class is created. + instFetchTraceFile = Param.String(desc="Protobuf trace file name for " \ + "instruction fetch tracing") + dataDepTraceFile = Param.String(desc="Protobuf trace file name for " \ + "data dependency tracing") + # The dependency window size param must be equal to or greater than the + # number of entries in the O3CPU ROB, a typical value is 3 times ROB size + depWindowSize = Param.Unsigned(desc="Instruction window size used for " \ + "recording and processing data " \ + "dependencies") + # The committed instruction count from which to start tracing + startTraceInst = Param.UInt64(0, "The number of committed instructions " \ + "after which to start tracing. Default " \ + "zero means start tracing from first " \ + "committed instruction.") + diff --git a/src/cpu/o3/probe/SConscript b/src/cpu/o3/probe/SConscript index c8ab2b53f..54f12e21e 100644 --- a/src/cpu/o3/probe/SConscript +++ b/src/cpu/o3/probe/SConscript @@ -43,3 +43,8 @@ if 'O3CPU' in env['CPU_MODELS']: SimObject('SimpleTrace.py') Source('simple_trace.cc') DebugFlag('SimpleTrace') + + if env['HAVE_PROTOBUF']: + SimObject('ElasticTrace.py') + Source('elastic_trace.cc') + DebugFlag('ElasticTrace') diff --git a/src/cpu/o3/probe/elastic_trace.cc b/src/cpu/o3/probe/elastic_trace.cc new file mode 100644 index 000000000..a7a0403f9 --- /dev/null +++ b/src/cpu/o3/probe/elastic_trace.cc @@ -0,0 +1,939 @@ +/* + * Copyright (c) 2013 - 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Radhika Jagtap + * Andreas Hansson + * Thomas Grass + */ + +#include "cpu/o3/probe/elastic_trace.hh" + +#include "base/callback.hh" +#include "base/output.hh" +#include "base/trace.hh" +#include "cpu/reg_class.hh" +#include "debug/ElasticTrace.hh" +#include "mem/packet.hh" + +ElasticTrace::ElasticTrace(const ElasticTraceParams* params) + : ProbeListenerObject(params), + regEtraceListenersEvent(this), + firstWin(true), + lastClearedSeqNum(0), + depWindowSize(params->depWindowSize), + dataTraceStream(nullptr), + instTraceStream(nullptr), + startTraceInst(params->startTraceInst), + allProbesReg(false) +{ + cpu = dynamic_cast<FullO3CPU<O3CPUImpl>*>(params->manager); + fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\ + "support dependency tracing.\n", name()); + + fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\ + "Recommended size is 3x ROB size in the O3CPU.\n"); + + fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\ + "single-threaded workload only", cpu->numThreads, name()); + // Initialize the protobuf output stream + fatal_if(params->instFetchTraceFile == "", "Assign instruction fetch "\ + "trace file path to instFetchTraceFile"); + fatal_if(params->dataDepTraceFile == "", "Assign data dependency "\ + "trace file path to dataDepTraceFile"); + std::string filename = simout.resolve(name() + "." + + params->instFetchTraceFile); + instTraceStream = new ProtoOutputStream(filename); + filename = simout.resolve(name() + "." + params->dataDepTraceFile); + dataTraceStream = new ProtoOutputStream(filename); + // Create a protobuf message for the header and write it to the stream + ProtoMessage::PacketHeader inst_pkt_header; + inst_pkt_header.set_obj_id(name()); + inst_pkt_header.set_tick_freq(SimClock::Frequency); + instTraceStream->write(inst_pkt_header); + // Create a protobuf message for the header and write it to + // the stream + ProtoMessage::InstDepRecordHeader data_rec_header; + data_rec_header.set_obj_id(name()); + data_rec_header.set_tick_freq(SimClock::Frequency); + data_rec_header.set_window_size(depWindowSize); + dataTraceStream->write(data_rec_header); + // Register a callback to flush trace records and close the output streams. + Callback* cb = new MakeCallback<ElasticTrace, + &ElasticTrace::flushTraces>(this); + registerExitCallback(cb); +} + +void +ElasticTrace::regProbeListeners() +{ + inform("@%llu: regProbeListeners() called, startTraceInst = %llu", + curTick(), startTraceInst); + if (startTraceInst == 0) { + // If we want to start tracing from the start of the simulation, + // register all elastic trace probes now. + regEtraceListeners(); + } else { + // Schedule an event to register all elastic trace probes when + // specified no. of instructions are committed. + cpu->comInstEventQueue[(ThreadID)0]->schedule(®EtraceListenersEvent, + startTraceInst); + } +} + +void +ElasticTrace::regEtraceListeners() +{ + assert(!allProbesReg); + inform("@%llu: No. of instructions committed = %llu, registering elastic" + " probe listeners", curTick(), cpu->numSimulatedInsts()); + // Create new listeners: provide method to be called upon a notify() for + // each probe point. + listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this, + "FetchRequest", &ElasticTrace::fetchReqTrace)); + listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, + "Execute", &ElasticTrace::recordExecTick)); + listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, + "ToCommit", &ElasticTrace::recordToCommTick)); + listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, + "Rename", &ElasticTrace::updateRegDep)); + listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this, + "SquashInRename", &ElasticTrace::removeRegDepMapEntry)); + listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, + "Squash", &ElasticTrace::addSquashedInst)); + listeners.push_back(new ProbeListenerArg<ElasticTrace, DynInstPtr>(this, + "Commit", &ElasticTrace::addCommittedInst)); + allProbesReg = true; +} + +void +ElasticTrace::fetchReqTrace(const RequestPtr &req) +{ + + DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n", + (MemCmd::ReadReq), + req->getPC(), req->getVaddr(), req->getPaddr(), + req->getFlags(), req->getSize(), curTick()); + + // Create a protobuf message including the request fields necessary to + // recreate the request in the TraceCPU. + ProtoMessage::Packet inst_fetch_pkt; + inst_fetch_pkt.set_tick(curTick()); + inst_fetch_pkt.set_cmd(MemCmd::ReadReq); + inst_fetch_pkt.set_pc(req->getPC()); + inst_fetch_pkt.set_flags(req->getFlags()); + inst_fetch_pkt.set_addr(req->getPaddr()); + inst_fetch_pkt.set_size(req->getSize()); + // Write the message to the stream. + instTraceStream->write(inst_fetch_pkt); +} + +void +ElasticTrace::recordExecTick(const DynInstPtr &dyn_inst) +{ + + // In a corner case, a retired instruction is propagated backward to the + // IEW instruction queue to handle some side-channel information. But we + // must not process an instruction again. So we test the sequence number + // against the lastClearedSeqNum and skip adding the instruction for such + // corner cases. + if (dyn_inst->seqNum <= lastClearedSeqNum) { + DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \ + has already retired (mostly squashed)", dyn_inst->seqNum); + // Do nothing as program has proceeded and this inst has been + // propagated backwards to handle something. + return; + } + + DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum, + curTick()); + // Either the execution info object will already exist if this + // instruction had a register dependency recorded in the rename probe + // listener before entering execute stage or it will not exist and will + // need to be created here. + InstExecInfo* exec_info_ptr; + auto itr_exec_info = tempStore.find(dyn_inst->seqNum); + if (itr_exec_info != tempStore.end()) { + exec_info_ptr = itr_exec_info->second; + } else { + exec_info_ptr = new InstExecInfo; + tempStore[dyn_inst->seqNum] = exec_info_ptr; + } + + exec_info_ptr->executeTick = curTick(); + maxTempStoreSize = std::max(tempStore.size(), + (std::size_t)maxTempStoreSize.value()); +} + +void +ElasticTrace::recordToCommTick(const DynInstPtr &dyn_inst) +{ + // If tracing has just been enabled then the instruction at this stage of + // execution is far enough that we cannot gather info about its past like + // the tick it started execution. Simply return until we see an instruction + // that is found in the tempStore. + auto itr_exec_info = tempStore.find(dyn_inst->seqNum); + if (itr_exec_info == tempStore.end()) { + DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store," + " skipping.\n", dyn_inst->seqNum); + return; + } + + DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum, + curTick()); + InstExecInfo* exec_info_ptr = itr_exec_info->second; + exec_info_ptr->toCommitTick = curTick(); + +} + +void +ElasticTrace::updateRegDep(const DynInstPtr &dyn_inst) +{ + // Get the sequence number of the instruction + InstSeqNum seq_num = dyn_inst->seqNum; + + assert(dyn_inst->seqNum > lastClearedSeqNum); + + // Since this is the first probe activated in the pipeline, create + // a new execution info object to track this instruction as it + // progresses through the pipeline. + InstExecInfo* exec_info_ptr = new InstExecInfo; + tempStore[seq_num] = exec_info_ptr; + + // Loop through the source registers and look up the dependency map. If + // the source register entry is found in the dependency map, add a + // dependency on the last writer. + int8_t max_regs = dyn_inst->numSrcRegs(); + for (int src_idx = 0; src_idx < max_regs; src_idx++) { + // Get the physical register index of the i'th source register. + PhysRegIndex src_reg = dyn_inst->renamedSrcRegIdx(src_idx); + DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg %i\n", seq_num, + src_reg); + auto itr_last_writer = physRegDepMap.find(src_reg); + if (itr_last_writer != physRegDepMap.end()) { + InstSeqNum last_writer = itr_last_writer->second; + // Additionally the dependency distance is kept less than the window + // size parameter to limit the memory allocation to nodes in the + // graph. If the window were tending to infinite we would have to + // load a large number of node objects during replay. + if (seq_num - last_writer < depWindowSize) { + // Record a physical register dependency. + exec_info_ptr->physRegDepSet.insert(last_writer); + } + } + } + + // Loop through the destination registers of this instruction and update + // the physical register dependency map for last writers to registers. + max_regs = dyn_inst->numDestRegs(); + for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) { + // For data dependency tracking the register must be an int, float or + // CC register and not a Misc register. + TheISA::RegIndex dest_reg = dyn_inst->destRegIdx(dest_idx); + if (regIdxToClass(dest_reg) != MiscRegClass) { + // Get the physical register index of the i'th destination register. + dest_reg = dyn_inst->renamedDestRegIdx(dest_idx); + if (dest_reg != TheISA::ZeroReg) { + DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg %i\n", + seq_num, dest_reg); + physRegDepMap[dest_reg] = seq_num; + } + } + } + maxPhysRegDepMapSize = std::max(physRegDepMap.size(), + (std::size_t)maxPhysRegDepMapSize.value()); +} + +void +ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair) +{ + DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n", + inst_reg_pair.second); + auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second); + if (itr_regdep_map != physRegDepMap.end()) + physRegDepMap.erase(itr_regdep_map); +} + +void +ElasticTrace::addSquashedInst(const DynInstPtr &head_inst) +{ + // If the squashed instruction was squashed before being processed by + // execute stage then it will not be in the temporary store. In this case + // do nothing and return. + auto itr_exec_info = tempStore.find(head_inst->seqNum); + if (itr_exec_info == tempStore.end()) + return; + + // If there is a squashed load for which a read request was + // sent before it got squashed then add it to the trace. + DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n", + head_inst->seqNum); + // Get pointer to the execution info object corresponding to the inst. + InstExecInfo* exec_info_ptr = itr_exec_info->second; + if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick && + exec_info_ptr->toCommitTick != MaxTick && + head_inst->hasRequest() && + head_inst->getFault() == NoFault) { + // Add record to depTrace with commit parameter as false. + addDepTraceRecord(head_inst, exec_info_ptr, false); + } + // As the information contained is no longer needed, remove the execution + // info object from the temporary store. + clearTempStoreUntil(head_inst); +} + +void +ElasticTrace::addCommittedInst(const DynInstPtr &head_inst) +{ + DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n", + head_inst->seqNum); + + // Add the instruction to the depTrace. + if (!head_inst->isNop()) { + + // If tracing has just been enabled then the instruction at this stage + // of execution is far enough that we cannot gather info about its past + // like the tick it started execution. Simply return until we see an + // instruction that is found in the tempStore. + auto itr_temp_store = tempStore.find(head_inst->seqNum); + if (itr_temp_store == tempStore.end()) { + DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp " + "store, skipping.\n", head_inst->seqNum); + return; + } + + // Get pointer to the execution info object corresponding to the inst. + InstExecInfo* exec_info_ptr = itr_temp_store->second; + assert(exec_info_ptr->executeTick != MaxTick); + assert(exec_info_ptr->toCommitTick != MaxTick); + + // Check if the instruction had a fault, if it predicated false and + // thus previous register values were restored or if it was a + // load/store that did not have a request (e.g. when the size of the + // request is zero). In all these cases the instruction is set as + // executed and is picked up by the commit probe listener. But a + // request is not issued and registers are not written. So practically, + // skipping these should not hurt as execution would not stall on them. + // Alternatively, these could be included merely as a compute node in + // the graph. Removing these for now. If correlation accuracy needs to + // be improved in future these can be turned into comp nodes at the + // cost of bigger traces. + if (head_inst->getFault() != NoFault) { + DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so " + "skip adding it to the trace\n", + (head_inst->isMemRef() ? "Load/store" : "Comp inst."), + head_inst->seqNum); + } else if (head_inst->isMemRef() && !head_inst->hasRequest()) { + DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so " + "skip adding it to the trace\n", head_inst->seqNum); + } else if (!head_inst->readPredicate()) { + DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so " + "skip adding it to the trace\n", + (head_inst->isMemRef() ? "Load/store" : "Comp inst."), + head_inst->seqNum); + } else { + // Add record to depTrace with commit parameter as true. + addDepTraceRecord(head_inst, exec_info_ptr, true); + } + } + // As the information contained is no longer needed, remove the execution + // info object from the temporary store. + clearTempStoreUntil(head_inst); +} + +void +ElasticTrace::addDepTraceRecord(const DynInstPtr &head_inst, + InstExecInfo* exec_info_ptr, bool commit) +{ + // Create a record to assign dynamic intruction related fields. + TraceInfo* new_record = new TraceInfo; + // Add to map for sequence number look up to retrieve the TraceInfo pointer + traceInfoMap[head_inst->seqNum] = new_record; + + // Assign fields from the instruction + new_record->instNum = head_inst->seqNum; + new_record->load = head_inst->isLoad(); + new_record->store = head_inst->isStore(); + new_record->commit = commit; + + // Assign fields for creating a request in case of a load/store + new_record->reqFlags = head_inst->memReqFlags; + new_record->addr = head_inst->physEffAddrLow; + // Currently the tracing does not support split requests. + new_record->size = head_inst->effSize; + new_record->pc = head_inst->instAddr(); + + // Assign the timing information stored in the execution info object + new_record->executeTick = exec_info_ptr->executeTick; + new_record->toCommitTick = exec_info_ptr->toCommitTick; + new_record->commitTick = curTick(); + + // Assign initial values for number of dependents and computational delay + new_record->numDepts = 0; + new_record->compDelay = -1; + + // The physical register dependency set of the first instruction is + // empty. Since there are no records in the depTrace at this point, the + // case of adding an ROB dependency by using a reverse iterator is not + // applicable. Thus, populate the fields of the record corresponding to the + // first instruction and return. + if (depTrace.empty()) { + // Store the record in depTrace. + depTrace.push_back(new_record); + DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n", + new_record->instNum); + return; + } + + // Clear register dependencies for squashed loads as they may be dependent + // on squashed instructions and we do not add those to the trace. + if (head_inst->isLoad() && !commit) { + (exec_info_ptr->physRegDepSet).clear(); + } + + // Assign the register dependencies stored in the execution info object + std::set<InstSeqNum>::const_iterator dep_set_it; + for (dep_set_it = (exec_info_ptr->physRegDepSet).begin(); + dep_set_it != (exec_info_ptr->physRegDepSet).end(); + ++dep_set_it) { + auto trace_info_itr = traceInfoMap.find(*dep_set_it); + if (trace_info_itr != traceInfoMap.end()) { + // The register dependency is valid. Assign it and calculate + // computational delay + new_record->physRegDepList.push_back(*dep_set_it); + DPRINTF(ElasticTrace, "Inst %lli has register dependency on " + "%lli\n", new_record->instNum, *dep_set_it); + TraceInfo* reg_dep = trace_info_itr->second; + reg_dep->numDepts++; + compDelayPhysRegDep(reg_dep, new_record); + ++numRegDep; + } else { + // The instruction that this has a register dependency on was + // not added to the trace because of one of the following + // 1. it was an instruction that had a fault + // 2. it was an instruction that was predicated false and + // previous register values were restored + // 3. it was load/store that did not have a request (e.g. when + // the size of the request is zero but this may not be a fault) + // In all these cases the instruction is set as executed and is + // picked up by the commit probe listener. But a request is not + // issued and registers are not written to in these cases. + DPRINTF(ElasticTrace, "Inst %lli has register dependency on " + "%lli is skipped\n",new_record->instNum, *dep_set_it); + } + } + + // Check for and assign an ROB dependency in addition to register + // dependency before adding the record to the trace. + // As stores have to commit in order a store is dependent on the last + // committed load/store. This is recorded in the ROB dependency. + if (head_inst->isStore()) { + // Look up store-after-store order dependency + updateCommitOrderDep(new_record, false); + // Look up store-after-load order dependency + updateCommitOrderDep(new_record, true); + } + + // In case a node is dependency-free or its dependency got discarded + // because it was outside the window, it is marked ready in the ROB at the + // time of issue. A request is sent as soon as possible. To model this, a + // node is assigned an issue order dependency on a committed instruction + // that completed earlier than it. This is done to avoid the problem of + // determining the issue times of such dependency-free nodes during replay + // which could lead to too much parallelism, thinking conservatively. + if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) { + updateIssueOrderDep(new_record); + } + + // Store the record in depTrace. + depTrace.push_back(new_record); + DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n", + (commit ? "committed" : "squashed"), new_record->instNum); + + // To process the number of records specified by depWindowSize in the + // forward direction, the depTrace must have twice as many records + // to check for dependencies. + if (depTrace.size() == 2 * depWindowSize) { + + DPRINTF(ElasticTrace, "Writing out trace...\n"); + + // Write out the records which have been processed to the trace + // and remove them from the depTrace. + writeDepTrace(depWindowSize); + + // After the first window, writeDepTrace() must check for valid + // compDelay. + firstWin = false; + } +} + +void +ElasticTrace::updateCommitOrderDep(TraceInfo* new_record, + bool find_load_not_store) +{ + assert(new_record->store); + // Iterate in reverse direction to search for the last committed + // load/store that completed earlier than the new record + depTraceRevItr from_itr(depTrace.end()); + depTraceRevItr until_itr(depTrace.begin()); + TraceInfo* past_record = *from_itr; + uint32_t num_go_back = 0; + + // The execution time of this store is when it is sent, that is committed + Tick execute_tick = curTick(); + // Search for store-after-load or store-after-store order dependency + while (num_go_back < depWindowSize && from_itr != until_itr) { + if (find_load_not_store) { + // Check if previous inst is a load completed earlier by comparing + // with execute tick + if (hasLoadCompleted(past_record, execute_tick)) { + // Assign rob dependency and calculate the computational delay + assignRobDep(past_record, new_record); + ++numOrderDepStores; + return; + } + } else { + // Check if previous inst is a store sent earlier by comparing with + // execute tick + if (hasStoreCommitted(past_record, execute_tick)) { + // Assign rob dependency and calculate the computational delay + assignRobDep(past_record, new_record); + ++numOrderDepStores; + return; + } + } + ++from_itr; + past_record = *from_itr; + ++num_go_back; + } +} + +void +ElasticTrace::updateIssueOrderDep(TraceInfo* new_record) +{ + // Interate in reverse direction to search for the last committed + // record that completed earlier than the new record + depTraceRevItr from_itr(depTrace.end()); + depTraceRevItr until_itr(depTrace.begin()); + TraceInfo* past_record = *from_itr; + + uint32_t num_go_back = 0; + Tick execute_tick = 0; + + if (new_record->load) { + // The execution time of a load is when a request is sent + execute_tick = new_record->executeTick; + ++numIssueOrderDepLoads; + } else if (new_record->store) { + // The execution time of a store is when it is sent, i.e. committed + execute_tick = curTick(); + ++numIssueOrderDepStores; + } else { + // The execution time of a non load/store is when it completes + execute_tick = new_record->toCommitTick; + ++numIssueOrderDepOther; + } + + // We search if this record has an issue order dependency on a past record. + // Once we find it, we update both the new record and the record it depends + // on and return. + while (num_go_back < depWindowSize && from_itr != until_itr) { + // Check if a previous inst is a load sent earlier, or a store sent + // earlier, or a comp inst completed earlier by comparing with execute + // tick + if (hasLoadBeenSent(past_record, execute_tick) || + hasStoreCommitted(past_record, execute_tick) || + hasCompCompleted(past_record, execute_tick)) { + // Assign rob dependency and calculate the computational delay + assignRobDep(past_record, new_record); + return; + } + ++from_itr; + past_record = *from_itr; + ++num_go_back; + } +} + +void +ElasticTrace::assignRobDep(TraceInfo* past_record, TraceInfo* new_record) { + DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n", + new_record->load ? "Load" : (new_record->store ? "Store" : + "Non load/store"), + new_record->instNum, past_record->instNum); + + // Add dependency on past record + new_record->robDepList.push_back(past_record->instNum); + // Update new_record's compute delay with respect to the past record + compDelayRob(past_record, new_record); + // Increment number of dependents of the past record + ++(past_record->numDepts); + // Update stat to log max number of dependents + maxNumDependents = std::max(past_record->numDepts, + (uint32_t)maxNumDependents.value()); +} + +bool +ElasticTrace::hasStoreCommitted(TraceInfo* past_record, + Tick execute_tick) const +{ + return (past_record->store && past_record->commitTick <= execute_tick); +} + +bool +ElasticTrace::hasLoadCompleted(TraceInfo* past_record, + Tick execute_tick) const +{ + return(past_record->load && past_record->commit && + past_record->toCommitTick <= execute_tick); +} + +bool +ElasticTrace::hasLoadBeenSent(TraceInfo* past_record, + Tick execute_tick) const +{ + // Check if previous inst is a load sent earlier than this + return (past_record->load && past_record->commit && + past_record->executeTick <= execute_tick); +} + +bool +ElasticTrace::hasCompCompleted(TraceInfo* past_record, + Tick execute_tick) const +{ + return(!past_record->store && !past_record->load && + past_record->toCommitTick <= execute_tick); +} + +void +ElasticTrace::clearTempStoreUntil(const DynInstPtr head_inst) +{ + // Clear from temp store starting with the execution info object + // corresponding the head_inst and continue clearing by decrementing the + // sequence number until the last cleared sequence number. + InstSeqNum temp_sn = (head_inst->seqNum); + while (temp_sn > lastClearedSeqNum) { + auto itr_exec_info = tempStore.find(temp_sn); + if (itr_exec_info != tempStore.end()) { + InstExecInfo* exec_info_ptr = itr_exec_info->second; + // Free allocated memory for the info object + delete exec_info_ptr; + // Remove entry from temporary store + tempStore.erase(itr_exec_info); + } + temp_sn--; + } + // Update the last cleared sequence number to that of the head_inst + lastClearedSeqNum = head_inst->seqNum; +} + +void +ElasticTrace::compDelayRob(TraceInfo* past_record, TraceInfo* new_record) +{ + // The computation delay is the delay between the completion tick of the + // inst. pointed to by past_record and the execution tick of its dependent + // inst. pointed to by new_record. + int64_t comp_delay = -1; + Tick execution_tick = 0, completion_tick = 0; + + DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n", + new_record->instNum, past_record->instNum); + + // Get the tick when the node is executed as per the modelling of + // computation delay + execution_tick = new_record->getExecuteTick(); + + if (past_record->load) { + if (new_record->store) { + completion_tick = past_record->toCommitTick; + } else { + completion_tick = past_record->executeTick; + } + } else if (past_record->store) { + completion_tick = past_record->commitTick; + } else { + completion_tick = past_record->toCommitTick; + } + assert(execution_tick >= completion_tick); + comp_delay = execution_tick - completion_tick; + + DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n", + execution_tick, completion_tick, comp_delay); + + // Assign the computational delay with respect to the dependency which + // completes the latest. + if (new_record->compDelay == -1) + new_record->compDelay = comp_delay; + else + new_record->compDelay = std::min(comp_delay, new_record->compDelay); + DPRINTF(ElasticTrace, "Final computational delay = %lli.\n", + new_record->compDelay); +} + +void +ElasticTrace::compDelayPhysRegDep(TraceInfo* past_record, + TraceInfo* new_record) +{ + // The computation delay is the delay between the completion tick of the + // inst. pointed to by past_record and the execution tick of its dependent + // inst. pointed to by new_record. + int64_t comp_delay = -1; + Tick execution_tick = 0, completion_tick = 0; + + DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num" + " %lli.\n", new_record->instNum, past_record->instNum); + + // Get the tick when the node is executed as per the modelling of + // computation delay + execution_tick = new_record->getExecuteTick(); + + // When there is a physical register dependency on an instruction, the + // completion tick of that instruction is when it wrote to the register, + // that is toCommitTick. In case, of a store updating a destination + // register, this is approximated to commitTick instead + if (past_record->store) { + completion_tick = past_record->commitTick; + } else { + completion_tick = past_record->toCommitTick; + } + assert(execution_tick >= completion_tick); + comp_delay = execution_tick - completion_tick; + DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n", + execution_tick, completion_tick, comp_delay); + + // Assign the computational delay with respect to the dependency which + // completes the latest. + if (new_record->compDelay == -1) + new_record->compDelay = comp_delay; + else + new_record->compDelay = std::min(comp_delay, new_record->compDelay); + DPRINTF(ElasticTrace, "Final computational delay = %lli.\n", + new_record->compDelay); +} + +Tick +ElasticTrace::TraceInfo::getExecuteTick() const +{ + if (load) { + // Execution tick for a load instruction is when the request was sent, + // that is executeTick. + return executeTick; + } else if (store) { + // Execution tick for a store instruction is when the request was sent, + // that is commitTick. + return commitTick; + } else { + // Execution tick for a non load/store instruction is when the register + // value was written to, that is commitTick. + return toCommitTick; + } +} + +void +ElasticTrace::writeDepTrace(uint32_t num_to_write) +{ + // Write the trace with fields as follows: + // Instruction sequence number + // If instruction was a load + // If instruction was a store + // If instruction has addr + // If instruction has size + // If instruction has flags + // List of order dependencies - optional, repeated + // Computational delay with respect to last completed dependency + // List of physical register RAW dependencies - optional, repeated + // Weight of a node equal to no. of filtered nodes before it - optional + uint16_t num_filtered_nodes = 0; + depTraceItr dep_trace_itr(depTrace.begin()); + depTraceItr dep_trace_itr_start = dep_trace_itr; + while (num_to_write > 0) { + TraceInfo* temp_ptr = *dep_trace_itr; + // If no node dependends on a non load/store node then there is + // no reason to track it in the dependency graph. We filter out such + // nodes but count them and add a weight field to the subsequent node + // that we do include in the trace. + if (temp_ptr->numDepts != 0 || temp_ptr->load || temp_ptr->store) { + + DPRINTFR(ElasticTrace, "Instruction with seq. num %lli " + "is as follows:\n", temp_ptr->instNum); + if (temp_ptr->load || temp_ptr->store) { + DPRINTFR(ElasticTrace, "\tis a %s\n", + (temp_ptr->load ? "Load" : "Store")); + DPRINTFR(ElasticTrace, "\thas a request with addr %i, size %i," + " flags %i\n", temp_ptr->addr, temp_ptr->size, + temp_ptr->reqFlags); + } else { + DPRINTFR(ElasticTrace, "\tis not a load or store\n"); + } + if (firstWin && temp_ptr->compDelay == -1) { + if (temp_ptr->load) { + temp_ptr->compDelay = temp_ptr->executeTick; + } else if (temp_ptr->store) { + temp_ptr->compDelay = temp_ptr->commitTick; + } else { + temp_ptr->compDelay = temp_ptr->toCommitTick; + } + } + assert(temp_ptr->compDelay != -1); + DPRINTFR(ElasticTrace, "\thas computational delay %lli\n", + temp_ptr->compDelay); + + // Create a protobuf message for the dependency record + ProtoMessage::InstDepRecord dep_pkt; + dep_pkt.set_seq_num(temp_ptr->instNum); + dep_pkt.set_load(temp_ptr->load); + dep_pkt.set_store(temp_ptr->store); + dep_pkt.set_pc(temp_ptr->pc); + if (temp_ptr->load || temp_ptr->store) { + dep_pkt.set_flags(temp_ptr->reqFlags); + dep_pkt.set_addr(temp_ptr->addr); + dep_pkt.set_size(temp_ptr->size); + } + dep_pkt.set_comp_delay(temp_ptr->compDelay); + if (temp_ptr->robDepList.empty()) { + DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n"); + } + while (!temp_ptr->robDepList.empty()) { + DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n", + temp_ptr->robDepList.front()); + dep_pkt.add_rob_dep(temp_ptr->robDepList.front()); + temp_ptr->robDepList.pop_front(); + } + if (temp_ptr->physRegDepList.empty()) { + DPRINTFR(ElasticTrace, "\thas no register dependencies\n"); + } + while (!temp_ptr->physRegDepList.empty()) { + DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n", + temp_ptr->physRegDepList.front()); + dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front()); + temp_ptr->physRegDepList.pop_front(); + } + if (num_filtered_nodes != 0) { + // Set the weight of this node as the no. of filtered nodes + // between this node and the last node that we wrote to output + // stream. The weight will be used during replay to model ROB + // occupancy of filtered nodes. + dep_pkt.set_weight(num_filtered_nodes); + num_filtered_nodes = 0; + } + // Write the message to the protobuf output stream + dataTraceStream->write(dep_pkt); + } else { + // Don't write the node to the trace but note that we have filtered + // out a node. + ++numFilteredNodes; + ++num_filtered_nodes; + } + dep_trace_itr++; + traceInfoMap.erase(temp_ptr->instNum); + delete temp_ptr; + num_to_write--; + } + depTrace.erase(dep_trace_itr_start, dep_trace_itr); +} + +void +ElasticTrace::regStats() { + using namespace Stats; + numRegDep + .name(name() + ".numRegDep") + .desc("Number of register dependencies recorded during tracing") + ; + + numOrderDepStores + .name(name() + ".numOrderDepStores") + .desc("Number of commit order (rob) dependencies for a store recorded" + " on a past load/store during tracing") + ; + + numIssueOrderDepLoads + .name(name() + ".numIssueOrderDepLoads") + .desc("Number of loads that got assigned issue order dependency" + " because they were dependency-free") + ; + + numIssueOrderDepStores + .name(name() + ".numIssueOrderDepStores") + .desc("Number of stores that got assigned issue order dependency" + " because they were dependency-free") + ; + + numIssueOrderDepOther + .name(name() + ".numIssueOrderDepOther") + .desc("Number of non load/store insts that got assigned issue order" + " dependency because they were dependency-free") + ; + + numFilteredNodes + .name(name() + ".numFilteredNodes") + .desc("No. of nodes filtered out before writing the output trace") + ; + + maxNumDependents + .name(name() + ".maxNumDependents") + .desc("Maximum number or dependents on any instruction") + ; + + maxTempStoreSize + .name(name() + ".maxTempStoreSize") + .desc("Maximum size of the temporary store during the run") + ; + + maxPhysRegDepMapSize + .name(name() + ".maxPhysRegDepMapSize") + .desc("Maximum size of register dependency map") + ; +} + +const std::string +ElasticTrace::name() const +{ + return ProbeListenerObject::name(); +} + +void +ElasticTrace::flushTraces() +{ + // Write to trace all records in the depTrace. + writeDepTrace(depTrace.size()); + // Delete the stream objects + delete dataTraceStream; + delete instTraceStream; +} + +ElasticTrace* +ElasticTraceParams::create() +{ + return new ElasticTrace(this); +} diff --git a/src/cpu/o3/probe/elastic_trace.hh b/src/cpu/o3/probe/elastic_trace.hh new file mode 100644 index 000000000..5dd2949e8 --- /dev/null +++ b/src/cpu/o3/probe/elastic_trace.hh @@ -0,0 +1,537 @@ +/* + * Copyright (c) 2013 - 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Radhika Jagtap + * Andreas Hansson + * Thomas Grass + */ + +/** + * @file This file describes a trace component which is a cpu probe listener + * used to generate elastic cpu traces. It registers listeners to probe points + * in the fetch, rename, iew and commit stages of the O3CPU. It processes the + * dependency graph of the cpu execution and writes out a protobuf trace. It + * also generates a protobuf trace of the instruction fetch requests. + */ + +#ifndef __CPU_O3_PROBE_ELASTIC_TRACE_HH__ +#define __CPU_O3_PROBE_ELASTIC_TRACE_HH__ + +#include <set> +#include <unordered_map> +#include <utility> + +#include "cpu/o3/dyn_inst.hh" +#include "cpu/o3/impl.hh" +#include "mem/request.hh" +#include "params/ElasticTrace.hh" +#include "proto/inst_dep_record.pb.h" +#include "proto/packet.pb.h" +#include "proto/protoio.hh" +#include "sim/eventq.hh" +#include "sim/probe/probe.hh" + +/** + * The elastic trace is a type of probe listener and listens to probe points + * in multiple stages of the O3CPU. The notify method is called on a probe + * point typically when an instruction successfully progresses through that + * stage. + * + * As different listener methods mapped to the different probe points execute, + * relevant information about the instruction, e.g. timestamps and register + * accesses, are captured and stored in temporary data structures. When the + * instruction progresses through the commit stage, the timing as well as + * dependency information about the instruction is finalised and encapsulated in + * a struct called TraceInfo. TraceInfo objects are collected in a list instead + * of writing them out to the trace file one a time. This is required as the + * trace is processed in chunks to evaluate order dependencies and computational + * delay in case an instruction does not have any register dependencies. By this + * we achieve a simpler algorithm during replay because every record in the + * trace can be hooked onto a record in its past. The trace is written out as + * a protobuf format output file. + * + * The output trace can be read in and played back by the TraceCPU. + */ +class ElasticTrace : public ProbeListenerObject +{ + + public: + typedef typename O3CPUImpl::DynInstPtr DynInstPtr; + typedef typename std::pair<InstSeqNum, PhysRegIndex> SeqNumRegPair; + + /** Constructor */ + ElasticTrace(const ElasticTraceParams *params); + + /** + * Register the probe listeners that is the methods called on a probe point + * notify() call. + */ + void regProbeListeners(); + + /** Register all listeners. */ + void regEtraceListeners(); + + /** Returns the name of the trace probe listener. */ + const std::string name() const; + + /** + * Process any outstanding trace records, flush them out to the protobuf + * output streams and delete the streams at simulation exit. + */ + void flushTraces(); + + /** + * Take the fields of the request class object that are relevant to create + * an instruction fetch request. It creates a protobuf message containing + * the request fields and writes it to instTraceStream. + * + * @param req pointer to the fetch request + */ + void fetchReqTrace(const RequestPtr &req); + + /** + * Populate the execute timestamp field in an InstExecInfo object for an + * instruction in flight. + * + * @param dyn_inst pointer to dynamic instruction in flight + */ + void recordExecTick(const DynInstPtr &dyn_inst); + + /** + * Populate the timestamp field in an InstExecInfo object for an + * instruction in flight when it is execution is complete and it is ready + * to commit. + * + * @param dyn_inst pointer to dynamic instruction in flight + */ + void recordToCommTick(const DynInstPtr &dyn_inst); + + /** + * Record a Read After Write physical register dependency if there has + * been a write to the source register and update the physical register + * map. For this look up the physRegDepMap with this instruction as the + * writer of its destination register. If the dependency falls outside the + * window it is assumed as already complete. Duplicate entries are avoided. + * + * @param dyn_inst pointer to dynamic instruction in flight + */ + void updateRegDep(const DynInstPtr &dyn_inst); + + /** + * When an instruction gets squashed the destination register mapped to it + * is freed up in the rename stage. Remove the register entry from the + * physRegDepMap as well to avoid dependencies on squashed instructions. + * + * @param inst_reg_pair pair of inst. sequence no. and the register + */ + void removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair); + + /** + * Add an instruction that is at the head of the ROB and is squashed only + * if it is a load and a request was sent for it. + * + * @param head_inst pointer to dynamic instruction to be squashed + */ + void addSquashedInst(const DynInstPtr &head_inst); + + /** + * Add an instruction that is at the head of the ROB and is committed. + * + * @param head_inst pointer to dynamic instruction to be committed + */ + void addCommittedInst(const DynInstPtr &head_inst); + + /** Register statistics for the elastic trace. */ + void regStats(); + + /** Event to trigger registering this listener for all probe points. */ + EventWrapper<ElasticTrace, + &ElasticTrace::regEtraceListeners> regEtraceListenersEvent; + + private: + /** + * Used for checking the first window for processing and writing of + * dependency trace. At the start of the program there can be dependency- + * free instructions and such cases are handled differently. + */ + bool firstWin; + + /** + * @defgroup InstExecInfo Struct for storing information before an + * instruction reaches the commit stage, e.g. execute timestamp. + */ + struct InstExecInfo + { + /** + * @ingroup InstExecInfo + * @{ + */ + /** Timestamp when instruction was first processed by execute stage */ + Tick executeTick; + /** + * Timestamp when instruction execution is completed in execute stage + * and instruction is marked as ready to commit + */ + Tick toCommitTick; + /** + * Set of instruction sequence numbers that this instruction depends on + * due to Read After Write data dependency based on physical register. + */ + std::set<InstSeqNum> physRegDepSet; + /** @} */ + + /** Constructor */ + InstExecInfo() + : executeTick(MaxTick), + toCommitTick(MaxTick) + { } + }; + + /** + * Temporary store of InstExecInfo objects. Later on when an instruction + * is processed for commit or retire, if it is chosen to be written to + * the output trace then this information is looked up using the instruction + * sequence number as the key. If it is not chosen then the entry for it in + * the store is cleared. + */ + std::unordered_map<InstSeqNum, InstExecInfo*> tempStore; + + /** + * The last cleared instruction sequence number used to free up the memory + * allocated in the temporary store. + */ + InstSeqNum lastClearedSeqNum; + + /** + * Map for recording the producer of a physical register to check Read + * After Write dependencies. The key is the renamed physical register and + * the value is the instruction sequence number of its last producer. + */ + std::unordered_map<PhysRegIndex, InstSeqNum> physRegDepMap; + + /** + * @defgroup TraceInfo Struct for a record in the instruction dependency + * trace. All information required to process and calculate the + * computational delay is stored in TraceInfo objects. The memory request + * fields for a load or store instruction are also included here. Note + * that the structure TraceInfo does not store pointers to children + * or parents. The dependency trace is maintained as an ordered collection + * of records for writing to the output trace and not as a tree data + * structure. + */ + struct TraceInfo + { + /** + * @ingroup TraceInfo + * @{ + */ + /* Instruction sequence number. */ + InstSeqNum instNum; + /* Tick when instruction was in execute stage. */ + Tick executeTick; + /* Tick when instruction was marked ready and sent to commit stage. */ + Tick toCommitTick; + /* Tick when instruction was committed. */ + Tick commitTick; + /* If instruction was a load, a store, committed. */ + bool load, store, commit; + /* List of order dependencies. */ + std::list<InstSeqNum> robDepList; + /* List of physical register RAW dependencies. */ + std::list<InstSeqNum> physRegDepList; + /** + * Computational delay after the last dependent inst. completed. + * A value of -1 which means instruction has no dependencies. + */ + int64_t compDelay; + /* Number of dependents. */ + uint32_t numDepts; + /* The instruction PC for a load, store or non load/store. */ + Addr pc; + /* Request flags in case of a load/store instruction */ + Request::FlagsType reqFlags; + /* Request address in case of a load/store instruction */ + Addr addr; + /* Request size in case of a load/store instruction */ + unsigned size; + /** @} */ + + /** + * Get the execute tick of the instruction. + * + * @return Tick when instruction was executed + */ + Tick getExecuteTick() const; + }; + + /** + * The instruction dependency trace containing TraceInfo objects. The + * container implemented is sequential as dependencies obey commit + * order (program order). For example, if B is dependent on A then B must + * be committed after A. Thus records are updated with dependency + * information and written to the trace in commit order. This ensures that + * when a graph is reconstructed from the trace during replay, all the + * dependencies are stored in the graph before the dependent itself is + * added. This facilitates creating a tree data structure during replay, + * i.e. adding children as records are read from the trace in an efficient + * manner. + */ + std::vector<TraceInfo*> depTrace; + + /** + * Map where the instruction sequence number is mapped to the pointer to + * the TraceInfo object. + */ + std::unordered_map<InstSeqNum, TraceInfo*> traceInfoMap; + + /** Typedef of iterator to the instruction dependency trace. */ + typedef typename std::vector<TraceInfo*>::iterator depTraceItr; + + /** Typedef of the reverse iterator to the instruction dependency trace. */ + typedef typename std::reverse_iterator<depTraceItr> depTraceRevItr; + + /** + * The maximum distance for a dependency and is set by a top level + * level parameter. It must be equal to or greater than the number of + * entries in the ROB. This variable is used as the length of the sliding + * window for processing the dependency trace. + */ + uint32_t depWindowSize; + + /** Protobuf output stream for data dependency trace */ + ProtoOutputStream* dataTraceStream; + + /** Protobuf output stream for instruction fetch trace. */ + ProtoOutputStream* instTraceStream; + + /** Number of instructions after which to enable tracing. */ + const InstSeqNum startTraceInst; + + /** + * Whther the elastic trace listener has been registered for all probes. + * + * When enabling tracing after a specified number of instructions have + * committed, check this to prevent re-registering the listener. + */ + bool allProbesReg; + + /** Pointer to the O3CPU that is this listener's parent a.k.a. manager */ + FullO3CPU<O3CPUImpl>* cpu; + + /** + * Add a record to the dependency trace depTrace which is a sequential + * container. A record is inserted per committed instruction and in the same + * order as the order in which instructions are committed. + * + * @param head_inst Pointer to the instruction which is head of the + * ROB and ready to commit + * @param exec_info_ptr Pointer to InstExecInfo for that instruction + * @param commit True if instruction is committed, false if squashed + */ + void addDepTraceRecord(const DynInstPtr &head_inst, + InstExecInfo* exec_info_ptr, bool commit); + + /** + * Clear entries in the temporary store of execution info objects to free + * allocated memory until the present instruction being added to the trace. + * + * @param head_inst pointer to dynamic instruction + */ + void clearTempStoreUntil(const DynInstPtr head_inst); + + /** + * Calculate the computational delay between an instruction and a + * subsequent instruction that has an ROB (order) dependency on it + * + * @param past_record Pointer to instruction + * + * @param new_record Pointer to subsequent instruction having an ROB + * dependency on the instruction pointed to by + * past_record + */ + void compDelayRob(TraceInfo* past_record, TraceInfo* new_record); + + /** + * Calculate the computational delay between an instruction and a + * subsequent instruction that has a Physical Register (data) dependency on + * it. + * + * @param past_record Pointer to instruction + * + * @param new_record Pointer to subsequent instruction having a Physical + * Register dependency on the instruction pointed to + * by past_record + */ + void compDelayPhysRegDep(TraceInfo* past_record, TraceInfo* new_record); + + /** + * Write out given number of records to the trace starting with the first + * record in depTrace and iterating through the trace in sequence. A + * record is deleted after it is written. + * + * @param num_to_write Number of records to write to the trace + */ + void writeDepTrace(uint32_t num_to_write); + + /** + * Reverse iterate through the graph, search for a store-after-store or + * store-after-load dependency and update the new node's Rob dependency list. + * + * If a dependency is found, then call the assignRobDep() method that + * updates the store with the dependency information. This function is only + * called when a new store node is added to the trace. + * + * @param new_record pointer to new store record + * @param find_load_not_store true for searching store-after-load and false + * for searching store-after-store dependency + */ + void updateCommitOrderDep(TraceInfo* new_record, bool find_load_not_store); + + /** + * Reverse iterate through the graph, search for an issue order dependency + * for a new node and update the new node's Rob dependency list. + * + * If a dependency is found, call the assignRobDep() method that updates + * the node with its dependency information. This function is called in + * case a new node to be added to the trace is dependency-free or its + * dependency got discarded because the dependency was outside the window. + * + * @param new_record pointer to new record to be added to the trace + */ + void updateIssueOrderDep(TraceInfo* new_record); + + /** + * The new_record has an order dependency on a past_record, thus update the + * new record's Rob dependency list and increment the number of dependents + * of the past record. + * + * @param new_record pointer to new record + * @param past_record pointer to record that new_record has a rob + * dependency on + */ + void assignRobDep(TraceInfo* past_record, TraceInfo* new_record); + + /** + * Check if past record is a store sent earlier than the execute tick. + * + * @param past_record pointer to past store + * @param execute_tick tick with which to compare past store's commit tick + * + * @return true if past record is store sent earlier + */ + bool hasStoreCommitted(TraceInfo* past_record, Tick execute_tick) const; + + /** + * Check if past record is a load that completed earlier than the execute + * tick. + * + * @param past_record pointer to past load + * @param execute_tick tick with which to compare past load's complete + * tick + * + * @return true if past record is load completed earlier + */ + bool hasLoadCompleted(TraceInfo* past_record, Tick execute_tick) const; + + /** + * Check if past record is a load sent earlier than the execute tick. + * + * @param past_record pointer to past load + * @param execute_tick tick with which to compare past load's send tick + * + * @return true if past record is load sent earlier + */ + bool hasLoadBeenSent(TraceInfo* past_record, Tick execute_tick) const; + + /** + * Check if past record is a comp node that completed earlier than the + * execute tick. + * + * @param past_record pointer to past comp node + * @param execute_tick tick with which to compare past comp node's + * completion tick + * + * @return true if past record is comp completed earlier + */ + bool hasCompCompleted(TraceInfo* past_record, Tick execute_tick) const; + + /** Number of register dependencies recorded during tracing */ + Stats::Scalar numRegDep; + + /** + * Number of stores that got assigned a commit order dependency + * on a past load/store. + */ + Stats::Scalar numOrderDepStores; + + /** + * Number of load insts that got assigned an issue order dependency + * because they were dependency-free. + */ + Stats::Scalar numIssueOrderDepLoads; + + /** + * Number of store insts that got assigned an issue order dependency + * because they were dependency-free. + */ + Stats::Scalar numIssueOrderDepStores; + + /** + * Number of non load/store insts that got assigned an issue order + * dependency because they were dependency-free. + */ + Stats::Scalar numIssueOrderDepOther; + + /** Number of filtered nodes */ + Stats::Scalar numFilteredNodes; + + /** Maximum number of dependents on any instruction */ + Stats::Scalar maxNumDependents; + + /** + * Maximum size of the temporary store mostly useful as a check that it is + * not growing + */ + Stats::Scalar maxTempStoreSize; + + /** + * Maximum size of the map that holds the last writer to a physical + * register. + * */ + Stats::Scalar maxPhysRegDepMapSize; + +}; +#endif//__CPU_O3_PROBE_ELASTIC_TRACE_HH__ diff --git a/src/proto/SConscript b/src/proto/SConscript index 292a23639..ef6bc2aca 100644 --- a/src/proto/SConscript +++ b/src/proto/SConscript @@ -41,6 +41,7 @@ Import('*') # Only build if we have protobuf support if env['HAVE_PROTOBUF']: + ProtoBuf('inst_dep_record.proto') ProtoBuf('packet.proto') ProtoBuf('inst.proto') Source('protoio.cc') diff --git a/src/proto/inst_dep_record.proto b/src/proto/inst_dep_record.proto new file mode 100644 index 000000000..7035bfc74 --- /dev/null +++ b/src/proto/inst_dep_record.proto @@ -0,0 +1,75 @@ +// Copyright (c) 2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Radhika Jagtap + +// Put all the generated messages in a namespace +package ProtoMessage; + +// Packet header for the o3cpu data dependency trace. The header fields are the +// identifier describing what object captured the trace, the version of this +// file format, the tick frequency of the object and the window size used to +// limit the register dependencies during capture. +message InstDepRecordHeader { + required string obj_id = 1; + optional uint32 ver = 2 [default = 0]; + required uint64 tick_freq = 3; + required uint32 window_size = 4; +} + +// Packet to encapsulate an instruction in the o3cpu data dependency trace. +// The required fields include the instruction sequence number, whether it +// is a load, and whether it is a store. The request related fields are +// optional, namely address, size and flags. These exist only if the +// instruction is a load or store. The dependency related information includes +// a repeated field for order dependencies, a repeated field for register +// dependencies and the computational delay with respect to the dependency +// that completed last. A weight field is used to account for committed +// instructions that were filtered out before writing the trace and is used +// to estimate ROB occupancy during replay. An optional field is provided for +// the instruction PC. +message InstDepRecord { + required uint64 seq_num = 1; + required bool load = 2; + required bool store = 3; + optional uint64 addr = 4; + optional uint32 size = 5; + optional uint32 flags = 6; + repeated uint64 rob_dep = 7; + required uint64 comp_delay = 8; + repeated uint64 reg_dep = 9; + optional uint32 weight = 10; + optional uint64 pc = 11; +} diff --git a/src/proto/packet.proto b/src/proto/packet.proto index d27599691..c07206742 100644 --- a/src/proto/packet.proto +++ b/src/proto/packet.proto @@ -55,6 +55,8 @@ message PacketHeader { // not, etc. An optional id field is added for generic use to identify // the packet or the "owner" of the packet. An example of the latter // is the sequential id of an instruction, or the master id etc. +// An optional field for PC of the instruction for which this request is made +// is provided. message Packet { required uint64 tick = 1; required uint32 cmd = 2; @@ -62,4 +64,5 @@ message Packet { required uint32 size = 4; optional uint32 flags = 5; optional uint64 pkt_id = 6; + optional uint64 pc = 7; } |