diff options
Diffstat (limited to 'src/cpu/o3/probe/elastic_trace.hh')
-rw-r--r-- | src/cpu/o3/probe/elastic_trace.hh | 537 |
1 files changed, 537 insertions, 0 deletions
diff --git a/src/cpu/o3/probe/elastic_trace.hh b/src/cpu/o3/probe/elastic_trace.hh new file mode 100644 index 000000000..5dd2949e8 --- /dev/null +++ b/src/cpu/o3/probe/elastic_trace.hh @@ -0,0 +1,537 @@ +/* + * Copyright (c) 2013 - 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Radhika Jagtap + * Andreas Hansson + * Thomas Grass + */ + +/** + * @file This file describes a trace component which is a cpu probe listener + * used to generate elastic cpu traces. It registers listeners to probe points + * in the fetch, rename, iew and commit stages of the O3CPU. It processes the + * dependency graph of the cpu execution and writes out a protobuf trace. It + * also generates a protobuf trace of the instruction fetch requests. + */ + +#ifndef __CPU_O3_PROBE_ELASTIC_TRACE_HH__ +#define __CPU_O3_PROBE_ELASTIC_TRACE_HH__ + +#include <set> +#include <unordered_map> +#include <utility> + +#include "cpu/o3/dyn_inst.hh" +#include "cpu/o3/impl.hh" +#include "mem/request.hh" +#include "params/ElasticTrace.hh" +#include "proto/inst_dep_record.pb.h" +#include "proto/packet.pb.h" +#include "proto/protoio.hh" +#include "sim/eventq.hh" +#include "sim/probe/probe.hh" + +/** + * The elastic trace is a type of probe listener and listens to probe points + * in multiple stages of the O3CPU. The notify method is called on a probe + * point typically when an instruction successfully progresses through that + * stage. + * + * As different listener methods mapped to the different probe points execute, + * relevant information about the instruction, e.g. timestamps and register + * accesses, are captured and stored in temporary data structures. When the + * instruction progresses through the commit stage, the timing as well as + * dependency information about the instruction is finalised and encapsulated in + * a struct called TraceInfo. TraceInfo objects are collected in a list instead + * of writing them out to the trace file one a time. This is required as the + * trace is processed in chunks to evaluate order dependencies and computational + * delay in case an instruction does not have any register dependencies. By this + * we achieve a simpler algorithm during replay because every record in the + * trace can be hooked onto a record in its past. The trace is written out as + * a protobuf format output file. + * + * The output trace can be read in and played back by the TraceCPU. + */ +class ElasticTrace : public ProbeListenerObject +{ + + public: + typedef typename O3CPUImpl::DynInstPtr DynInstPtr; + typedef typename std::pair<InstSeqNum, PhysRegIndex> SeqNumRegPair; + + /** Constructor */ + ElasticTrace(const ElasticTraceParams *params); + + /** + * Register the probe listeners that is the methods called on a probe point + * notify() call. + */ + void regProbeListeners(); + + /** Register all listeners. */ + void regEtraceListeners(); + + /** Returns the name of the trace probe listener. */ + const std::string name() const; + + /** + * Process any outstanding trace records, flush them out to the protobuf + * output streams and delete the streams at simulation exit. + */ + void flushTraces(); + + /** + * Take the fields of the request class object that are relevant to create + * an instruction fetch request. It creates a protobuf message containing + * the request fields and writes it to instTraceStream. + * + * @param req pointer to the fetch request + */ + void fetchReqTrace(const RequestPtr &req); + + /** + * Populate the execute timestamp field in an InstExecInfo object for an + * instruction in flight. + * + * @param dyn_inst pointer to dynamic instruction in flight + */ + void recordExecTick(const DynInstPtr &dyn_inst); + + /** + * Populate the timestamp field in an InstExecInfo object for an + * instruction in flight when it is execution is complete and it is ready + * to commit. + * + * @param dyn_inst pointer to dynamic instruction in flight + */ + void recordToCommTick(const DynInstPtr &dyn_inst); + + /** + * Record a Read After Write physical register dependency if there has + * been a write to the source register and update the physical register + * map. For this look up the physRegDepMap with this instruction as the + * writer of its destination register. If the dependency falls outside the + * window it is assumed as already complete. Duplicate entries are avoided. + * + * @param dyn_inst pointer to dynamic instruction in flight + */ + void updateRegDep(const DynInstPtr &dyn_inst); + + /** + * When an instruction gets squashed the destination register mapped to it + * is freed up in the rename stage. Remove the register entry from the + * physRegDepMap as well to avoid dependencies on squashed instructions. + * + * @param inst_reg_pair pair of inst. sequence no. and the register + */ + void removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair); + + /** + * Add an instruction that is at the head of the ROB and is squashed only + * if it is a load and a request was sent for it. + * + * @param head_inst pointer to dynamic instruction to be squashed + */ + void addSquashedInst(const DynInstPtr &head_inst); + + /** + * Add an instruction that is at the head of the ROB and is committed. + * + * @param head_inst pointer to dynamic instruction to be committed + */ + void addCommittedInst(const DynInstPtr &head_inst); + + /** Register statistics for the elastic trace. */ + void regStats(); + + /** Event to trigger registering this listener for all probe points. */ + EventWrapper<ElasticTrace, + &ElasticTrace::regEtraceListeners> regEtraceListenersEvent; + + private: + /** + * Used for checking the first window for processing and writing of + * dependency trace. At the start of the program there can be dependency- + * free instructions and such cases are handled differently. + */ + bool firstWin; + + /** + * @defgroup InstExecInfo Struct for storing information before an + * instruction reaches the commit stage, e.g. execute timestamp. + */ + struct InstExecInfo + { + /** + * @ingroup InstExecInfo + * @{ + */ + /** Timestamp when instruction was first processed by execute stage */ + Tick executeTick; + /** + * Timestamp when instruction execution is completed in execute stage + * and instruction is marked as ready to commit + */ + Tick toCommitTick; + /** + * Set of instruction sequence numbers that this instruction depends on + * due to Read After Write data dependency based on physical register. + */ + std::set<InstSeqNum> physRegDepSet; + /** @} */ + + /** Constructor */ + InstExecInfo() + : executeTick(MaxTick), + toCommitTick(MaxTick) + { } + }; + + /** + * Temporary store of InstExecInfo objects. Later on when an instruction + * is processed for commit or retire, if it is chosen to be written to + * the output trace then this information is looked up using the instruction + * sequence number as the key. If it is not chosen then the entry for it in + * the store is cleared. + */ + std::unordered_map<InstSeqNum, InstExecInfo*> tempStore; + + /** + * The last cleared instruction sequence number used to free up the memory + * allocated in the temporary store. + */ + InstSeqNum lastClearedSeqNum; + + /** + * Map for recording the producer of a physical register to check Read + * After Write dependencies. The key is the renamed physical register and + * the value is the instruction sequence number of its last producer. + */ + std::unordered_map<PhysRegIndex, InstSeqNum> physRegDepMap; + + /** + * @defgroup TraceInfo Struct for a record in the instruction dependency + * trace. All information required to process and calculate the + * computational delay is stored in TraceInfo objects. The memory request + * fields for a load or store instruction are also included here. Note + * that the structure TraceInfo does not store pointers to children + * or parents. The dependency trace is maintained as an ordered collection + * of records for writing to the output trace and not as a tree data + * structure. + */ + struct TraceInfo + { + /** + * @ingroup TraceInfo + * @{ + */ + /* Instruction sequence number. */ + InstSeqNum instNum; + /* Tick when instruction was in execute stage. */ + Tick executeTick; + /* Tick when instruction was marked ready and sent to commit stage. */ + Tick toCommitTick; + /* Tick when instruction was committed. */ + Tick commitTick; + /* If instruction was a load, a store, committed. */ + bool load, store, commit; + /* List of order dependencies. */ + std::list<InstSeqNum> robDepList; + /* List of physical register RAW dependencies. */ + std::list<InstSeqNum> physRegDepList; + /** + * Computational delay after the last dependent inst. completed. + * A value of -1 which means instruction has no dependencies. + */ + int64_t compDelay; + /* Number of dependents. */ + uint32_t numDepts; + /* The instruction PC for a load, store or non load/store. */ + Addr pc; + /* Request flags in case of a load/store instruction */ + Request::FlagsType reqFlags; + /* Request address in case of a load/store instruction */ + Addr addr; + /* Request size in case of a load/store instruction */ + unsigned size; + /** @} */ + + /** + * Get the execute tick of the instruction. + * + * @return Tick when instruction was executed + */ + Tick getExecuteTick() const; + }; + + /** + * The instruction dependency trace containing TraceInfo objects. The + * container implemented is sequential as dependencies obey commit + * order (program order). For example, if B is dependent on A then B must + * be committed after A. Thus records are updated with dependency + * information and written to the trace in commit order. This ensures that + * when a graph is reconstructed from the trace during replay, all the + * dependencies are stored in the graph before the dependent itself is + * added. This facilitates creating a tree data structure during replay, + * i.e. adding children as records are read from the trace in an efficient + * manner. + */ + std::vector<TraceInfo*> depTrace; + + /** + * Map where the instruction sequence number is mapped to the pointer to + * the TraceInfo object. + */ + std::unordered_map<InstSeqNum, TraceInfo*> traceInfoMap; + + /** Typedef of iterator to the instruction dependency trace. */ + typedef typename std::vector<TraceInfo*>::iterator depTraceItr; + + /** Typedef of the reverse iterator to the instruction dependency trace. */ + typedef typename std::reverse_iterator<depTraceItr> depTraceRevItr; + + /** + * The maximum distance for a dependency and is set by a top level + * level parameter. It must be equal to or greater than the number of + * entries in the ROB. This variable is used as the length of the sliding + * window for processing the dependency trace. + */ + uint32_t depWindowSize; + + /** Protobuf output stream for data dependency trace */ + ProtoOutputStream* dataTraceStream; + + /** Protobuf output stream for instruction fetch trace. */ + ProtoOutputStream* instTraceStream; + + /** Number of instructions after which to enable tracing. */ + const InstSeqNum startTraceInst; + + /** + * Whther the elastic trace listener has been registered for all probes. + * + * When enabling tracing after a specified number of instructions have + * committed, check this to prevent re-registering the listener. + */ + bool allProbesReg; + + /** Pointer to the O3CPU that is this listener's parent a.k.a. manager */ + FullO3CPU<O3CPUImpl>* cpu; + + /** + * Add a record to the dependency trace depTrace which is a sequential + * container. A record is inserted per committed instruction and in the same + * order as the order in which instructions are committed. + * + * @param head_inst Pointer to the instruction which is head of the + * ROB and ready to commit + * @param exec_info_ptr Pointer to InstExecInfo for that instruction + * @param commit True if instruction is committed, false if squashed + */ + void addDepTraceRecord(const DynInstPtr &head_inst, + InstExecInfo* exec_info_ptr, bool commit); + + /** + * Clear entries in the temporary store of execution info objects to free + * allocated memory until the present instruction being added to the trace. + * + * @param head_inst pointer to dynamic instruction + */ + void clearTempStoreUntil(const DynInstPtr head_inst); + + /** + * Calculate the computational delay between an instruction and a + * subsequent instruction that has an ROB (order) dependency on it + * + * @param past_record Pointer to instruction + * + * @param new_record Pointer to subsequent instruction having an ROB + * dependency on the instruction pointed to by + * past_record + */ + void compDelayRob(TraceInfo* past_record, TraceInfo* new_record); + + /** + * Calculate the computational delay between an instruction and a + * subsequent instruction that has a Physical Register (data) dependency on + * it. + * + * @param past_record Pointer to instruction + * + * @param new_record Pointer to subsequent instruction having a Physical + * Register dependency on the instruction pointed to + * by past_record + */ + void compDelayPhysRegDep(TraceInfo* past_record, TraceInfo* new_record); + + /** + * Write out given number of records to the trace starting with the first + * record in depTrace and iterating through the trace in sequence. A + * record is deleted after it is written. + * + * @param num_to_write Number of records to write to the trace + */ + void writeDepTrace(uint32_t num_to_write); + + /** + * Reverse iterate through the graph, search for a store-after-store or + * store-after-load dependency and update the new node's Rob dependency list. + * + * If a dependency is found, then call the assignRobDep() method that + * updates the store with the dependency information. This function is only + * called when a new store node is added to the trace. + * + * @param new_record pointer to new store record + * @param find_load_not_store true for searching store-after-load and false + * for searching store-after-store dependency + */ + void updateCommitOrderDep(TraceInfo* new_record, bool find_load_not_store); + + /** + * Reverse iterate through the graph, search for an issue order dependency + * for a new node and update the new node's Rob dependency list. + * + * If a dependency is found, call the assignRobDep() method that updates + * the node with its dependency information. This function is called in + * case a new node to be added to the trace is dependency-free or its + * dependency got discarded because the dependency was outside the window. + * + * @param new_record pointer to new record to be added to the trace + */ + void updateIssueOrderDep(TraceInfo* new_record); + + /** + * The new_record has an order dependency on a past_record, thus update the + * new record's Rob dependency list and increment the number of dependents + * of the past record. + * + * @param new_record pointer to new record + * @param past_record pointer to record that new_record has a rob + * dependency on + */ + void assignRobDep(TraceInfo* past_record, TraceInfo* new_record); + + /** + * Check if past record is a store sent earlier than the execute tick. + * + * @param past_record pointer to past store + * @param execute_tick tick with which to compare past store's commit tick + * + * @return true if past record is store sent earlier + */ + bool hasStoreCommitted(TraceInfo* past_record, Tick execute_tick) const; + + /** + * Check if past record is a load that completed earlier than the execute + * tick. + * + * @param past_record pointer to past load + * @param execute_tick tick with which to compare past load's complete + * tick + * + * @return true if past record is load completed earlier + */ + bool hasLoadCompleted(TraceInfo* past_record, Tick execute_tick) const; + + /** + * Check if past record is a load sent earlier than the execute tick. + * + * @param past_record pointer to past load + * @param execute_tick tick with which to compare past load's send tick + * + * @return true if past record is load sent earlier + */ + bool hasLoadBeenSent(TraceInfo* past_record, Tick execute_tick) const; + + /** + * Check if past record is a comp node that completed earlier than the + * execute tick. + * + * @param past_record pointer to past comp node + * @param execute_tick tick with which to compare past comp node's + * completion tick + * + * @return true if past record is comp completed earlier + */ + bool hasCompCompleted(TraceInfo* past_record, Tick execute_tick) const; + + /** Number of register dependencies recorded during tracing */ + Stats::Scalar numRegDep; + + /** + * Number of stores that got assigned a commit order dependency + * on a past load/store. + */ + Stats::Scalar numOrderDepStores; + + /** + * Number of load insts that got assigned an issue order dependency + * because they were dependency-free. + */ + Stats::Scalar numIssueOrderDepLoads; + + /** + * Number of store insts that got assigned an issue order dependency + * because they were dependency-free. + */ + Stats::Scalar numIssueOrderDepStores; + + /** + * Number of non load/store insts that got assigned an issue order + * dependency because they were dependency-free. + */ + Stats::Scalar numIssueOrderDepOther; + + /** Number of filtered nodes */ + Stats::Scalar numFilteredNodes; + + /** Maximum number of dependents on any instruction */ + Stats::Scalar maxNumDependents; + + /** + * Maximum size of the temporary store mostly useful as a check that it is + * not growing + */ + Stats::Scalar maxTempStoreSize; + + /** + * Maximum size of the map that holds the last writer to a physical + * register. + * */ + Stats::Scalar maxPhysRegDepMapSize; + +}; +#endif//__CPU_O3_PROBE_ELASTIC_TRACE_HH__ |