summaryrefslogtreecommitdiff
path: root/src/mem/ruby/profiler/Profiler.hh
blob: 2961a81d1eb4770b7a2f35740d5fe277b5ed219a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
/*
 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met: redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer;
 * redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution;
 * neither the name of the copyright holders nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
   This file has been modified by Kevin Moore and Dan Nussbaum of the
   Scalable Systems Research Group at Sun Microsystems Laboratories
   (http://research.sun.com/scalable/) to support the Adaptive
   Transactional Memory Test Platform (ATMTP).

   Please send email to atmtp-interest@sun.com with feedback, questions, or
   to request future announcements about ATMTP.

   ----------------------------------------------------------------------

   File modification date: 2008-02-23

   ----------------------------------------------------------------------
*/

/*
 * Profiler.h
 *
 * Description:
 *
 * $Id$
 *
 */

#ifndef PROFILER_H
#define PROFILER_H

#include "Global.hh"
#include "GenericMachineType.hh"
#include "RubyConfig.hh"
#include "Histogram.hh"
#include "Consumer.hh"
#include "AccessModeType.hh"
#include "AccessType.hh"
#include "NodeID.hh"
#include "MachineID.hh"
#include "PrefetchBit.hh"
#include "Address.hh"
#include "Set.hh"
#include "CacheRequestType.hh"
#include "GenericRequestType.hh"
//#include "XactProfiler.hh" //gem5:Arka for decomissioning og log_tm

class CacheMsg;
class CacheProfiler;
class AddressProfiler;

template <class KEY_TYPE, class VALUE_TYPE> class Map;

class Profiler : public Consumer {
public:
  // Constructors
  Profiler();

  // Destructor
  ~Profiler();

  // Public Methods
  void wakeup();

  void setPeriodicStatsFile(const string& filename);
  void setPeriodicStatsInterval(integer_t period);

  void setXactVisualizerFile(char* filename);

  void printStats(ostream& out, bool short_stats=false);
  void printShortStats(ostream& out) { printStats(out, true); }
  void printTraceStats(ostream& out) const;
  void clearStats();
  void printConfig(ostream& out) const;
  void printResourceUsage(ostream& out) const;

  AddressProfiler* getAddressProfiler() { return m_address_profiler_ptr; }
  AddressProfiler* getInstructionProfiler() { return m_inst_profiler_ptr; }
  //XactProfiler*    getXactProfiler() { return m_xact_profiler_ptr;} //gem5:Arka for decomissioning og log_tm

  void addPrimaryStatSample(const CacheMsg& msg, NodeID id);
  void addSecondaryStatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id);
  void addSecondaryStatSample(CacheRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id);
  void addAddressTraceSample(const CacheMsg& msg, NodeID id);

  void profileRequest(const string& requestStr);
  void profileSharing(const Address& addr, AccessType type, NodeID requestor, const Set& sharers, const Set& owner);

  void profileMulticastRetry(const Address& addr, int count);

  void profileFilterAction(int action);

  void profileConflictingRequests(const Address& addr);
  void profileOutstandingRequest(int outstanding) { m_outstanding_requests.add(outstanding); }
  void profileOutstandingPersistentRequest(int outstanding) { m_outstanding_persistent_requests.add(outstanding); }
  void profileAverageLatencyEstimate(int latency) { m_average_latency_estimate.add(latency); }

  void countBAUnicast() { m_num_BA_unicasts++; }
  void countBABroadcast() { m_num_BA_broadcasts++; }

  void recordPrediction(bool wasGood, bool wasPredicted);

  void startTransaction(int cpu);
  void endTransaction(int cpu);
  void profilePFWait(Time waitTime);

  void controllerBusy(MachineID machID);
  void bankBusy();
  void missLatency(Time t, CacheRequestType type, GenericMachineType respondingMach);
  void swPrefetchLatency(Time t, CacheRequestType type, GenericMachineType respondingMach);
  void stopTableUsageSample(int num) { m_stopTableProfile.add(num); }
  void L1tbeUsageSample(int num) { m_L1tbeProfile.add(num); }
  void L2tbeUsageSample(int num) { m_L2tbeProfile.add(num); }
  void sequencerRequests(int num) { m_sequencer_requests.add(num); }
  void storeBuffer(int size, int blocks) { m_store_buffer_size.add(size); m_store_buffer_blocks.add(blocks);}

  void profileGetXMaskPrediction(const Set& pred_set);
  void profileGetSMaskPrediction(const Set& pred_set);
  void profileTrainingMask(const Set& pred_set);
  void profileTransition(const string& component, NodeID id, NodeID version, Address addr,
                         const string& state, const string& event,
                         const string& next_state, const string& note);
  void profileMsgDelay(int virtualNetwork, int delayCycles);

  void print(ostream& out) const;

  int64 getTotalInstructionsExecuted() const;
  int64 getTotalTransactionsExecuted() const;

  //---- begin Transactional Memory CODE
  #if 0  //gem5:Arka for decomissioning og log_tm
  void profileTransCycles(int proc, int cycles) { getXactProfiler()->profileTransCycles(proc, cycles);}
  void profileNonTransCycles(int proc, int cycles) { getXactProfiler()->profileNonTransCycles(proc, cycles);}
  void profileStallTransCycles(int proc, int cycles) { getXactProfiler()->profileStallTransCycles(proc, cycles); }
  void profileStallNonTransCycles(int proc, int cycles) { getXactProfiler()->profileStallNonTransCycles(proc, cycles); }
  void profileAbortingTransCycles(int proc, int cycles) { getXactProfiler()->profileAbortingTransCycles(proc, cycles); }
  void profileCommitingTransCycles(int proc, int cycles) { getXactProfiler()->profileCommitingTransCycles(proc, cycles); }
  void profileBarrierCycles(int proc, int cycles) { getXactProfiler()->profileBarrierCycles(proc, cycles);}
  void profileBackoffTransCycles(int proc, int cycles) { getXactProfiler()->profileBackoffTransCycles(proc, cycles); }
  void profileGoodTransCycles(int proc, int cycles) {getXactProfiler()->profileGoodTransCycles(proc, cycles); }

  #endif //gem5:Arka TODO clean up the rest of this functions as well
  void profileTransaction(int size, int logSize, int readS, int writeS, int overflow_readS, int overflow_writeS, int retries, int cycles, bool nacked, int loadMisses, int storeMisses, int instrCount, int xid);
  void profileBeginTransaction(NodeID id, int tid, int xid, int thread, Address pc, bool isOpen);
  void profileCommitTransaction(NodeID id, int tid, int xid, int thread, Address pc, bool isOpen);
  void profileLoadTransaction(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc);
  void profileLoad(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc);
  void profileStoreTransaction(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc);
  void profileStore(NodeID id, int tid, int xid, int thread, Address addr, Address logicalAddress, Address pc);
  void profileLoadOverflow(NodeID id, int tid, int xid, int thread, Address addr, bool l1_overflow);
  void profileStoreOverflow(NodeID id, int tid, int xid, int thread, Address addr, bool l1_overflow);
  void profileNack(NodeID id, int tid, int xid, int thread, int nacking_thread, NodeID nackedBy, Address addr, Address logicalAddress, Address pc, uint64 seq_ts, uint64 nack_ts, bool possibleCycle);
  void profileExposedConflict(NodeID id, int xid, int thread, Address addr, Address pc);
  void profileTransWB();
  void profileExtraWB();
  void profileInferredAbort();
  void profileAbortTransaction(NodeID id, int tid, int xid, int thread, int delay, int abortingThread, int abortingProc, Address addr, Address pc);
  void profileExceptionStart(bool xact, NodeID proc_no, int thread, int val, int trap_level, uinteger_t pc, uinteger_t npc);
  void profileExceptionDone(bool xact, NodeID proc_no, int thread, int val, int trap_level, uinteger_t pc, uinteger_t npc, uinteger_t tpc, uinteger_t tnpc);
  void profileTimerInterrupt(NodeID id,
                             uinteger_t tick, uinteger_t tick_cmpr,
                             uinteger_t stick, uinteger_t stick_cmpr,
                             int trap_level,
                             uinteger_t pc, uinteger_t npc,
                             uinteger_t pstate, int pil);

  void profileAbortDelayConstants(int handlerStartupDelay, int handlerPerBlockDelay);
  void profileXactChange(int procs, int cycles);
  void profileReadSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread);
  void profileWriteSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread);
  void profileRemoteReadSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread);
  void profileRemoteWriteSet(Address addr, bool bf_filter_result, bool perfect_filter_result, NodeID id, int thread);


  void profileReadFilterBitsSet(int xid, int bits, bool isCommit);
  void profileWriteFilterBitsSet(int xid, int bits, bool isCommit);

  void printTransactionState(bool can_skip);

  void watchpointsFalsePositiveTrigger();
  void watchpointsTrueTrigger();

  void profileTransactionLogOverflow(NodeID id, Address addr, Address pc);
  void profileTransactionCacheOverflow(NodeID id, Address addr, Address pc);
  void profileGetCPS(NodeID id, uint32 cps, Address pc);
  void profileTransactionTCC(NodeID id, Address pc);
  void profileTransactionUnsupInst(NodeID id, Address pc);
  void profileTransactionSaveInst(NodeID id, Address pc);
  void profileTransactionRestoreInst(NodeID id, Address pc);

  //---- end Transactional Memory CODE

  void rubyWatch(int proc);
  bool watchAddress(Address addr);

  // return Ruby's start time
  Time getRubyStartTime(){
    return m_ruby_start;
  }

  // added for MemoryControl:
  void profileMemReq(int bank);
  void profileMemBankBusy();
  void profileMemBusBusy();
  void profileMemTfawBusy();
  void profileMemReadWriteBusy();
  void profileMemDataBusBusy();
  void profileMemRefresh();
  void profileMemRead();
  void profileMemWrite();
  void profileMemWaitCycles(int cycles);
  void profileMemInputQ(int cycles);
  void profileMemBankQ(int cycles);
  void profileMemArbWait(int cycles);
  void profileMemRandBusy();
  void profileMemNotOld();

private:
  // Private Methods
  void addL2StatSample(GenericRequestType requestType, AccessModeType type, int msgSize, PrefetchBit pfBit, NodeID id);
  void addL1DStatSample(const CacheMsg& msg, NodeID id);
  void addL1IStatSample(const CacheMsg& msg, NodeID id);

  GenericRequestType CacheRequestType_to_GenericRequestType(const CacheRequestType& type);

  // Private copy constructor and assignment operator
  Profiler(const Profiler& obj);
  Profiler& operator=(const Profiler& obj);

  // Data Members (m_ prefix)
  CacheProfiler* m_L1D_cache_profiler_ptr;
  CacheProfiler* m_L1I_cache_profiler_ptr;
  CacheProfiler* m_L2_cache_profiler_ptr;
  AddressProfiler* m_address_profiler_ptr;
  AddressProfiler* m_inst_profiler_ptr;

//  XactProfiler*   m_xact_profiler_ptr; // gem5:Arka for decomissioning of log_tm

  Vector<int64> m_instructions_executed_at_start;
  Vector<int64> m_cycles_executed_at_start;

  ostream* m_periodic_output_file_ptr;
  integer_t m_stats_period;
  std::fstream m_xact_visualizer;
  std::ostream *m_xact_visualizer_ptr;

  Time m_ruby_start;
  time_t m_real_time_start_time;

  int m_num_BA_unicasts;
  int m_num_BA_broadcasts;

  Vector<integer_t> m_perProcTotalMisses;
  Vector<integer_t> m_perProcUserMisses;
  Vector<integer_t> m_perProcSupervisorMisses;
  Vector<integer_t> m_perProcStartTransaction;
  Vector<integer_t> m_perProcEndTransaction;
  Vector < Vector < integer_t > > m_busyControllerCount;
  integer_t m_busyBankCount;
  Histogram m_multicast_retry_histogram;

  Histogram m_L1tbeProfile;
  Histogram m_L2tbeProfile;
  Histogram m_stopTableProfile;

  Histogram m_filter_action_histogram;
  Histogram m_tbeProfile;

  Histogram m_sequencer_requests;
  Histogram m_store_buffer_size;
  Histogram m_store_buffer_blocks;
  Histogram m_read_sharing_histogram;
  Histogram m_write_sharing_histogram;
  Histogram m_all_sharing_histogram;
  int64 m_cache_to_cache;
  int64 m_memory_to_cache;

  Histogram m_prefetchWaitHistogram;

  Vector<Histogram> m_missLatencyHistograms;
  Vector<Histogram> m_machLatencyHistograms;
  Histogram m_L2MissLatencyHistogram;
  Histogram m_allMissLatencyHistogram;

  Histogram  m_allSWPrefetchLatencyHistogram;
  Histogram  m_SWPrefetchL2MissLatencyHistogram;
  Vector<Histogram> m_SWPrefetchLatencyHistograms;
  Vector<Histogram> m_SWPrefetchMachLatencyHistograms;

  Histogram m_delayedCyclesHistogram;
  Histogram m_delayedCyclesNonPFHistogram;
  Vector<Histogram> m_delayedCyclesVCHistograms;

  int m_predictions;
  int m_predictionOpportunities;
  int m_goodPredictions;

  Histogram m_gets_mask_prediction;
  Histogram m_getx_mask_prediction;
  Histogram m_explicit_training_mask;

  // For profiling possibly conflicting requests
  Map<Address, Time>* m_conflicting_map_ptr;
  Histogram m_conflicting_histogram;

  Histogram m_outstanding_requests;
  Histogram m_outstanding_persistent_requests;

  Histogram m_average_latency_estimate;

  //---- begin Transactional Memory CODE
  Map <int, int>* m_procsInXactMap_ptr;

  Histogram m_xactCycles;
  Histogram m_xactLogs;
  Histogram m_xactReads;
  Histogram m_xactWrites;
  Histogram m_xactOverflowReads;
  Histogram m_xactOverflowWrites;
  Histogram m_xactOverflowTotalReads;
  Histogram m_xactOverflowTotalWrites;
  Histogram m_xactSizes;
  Histogram m_xactRetries;
  Histogram m_abortDelays;
  Histogram m_xactLoadMisses;
  Histogram m_xactStoreMisses;
  Histogram m_xactInstrCount;
  int m_xactNacked;
  int m_transactionAborts;
  int m_transWBs;
  int m_extraWBs;
  int m_abortStarupDelay;
  int m_abortPerBlockDelay;
  int m_inferredAborts;
  Map <int, int>* m_nackXIDMap_ptr;
  // pairs of XIDs involved in NACKs
  Map<int, Map<int, int> * > * m_nackXIDPairMap_ptr;
  Map <Address, int>* m_nackPCMap_ptr;
  Map <int, int>* m_xactExceptionMap_ptr;
  Map <int, int>* m_abortIDMap_ptr;
  Map <int, int>* m_commitIDMap_ptr;
  Map <int, int>* m_xactRetryIDMap_ptr;
  Map <int, int>* m_xactCyclesIDMap_ptr;
  Map <int, int>* m_xactReadSetIDMap_ptr;
  Map <int, int>* m_xactWriteSetIDMap_ptr;
  Map <int, int>* m_xactLoadMissIDMap_ptr;
  Map <int, int>* m_xactStoreMissIDMap_ptr;
  Map <int, integer_t> *m_xactInstrCountIDMap_ptr;
  Map <Address, int>* m_abortPCMap_ptr;
  Map <Address, int>* m_abortAddressMap_ptr;
  Map <Address, int>* m_readSetMatch_ptr;
  Map <Address, int>* m_readSetNoMatch_ptr;
  Map <Address, int>* m_writeSetMatch_ptr;
  Map <Address, int>* m_writeSetNoMatch_ptr;
  Map <Address, int>* m_remoteReadSetMatch_ptr;
  Map <Address, int>* m_remoteReadSetNoMatch_ptr;
  Map <Address, int>* m_remoteWriteSetMatch_ptr;
  Map <Address, int>* m_remoteWriteSetNoMatch_ptr;
  long long int m_readSetEmptyChecks;
  long long int m_readSetMatch;
  long long int m_readSetNoMatch;
  long long int m_writeSetEmptyChecks;
  long long int m_writeSetMatch;
  long long int m_writeSetNoMatch;
  Map<int, Histogram> * m_xactReadFilterBitsSetOnCommit;
  Map<int, Histogram> * m_xactReadFilterBitsSetOnAbort;
  Map<int, Histogram> * m_xactWriteFilterBitsSetOnCommit;
  Map<int, Histogram> * m_xactWriteFilterBitsSetOnAbort;

  unsigned int m_watchpointsFalsePositiveTrigger;
  unsigned int m_watchpointsTrueTrigger;

  int m_transactionUnsupInsts;
  int m_transactionSaveRestAborts;

  int m_transactionLogOverflows;
  int m_transactionCacheOverflows;

  //---- end Transactional Memory CODE

  Map<Address, int>* m_watch_address_list_ptr;
  // counts all initiated cache request including PUTs
  int m_requests;
  Map <string, int>* m_requestProfileMap_ptr;

  Time m_xact_visualizer_last;

  // added for MemoryControl:
  long long int m_memReq;
  long long int m_memBankBusy;
  long long int m_memBusBusy;
  long long int m_memTfawBusy;
  long long int m_memReadWriteBusy;
  long long int m_memDataBusBusy;
  long long int m_memRefresh;
  long long int m_memRead;
  long long int m_memWrite;
  long long int m_memWaitCycles;
  long long int m_memInputQ;
  long long int m_memBankQ;
  long long int m_memArbWait;
  long long int m_memRandBusy;
  long long int m_memNotOld;
  Vector<long long int> m_memBankCount;

};

// Output operator declaration
ostream& operator<<(ostream& out, const Profiler& obj);

// ******************* Definitions *******************

// Output operator definition
extern inline
ostream& operator<<(ostream& out, const Profiler& obj)
{
  obj.print(out);
  out << flush;
  return out;
}

#endif //PROFILER_H