From 51d38a4b79d54a97d438ecdba5ce94d6e9172eef Mon Sep 17 00:00:00 2001
From: Andreas Sandberg <andreas.sandberg@arm.com>
Date: Fri, 22 Apr 2016 22:26:56 +0100
Subject: stats: Add beta support for HDF5 stat dumps

This changeset add support for stat dumps in the HDF5 file
format. HDF5 is a binary data format that represents data in a
file-system-like balanced tree. It has native support for
N-dimensional arrays and binary data (e.g., frame buffers).

It has the following benefits over traditional text stat files:

  * Efficient storage of time series (multiple stat dumps)

  * Fast lookup of stats

  * Plenty of existing tooling (e.g., Python libraries and graphical
    viewers)

  * File format can be used to store frame buffers together with
    normal stats.

Drawbacks:

  * Large startup cost (single stat dump larger than text equivalent)

  * Stat dumps are slower than text

Known limitations:

  * Distributions and histograms aren't supported.

HDF5 stat output can be enabled using the 'h5' URL scheme when
overriding the stat file name on gem5's command line. The following
parameters are supported:

  * chunking (unsigned): Number of time steps to pre-allocate
    (default: 10)

  * desc (bool): Output stat descriptions (default: True)

  * formulas (bool): Output derived stats (default: True)

Example gem5 command line:

./build/ARM/gem5.opt \
  --stats-file="h5://stats.h5?desc=False;formulas=False" \
  configs/example/fs.py

Example Python stat consumer that computes IPC:
  import h5py

  f = h5py.File('stats.h5', 'r')
  group = f['/system/cpu']
  for i, c in zip(group['committedInsts'], group['numCycles']):
      print i, c, i / c

Change-Id: I351c6cbff2fb7bef9012f47876ba227ed288975b
Signed-off-by: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/8121
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Ciro Santilli <ciro.santilli@arm.com>
---
 src/python/m5/stats/__init__.py | 43 ++++++++++++++++++++++++++++++++++++++++-
 src/python/pybind11/stats.cc    | 11 ++++++++++-
 2 files changed, 52 insertions(+), 2 deletions(-)

(limited to 'src/python')

diff --git a/src/python/m5/stats/__init__.py b/src/python/m5/stats/__init__.py
index 14801447d..bca311da5 100644
--- a/src/python/m5/stats/__init__.py
+++ b/src/python/m5/stats/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2017, 2019 Arm Limited
+# Copyright (c) 2017-2019 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -119,13 +119,54 @@ def _textFactory(fn, desc=True):
 
     return _m5.stats.initText(fn, desc)
 
+@_url_factory
+def _hdf5Factory(fn, chunking=10, desc=True, formulas=True):
+    """Output stats in HDF5 format.
+
+    The HDF5 file format is a structured binary file format. It has
+    the multiple benefits over traditional text stat files:
+
+      * Efficient storage of time series (multiple stat dumps)
+      * Fast lookup of stats
+      * Plenty of existing tooling (e.g., Python libraries and graphical
+        viewers)
+      * File format can be used to store frame buffers together with
+        normal stats.
+
+    There are some drawbacks compared to the default text format:
+      * Large startup cost (single stat dump larger than text equivalent)
+      * Stat dumps are slower than text
+
+
+    Known limitations:
+      * Distributions and histograms currently unsupported.
+      * No support for forking.
+
+
+    Parameters:
+      * chunking (unsigned): Number of time steps to pre-allocate (default: 10)
+      * desc (bool): Output stat descriptions (default: True)
+      * formulas (bool): Output derived stats (default: True)
+
+    Example:
+      h5://stats.h5?desc=False;chunking=100;formulas=False
+
+    """
+
+    if hasattr(_m5.stats, "initHDF5"):
+        return _m5.stats.initHDF5(fn, chunking, desc, formulas)
+    else:
+        fatal("HDF5 support not enabled at compile time")
+
 factories = {
     # Default to the text factory if we're given a naked path
     "" : _textFactory,
     "file" : _textFactory,
     "text" : _textFactory,
+    "h5" : _hdf5Factory,
 }
 
+
 def addStatVisitor(url):
     """Add a stat visitor specified using a URL string
 
diff --git a/src/python/pybind11/stats.cc b/src/python/pybind11/stats.cc
index 1302c7cc5..190c78d52 100644
--- a/src/python/pybind11/stats.cc
+++ b/src/python/pybind11/stats.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2019 ARM Limited
+ * Copyright (c) 2017-2019 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -41,14 +41,20 @@
  *          Andreas Sandberg
  */
 
+#include "config/use_hdf5.hh"
+
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
 
 #include "base/statistics.hh"
 #include "base/stats/text.hh"
+#if USE_HDF5
+#include "base/stats/hdf5.hh"
+#endif
 #include "sim/stat_control.hh"
 #include "sim/stat_register.hh"
 
+
 namespace py = pybind11;
 
 namespace Stats {
@@ -77,6 +83,9 @@ pybind_init_stats(py::module &m_native)
     m
         .def("initSimStats", &Stats::initSimStats)
         .def("initText", &Stats::initText, py::return_value_policy::reference)
+#if USE_HDF5
+        .def("initHDF5", &Stats::initHDF5)
+#endif
         .def("registerPythonStatsHandlers",
              &Stats::registerPythonStatsHandlers)
         .def("schedStatEvent", &Stats::schedStatEvent)
-- 
cgit v1.2.3