diff options
author | Andrew Bardsley <Andrew.Bardsley@arm.com> | 2014-07-23 16:09:04 -0500 |
---|---|---|
committer | Andrew Bardsley <Andrew.Bardsley@arm.com> | 2014-07-23 16:09:04 -0500 |
commit | 0e8a90f06bd3db00f700891a33458353478cce76 (patch) | |
tree | 50742efcc18254a36e80029b522139e8bd601dc2 /src | |
parent | 040fa23d01109c68d194d2517df777844e4e2f13 (diff) | |
download | gem5-0e8a90f06bd3db00f700891a33458353478cce76.tar.xz |
cpu: `Minor' in-order CPU model
This patch contains a new CPU model named `Minor'. Minor models a four
stage in-order execution pipeline (fetch lines, decompose into
macroops, decompose macroops into microops, execute).
The model was developed to support the ARM ISA but should be fixable
to support all the remaining gem5 ISAs. It currently also works for
Alpha, and regressions are included for ARM and Alpha (including Linux
boot).
Documentation for the model can be found in src/doc/inside-minor.doxygen and
its internal operations can be visualised using the Minorview tool
utils/minorview.py.
Minor was designed to be fairly simple and not to engage in a lot of
instruction annotation. As such, it currently has very few gathered
stats and may lack other gem5 features.
Minor is faster than the o3 model. Sample results:
Benchmark | Stat host_seconds (s)
---------------+--------v--------v--------
(on ARM, opt) | simple | o3 | minor
| timing | timing | timing
---------------+--------+--------+--------
10.linux-boot | 169 | 1883 | 1075
10.mcf | 117 | 967 | 491
20.parser | 668 | 6315 | 3146
30.eon | 542 | 3413 | 2414
40.perlbmk | 2339 | 20905 | 11532
50.vortex | 122 | 1094 | 588
60.bzip2 | 2045 | 18061 | 9662
70.twolf | 207 | 2736 | 1036
Diffstat (limited to 'src')
44 files changed, 13521 insertions, 1 deletions
diff --git a/src/base/trace.hh b/src/base/trace.hh index dbeffdc8b..eb0ab9dae 100644 --- a/src/base/trace.hh +++ b/src/base/trace.hh @@ -72,6 +72,20 @@ struct StringWrap inline const std::string &name() { return Trace::DefaultName; } +// Interface for things with names. (cf. SimObject but without other +// functionality). This is useful when using DPRINTF +class Named +{ + protected: + const std::string _name; + + public: + Named(const std::string &name_) : _name(name_) { } + + public: + const std::string &name() const { return _name; } +}; + // // DPRINTF is a debugging trace facility that allows one to // selectively enable tracing statements. To use DPRINTF, there must diff --git a/src/cpu/SConscript b/src/cpu/SConscript index ca9c6a791..1ea92114a 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -106,6 +106,7 @@ SimObject('ExeTracer.py') SimObject('IntelTrace.py') SimObject('IntrControl.py') SimObject('NativeTrace.py') +SimObject('TimingExpr.py') Source('activity.cc') Source('base.cc') @@ -123,6 +124,7 @@ Source('static_inst.cc') Source('simple_thread.cc') Source('thread_context.cc') Source('thread_state.cc') +Source('timing_expr.cc') if env['TARGET_ISA'] == 'sparc': SimObject('LegionTrace.py') diff --git a/src/cpu/TimingExpr.py b/src/cpu/TimingExpr.py new file mode 100644 index 000000000..6a9d6f95c --- /dev/null +++ b/src/cpu/TimingExpr.py @@ -0,0 +1,176 @@ +# Copyright (c) 2013-2014 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Andrew Bardsley + +from m5.params import * +from m5.SimObject import SimObject + +# These classes define an expression language over uint64_t with only +# a few operators. This can be used to form expressions for the extra +# delay required in variable execution time instructions. +# +# Expressions, in evaluation, will have access to the ThreadContext and +# a StaticInst + +class TimingExpr(SimObject): + type = 'TimingExpr' + cxx_header = 'cpu/timing_expr.hh' + abstract = True; + +class TimingExprLiteral(TimingExpr): + """Literal 64 bit unsigned value""" + type = 'TimingExprLiteral' + cxx_header = 'cpu/timing_expr.hh' + + value = Param.UInt64("literal value") + + def set_params(self, value): + self.value = value + return self + +class TimingExpr0(TimingExprLiteral): + """Convenient 0""" + value = 0 + +class TimingExprSrcReg(TimingExpr): + """Find the source register number from the current inst""" + type = 'TimingExprSrcReg' + cxx_header = 'cpu/timing_expr.hh' + + # index = Param.Unsigned("index into inst src regs") + index = Param.Unsigned("index into inst src regs") + + def set_params(self, index): + self.index = index + return self + +class TimingExprReadIntReg(TimingExpr): + """Read an architectural register""" + type = 'TimingExprReadIntReg' + cxx_header = 'cpu/timing_expr.hh' + + reg = Param.TimingExpr("register raw index to read") + + def set_params(self, reg): + self.reg = reg + return self + +class TimingExprLet(TimingExpr): + """Block of declarations""" + type = 'TimingExprLet' + cxx_header = 'cpu/timing_expr.hh' + + defns = VectorParam.TimingExpr("expressions for bindings") + expr = Param.TimingExpr("body expression") + + def set_params(self, defns, expr): + self.defns = defns + self.expr = expr + return self + +class TimingExprRef(TimingExpr): + """Value of a bound sub-expression""" + type = 'TimingExprRef' + cxx_header = 'cpu/timing_expr.hh' + + index = Param.Unsigned("expression index") + + def set_params(self, index): + self.index = index + return self + +class TimingExprOp(Enum): + vals = [ + 'timingExprAdd', 'timingExprSub', + 'timingExprUMul', 'timingExprUDiv', + 'timingExprSMul', 'timingExprSDiv', + 'timingExprUCeilDiv', # Unsigned divide rounding up + 'timingExprEqual', 'timingExprNotEqual', + 'timingExprULessThan', + 'timingExprUGreaterThan', + 'timingExprSLessThan', + 'timingExprSGreaterThan', + 'timingExprInvert', + 'timingExprNot', + 'timingExprAnd', + 'timingExprOr', + 'timingExprSizeInBits', + 'timingExprSignExtend32To64', + 'timingExprAbs' + ] + +class TimingExprUn(TimingExpr): + """Unary operator""" + type = 'TimingExprUn' + cxx_header = 'cpu/timing_expr.hh' + + op = Param.TimingExprOp("operator") + arg = Param.TimingExpr("expression") + + def set_params(self, op, arg): + self.op = op + self.arg = arg + return self + +class TimingExprBin(TimingExpr): + """Binary operator""" + type = 'TimingExprBin' + cxx_header = 'cpu/timing_expr.hh' + + op = Param.TimingExprOp("operator") + left = Param.TimingExpr("LHS expression") + right = Param.TimingExpr("RHS expression") + + def set_params(self, op, left, right): + self.op = op + self.left = left + self.right = right + return self + +class TimingExprIf(TimingExpr): + """If-then-else operator""" + type = 'TimingExprIf' + cxx_header = 'cpu/timing_expr.hh' + + cond = Param.TimingExpr("condition expression") + trueExpr = Param.TimingExpr("true expression") + falseExpr = Param.TimingExpr("false expression") + + def set_params(self, cond, trueExpr, falseExpr): + self.cond = cond + self.trueExpr = trueExpr + self.falseExpr = falseExpr + return self diff --git a/src/cpu/minor/MinorCPU.py b/src/cpu/minor/MinorCPU.py new file mode 100644 index 000000000..07953cf5a --- /dev/null +++ b/src/cpu/minor/MinorCPU.py @@ -0,0 +1,274 @@ +# Copyright (c) 2012-2014 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2007 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Gabe Black +# Nathan Binkert +# Andrew Bardsley + +from m5.defines import buildEnv +from m5.params import * +from m5.proxy import * +from m5.SimObject import SimObject +from BaseCPU import BaseCPU +from DummyChecker import DummyChecker +from BranchPredictor import BranchPredictor +from TimingExpr import TimingExpr + +from FuncUnit import OpClass + +class MinorOpClass(SimObject): + """Boxing of OpClass to get around build problems and provide a hook for + future additions to OpClass checks""" + + type = 'MinorOpClass' + cxx_header = "cpu/minor/func_unit.hh" + + opClass = Param.OpClass("op class to match") + +class MinorOpClassSet(SimObject): + """A set of matchable op classes""" + + type = 'MinorOpClassSet' + cxx_header = "cpu/minor/func_unit.hh" + + opClasses = VectorParam.MinorOpClass([], "op classes to be matched." + " An empty list means any class") + +class MinorFUTiming(SimObject): + type = 'MinorFUTiming' + cxx_header = "cpu/minor/func_unit.hh" + + mask = Param.UInt64(0, "mask for testing ExtMachInst") + match = Param.UInt64(0, "match value for testing ExtMachInst:" + " (ext_mach_inst & mask) == match") + suppress = Param.Bool(False, "if true, this inst. is not executed by" + " this FU") + extraCommitLat = Param.Cycles(0, "extra cycles to stall commit for" + " this inst.") + extraCommitLatExpr = Param.TimingExpr(NULL, "extra cycles as a" + " run-time evaluated expression") + extraAssumedLat = Param.Cycles(0, "extra cycles to add to scoreboard" + " retire time for this insts dest registers once it leaves the" + " functional unit. For mem refs, if this is 0, the result's time" + " is marked as unpredictable and no forwarding can take place.") + srcRegsRelativeLats = VectorParam.Cycles("the maximum number of cycles" + " after inst. issue that each src reg can be available for this" + " inst. to issue") + opClasses = Param.MinorOpClassSet(MinorOpClassSet(), + "op classes to be considered for this decode. An empty set means any" + " class") + description = Param.String('', "description string of the decoding/inst." + " class") + +def minorMakeOpClassSet(op_classes): + """Make a MinorOpClassSet from a list of OpClass enum value strings""" + def boxOpClass(op_class): + return MinorOpClass(opClass=op_class) + + return MinorOpClassSet(opClasses=map(boxOpClass, op_classes)) + +class MinorFU(SimObject): + type = 'MinorFU' + cxx_header = "cpu/minor/func_unit.hh" + + opClasses = Param.MinorOpClassSet(MinorOpClassSet(), "type of operations" + " allowed on this functional unit") + opLat = Param.Cycles(1, "latency in cycles") + issueLat = Param.Cycles(1, "cycles until another instruction can be" + " issued") + timings = VectorParam.MinorFUTiming([], "extra decoding rules") + + cantForwardFromFUIndices = VectorParam.Unsigned([], + "list of FU indices from which this FU can't receive and early" + " (forwarded) result") + +class MinorFUPool(SimObject): + type = 'MinorFUPool' + cxx_header = "cpu/minor/func_unit.hh" + + funcUnits = VectorParam.MinorFU("functional units") + +class MinorDefaultIntFU(MinorFU): + opClasses = minorMakeOpClassSet(['IntAlu']) + timings = [MinorFUTiming(description="Int", + srcRegsRelativeLats=[2])] + opLat = 3 + +class MinorDefaultIntMulFU(MinorFU): + opClasses = minorMakeOpClassSet(['IntMult']) + timings = [MinorFUTiming(description='Mul', + srcRegsRelativeLats=[0])] + opLat = 3 + +class MinorDefaultIntDivFU(MinorFU): + opClasses = minorMakeOpClassSet(['IntDiv']) + issueLat = 9 + opLat = 9 + +class MinorDefaultFloatSimdFU(MinorFU): + opClasses = minorMakeOpClassSet([ + 'FloatAdd', 'FloatCmp', 'FloatCvt', 'FloatMult', 'FloatDiv', + 'FloatSqrt', + 'SimdAdd', 'SimdAddAcc', 'SimdAlu', 'SimdCmp', 'SimdCvt', + 'SimdMisc', 'SimdMult', 'SimdMultAcc', 'SimdShift', 'SimdShiftAcc', + 'SimdSqrt', 'SimdFloatAdd', 'SimdFloatAlu', 'SimdFloatCmp', + 'SimdFloatCvt', 'SimdFloatDiv', 'SimdFloatMisc', 'SimdFloatMult', + 'SimdFloatMultAcc', 'SimdFloatSqrt']) + timings = [MinorFUTiming(description='FloatSimd', + srcRegsRelativeLats=[2])] + opLat = 6 + +class MinorDefaultMemFU(MinorFU): + opClasses = minorMakeOpClassSet(['MemRead', 'MemWrite']) + timings = [MinorFUTiming(description='Mem', + srcRegsRelativeLats=[1], extraAssumedLat=2)] + opLat = 1 + +class MinorDefaultMiscFU(MinorFU): + opClasses = minorMakeOpClassSet(['IprAccess', 'InstPrefetch']) + opLat = 1 + +class MinorDefaultFUPool(MinorFUPool): + funcUnits = [MinorDefaultIntFU(), MinorDefaultIntFU(), + MinorDefaultIntMulFU(), MinorDefaultIntDivFU(), + MinorDefaultFloatSimdFU(), MinorDefaultMemFU(), + MinorDefaultMiscFU()] + +class MinorCPU(BaseCPU): + type = 'MinorCPU' + cxx_header = "cpu/minor/cpu.hh" + + @classmethod + def memory_mode(cls): + return 'timing' + + @classmethod + def require_caches(cls): + return True + + @classmethod + def support_take_over(cls): + return True + + fetch1FetchLimit = Param.Unsigned(1, + "Number of line fetches allowable in flight at once") + fetch1LineSnapWidth = Param.Unsigned(0, + "Fetch1 'line' fetch snap size in bytes" + " (0 means use system cache line size)") + fetch1LineWidth = Param.Unsigned(0, + "Fetch1 maximum fetch size in bytes (0 means use system cache" + " line size)") + fetch1ToFetch2ForwardDelay = Param.Cycles(1, + "Forward cycle delay from Fetch1 to Fetch2 (1 means next cycle)") + fetch1ToFetch2BackwardDelay = Param.Cycles(1, + "Backward cycle delay from Fetch2 to Fetch1 for branch prediction" + " signalling (0 means in the same cycle, 1 mean the next cycle)") + + fetch2InputBufferSize = Param.Unsigned(2, + "Size of input buffer to Fetch2 in cycles-worth of insts.") + fetch2ToDecodeForwardDelay = Param.Cycles(1, + "Forward cycle delay from Fetch2 to Decode (1 means next cycle)") + fetch2CycleInput = Param.Bool(True, + "Allow Fetch2 to cross input lines to generate full output each" + " cycle") + + decodeInputBufferSize = Param.Unsigned(3, + "Size of input buffer to Decode in cycles-worth of insts.") + decodeToExecuteForwardDelay = Param.Cycles(1, + "Forward cycle delay from Decode to Execute (1 means next cycle)") + decodeInputWidth = Param.Unsigned(2, + "Width (in instructions) of input to Decode (and implicitly" + " Decode's own width)") + decodeCycleInput = Param.Bool(True, + "Allow Decode to pack instructions from more than one input cycle" + " to fill its output each cycle") + + executeInputWidth = Param.Unsigned(2, + "Width (in instructions) of input to Execute") + executeCycleInput = Param.Bool(True, + "Allow Execute to use instructions from more than one input cycle" + " each cycle") + executeIssueLimit = Param.Unsigned(2, + "Number of issuable instructions in Execute each cycle") + executeMemoryIssueLimit = Param.Unsigned(1, + "Number of issuable memory instructions in Execute each cycle") + executeCommitLimit = Param.Unsigned(2, + "Number of committable instructions in Execute each cycle") + executeMemoryCommitLimit = Param.Unsigned(1, + "Number of committable memory references in Execute each cycle") + executeInputBufferSize = Param.Unsigned(7, + "Size of input buffer to Execute in cycles-worth of insts.") + executeMemoryWidth = Param.Unsigned(0, + "Width (and snap) in bytes of the data memory interface. (0 mean use" + " the system cacheLineSize)") + executeMaxAccessesInMemory = Param.Unsigned(2, + "Maximum number of concurrent accesses allowed to the memory system" + " from the dcache port") + executeLSQMaxStoreBufferStoresPerCycle = Param.Unsigned(2, + "Maximum number of stores that the store buffer can issue per cycle") + executeLSQRequestsQueueSize = Param.Unsigned(1, + "Size of LSQ requests queue (address translation queue)") + executeLSQTransfersQueueSize = Param.Unsigned(2, + "Size of LSQ transfers queue (memory transaction queue)") + executeLSQStoreBufferSize = Param.Unsigned(5, + "Size of LSQ store buffer") + executeBranchDelay = Param.Cycles(1, + "Delay from Execute deciding to branch and Fetch1 reacting" + " (1 means next cycle)") + + executeFuncUnits = Param.MinorFUPool(MinorDefaultFUPool(), + "FUlines for this processor") + + executeSetTraceTimeOnCommit = Param.Bool(True, + "Set inst. trace times to be commit times") + executeSetTraceTimeOnIssue = Param.Bool(False, + "Set inst. trace times to be issue times") + + executeAllowEarlyMemoryIssue = Param.Bool(True, + "Allow mem refs to be issued to the LSQ before reaching the head of" + " the in flight insts queue") + + enableIdling = Param.Bool(True, + "Enable cycle skipping when the processor is idle\n"); + + branchPred = Param.BranchPredictor(BranchPredictor( + numThreads = Parent.numThreads), "Branch Predictor") + + def addCheckerCpu(self): + print "Checker not yet supported by MinorCPU" + exit(1) diff --git a/src/cpu/minor/SConscript b/src/cpu/minor/SConscript new file mode 100644 index 000000000..2234f9a8d --- /dev/null +++ b/src/cpu/minor/SConscript @@ -0,0 +1,73 @@ +# -*- mode:python -*- + +# Copyright (c) 2013-2014 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert +# Andrew Bardsley + +Import('*') + +if 'MinorCPU' in env['CPU_MODELS']: + SimObject('MinorCPU.py') + + Source('activity.cc') + Source('cpu.cc') + Source('decode.cc') + Source('dyn_inst.cc') + Source('execute.cc') + Source('fetch1.cc') + Source('fetch2.cc') + Source('func_unit.cc') + Source('lsq.cc') + Source('pipe_data.cc') + Source('pipeline.cc') + Source('scoreboard.cc') + Source('stats.cc') + + DebugFlag('MinorCPU', 'Minor CPU-level events') + DebugFlag('MinorExecute', 'Minor Execute stage') + DebugFlag('MinorInterrupt', 'Minor interrupt handling') + DebugFlag('MinorMem', 'Minor memory accesses') + DebugFlag('MinorScoreboard', 'Minor Execute register scoreboard') + DebugFlag('MinorTrace', 'MinorTrace cycle-by-cycle state trace') + DebugFlag('MinorTiming', 'Extra timing for instructions') + + CompoundFlag('Minor', [ + 'MinorCPU', 'MinorExecute', 'MinorInterrupt', 'MinorMem', + 'MinorScoreboard']) diff --git a/src/cpu/minor/SConsopts b/src/cpu/minor/SConsopts new file mode 100644 index 000000000..68c420779 --- /dev/null +++ b/src/cpu/minor/SConsopts @@ -0,0 +1,45 @@ +# -*- mode:python -*- + +# Copyright (c) 2012-2014 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Andrew Bardsley + +Import('*') + +CpuModel('MinorCPU', 'minor_cpu_exec.cc', + '#include "cpu/minor/exec_context.hh"', + { 'CPU_exec_context': 'Minor::ExecContext' }, + default=True) diff --git a/src/cpu/minor/activity.cc b/src/cpu/minor/activity.cc new file mode 100644 index 000000000..8e322d3e7 --- /dev/null +++ b/src/cpu/minor/activity.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <sstream> + +#include "cpu/minor/activity.hh" +#include "cpu/minor/trace.hh" + +namespace Minor +{ + +void +MinorActivityRecorder::minorTrace() const +{ + std::ostringstream stages; + unsigned int num_stages = getNumStages(); + + unsigned int stage_index = 0; + while (stage_index < num_stages) { + stages << (getStageActive(stage_index) ? '1' : 'E'); + + stage_index++; + if (stage_index != num_stages) + stages << ','; + } + + MINORTRACE("activity=%d stages=%s\n", getActivityCount(), stages.str()); +} + +} diff --git a/src/cpu/minor/activity.hh b/src/cpu/minor/activity.hh new file mode 100644 index 000000000..e38c476c0 --- /dev/null +++ b/src/cpu/minor/activity.hh @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * ActivityRecoder from cpu/activity.h wrapped to provide evaluate and + * minorTrace. + */ + +#ifndef __CPU_MINOR_ACTIVITY_HH__ +#define __CPU_MINOR_ACTIVITY_HH__ + +#include "cpu/activity.hh" + +namespace Minor +{ + +/** ActivityRecorder with a Ticked interface */ +class MinorActivityRecorder : public ActivityRecorder +{ + public: + /** Ticked interface */ + void evaluate() { advance(); } + void minorTrace() const; + + public: + MinorActivityRecorder(const std::string &name, int num_stages, + int longest_latency) : + ActivityRecorder(name, num_stages, longest_latency, 0) + { } +}; + +} + +#endif /* __CPU_MINOR_ACTIVITY_HH__ */ diff --git a/src/cpu/minor/buffers.hh b/src/cpu/minor/buffers.hh new file mode 100644 index 000000000..f4ae91a70 --- /dev/null +++ b/src/cpu/minor/buffers.hh @@ -0,0 +1,653 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Classes for buffer, queue and FIFO behaviour. + */ + +#ifndef __CPU_MINOR_BUFFERS_HH__ +#define __CPU_MINOR_BUFFERS_HH__ + +#include <iostream> +#include <queue> +#include <sstream> + +#include "cpu/minor/trace.hh" +#include "cpu/activity.hh" +#include "cpu/timebuf.hh" + +namespace Minor +{ + +/** Interface class for data with reporting/tracing facilities. This + * interface doesn't actually have to be used as other classes which need + * this interface uses templating rather than inheritance but it's provided + * here to document the interface needed by those classes. */ +class ReportIF +{ + public: + /** Print the data in a format suitable to be the value in "name=value" + * trace lines */ + virtual void reportData(std::ostream &os) const = 0; + + virtual ~ReportIF() { } +}; + +/** Interface class for data with 'bubble' values. This interface doesn't + * actually have to be used as other classes which need this interface uses + * templating rather than inheritance but it's provided here to document + * the interface needed by those classes. */ +class BubbleIF +{ + public: + virtual bool isBubble() const = 0; +}; + +/** ...ReportTraits are trait classes with the same functionality as + * ReportIF, but with elements explicitly passed into the report... + * functions. */ + +/** Allow a template using ReportTraits to call report... functions of + * ReportIF-bearing elements themselves */ +template <typename ElemType> /* ElemType should implement ReportIF */ +class ReportTraitsAdaptor +{ + public: + static void + reportData(std::ostream &os, const ElemType &elem) + { elem.reportData(os); } +}; + +/** A similar adaptor but for elements held by pointer + * ElemType should implement ReportIF */ +template <typename PtrType> +class ReportTraitsPtrAdaptor +{ + public: + static void + reportData(std::ostream &os, const PtrType &elem) + { elem->reportData(os); } +}; + +/** ... BubbleTraits are trait classes to add BubbleIF interface + * functionality to templates which process elements which don't necessarily + * implement BubbleIF themselves */ + +/** Default behaviour, no bubbles */ +template <typename ElemType> +class NoBubbleTraits +{ + public: + static bool isBubble(const ElemType &) { return false; } + static ElemType bubble() { assert(false); } +}; + +/** Pass on call to the element */ +template <typename ElemType> +class BubbleTraitsAdaptor +{ + public: + static bool isBubble(const ElemType &elem) + { return elem.isBubble(); } + + static ElemType bubble() { return ElemType::bubble(); } +}; + +/** Pass on call to the element where the element is a pointer */ +template <typename PtrType, typename ElemType> +class BubbleTraitsPtrAdaptor +{ + public: + static bool isBubble(const PtrType &elem) + { return elem->isBubble(); } + + static PtrType bubble() { return ElemType::bubble(); } +}; + +/** TimeBuffer with MinorTrace and Named interfaces */ +template <typename ElemType, + typename ReportTraits = ReportTraitsAdaptor<ElemType>, + typename BubbleTraits = BubbleTraitsAdaptor<ElemType> > +class MinorBuffer : public Named, public TimeBuffer<ElemType> +{ + protected: + /** The range of elements that should appear in trace lines */ + int reportLeft, reportRight; + + /** Name to use for the data in a MinorTrace line */ + std::string dataName; + + public: + MinorBuffer(const std::string &name, + const std::string &data_name, + int num_past, int num_future, + int report_left = -1, int report_right = -1) : + Named(name), TimeBuffer<ElemType>(num_past, num_future), + reportLeft(report_left), reportRight(report_right), + dataName(data_name) + { } + + public: + /* Is this buffer full of only bubbles */ + bool + empty() const + { + bool ret = true; + + for (int i = -this->past; i <= this->future; i++) { + if (!BubbleTraits::isBubble((*this)[i])) + ret = false; + } + + return ret; + } + + /** Report buffer states from 'slot' 'from' to 'to'. For example 0,-1 + * will produce two slices with current (just assigned) and last (one + * advance() old) slices with the current (0) one on the left. + * Reverse the numbers to change the order of slices */ + void + minorTrace() const + { + std::ostringstream data; + + int step = (reportLeft > reportRight ? -1 : 1); + int end = reportRight + step; + int i = reportLeft; + + while (i != end) { + const ElemType &datum = (*this)[i]; + + ReportTraits::reportData(data, datum); + i += step; + if (i != end) + data << ','; + } + + MINORTRACE("%s=%s\n", dataName, data.str()); + } +}; + +/** Wraps a MinorBuffer with Input/Output interfaces to ensure that units + * within the model can only see the right end of buffers between them. */ +template <typename Data> +class Latch +{ + public: + typedef MinorBuffer<Data> Buffer; + + protected: + /** Delays, in cycles, writing data into the latch and seeing it on the + * latched wires */ + Cycles delay; + + Buffer buffer; + + public: + /** forward/backwardDelay specify the delay from input to output in each + * direction. These arguments *must* be >= 1 */ + Latch(const std::string &name, + const std::string &data_name, + Cycles delay_ = Cycles(1), + bool report_backwards = false) : + delay(delay_), + buffer(name, data_name, delay_, 0, (report_backwards ? -delay_ : 0), + (report_backwards ? 0 : -delay_)) + { } + + public: + /** Encapsulate wires on either input or output of the latch. + * forward/backward correspond to data direction relative to the + * pipeline. Latched and Immediate specify delay for backward data. + * Immediate data is available to earlier stages *during* the cycle it + * is written */ + class Input + { + public: + typename Buffer::wire inputWire; + + public: + Input(typename Buffer::wire input_wire) : + inputWire(input_wire) + { } + }; + + class Output + { + public: + typename Buffer::wire outputWire; + + public: + Output(typename Buffer::wire output_wire) : + outputWire(output_wire) + { } + }; + + bool empty() const { return buffer.empty(); } + + /** An interface to just the input of the buffer */ + Input input() { return Input(buffer.getWire(0)); } + + /** An interface to just the output of the buffer */ + Output output() { return Output(buffer.getWire(-delay)); } + + void minorTrace() const { buffer.minorTrace(); } + + void evaluate() { buffer.advance(); } +}; + +/** A pipeline simulating class that will stall (not advance when advance() + * is called) if a non-bubble value lies at the far end of the pipeline. + * The user can clear the stall before calling advance to unstall the + * pipeline. */ +template <typename ElemType, + typename ReportTraits, + typename BubbleTraits = BubbleTraitsAdaptor<ElemType> > +class SelfStallingPipeline : public MinorBuffer<ElemType, ReportTraits> +{ + protected: + /** Wire at the input end of the pipeline (for convenience) */ + typename TimeBuffer<ElemType>::wire pushWire; + /** Wire at the output end of the pipeline (for convenience) */ + typename TimeBuffer<ElemType>::wire popWire; + + public: + /** If true, advance will not advance the pipeline */ + bool stalled; + + /** The number of slots with non-bubbles in them */ + unsigned int occupancy; + + public: + SelfStallingPipeline(const std::string &name, + const std::string &data_name, + unsigned depth) : + MinorBuffer<ElemType, ReportTraits> + (name, data_name, depth, 0, -1, -depth), + pushWire(this->getWire(0)), + popWire(this->getWire(-depth)), + stalled(false), + occupancy(0) + { + assert(depth > 0); + + /* Write explicit bubbles to get around the case where the default + * constructor for the element type isn't good enough */ + for (unsigned i = 0; i <= depth; i++) + (*this)[-i] = BubbleTraits::bubble(); + } + + public: + /** Write an element to the back of the pipeline. This doesn't cause + * the pipeline to advance until advance is called. Pushing twice + * without advance-ing will just cause an overwrite of the last push's + * data. */ + void push(ElemType &elem) + { + assert(!alreadyPushed()); + *pushWire = elem; + if (!BubbleTraits::isBubble(elem)) + occupancy++; + } + + /** Peek at the end element of the pipe */ + ElemType &front() { return *popWire; } + + const ElemType &front() const { return *popWire; } + + /** Have we already pushed onto this pipe without advancing */ + bool alreadyPushed() { return !BubbleTraits::isBubble(*pushWire); } + + /** There's data (not a bubble) at the end of the pipe */ + bool isPopable() { return !BubbleTraits::isBubble(front()); } + + /** Try to advance the pipeline. If we're stalled, don't advance. If + * we're not stalled, advance then check to see if we become stalled + * (a non-bubble at the end of the pipe) */ + void + advance() + { + bool data_at_end = isPopable(); + + if (!stalled) { + TimeBuffer<ElemType>::advance(); + /* If there was data at the end of the pipe that has now been + * advanced out of the pipe, we've lost data */ + if (data_at_end) + occupancy--; + /* Is there data at the end of the pipe now? */ + stalled = isPopable(); + /* Insert a bubble into the empty input slot to make sure that + * element is correct in the case where the default constructor + * for ElemType doesn't produce a bubble */ + ElemType bubble = BubbleTraits::bubble(); + *pushWire = bubble; + } + } +}; + +/** Base class for space reservation requestable objects */ +class Reservable +{ + public: + /** Can a slot be reserved? */ + virtual bool canReserve() const = 0; + + /** Reserve a slot in whatever structure this is attached to */ + virtual void reserve() = 0; + + /** Free a reserved slot */ + virtual void freeReservation() = 0; +}; + +/** Wrapper for a queue type to act as a pipeline stage input queue. + * Handles capacity management, bubble value suppression and provides + * reporting. + * + * In an ideal world, ElemType would be derived from ReportIF and BubbleIF, + * but here we use traits and allow the Adaptors ReportTraitsAdaptor and + * BubbleTraitsAdaptor to work on data which *does* directly implement + * those interfaces. */ +template <typename ElemType, + typename ReportTraits = ReportTraitsAdaptor<ElemType>, + typename BubbleTraits = BubbleTraitsAdaptor<ElemType> > +class Queue : public Named, public Reservable +{ + private: + std::deque<ElemType> queue; + + /** Number of slots currently reserved for future (reservation + * respecting) pushes */ + unsigned int numReservedSlots; + + /** Need this here as queues usually don't have a limited capacity */ + unsigned int capacity; + + /** Name to use for the data in MinorTrace */ + std::string dataName; + + public: + Queue(const std::string &name, const std::string &data_name, + unsigned int capacity_) : + Named(name), + numReservedSlots(0), + capacity(capacity_), + dataName(data_name) + { } + + virtual ~Queue() { } + + public: + /** Push an element into the buffer if it isn't a bubble. Bubbles are + * just discarded. It is assummed that any push into a queue with + * reserved space intends to take that space */ + void + push(ElemType &data) + { + if (!BubbleTraits::isBubble(data)) { + freeReservation(); + queue.push_back(data); + + if (queue.size() > capacity) { + warn("%s: No space to push data into queue of capacity" + " %u, pushing anyway\n", name(), capacity); + } + + } + } + + /** Clear all allocated space. Be careful how this is used */ + void clearReservedSpace() { numReservedSlots = 0; } + + /** Clear a single reserved slot */ + void freeReservation() + { + if (numReservedSlots != 0) + numReservedSlots--; + } + + /** Reserve space in the queue for future pushes. Enquiries about space + * in the queue using unreservedRemainingSpace will only tell about + * space which is not full and not reserved. */ + void + reserve() + { + /* Check reservable space */ + if (unreservedRemainingSpace() == 0) + warn("%s: No space is reservable in queue", name()); + + numReservedSlots++; + } + + bool canReserve() const { return unreservedRemainingSpace() != 0; } + + /** Number of slots available in an empty buffer */ + unsigned int totalSpace() const { return capacity; } + + /** Number of slots already occupied in this buffer */ + unsigned int occupiedSpace() const { return queue.size(); } + + /** Number of slots which are reserved. */ + unsigned int reservedSpace() const { return numReservedSlots; } + + /** Number of slots yet to fill in this buffer. This doesn't include + * reservation. */ + unsigned int + remainingSpace() const + { + int ret = capacity - queue.size(); + + return (ret < 0 ? 0 : ret); + } + + /** Like remainingSpace but does not count reserved spaces */ + unsigned int + unreservedRemainingSpace() const + { + int ret = capacity - (queue.size() + numReservedSlots); + + return (ret < 0 ? 0 : ret); + } + + /** Head value. Like std::queue::front */ + ElemType &front() { return queue.front(); } + + const ElemType &front() const { return queue.front(); } + + /** Pop the head item. Like std::queue::pop */ + void pop() { queue.pop_front(); } + + /** Is the queue empty? */ + bool empty() const { return queue.empty(); } + + void + minorTrace() const + { + std::ostringstream data; + /* If we become over-full, totalSpace() can actually be smaller than + * occupiedSpace(). Handle this */ + unsigned int num_total = (occupiedSpace() > totalSpace() ? + occupiedSpace() : totalSpace()); + + unsigned int num_reserved = reservedSpace(); + unsigned int num_occupied = occupiedSpace(); + + int num_printed = 1; + /* Bodge to rotate queue to report elements */ + while (num_printed <= num_occupied) { + ReportTraits::reportData(data, queue[num_printed - 1]); + num_printed++; + + if (num_printed <= num_total) + data << ','; + } + + int num_printed_reserved = 1; + /* Show reserved slots */ + while (num_printed_reserved <= num_reserved && + num_printed <= num_total) + { + data << 'R'; + num_printed_reserved++; + num_printed++; + + if (num_printed <= num_total) + data << ','; + } + + /* And finally pad with empty slots (if there are any) */ + while (num_printed <= num_total) { + num_printed++; + + if (num_printed <= num_total) + data << ','; + } + + MINORTRACE("%s=%s\n", dataName, data.str()); + } +}; + +/** Like a Queue but with a restricted interface and a setTail function + * which, when the queue is empty, just takes a reference to the pushed + * item as the single element. Calling pushTail will push that element + * onto the queue. + * + * The purpose of this class is to allow the faster operation of queues of + * items which usually don't get deeper than one item and for which the copy + * associated with a push is expensive enough to want to avoid + * + * The intended use case is the input buffer for pipeline stages, hence the + * class name */ +template <typename ElemType, + typename ReportTraits = ReportTraitsAdaptor<ElemType>, + typename BubbleTraits = BubbleTraitsAdaptor<ElemType> > +class InputBuffer : public Reservable +{ + protected: + /** Underlying queue */ + mutable Queue<ElemType, ReportTraits, BubbleTraits> queue; + + /** Pointer to the single element (if not NULL) */ + mutable ElemType *elementPtr; + + public: + InputBuffer(const std::string &name, const std::string &data_name, + unsigned int capacity_) : + queue(name, data_name, capacity_), + elementPtr(NULL) + { } + + public: + /** Set the tail of the queue, this is like push but needs + * to be followed by pushTail for the new tail to make its + * way into the queue proper */ + void + setTail(ElemType &new_element) + { + assert(!elementPtr); + if (!BubbleTraits::isBubble(new_element)) { + if (queue.empty()) + elementPtr = &new_element; + else + queue.push(new_element); + } + } + + /** No single element or queue entries */ + bool empty() const { return !elementPtr && queue.empty(); } + + /** Return the element, or the front of the queue */ + const ElemType &front() const + { return (elementPtr ? *elementPtr : queue.front()); } + + ElemType &front() + { return (elementPtr ? *elementPtr : queue.front()); } + + /** Pop either the head, or if none, the head of the queue */ + void + pop() + { + if (elementPtr) { + /* A popped element was expected to be pushed into queue + * and so take a reserved space */ + elementPtr = NULL; + queue.freeReservation(); + } else { + queue.pop(); + } + } + + /** Push the single element (if any) into the queue proper. If the + * element's reference points to a transient object, remember to + * always do this before the end of that object's life */ + void + pushTail() const + { + if (elementPtr) + queue.push(*elementPtr); + elementPtr = NULL; + } + + /** Report elements */ + void + minorTrace() const + { + pushTail(); + queue.minorTrace(); + } + + /** Reservable interface, passed on to queue */ + bool canReserve() const { return queue.canReserve(); } + void reserve() { queue.reserve(); } + void freeReservation() { queue.freeReservation(); } + + /** Like remainingSpace but does not count reserved spaces */ + unsigned int + unreservedRemainingSpace() + { + pushTail(); + return queue.unreservedRemainingSpace(); + } +}; + +} + +#endif /* __CPU_MINOR_BUFFERS_HH__ */ diff --git a/src/cpu/minor/cpu.cc b/src/cpu/minor/cpu.cc new file mode 100644 index 000000000..f7007f6ff --- /dev/null +++ b/src/cpu/minor/cpu.cc @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2012-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "arch/utility.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/dyn_inst.hh" +#include "cpu/minor/fetch1.hh" +#include "cpu/minor/pipeline.hh" +#include "debug/Drain.hh" +#include "debug/MinorCPU.hh" +#include "debug/Quiesce.hh" + +MinorCPU::MinorCPU(MinorCPUParams *params) : + BaseCPU(params), + drainManager(NULL) +{ + /* This is only written for one thread at the moment */ + Minor::MinorThread *thread; + + if (FullSystem) { + thread = new Minor::MinorThread(this, 0, params->system, params->itb, + params->dtb, params->isa[0]); + } else { + /* thread_id 0 */ + thread = new Minor::MinorThread(this, 0, params->system, + params->workload[0], params->itb, params->dtb, params->isa[0]); + } + + threads.push_back(thread); + threadActivateEvents.push_back(new ThreadActivateEvent(*this, 0)); + + thread->setStatus(ThreadContext::Halted); + + ThreadContext *tc = thread->getTC(); + + if (params->checker) { + fatal("The Minor model doesn't support checking (yet)\n"); + } + + threadContexts.push_back(tc); + + Minor::MinorDynInst::init(); + + pipeline = new Minor::Pipeline(*this, *params); + activityRecorder = pipeline->getActivityRecorder(); +} + +MinorCPU::~MinorCPU() +{ + delete pipeline; + + for (ThreadID thread_id = 0; thread_id < threads.size(); thread_id++) { + delete threads[thread_id]; + delete threadActivateEvents[thread_id]; + } +} + +void +MinorCPU::init() +{ + BaseCPU::init(); + + if (!params()->switched_out && + system->getMemoryMode() != Enums::timing) + { + fatal("The Minor CPU requires the memory system to be in " + "'timing' mode.\n"); + } + + /* Initialise the ThreadContext's memory proxies */ + for (ThreadID thread_id = 0; thread_id < threads.size(); thread_id++) { + ThreadContext *tc = getContext(thread_id); + + tc->initMemProxies(tc); + } + + /* Initialise CPUs (== threads in the ISA) */ + if (FullSystem && !params()->switched_out) { + for (ThreadID thread_id = 0; thread_id < threads.size(); thread_id++) + { + ThreadContext *tc = getContext(thread_id); + + /* Initialize CPU, including PC */ + TheISA::initCPU(tc, cpuId()); + } + } +} + +/** Stats interface from SimObject (by way of BaseCPU) */ +void +MinorCPU::regStats() +{ + BaseCPU::regStats(); + stats.regStats(name(), *this); + pipeline->regStats(); +} + +void +MinorCPU::serializeThread(std::ostream &os, ThreadID thread_id) +{ + threads[thread_id]->serialize(os); +} + +void +MinorCPU::unserializeThread(Checkpoint *cp, const std::string §ion, + ThreadID thread_id) +{ + if (thread_id != 0) + fatal("Trying to load more than one thread into a MinorCPU\n"); + + threads[thread_id]->unserialize(cp, section); +} + +void +MinorCPU::serialize(std::ostream &os) +{ + pipeline->serialize(os); + BaseCPU::serialize(os); +} + +void +MinorCPU::unserialize(Checkpoint *cp, const std::string §ion) +{ + pipeline->unserialize(cp, section); + BaseCPU::unserialize(cp, section); +} + +Addr +MinorCPU::dbg_vtophys(Addr addr) +{ + /* Note that this gives you the translation for thread 0 */ + panic("No implementation for vtophy\n"); + + return 0; +} + +void +MinorCPU::wakeup() +{ + DPRINTF(Drain, "MinorCPU wakeup\n"); + + for (auto i = threads.begin(); i != threads.end(); i ++) { + if ((*i)->status() == ThreadContext::Suspended) + (*i)->activate(); + } + + DPRINTF(Drain,"Suspended Processor awoke\n"); +} + +void +MinorCPU::startup() +{ + DPRINTF(MinorCPU, "MinorCPU startup\n"); + + BaseCPU::startup(); + + for (auto i = threads.begin(); i != threads.end(); i ++) + (*i)->startup(); +} + +unsigned int +MinorCPU::drain(DrainManager *drain_manager) +{ + DPRINTF(Drain, "MinorCPU drain\n"); + + drainManager = drain_manager; + + /* Need to suspend all threads and wait for Execute to idle. + * Tell Fetch1 not to fetch */ + unsigned int ret = pipeline->drain(drain_manager); + + if (ret == 0) + DPRINTF(Drain, "MinorCPU drained\n"); + else + DPRINTF(Drain, "MinorCPU not finished draining\n"); + + return ret; +} + +void +MinorCPU::signalDrainDone() +{ + DPRINTF(Drain, "MinorCPU drain done\n"); + setDrainState(Drainable::Drained); + drainManager->signalDrainDone(); + drainManager = NULL; +} + +void +MinorCPU::drainResume() +{ + assert(getDrainState() == Drainable::Drained || + getDrainState() == Drainable::Running); + + if (switchedOut()) { + DPRINTF(Drain, "drainResume while switched out. Ignoring\n"); + return; + } + + DPRINTF(Drain, "MinorCPU drainResume\n"); + + if (!system->isTimingMode()) { + fatal("The Minor CPU requires the memory system to be in " + "'timing' mode.\n"); + } + + wakeup(); + pipeline->drainResume(); + + setDrainState(Drainable::Running); +} + +void +MinorCPU::memWriteback() +{ + DPRINTF(Drain, "MinorCPU memWriteback\n"); +} + +void +MinorCPU::switchOut() +{ + DPRINTF(MinorCPU, "MinorCPU switchOut\n"); + + assert(!switchedOut()); + BaseCPU::switchOut(); + + /* Check that the CPU is drained? */ + activityRecorder->reset(); +} + +void +MinorCPU::takeOverFrom(BaseCPU *old_cpu) +{ + DPRINTF(MinorCPU, "MinorCPU takeOverFrom\n"); + + BaseCPU::takeOverFrom(old_cpu); + + /* Don't think I need to do anything here */ +} + +void +MinorCPU::activateContext(ThreadID thread_id, Cycles delay) +{ + DPRINTF(MinorCPU, "ActivateContext thread: %d delay: %d\n", + thread_id, delay); + + if (!threadActivateEvents[thread_id]->scheduled()) { + schedule(threadActivateEvents[thread_id], clockEdge(delay)); + } +} + +void +MinorCPU::ThreadActivateEvent::process() +{ + DPRINTFS(MinorCPU, (&cpu), "Activating thread: %d\n", thread_id); + + /* Do some cycle accounting. lastStopped is reset to stop the + * wakeup call on the pipeline from adding the quiesce period + * to BaseCPU::numCycles */ + cpu.stats.quiesceCycles += cpu.pipeline->cyclesSinceLastStopped(); + cpu.pipeline->resetLastStopped(); + + /* Wake up the thread, wakeup the pipeline tick */ + cpu.threads[thread_id]->activate(); + cpu.wakeupOnEvent(Minor::Pipeline::CPUStageId); + cpu.pipeline->wakeupFetch(); +} + +void +MinorCPU::suspendContext(ThreadID thread_id) +{ + DPRINTF(MinorCPU, "SuspendContext %d\n", thread_id); + + threads[thread_id]->suspend(); +} + +void +MinorCPU::wakeupOnEvent(unsigned int stage_id) +{ + DPRINTF(Quiesce, "Event wakeup from stage %d\n", stage_id); + + /* Mark that some activity has taken place and start the pipeline */ + activityRecorder->activateStage(stage_id); + pipeline->start(); +} + +MinorCPU * +MinorCPUParams::create() +{ + numThreads = 1; + if (!FullSystem && workload.size() != 1) + panic("only one workload allowed"); + return new MinorCPU(this); +} + +MasterPort &MinorCPU::getInstPort() +{ + return pipeline->getInstPort(); +} + +MasterPort &MinorCPU::getDataPort() +{ + return pipeline->getDataPort(); +} + +Counter +MinorCPU::totalInsts() const +{ + Counter ret = 0; + + for (auto i = threads.begin(); i != threads.end(); i ++) + ret += (*i)->numInst; + + return ret; +} + +Counter +MinorCPU::totalOps() const +{ + Counter ret = 0; + + for (auto i = threads.begin(); i != threads.end(); i ++) + ret += (*i)->numOp; + + return ret; +} diff --git a/src/cpu/minor/cpu.hh b/src/cpu/minor/cpu.hh new file mode 100644 index 000000000..80f41b5d2 --- /dev/null +++ b/src/cpu/minor/cpu.hh @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2012-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Top level definition of the Minor in-order CPU model + */ + +#ifndef __CPU_MINOR_CPU_HH__ +#define __CPU_MINOR_CPU_HH__ + +#include "cpu/minor/activity.hh" +#include "cpu/minor/stats.hh" +#include "cpu/base.hh" +#include "cpu/simple_thread.hh" +#include "params/MinorCPU.hh" + +namespace Minor +{ +/** Forward declared to break the cyclic inclusion dependencies between + * pipeline and cpu */ +class Pipeline; + +/** Minor will use the SimpleThread state for now */ +typedef SimpleThread MinorThread; +}; + +/** + * MinorCPU is an in-order CPU model with four fixed pipeline stages: + * + * Fetch1 - fetches lines from memory + * Fetch2 - decomposes lines into macro-op instructions + * Decode - decomposes macro-ops into micro-ops + * Execute - executes those micro-ops + * + * This pipeline is carried in the MinorCPU::pipeline object. + * The exec_context interface is not carried by MinorCPU but by + * Minor::ExecContext objects + * created by Minor::Execute. + */ +class MinorCPU : public BaseCPU +{ + protected: + /** Event for delayed wakeup of a thread */ + class ThreadActivateEvent : public Event + { + public: + MinorCPU &cpu; + ThreadID thread_id; + + ThreadActivateEvent(MinorCPU &cpu_, ThreadID thread_id_) : + cpu(cpu_), thread_id(thread_id_) + { } + + void process(); + }; + + /** Events to wakeup each thread */ + std::vector<ThreadActivateEvent *> threadActivateEvents; + + /** pipeline is a container for the clockable pipeline stage objects. + * Elements of pipeline call TheISA to implement the model. */ + Minor::Pipeline *pipeline; + + public: + /** Activity recording for pipeline. This belongs to Pipeline but + * stages will access it through the CPU as the MinorCPU object + * actually mediates idling behaviour */ + Minor::MinorActivityRecorder *activityRecorder; + + /** These are thread state-representing objects for this CPU. If + * you need a ThreadContext for *any* reason, use + * threads[threadId]->getTC() */ + std::vector<Minor::MinorThread *> threads; + + public: + /** Provide a non-protected base class for Minor's Ports as derived + * classes are created by Fetch1 and Execute */ + class MinorCPUPort : public MasterPort + { + public: + /** The enclosing cpu */ + MinorCPU &cpu; + + public: + MinorCPUPort(const std::string& name_, MinorCPU &cpu_) + : MasterPort(name_, &cpu_), cpu(cpu_) + { } + + protected: + /** Snooping a coherence request, do nothing. */ + virtual void recvTimingSnoopReq(PacketPtr pkt) { } + }; + + /** The DrainManager passed into drain that needs be signalled when + * draining is complete */ + DrainManager *drainManager; + + protected: + /** Return a reference to the data port. */ + MasterPort &getDataPort(); + + /** Return a reference to the instruction port. */ + MasterPort &getInstPort(); + + public: + MinorCPU(MinorCPUParams *params); + + ~MinorCPU(); + + public: + /** Starting, waking and initialisation */ + void init(); + void startup(); + void wakeup(); + + Addr dbg_vtophys(Addr addr); + + /** Processor-specific statistics */ + Minor::MinorStats stats; + + /** Stats interface from SimObject (by way of BaseCPU) */ + void regStats(); + + /** Simple inst count interface from BaseCPU */ + Counter totalInsts() const; + Counter totalOps() const; + + void serializeThread(std::ostream &os, ThreadID thread_id); + void unserializeThread(Checkpoint *cp, const std::string §ion, + ThreadID thread_id); + + /** Serialize pipeline data */ + void serialize(std::ostream &os); + void unserialize(Checkpoint *cp, const std::string §ion); + + /** Drain interface */ + unsigned int drain(DrainManager *drain_manager); + void drainResume(); + /** Signal from Pipeline that MinorCPU should signal the DrainManager + * that a drain is complete and set its drainState */ + void signalDrainDone(); + void memWriteback(); + + /** Switching interface from BaseCPU */ + void switchOut(); + void takeOverFrom(BaseCPU *old_cpu); + + /** Thread activation interface from BaseCPU. */ + void activateContext(ThreadID thread_id, Cycles delay); + void suspendContext(ThreadID thread_id); + + /** Interface for stages to signal that they have become active after + * a callback or eventq event where the pipeline itself may have + * already been idled. The stage argument should be from the + * enumeration Pipeline::StageId */ + void wakeupOnEvent(unsigned int stage_id); +}; + +#endif /* __CPU_MINOR_CPU_HH__ */ diff --git a/src/cpu/minor/decode.cc b/src/cpu/minor/decode.cc new file mode 100644 index 000000000..e380f0d2d --- /dev/null +++ b/src/cpu/minor/decode.cc @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "cpu/minor/decode.hh" +#include "cpu/minor/pipeline.hh" +#include "debug/Decode.hh" + +namespace Minor +{ + +Decode::Decode(const std::string &name, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<ForwardInstData>::Output inp_, + Latch<ForwardInstData>::Input out_, + Reservable &next_stage_input_buffer) : + Named(name), + cpu(cpu_), + inp(inp_), + out(out_), + nextStageReserve(next_stage_input_buffer), + outputWidth(params.executeInputWidth), + processMoreThanOneInput(params.decodeCycleInput), + inputBuffer(name + ".inputBuffer", "insts", params.decodeInputBufferSize), + inputIndex(0), + inMacroop(false), + execSeqNum(InstId::firstExecSeqNum) +{ + if (outputWidth < 1) + fatal("%s: executeInputWidth must be >= 1 (%d)\n", name, outputWidth); + + if (params.decodeInputBufferSize < 1) { + fatal("%s: decodeInputBufferSize must be >= 1 (%d)\n", name, + params.decodeInputBufferSize); + } +} + +const ForwardInstData * +Decode::getInput() +{ + /* Get insts from the inputBuffer to work with */ + if (!inputBuffer.empty()) { + const ForwardInstData &head = inputBuffer.front(); + + return (head.isBubble() ? NULL : &(inputBuffer.front())); + } else { + return NULL; + } +} + +void +Decode::popInput() +{ + if (!inputBuffer.empty()) + inputBuffer.pop(); + + inputIndex = 0; + inMacroop = false; +} + +#if TRACING_ON +/** Add the tracing data to an instruction. This originates in + * decode because this is the first place that execSeqNums are known + * (these are used as the 'FetchSeq' in tracing data) */ +static void +dynInstAddTracing(MinorDynInstPtr inst, StaticInstPtr static_inst, + MinorCPU &cpu) +{ + inst->traceData = cpu.getTracer()->getInstRecord(curTick(), + cpu.getContext(inst->id.threadId), + inst->staticInst, inst->pc, static_inst); + + /* Use the execSeqNum as the fetch sequence number as this most closely + * matches the other processor models' idea of fetch sequence */ + if (inst->traceData) + inst->traceData->setFetchSeq(inst->id.execSeqNum); +} +#endif + +void +Decode::evaluate() +{ + inputBuffer.setTail(*inp.outputWire); + ForwardInstData &insts_out = *out.inputWire; + + assert(insts_out.isBubble()); + + blocked = false; + + if (!nextStageReserve.canReserve()) { + blocked = true; + } else { + const ForwardInstData *insts_in = getInput(); + + unsigned int output_index = 0; + + /* Pack instructions into the output while we can. This may involve + * using more than one input line */ + while (insts_in && + inputIndex < insts_in->width() && /* Still more input */ + output_index < outputWidth /* Still more output to fill */) + { + MinorDynInstPtr inst = insts_in->insts[inputIndex]; + + if (inst->isBubble()) { + /* Skip */ + inputIndex++; + inMacroop = false; + } else { + StaticInstPtr static_inst = inst->staticInst; + /* Static inst of a macro-op above the output_inst */ + StaticInstPtr parent_static_inst = NULL; + MinorDynInstPtr output_inst = inst; + + if (inst->isFault()) { + DPRINTF(Decode, "Fault being passed: %d\n", + inst->fault->name()); + + inputIndex++; + inMacroop = false; + } else if (static_inst->isMacroop()) { + /* Generate a new micro-op */ + StaticInstPtr static_micro_inst; + + /* Set up PC for the next micro-op emitted */ + if (!inMacroop) { + microopPC = inst->pc; + inMacroop = true; + } + + /* Get the micro-op static instruction from the + * static_inst. */ + static_micro_inst = + static_inst->fetchMicroop(microopPC.microPC()); + + output_inst = new MinorDynInst(inst->id); + output_inst->pc = microopPC; + output_inst->staticInst = static_micro_inst; + output_inst->fault = NoFault; + + /* Allow a predicted next address only on the last + * microop */ + if (static_micro_inst->isLastMicroop()) { + output_inst->predictedTaken = inst->predictedTaken; + output_inst->predictedTarget = inst->predictedTarget; + } + + DPRINTF(Decode, "Microop decomposition inputIndex:" + " %d output_index: %d lastMicroop: %s microopPC:" + " %d.%d inst: %d\n", + inputIndex, output_index, + (static_micro_inst->isLastMicroop() ? + "true" : "false"), + microopPC.instAddr(), microopPC.microPC(), + *output_inst); + + /* Acknowledge that the static_inst isn't mine, it's my + * parent macro-op's */ + parent_static_inst = static_inst; + + static_micro_inst->advancePC(microopPC); + + /* Step input if this is the last micro-op */ + if (static_micro_inst->isLastMicroop()) { + inputIndex++; + inMacroop = false; + } + } else { + /* Doesn't need decomposing, pass on instruction */ + DPRINTF(Decode, "Passing on inst: %s inputIndex:" + " %d output_index: %d\n", + *output_inst, inputIndex, output_index); + + parent_static_inst = static_inst; + + /* Step input */ + inputIndex++; + inMacroop = false; + } + + /* Set execSeqNum of output_inst */ + output_inst->id.execSeqNum = execSeqNum; + /* Add tracing */ +#if TRACING_ON + dynInstAddTracing(output_inst, parent_static_inst, cpu); +#endif + + /* Step to next sequence number */ + execSeqNum++; + + /* Correctly size the output before writing */ + if(output_index == 0) insts_out.resize(outputWidth); + /* Push into output */ + insts_out.insts[output_index] = output_inst; + output_index++; + } + + /* Have we finished with the input? */ + if (inputIndex == insts_in->width()) { + /* If we have just been producing micro-ops, we *must* have + * got to the end of that for inputIndex to be pushed past + * insts_in->width() */ + assert(!inMacroop); + popInput(); + insts_in = NULL; + + if (processMoreThanOneInput) { + DPRINTF(Decode, "Wrapping\n"); + insts_in = getInput(); + } + } + } + + /* The rest of the output (if any) should already have been packed + * with bubble instructions by insts_out's initialisation + * + * for (; output_index < outputWidth; output_index++) + * assert(insts_out.insts[output_index]->isBubble()); + */ + } + + /* If we generated output, reserve space for the result in the next stage + * and mark the stage as being active this cycle */ + if (!insts_out.isBubble()) { + /* Note activity of following buffer */ + cpu.activityRecorder->activity(); + nextStageReserve.reserve(); + } + + /* If we still have input to process and somewhere to put it, + * mark stage as active */ + if (getInput() && nextStageReserve.canReserve()) + cpu.activityRecorder->activateStage(Pipeline::DecodeStageId); + + /* Make sure the input (if any left) is pushed */ + inputBuffer.pushTail(); +} + +bool +Decode::isDrained() +{ + return inputBuffer.empty() && (*inp.outputWire).isBubble(); +} + +void +Decode::minorTrace() const +{ + std::ostringstream data; + + if (blocked) + data << 'B'; + else + (*out.inputWire).reportData(data); + + MINORTRACE("insts=%s\n", data.str()); + inputBuffer.minorTrace(); +} + +} diff --git a/src/cpu/minor/decode.hh b/src/cpu/minor/decode.hh new file mode 100644 index 000000000..fcc18fd44 --- /dev/null +++ b/src/cpu/minor/decode.hh @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Decode collects macro-ops from Fetch2 and splits them into micro-ops + * passed to Execute. + */ + +#ifndef __CPU_MINOR_DECODE_HH__ +#define __CPU_MINOR_DECODE_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/dyn_inst.hh" +#include "cpu/minor/pipe_data.hh" + +namespace Minor +{ + +/* Decode takes instructions from Fetch2 and decomposes them into micro-ops + * to feed to Execute. It generates a new sequence number for each + * instruction: execSeqNum. + */ +class Decode : public Named +{ + protected: + /** Pointer back to the containing CPU */ + MinorCPU &cpu; + + /** Input port carrying macro instructions from Fetch2 */ + Latch<ForwardInstData>::Output inp; + /** Output port carrying micro-op decomposed instructions to Execute */ + Latch<ForwardInstData>::Input out; + + /** Interface to reserve space in the next stage */ + Reservable &nextStageReserve; + + /** Width of output of this stage/input of next in instructions */ + unsigned int outputWidth; + + /** If true, more than one input word can be processed each cycle if + * there is room in the output to contain its processed data */ + bool processMoreThanOneInput; + + public: + /* Public for Pipeline to be able to pass it to Fetch2 */ + InputBuffer<ForwardInstData> inputBuffer; + + protected: + /** Data members after this line are cycle-to-cycle state */ + + /** Index into the inputBuffer's head marking the start of unhandled + * instructions */ + unsigned int inputIndex; + + /** True when we're in the process of decomposing a micro-op and + * microopPC will be valid. This is only the case when there isn't + * sufficient space in Executes input buffer to take the whole of a + * decomposed instruction and some of that instructions micro-ops must + * be generated in a later cycle */ + bool inMacroop; + TheISA::PCState microopPC; + + /** Source of execSeqNums to number instructions. */ + InstSeqNum execSeqNum; + + /** Blocked indication for report */ + bool blocked; + + protected: + /** Get a piece of data to work on, or 0 if there is no data. */ + const ForwardInstData *getInput(); + + /** Pop an element off the input buffer, if there are any */ + void popInput(); + + public: + Decode(const std::string &name, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<ForwardInstData>::Output inp_, + Latch<ForwardInstData>::Input out_, + Reservable &next_stage_input_buffer); + + public: + /** Pass on input/buffer data to the output if you can */ + void evaluate(); + + void minorTrace() const; + + /** Is this stage drained? For Decoed, draining is initiated by + * Execute halting Fetch1 causing Fetch2 to naturally drain + * into Decode and on to Execute which is responsible for + * actually killing instructions */ + bool isDrained(); +}; + +} + +#endif /* __CPU_MINOR_DECODE_HH__ */ diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc new file mode 100644 index 000000000..ab08e6b4a --- /dev/null +++ b/src/cpu/minor/dyn_inst.cc @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <iomanip> +#include <sstream> + +#include "arch/isa.hh" +#include "arch/registers.hh" +#include "cpu/minor/dyn_inst.hh" +#include "cpu/minor/trace.hh" +#include "cpu/base.hh" +#include "cpu/reg_class.hh" +#include "debug/MinorExecute.hh" +#include "enums/OpClass.hh" + +namespace Minor +{ + +std::ostream & +operator <<(std::ostream &os, const InstId &id) +{ + os << id.threadId << '/' << id.streamSeqNum << '.' + << id.predictionSeqNum << '/' << id.lineSeqNum; + + /* Not all structures have fetch and exec sequence numbers */ + if (id.fetchSeqNum != 0) { + os << '/' << id.fetchSeqNum; + if (id.execSeqNum != 0) + os << '.' << id.execSeqNum; + } + + return os; +} + +MinorDynInstPtr MinorDynInst::bubbleInst = NULL; + +void +MinorDynInst::init() +{ + if (!bubbleInst) { + bubbleInst = new MinorDynInst(); + assert(bubbleInst->isBubble()); + /* Make bubbleInst immortal */ + bubbleInst->incref(); + } +} + +bool +MinorDynInst::isLastOpInInst() const +{ + assert(staticInst); + return !(staticInst->isMicroop() && !staticInst->isLastMicroop()); +} + +bool +MinorDynInst::isNoCostInst() const +{ + return isInst() && staticInst->opClass() == No_OpClass; +} + +void +MinorDynInst::reportData(std::ostream &os) const +{ + if (isBubble()) + os << "-"; + else if (isFault()) + os << "F;" << id; + else + os << id; +} + +std::ostream & +operator <<(std::ostream &os, const MinorDynInst &inst) +{ + os << inst.id << " pc: 0x" + << std::hex << inst.pc.instAddr() << std::dec << " ("; + + if (inst.isFault()) + os << "fault: \"" << inst.fault->name() << '"'; + else if (inst.staticInst) + os << inst.staticInst->getName(); + else + os << "bubble"; + + os << ')'; + + return os; +} + +/** Print a register in the form r<n>, f<n>, m<n>(<name>), z for integer, + * float, misc and zero registers given an 'architectural register number' */ +static void +printRegName(std::ostream &os, TheISA::RegIndex reg) +{ + RegClass reg_class = regIdxToClass(reg); + + switch (reg_class) + { + case MiscRegClass: + { + TheISA::RegIndex misc_reg = reg - TheISA::Misc_Reg_Base; + + /* This is an ugly test because not all archs. have miscRegName */ +#if THE_ISA == ARM_ISA + os << 'm' << misc_reg << '(' << TheISA::miscRegName[misc_reg] << + ')'; +#else + os << 'n' << misc_reg; +#endif + } + break; + case FloatRegClass: + os << 'f' << static_cast<unsigned int>(reg - TheISA::FP_Reg_Base); + break; + case IntRegClass: + if (reg == TheISA::ZeroReg) { + os << 'z'; + } else { + os << 'r' << static_cast<unsigned int>(reg); + } + break; + case CCRegClass: + os << 'c' << static_cast<unsigned int>(reg - TheISA::CC_Reg_Base); + } +} + +void +MinorDynInst::minorTraceInst(const Named &named_object) const +{ + if (isFault()) { + MINORINST(&named_object, "id=F;%s addr=0x%x fault=\"%s\"\n", + id, pc.instAddr(), fault->name()); + } else { + unsigned int num_src_regs = staticInst->numSrcRegs(); + unsigned int num_dest_regs = staticInst->numDestRegs(); + + std::ostringstream regs_str; + + /* Format lists of src and dest registers for microops and + * 'full' instructions */ + if (!staticInst->isMacroop()) { + regs_str << " srcRegs="; + + unsigned int src_reg = 0; + while (src_reg < num_src_regs) { + printRegName(regs_str, staticInst->srcRegIdx(src_reg)); + + src_reg++; + if (src_reg != num_src_regs) + regs_str << ','; + } + + regs_str << " destRegs="; + + unsigned int dest_reg = 0; + while (dest_reg < num_dest_regs) { + printRegName(regs_str, staticInst->destRegIdx(dest_reg)); + + dest_reg++; + if (dest_reg != num_dest_regs) + regs_str << ','; + } + +#if THE_ISA == ARM_ISA + regs_str << " extMachInst=" << std::hex << std::setw(16) + << std::setfill('0') << staticInst->machInst << std::dec; +#endif + } + + std::ostringstream flags; + staticInst->printFlags(flags, " "); + + MINORINST(&named_object, "id=%s addr=0x%x inst=\"%s\" class=%s" + " flags=\"%s\"%s%s\n", + id, pc.instAddr(), + (staticInst->opClass() == No_OpClass ? + "(invalid)" : staticInst->disassemble(0,NULL)), + Enums::OpClassStrings[staticInst->opClass()], + flags.str(), + regs_str.str(), + (predictedTaken ? " predictedTaken" : "")); + } +} + +MinorDynInst::~MinorDynInst() +{ + if (traceData) + delete traceData; +} + +} diff --git a/src/cpu/minor/dyn_inst.hh b/src/cpu/minor/dyn_inst.hh new file mode 100644 index 000000000..a30d68819 --- /dev/null +++ b/src/cpu/minor/dyn_inst.hh @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * The dynamic instruction and instruction/line id (sequence numbers) + * definition for Minor. A spirited attempt is made here to not carry too + * much on this structure. + */ + +#ifndef __CPU_MINOR_DYN_INST_HH__ +#define __CPU_MINOR_DYN_INST_HH__ + +#include <iostream> + +#include "base/refcnt.hh" +#include "cpu/minor/buffers.hh" +#include "cpu/inst_seq.hh" +#include "cpu/static_inst.hh" +#include "cpu/timing_expr.hh" +#include "sim/faults.hh" + +namespace Minor +{ + +class MinorDynInst; + +/** MinorDynInsts are currently reference counted. */ +typedef RefCountingPtr<MinorDynInst> MinorDynInstPtr; + +/** Id for lines and instructions. This includes all the relevant sequence + * numbers and thread ids for all stages of execution. */ +class InstId +{ + public: + /** First sequence numbers to use in initialisation of the pipeline and + * to be expected on the first line/instruction issued */ + static const InstSeqNum firstStreamSeqNum = 1; + static const InstSeqNum firstPredictionSeqNum = 1; + static const InstSeqNum firstLineSeqNum = 1; + static const InstSeqNum firstFetchSeqNum = 1; + static const InstSeqNum firstExecSeqNum = 1; + + public: + /** The thread to which this line/instruction belongs */ + ThreadID threadId; + + /** The 'stream' this instruction belongs to. Streams are interrupted + * (and sequence numbers increased) when Execute finds it wants to + * change the stream of instructions due to a branch. */ + InstSeqNum streamSeqNum; + + /** The predicted qualifier to stream, attached by Fetch2 as a + * consequence of branch prediction */ + InstSeqNum predictionSeqNum; + + /** Line sequence number. This is the sequence number of the fetched + * line from which this instruction was fetched */ + InstSeqNum lineSeqNum; + + /** Fetch sequence number. This is 0 for bubbles and an ascending + * sequence for the stream of all fetched instructions */ + InstSeqNum fetchSeqNum; + + /** 'Execute' sequence number. These are assigned after micro-op + * decomposition and form an ascending sequence (starting with 1) for + * post-micro-op decomposed instructions. */ + InstSeqNum execSeqNum; + + public: + /** Very boring default constructor */ + InstId( + ThreadID thread_id = 0, InstSeqNum stream_seq_num = 0, + InstSeqNum prediction_seq_num = 0, InstSeqNum line_seq_num = 0, + InstSeqNum fetch_seq_num = 0, InstSeqNum exec_seq_num = 0) : + threadId(thread_id), streamSeqNum(stream_seq_num), + predictionSeqNum(prediction_seq_num), lineSeqNum(line_seq_num), + fetchSeqNum(fetch_seq_num), execSeqNum(exec_seq_num) + { } + + public: + /* Equal if the thread and last set sequence number matches */ + bool + operator== (const InstId &rhs) + { + /* If any of fetch and exec sequence number are not set + * they need to be 0, so a straight comparison is still + * fine */ + bool ret = (threadId == rhs.threadId && + lineSeqNum == rhs.lineSeqNum && + fetchSeqNum == rhs.fetchSeqNum && + execSeqNum == rhs.execSeqNum); + + /* Stream and prediction *must* match if these are the same id */ + if (ret) { + assert(streamSeqNum == rhs.streamSeqNum && + predictionSeqNum == rhs.predictionSeqNum); + } + + return ret; + } +}; + +/** Print this id in the usual slash-separated format expected by + * MinorTrace */ +std::ostream &operator <<(std::ostream &os, const InstId &id); + +class MinorDynInst; + +/** Print a short reference to this instruction. '-' for a bubble and a + * series of '/' separated sequence numbers for other instructions. The + * sequence numbers will be in the order: stream, prediction, line, fetch, + * exec with exec absent if it is 0. This is used by MinorTrace. */ +std::ostream &operator <<(std::ostream &os, const MinorDynInst &inst); + +/** Dynamic instruction for Minor. + * MinorDynInst implements the BubbleIF interface + * Has two separate notions of sequence number for pre/post-micro-op + * decomposition: fetchSeqNum and execSeqNum */ +class MinorDynInst : public RefCounted +{ + private: + /** A prototypical bubble instruction. You must call MinorDynInst::init + * to initialise this */ + static MinorDynInstPtr bubbleInst; + + public: + StaticInstPtr staticInst; + + InstId id; + + /** Trace information for this instruction's execution */ + Trace::InstRecord *traceData; + + /** The fetch address of this instruction */ + TheISA::PCState pc; + + /** This is actually a fault masquerading as an instruction */ + Fault fault; + + /** Tried to predict the destination of this inst (if a control + * instruction or a sys call) */ + bool triedToPredict; + + /** This instruction was predicted to change control flow and + * the following instructions will have a newer predictionSeqNum */ + bool predictedTaken; + + /** Predicted branch target */ + TheISA::PCState predictedTarget; + + /** Fields only set during execution */ + + /** FU this instruction is issued to */ + unsigned int fuIndex; + + /** This instruction is in the LSQ, not a functional unit */ + bool inLSQ; + + /** The instruction has been sent to the store buffer */ + bool inStoreBuffer; + + /** Can this instruction be executed out of order. In this model, + * this only happens with mem refs that need to be issued early + * to allow other instructions to fill the fetch delay */ + bool canEarlyIssue; + + /** execSeqNum of the latest inst on which this inst depends. + * This can be used as a sanity check for dependency ordering + * where slightly out of order execution is required (notably + * initiateAcc for memory ops) */ + InstSeqNum instToWaitFor; + + /** Extra delay at the end of the pipeline */ + Cycles extraCommitDelay; + TimingExpr *extraCommitDelayExpr; + + /** Once issued, extraCommitDelay becomes minimumCommitCycle + * to account for delay in absolute time */ + Cycles minimumCommitCycle; + + /** Flat register indices so that, when clearing the scoreboard, we + * have the same register indices as when the instruction was marked + * up */ + TheISA::RegIndex flatDestRegIdx[TheISA::MaxInstDestRegs]; + + /** Effective address as set by ExecContext::setEA */ + Addr ea; + + public: + MinorDynInst(InstId id_ = InstId(), Fault fault_ = NoFault) : + staticInst(NULL), id(id_), traceData(NULL), + pc(TheISA::PCState(0)), fault(fault_), + triedToPredict(false), predictedTaken(false), + fuIndex(0), inLSQ(false), inStoreBuffer(false), + canEarlyIssue(false), + instToWaitFor(0), extraCommitDelay(Cycles(0)), + extraCommitDelayExpr(NULL), minimumCommitCycle(Cycles(0)), + ea(0) + { } + + public: + /** The BubbleIF interface. */ + bool isBubble() const { return id.fetchSeqNum == 0; } + + /** There is a single bubble inst */ + static MinorDynInstPtr bubble() { return bubbleInst; } + + /** Is this a fault rather than instruction */ + bool isFault() const { return fault != NoFault; } + + /** Is this a real instruction */ + bool isInst() const { return !isBubble() && !isFault(); } + + /** Is this a real mem ref instruction */ + bool isMemRef() const { return isInst() && staticInst->isMemRef(); } + + /** Is this an instruction that can be executed `for free' and + * needn't spend time in an FU */ + bool isNoCostInst() const; + + /** Assuming this is not a fault, is this instruction either + * a whole instruction or the last microop from a macroop */ + bool isLastOpInInst() const; + + /** Initialise the class */ + static void init(); + + /** Print (possibly verbose) instruction information for + * MinorTrace using the given Named object's name */ + void minorTraceInst(const Named &named_object) const; + + /** ReportIF interface */ + void reportData(std::ostream &os) const; + + ~MinorDynInst(); +}; + +/** Print a summary of the instruction */ +std::ostream &operator <<(std::ostream &os, const MinorDynInst &inst); + +} + +#endif /* __CPU_MINOR_DYN_INST_HH__ */ diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh new file mode 100644 index 000000000..df909a95c --- /dev/null +++ b/src/cpu/minor/exec_context.hh @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2011-2014 ARM Limited + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Reinhardt + * Dave Greene + * Nathan Binkert + * Andrew Bardsley + */ + +/** + * @file + * + * ExecContext bears the exec_context interface for Minor. + */ + +#ifndef __CPU_MINOR_EXEC_CONTEXT_HH__ +#define __CPU_MINOR_EXEC_CONTEXT_HH__ + +#include "cpu/minor/execute.hh" +#include "cpu/minor/pipeline.hh" +#include "cpu/base.hh" +#include "cpu/simple_thread.hh" +#include "debug/MinorExecute.hh" + +namespace Minor +{ + +/* Forward declaration of Execute */ +class Execute; + +/** ExecContext bears the exec_context interface for Minor. This nicely + * separates that interface from other classes such as Pipeline, MinorCPU + * and DynMinorInst and makes it easier to see what state is accessed by it. + */ +class ExecContext +{ + public: + MinorCPU &cpu; + + /** ThreadState object, provides all the architectural state. */ + SimpleThread &thread; + + /** The execute stage so we can peek at its contents. */ + Execute &execute; + + /** Instruction for the benefit of memory operations and for PC */ + MinorDynInstPtr inst; + + ExecContext ( + MinorCPU &cpu_, + SimpleThread &thread_, Execute &execute_, + MinorDynInstPtr inst_) : + cpu(cpu_), + thread(thread_), + execute(execute_), + inst(inst_) + { + DPRINTF(MinorExecute, "ExecContext setting PC: %s\n", inst->pc); + pcState(inst->pc); + setPredicate(true); + thread.setIntReg(TheISA::ZeroReg, 0); +#if THE_ISA == ALPHA_ISA + thread.setFloatReg(TheISA::ZeroReg, 0.0); +#endif + } + + Fault + readMem(Addr addr, uint8_t *data, unsigned int size, + unsigned int flags) + { + execute.getLSQ().pushRequest(inst, true /* load */, data, + size, addr, flags, NULL); + return NoFault; + } + + Fault + writeMem(uint8_t *data, unsigned int size, Addr addr, + unsigned int flags, uint64_t *res) + { + execute.getLSQ().pushRequest(inst, false /* store */, data, + size, addr, flags, res); + return NoFault; + } + + uint64_t + readIntRegOperand(const StaticInst *si, int idx) + { + return thread.readIntReg(si->srcRegIdx(idx)); + } + + TheISA::FloatReg + readFloatRegOperand(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base; + return thread.readFloatReg(reg_idx); + } + + TheISA::FloatRegBits + readFloatRegOperandBits(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base; + return thread.readFloatRegBits(reg_idx); + } + + void + setIntRegOperand(const StaticInst *si, int idx, uint64_t val) + { + thread.setIntReg(si->destRegIdx(idx), val); + } + + void + setFloatRegOperand(const StaticInst *si, int idx, + TheISA::FloatReg val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base; + thread.setFloatReg(reg_idx, val); + } + + void + setFloatRegOperandBits(const StaticInst *si, int idx, + TheISA::FloatRegBits val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base; + thread.setFloatRegBits(reg_idx, val); + } + + bool + readPredicate() + { + return thread.readPredicate(); + } + + void + setPredicate(bool val) + { + thread.setPredicate(val); + } + + TheISA::PCState + pcState() + { + return thread.pcState(); + } + + void + pcState(const TheISA::PCState &val) + { + thread.pcState(val); + } + + TheISA::MiscReg + readMiscRegNoEffect(int misc_reg) + { + return thread.readMiscRegNoEffect(misc_reg); + } + + TheISA::MiscReg + readMiscReg(int misc_reg) + { + return thread.readMiscReg(misc_reg); + } + + void + setMiscReg(int misc_reg, const TheISA::MiscReg &val) + { + thread.setMiscReg(misc_reg, val); + } + + TheISA::MiscReg + readMiscRegOperand(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::Misc_Reg_Base; + return thread.readMiscReg(reg_idx); + } + + void + setMiscRegOperand(const StaticInst *si, int idx, + const TheISA::MiscReg &val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::Misc_Reg_Base; + return thread.setMiscReg(reg_idx, val); + } + + Fault + hwrei() + { +#if THE_ISA == ALPHA_ISA + return thread.hwrei(); +#else + return NoFault; +#endif + } + + bool + simPalCheck(int palFunc) + { +#if THE_ISA == ALPHA_ISA + return thread.simPalCheck(palFunc); +#else + return false; +#endif + } + + void + syscall(int64_t callnum) + { + if (FullSystem) + panic("Syscall emulation isn't available in FS mode.\n"); + + thread.syscall(callnum); + } + + ThreadContext *tcBase() { return thread.getTC(); } + + /* @todo, should make stCondFailures persistent somewhere */ + unsigned int readStCondFailures() { return 0; } + unsigned int + setStCondFailures(unsigned int st_cond_failures) + { + return 0; + } + + int contextId() { return thread.contextId(); } + /* ISA-specific (or at least currently ISA singleton) functions */ + + /* X86: TLB twiddling */ + void + demapPage(Addr vaddr, uint64_t asn) + { + thread.getITBPtr()->demapPage(vaddr, asn); + thread.getDTBPtr()->demapPage(vaddr, asn); + } + + TheISA::CCReg + readCCRegOperand(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::CC_Reg_Base; + return thread.readCCReg(reg_idx); + } + + void + setCCRegOperand(const StaticInst *si, int idx, TheISA::CCReg val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::CC_Reg_Base; + thread.setCCReg(reg_idx, val); + } + + void + demapInstPage(Addr vaddr, uint64_t asn) + { + thread.getITBPtr()->demapPage(vaddr, asn); + } + + void + demapDataPage(Addr vaddr, uint64_t asn) + { + thread.getDTBPtr()->demapPage(vaddr, asn); + } + + /* ALPHA/POWER: Effective address storage */ + void setEA(Addr &ea) + { + inst->ea = ea; + } + + BaseCPU *getCpuPtr() { return &cpu; } + + /* POWER: Effective address storage */ + Addr getEA() + { + return inst->ea; + } + + /* MIPS: other thread register reading/writing */ + uint64_t + readRegOtherThread(unsigned idx, ThreadID tid = InvalidThreadID) + { + SimpleThread *other_thread = (tid == InvalidThreadID + ? &thread : cpu.threads[tid]); + + if (idx < TheISA::FP_Reg_Base) { /* Integer */ + return other_thread->readIntReg(idx); + } else if (idx < TheISA::Misc_Reg_Base) { /* Float */ + return other_thread->readFloatRegBits(idx + - TheISA::FP_Reg_Base); + } else { /* Misc */ + return other_thread->readMiscReg(idx + - TheISA::Misc_Reg_Base); + } + } + + void + setRegOtherThread(unsigned idx, const TheISA::MiscReg &val, + ThreadID tid = InvalidThreadID) + { + SimpleThread *other_thread = (tid == InvalidThreadID + ? &thread : cpu.threads[tid]); + + if (idx < TheISA::FP_Reg_Base) { /* Integer */ + return other_thread->setIntReg(idx, val); + } else if (idx < TheISA::Misc_Reg_Base) { /* Float */ + return other_thread->setFloatRegBits(idx + - TheISA::FP_Reg_Base, val); + } else { /* Misc */ + return other_thread->setMiscReg(idx + - TheISA::Misc_Reg_Base, val); + } + } +}; + +} + +#endif /* __CPU_MINOR_EXEC_CONTEXT_HH__ */ diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc new file mode 100644 index 000000000..2a009a154 --- /dev/null +++ b/src/cpu/minor/execute.cc @@ -0,0 +1,1736 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "arch/locked_mem.hh" +#include "arch/registers.hh" +#include "arch/utility.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/exec_context.hh" +#include "cpu/minor/execute.hh" +#include "cpu/minor/fetch1.hh" +#include "cpu/minor/lsq.hh" +#include "cpu/op_class.hh" +#include "debug/Activity.hh" +#include "debug/Branch.hh" +#include "debug/Drain.hh" +#include "debug/MinorExecute.hh" +#include "debug/MinorInterrupt.hh" +#include "debug/MinorMem.hh" +#include "debug/MinorTrace.hh" +#include "debug/PCEvent.hh" + +namespace Minor +{ + +Execute::Execute(const std::string &name_, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<ForwardInstData>::Output inp_, + Latch<BranchData>::Input out_) : + Named(name_), + inp(inp_), + out(out_), + cpu(cpu_), + issueLimit(params.executeIssueLimit), + memoryIssueLimit(params.executeMemoryIssueLimit), + commitLimit(params.executeCommitLimit), + memoryCommitLimit(params.executeMemoryCommitLimit), + processMoreThanOneInput(params.executeCycleInput), + fuDescriptions(*params.executeFuncUnits), + numFuncUnits(fuDescriptions.funcUnits.size()), + setTraceTimeOnCommit(params.executeSetTraceTimeOnCommit), + setTraceTimeOnIssue(params.executeSetTraceTimeOnIssue), + allowEarlyMemIssue(params.executeAllowEarlyMemoryIssue), + noCostFUIndex(fuDescriptions.funcUnits.size() + 1), + lsq(name_ + ".lsq", name_ + ".dcache_port", + cpu_, *this, + params.executeMaxAccessesInMemory, + params.executeMemoryWidth, + params.executeLSQRequestsQueueSize, + params.executeLSQTransfersQueueSize, + params.executeLSQStoreBufferSize, + params.executeLSQMaxStoreBufferStoresPerCycle), + scoreboard(name_ + ".scoreboard"), + inputBuffer(name_ + ".inputBuffer", "insts", + params.executeInputBufferSize), + inputIndex(0), + lastCommitWasEndOfMacroop(true), + instsBeingCommitted(params.executeCommitLimit), + streamSeqNum(InstId::firstStreamSeqNum), + lastPredictionSeqNum(InstId::firstPredictionSeqNum), + drainState(NotDraining) +{ + if (commitLimit < 1) { + fatal("%s: executeCommitLimit must be >= 1 (%d)\n", name_, + commitLimit); + } + + if (issueLimit < 1) { + fatal("%s: executeCommitLimit must be >= 1 (%d)\n", name_, + issueLimit); + } + + if (memoryIssueLimit < 1) { + fatal("%s: executeMemoryIssueLimit must be >= 1 (%d)\n", name_, + memoryIssueLimit); + } + + if (memoryCommitLimit > commitLimit) { + fatal("%s: executeMemoryCommitLimit (%d) must be <=" + " executeCommitLimit (%d)\n", + name_, memoryCommitLimit, commitLimit); + } + + if (params.executeInputBufferSize < 1) { + fatal("%s: executeInputBufferSize must be >= 1 (%d)\n", name_, + params.executeInputBufferSize); + } + + if (params.executeInputBufferSize < 1) { + fatal("%s: executeInputBufferSize must be >= 1 (%d)\n", name_, + params.executeInputBufferSize); + } + + /* This should be large enough to count all the in-FU instructions + * which need to be accounted for in the inFlightInsts + * queue */ + unsigned int total_slots = 0; + + /* Make FUPipelines for each MinorFU */ + for (unsigned int i = 0; i < numFuncUnits; i++) { + std::ostringstream fu_name; + MinorFU *fu_description = fuDescriptions.funcUnits[i]; + + /* Note the total number of instruction slots (for sizing + * the inFlightInst queue) and the maximum latency of any FU + * (for sizing the activity recorder) */ + total_slots += fu_description->opLat; + + fu_name << name_ << ".fu." << i; + + FUPipeline *fu = new FUPipeline(fu_name.str(), *fu_description, cpu); + + funcUnits.push_back(fu); + } + + /** Check that there is a functional unit for all operation classes */ + for (int op_class = No_OpClass + 1; op_class < Num_OpClass; op_class++) { + bool found_fu = false; + unsigned int fu_index = 0; + + while (fu_index < numFuncUnits && !found_fu) + { + if (funcUnits[fu_index]->provides( + static_cast<OpClass>(op_class))) + { + found_fu = true; + } + fu_index++; + } + + if (!found_fu) { + warn("No functional unit for OpClass %s\n", + Enums::OpClassStrings[op_class]); + } + } + + inFlightInsts = new Queue<QueuedInst, + ReportTraitsAdaptor<QueuedInst> >( + name_ + ".inFlightInsts", "insts", total_slots); + + inFUMemInsts = new Queue<QueuedInst, + ReportTraitsAdaptor<QueuedInst> >( + name_ + ".inFUMemInsts", "insts", total_slots); +} + +const ForwardInstData * +Execute::getInput() +{ + /* Get a line from the inputBuffer to work with */ + if (!inputBuffer.empty()) { + const ForwardInstData &head = inputBuffer.front(); + + return (head.isBubble() ? NULL : &(inputBuffer.front())); + } else { + return NULL; + } +} + +void +Execute::popInput() +{ + if (!inputBuffer.empty()) + inputBuffer.pop(); + + inputIndex = 0; +} + +void +Execute::tryToBranch(MinorDynInstPtr inst, Fault fault, BranchData &branch) +{ + ThreadContext *thread = cpu.getContext(inst->id.threadId); + const TheISA::PCState &pc_before = inst->pc; + TheISA::PCState target = thread->pcState(); + + /* Force a branch for SerializeAfter instructions at the end of micro-op + * sequence when we're not suspended */ + bool force_branch = thread->status() != ThreadContext::Suspended && + !inst->isFault() && + inst->isLastOpInInst() && + (inst->staticInst->isSerializeAfter() || + inst->staticInst->isIprAccess()); + + DPRINTF(Branch, "tryToBranch before: %s after: %s%s\n", + pc_before, target, (force_branch ? " (forcing)" : "")); + + /* Will we change the PC to something other than the next instruction? */ + bool must_branch = pc_before != target || + fault != NoFault || + force_branch; + + /* The reason for the branch data we're about to generate, set below */ + BranchData::Reason reason = BranchData::NoBranch; + + if (fault == NoFault) + { + TheISA::advancePC(target, inst->staticInst); + thread->pcState(target); + + DPRINTF(Branch, "Advancing current PC from: %s to: %s\n", + pc_before, target); + } + + if (inst->predictedTaken && !force_branch) { + /* Predicted to branch */ + if (!must_branch) { + /* No branch was taken, change stream to get us back to the + * intended PC value */ + DPRINTF(Branch, "Predicted a branch from 0x%x to 0x%x but" + " none happened inst: %s\n", + inst->pc.instAddr(), inst->predictedTarget.instAddr(), *inst); + + reason = BranchData::BadlyPredictedBranch; + } else if (inst->predictedTarget == target) { + /* Branch prediction got the right target, kill the branch and + * carry on. + * Note that this information to the branch predictor might get + * overwritten by a "real" branch during this cycle */ + DPRINTF(Branch, "Predicted a branch from 0x%x to 0x%x correctly" + " inst: %s\n", + inst->pc.instAddr(), inst->predictedTarget.instAddr(), *inst); + + reason = BranchData::CorrectlyPredictedBranch; + } else { + /* Branch prediction got the wrong target */ + DPRINTF(Branch, "Predicted a branch from 0x%x to 0x%x" + " but got the wrong target (actual: 0x%x) inst: %s\n", + inst->pc.instAddr(), inst->predictedTarget.instAddr(), + target.instAddr() *inst); + + reason = BranchData::BadlyPredictedBranchTarget; + } + } else if (must_branch) { + /* Unpredicted branch */ + DPRINTF(Branch, "Unpredicted branch from 0x%x to 0x%x inst: %s\n", + inst->pc.instAddr(), target.instAddr(), *inst); + + reason = BranchData::UnpredictedBranch; + } else { + /* No branch at all */ + reason = BranchData::NoBranch; + } + + updateBranchData(reason, inst, target, branch); +} + +void +Execute::updateBranchData( + BranchData::Reason reason, + MinorDynInstPtr inst, const TheISA::PCState &target, + BranchData &branch) +{ + if (reason != BranchData::NoBranch) { + /* Bump up the stream sequence number on a real branch*/ + if (BranchData::isStreamChange(reason)) + streamSeqNum++; + + /* Branches (even mis-predictions) don't change the predictionSeqNum, + * just the streamSeqNum */ + branch = BranchData(reason, streamSeqNum, + /* Maintaining predictionSeqNum if there's no inst is just a + * courtesy and looks better on minorview */ + (inst->isBubble() ? lastPredictionSeqNum + : inst->id.predictionSeqNum), + target, inst); + + DPRINTF(Branch, "Branch data signalled: %s\n", branch); + } +} + +void +Execute::handleMemResponse(MinorDynInstPtr inst, + LSQ::LSQRequestPtr response, BranchData &branch, Fault &fault) +{ + ThreadID thread_id = inst->id.threadId; + ThreadContext *thread = cpu.getContext(thread_id); + + ExecContext context(cpu, *cpu.threads[thread_id], *this, inst); + + PacketPtr packet = response->packet; + + bool is_load = inst->staticInst->isLoad(); + bool is_store = inst->staticInst->isStore(); + bool is_prefetch = inst->staticInst->isDataPrefetch(); + + /* If true, the trace's predicate value will be taken from the exec + * context predicate, otherwise, it will be set to false */ + bool use_context_predicate = true; + + if (response->fault != NoFault) { + /* Invoke memory faults. */ + DPRINTF(MinorMem, "Completing fault from DTLB access: %s\n", + response->fault->name()); + + if (inst->staticInst->isPrefetch()) { + DPRINTF(MinorMem, "Not taking fault on prefetch: %s\n", + response->fault->name()); + + /* Don't assign to fault */ + } else { + /* Take the fault raised during the TLB/memory access */ + fault = response->fault; + + fault->invoke(thread, inst->staticInst); + } + } else if (!packet) { + DPRINTF(MinorMem, "Completing failed request inst: %s\n", + *inst); + use_context_predicate = false; + } else if (packet->isError()) { + DPRINTF(MinorMem, "Trying to commit error response: %s\n", + *inst); + + fatal("Received error response packet for inst: %s\n", *inst); + } else if (is_store || is_load || is_prefetch) { + assert(packet); + + DPRINTF(MinorMem, "Memory response inst: %s addr: 0x%x size: %d\n", + *inst, packet->getAddr(), packet->getSize()); + + if (is_load && packet->getSize() > 0) { + DPRINTF(MinorMem, "Memory data[0]: 0x%x\n", + static_cast<unsigned int>(packet->getPtr<uint8_t>()[0])); + } + + /* Complete the memory access instruction */ + fault = inst->staticInst->completeAcc(packet, &context, + inst->traceData); + + if (fault != NoFault) { + /* Invoke fault created by instruction completion */ + DPRINTF(MinorMem, "Fault in memory completeAcc: %s\n", + fault->name()); + fault->invoke(thread, inst->staticInst); + } else { + /* Stores need to be pushed into the store buffer to finish + * them off */ + if (response->needsToBeSentToStoreBuffer()) + lsq.sendStoreToStoreBuffer(response); + } + } else { + fatal("There should only ever be reads, " + "writes or faults at this point\n"); + } + + lsq.popResponse(response); + + if (inst->traceData) { + inst->traceData->setPredicate((use_context_predicate ? + context.readPredicate() : false)); + } + + doInstCommitAccounting(inst); + + /* Generate output to account for branches */ + tryToBranch(inst, fault, branch); +} + +bool +Execute::isInterrupted(ThreadID thread_id) const +{ + return cpu.checkInterrupts(cpu.getContext(thread_id)); +} + +bool +Execute::takeInterrupt(ThreadID thread_id, BranchData &branch) +{ + DPRINTF(MinorInterrupt, "Considering interrupt status from PC: %s\n", + cpu.getContext(thread_id)->pcState()); + + Fault interrupt = cpu.getInterruptController()->getInterrupt + (cpu.getContext(thread_id)); + + if (interrupt != NoFault) { + /* The interrupt *must* set pcState */ + cpu.getInterruptController()->updateIntrInfo + (cpu.getContext(thread_id)); + interrupt->invoke(cpu.getContext(thread_id)); + + assert(!lsq.accessesInFlight()); + + DPRINTF(MinorInterrupt, "Invoking interrupt: %s to PC: %s\n", + interrupt->name(), cpu.getContext(thread_id)->pcState()); + + /* Assume that an interrupt *must* cause a branch. Assert this? */ + + updateBranchData(BranchData::Interrupt, MinorDynInst::bubble(), + cpu.getContext(thread_id)->pcState(), branch); + } + + return interrupt != NoFault; +} + +bool +Execute::executeMemRefInst(MinorDynInstPtr inst, BranchData &branch, + bool &passed_predicate, Fault &fault) +{ + bool issued = false; + + /* Set to true if the mem op. is issued and sent to the mem system */ + passed_predicate = false; + + if (!lsq.canRequest()) { + /* Not acting on instruction yet as the memory + * queues are full */ + issued = false; + } else { + ThreadContext *thread = cpu.getContext(inst->id.threadId); + TheISA::PCState old_pc = thread->pcState(); + + ExecContext context(cpu, *cpu.threads[inst->id.threadId], + *this, inst); + + DPRINTF(MinorExecute, "Initiating memRef inst: %s\n", *inst); + + Fault init_fault = inst->staticInst->initiateAcc(&context, + inst->traceData); + + if (init_fault != NoFault) { + DPRINTF(MinorExecute, "Fault on memory inst: %s" + " initiateAcc: %s\n", *inst, init_fault->name()); + fault = init_fault; + } else { + /* Only set this if the instruction passed its + * predicate */ + passed_predicate = context.readPredicate(); + + /* Set predicate in tracing */ + if (inst->traceData) + inst->traceData->setPredicate(passed_predicate); + + /* If the instruction didn't pass its predicate (and so will not + * progress from here) Try to branch to correct and branch + * mis-prediction. */ + if (!passed_predicate) { + /* Leave it up to commit to handle the fault */ + lsq.pushFailedRequest(inst); + } + } + + /* Restore thread PC */ + thread->pcState(old_pc); + issued = true; + } + + return issued; +} + +/** Increment a cyclic buffer index for indices [0, cycle_size-1] */ +inline unsigned int +cyclicIndexInc(unsigned int index, unsigned int cycle_size) +{ + unsigned int ret = index + 1; + + if (ret == cycle_size) + ret = 0; + + return ret; +} + +/** Decrement a cyclic buffer index for indices [0, cycle_size-1] */ +inline unsigned int +cyclicIndexDec(unsigned int index, unsigned int cycle_size) +{ + int ret = index - 1; + + if (ret < 0) + ret = cycle_size - 1; + + return ret; +} + +unsigned int +Execute::issue(bool only_issue_microops) +{ + const ForwardInstData *insts_in = getInput(); + + /* Early termination if we have no instructions */ + if (!insts_in) + return 0; + + /* Start from the first FU */ + unsigned int fu_index = 0; + + /* Remains true while instructions are still being issued. If any + * instruction fails to issue, this is set to false and we exit issue. + * This strictly enforces in-order issue. For other issue behaviours, + * a more complicated test in the outer while loop below is needed. */ + bool issued = true; + + /* Number of insts issues this cycle to check for issueLimit */ + unsigned num_insts_issued = 0; + + /* Number of memory ops issues this cycle to check for memoryIssueLimit */ + unsigned num_mem_insts_issued = 0; + + /* Number of instructions discarded this cycle in order to enforce a + * discardLimit. @todo, add that parameter? */ + unsigned num_insts_discarded = 0; + + do { + MinorDynInstPtr inst = insts_in->insts[inputIndex]; + ThreadID thread_id = inst->id.threadId; + Fault fault = inst->fault; + bool discarded = false; + bool issued_mem_ref = false; + + if (inst->isBubble()) { + /* Skip */ + issued = true; + } else if (cpu.getContext(thread_id)->status() == + ThreadContext::Suspended) + { + DPRINTF(MinorExecute, "Not issuing inst: %s from suspended" + " thread\n", *inst); + + issued = false; + } else if (inst->id.streamSeqNum != streamSeqNum) { + DPRINTF(MinorExecute, "Discarding inst: %s as its stream" + " state was unexpected, expected: %d\n", + *inst, streamSeqNum); + issued = true; + discarded = true; + } else if (fault == NoFault && only_issue_microops && + /* Is this anything other than a non-first microop */ + (!inst->staticInst->isMicroop() || + !inst->staticInst->isFirstMicroop())) + { + DPRINTF(MinorExecute, "Not issuing new non-microop inst: %s\n", + *inst); + + issued = false; + } else { + /* Try and issue an instruction into an FU, assume we didn't and + * fix that in the loop */ + issued = false; + + /* Try FU from 0 each instruction */ + fu_index = 0; + + /* Try and issue a single instruction stepping through the + * available FUs */ + do { + FUPipeline *fu = funcUnits[fu_index]; + + DPRINTF(MinorExecute, "Trying to issue inst: %s to FU: %d\n", + *inst, fu_index); + + /* Does the examined fu have the OpClass-related capability + * needed to execute this instruction? Faults can always + * issue to any FU but probably should just 'live' in the + * inFlightInsts queue rather than having an FU. */ + bool fu_is_capable = (!inst->isFault() ? + fu->provides(inst->staticInst->opClass()) : true); + + if (inst->isNoCostInst()) { + /* Issue free insts. to a fake numbered FU */ + fu_index = noCostFUIndex; + + /* And start the countdown on activity to allow + * this instruction to get to the end of its FU */ + cpu.activityRecorder->activity(); + + /* Mark the destinations for this instruction as + * busy */ + scoreboard.markupInstDests(inst, cpu.curCycle() + + Cycles(0), cpu.getContext(thread_id), false); + + inst->fuIndex = noCostFUIndex; + inst->extraCommitDelay = Cycles(0); + inst->extraCommitDelayExpr = NULL; + + /* Push the instruction onto the inFlight queue so + * it can be committed in order */ + QueuedInst fu_inst(inst); + inFlightInsts->push(fu_inst); + + issued = true; + + } else if (!fu_is_capable || fu->alreadyPushed()) { + /* Skip */ + if (!fu_is_capable) { + DPRINTF(MinorExecute, "Can't issue as FU: %d isn't" + " capable\n", fu_index); + } else { + DPRINTF(MinorExecute, "Can't issue as FU: %d is" + " already busy\n", fu_index); + } + } else if (fu->stalled) { + DPRINTF(MinorExecute, "Can't issue inst: %s into FU: %d," + " it's stalled\n", + *inst, fu_index); + } else if (!fu->canInsert()) { + DPRINTF(MinorExecute, "Can't issue inst: %s to busy FU" + " for another: %d cycles\n", + *inst, fu->cyclesBeforeInsert()); + } else { + MinorFUTiming *timing = (!inst->isFault() ? + fu->findTiming(inst->staticInst) : NULL); + + const std::vector<Cycles> *src_latencies = + (timing ? &(timing->srcRegsRelativeLats) + : NULL); + + const std::vector<bool> *cant_forward_from_fu_indices = + &(fu->cantForwardFromFUIndices); + + if (timing && timing->suppress) { + DPRINTF(MinorExecute, "Can't issue inst: %s as extra" + " decoding is suppressing it\n", + *inst); + } else if (!scoreboard.canInstIssue(inst, src_latencies, + cant_forward_from_fu_indices, + cpu.curCycle(), cpu.getContext(thread_id))) + { + DPRINTF(MinorExecute, "Can't issue inst: %s yet\n", + *inst); + } else { + /* Can insert the instruction into this FU */ + DPRINTF(MinorExecute, "Issuing inst: %s" + " into FU %d\n", *inst, + fu_index); + + Cycles extra_dest_retire_lat = Cycles(0); + TimingExpr *extra_dest_retire_lat_expr = NULL; + Cycles extra_assumed_lat = Cycles(0); + + /* Add the extraCommitDelay and extraAssumeLat to + * the FU pipeline timings */ + if (timing) { + extra_dest_retire_lat = + timing->extraCommitLat; + extra_dest_retire_lat_expr = + timing->extraCommitLatExpr; + extra_assumed_lat = + timing->extraAssumedLat; + } + + bool issued_mem_ref = inst->isMemRef(); + + QueuedInst fu_inst(inst); + + /* Decorate the inst with FU details */ + inst->fuIndex = fu_index; + inst->extraCommitDelay = extra_dest_retire_lat; + inst->extraCommitDelayExpr = + extra_dest_retire_lat_expr; + + if (issued_mem_ref) { + /* Remember which instruction this memory op + * depends on so that initiateAcc can be called + * early */ + if (allowEarlyMemIssue) { + inst->instToWaitFor = + scoreboard.execSeqNumToWaitFor(inst, + cpu.getContext(thread_id)); + + if (lsq.getLastMemBarrier() > + inst->instToWaitFor) + { + DPRINTF(MinorExecute, "A barrier will" + " cause a delay in mem ref issue of" + " inst: %s until after inst" + " %d(exec)\n", *inst, + lsq.getLastMemBarrier()); + + inst->instToWaitFor = + lsq.getLastMemBarrier(); + } else { + DPRINTF(MinorExecute, "Memory ref inst:" + " %s must wait for inst %d(exec)" + " before issuing\n", + *inst, inst->instToWaitFor); + } + + inst->canEarlyIssue = true; + } + /* Also queue this instruction in the memory ref + * queue to ensure in-order issue to the LSQ */ + DPRINTF(MinorExecute, "Pushing mem inst: %s\n", + *inst); + inFUMemInsts->push(fu_inst); + } + + /* Issue to FU */ + fu->push(fu_inst); + /* And start the countdown on activity to allow + * this instruction to get to the end of its FU */ + cpu.activityRecorder->activity(); + + /* Mark the destinations for this instruction as + * busy */ + scoreboard.markupInstDests(inst, cpu.curCycle() + + fu->description.opLat + + extra_dest_retire_lat + + extra_assumed_lat, + cpu.getContext(thread_id), + issued_mem_ref && extra_assumed_lat == Cycles(0)); + + /* Push the instruction onto the inFlight queue so + * it can be committed in order */ + inFlightInsts->push(fu_inst); + + issued = true; + } + } + + fu_index++; + } while (fu_index != numFuncUnits && !issued); + + if (!issued) + DPRINTF(MinorExecute, "Didn't issue inst: %s\n", *inst); + } + + if (issued) { + /* Generate MinorTrace's MinorInst lines. Do this at commit + * to allow better instruction annotation? */ + if (DTRACE(MinorTrace) && !inst->isBubble()) + inst->minorTraceInst(*this); + + /* Mark up barriers in the LSQ */ + if (!discarded && inst->isInst() && + inst->staticInst->isMemBarrier()) + { + DPRINTF(MinorMem, "Issuing memory barrier inst: %s\n", *inst); + lsq.issuedMemBarrierInst(inst); + } + + if (inst->traceData && setTraceTimeOnIssue) { + inst->traceData->setWhen(curTick()); + } + + if (issued_mem_ref) + num_mem_insts_issued++; + + if (discarded) { + num_insts_discarded++; + } else { + num_insts_issued++; + + if (num_insts_issued == issueLimit) + DPRINTF(MinorExecute, "Reached inst issue limit\n"); + } + + inputIndex++; + DPRINTF(MinorExecute, "Stepping to next inst inputIndex: %d\n", + inputIndex); + } + + /* Got to the end of a line */ + if (inputIndex == insts_in->width()) { + popInput(); + /* Set insts_in to null to force us to leave the surrounding + * loop */ + insts_in = NULL; + + if (processMoreThanOneInput) { + DPRINTF(MinorExecute, "Wrapping\n"); + insts_in = getInput(); + } + } + } while (insts_in && inputIndex < insts_in->width() && + /* We still have instructions */ + fu_index != numFuncUnits && /* Not visited all FUs */ + issued && /* We've not yet failed to issue an instruction */ + num_insts_issued != issueLimit && /* Still allowed to issue */ + num_mem_insts_issued != memoryIssueLimit); + + return num_insts_issued; +} + +bool +Execute::tryPCEvents() +{ + ThreadContext *thread = cpu.getContext(0); + unsigned int num_pc_event_checks = 0; + + /* Handle PC events on instructions */ + Addr oldPC; + do { + oldPC = thread->instAddr(); + cpu.system->pcEventQueue.service(thread); + num_pc_event_checks++; + } while (oldPC != thread->instAddr()); + + if (num_pc_event_checks > 1) { + DPRINTF(PCEvent, "Acting on PC Event to PC: %s\n", + thread->pcState()); + } + + return num_pc_event_checks > 1; +} + +void +Execute::doInstCommitAccounting(MinorDynInstPtr inst) +{ + assert(!inst->isFault()); + + MinorThread *thread = cpu.threads[inst->id.threadId]; + + /* Increment the many and various inst and op counts in the + * thread and system */ + if (!inst->staticInst->isMicroop() || inst->staticInst->isLastMicroop()) + { + thread->numInst++; + thread->numInsts++; + cpu.stats.numInsts++; + } + thread->numOp++; + thread->numOps++; + cpu.stats.numOps++; + cpu.system->totalNumInsts++; + + /* Act on events related to instruction counts */ + cpu.comInstEventQueue[inst->id.threadId]->serviceEvents(thread->numInst); + cpu.system->instEventQueue.serviceEvents(cpu.system->totalNumInsts); + + /* Set the CP SeqNum to the numOps commit number */ + if (inst->traceData) + inst->traceData->setCPSeq(thread->numOp); +} + +bool +Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue, + BranchData &branch, Fault &fault, bool &committed, + bool &completed_mem_issue) +{ + ThreadID thread_id = inst->id.threadId; + ThreadContext *thread = cpu.getContext(thread_id); + + bool completed_inst = true; + fault = NoFault; + + /* Is the thread for this instruction suspended? In that case, just + * stall as long as there are no pending interrupts */ + if (thread->status() == ThreadContext::Suspended && + !isInterrupted(thread_id)) + { + DPRINTF(MinorExecute, "Not committing inst from suspended thread" + " inst: %s\n", *inst); + completed_inst = false; + } else if (inst->isFault()) { + ExecContext context(cpu, *cpu.threads[thread_id], *this, inst); + + DPRINTF(MinorExecute, "Fault inst reached Execute: %s\n", + inst->fault->name()); + + fault = inst->fault; + inst->fault->invoke(thread, NULL); + + tryToBranch(inst, fault, branch); + } else if (inst->staticInst->isMemRef()) { + /* Memory accesses are executed in two parts: + * executeMemRefInst -- calculates the EA and issues the access + * to memory. This is done here. + * handleMemResponse -- handles the response packet, done by + * Execute::commit + * + * While the memory access is in its FU, the EA is being + * calculated. At the end of the FU, when it is ready to + * 'commit' (in this function), the access is presented to the + * memory queues. When a response comes back from memory, + * Execute::commit will commit it. + */ + bool predicate_passed = false; + bool completed_mem_inst = executeMemRefInst(inst, branch, + predicate_passed, fault); + + if (completed_mem_inst && fault != NoFault) { + if (early_memory_issue) { + DPRINTF(MinorExecute, "Fault in early executing inst: %s\n", + fault->name()); + /* Don't execute the fault, just stall the instruction + * until it gets to the head of inFlightInsts */ + inst->canEarlyIssue = false; + /* Not completed as we'll come here again to pick up + * the fault when we get to the end of the FU */ + completed_inst = false; + } else { + DPRINTF(MinorExecute, "Fault in execute: %s\n", + fault->name()); + fault->invoke(thread, NULL); + + tryToBranch(inst, fault, branch); + completed_inst = true; + } + } else { + completed_inst = completed_mem_inst; + } + completed_mem_issue = completed_inst; + } else if (inst->isInst() && inst->staticInst->isMemBarrier() && + !lsq.canPushIntoStoreBuffer()) + { + DPRINTF(MinorExecute, "Can't commit data barrier inst: %s yet as" + " there isn't space in the store buffer\n", *inst); + + completed_inst = false; + } else { + ExecContext context(cpu, *cpu.threads[thread_id], *this, inst); + + DPRINTF(MinorExecute, "Committing inst: %s\n", *inst); + + fault = inst->staticInst->execute(&context, + inst->traceData); + + /* Set the predicate for tracing and dump */ + if (inst->traceData) + inst->traceData->setPredicate(context.readPredicate()); + + committed = true; + + if (fault != NoFault) { + DPRINTF(MinorExecute, "Fault in execute of inst: %s fault: %s\n", + *inst, fault->name()); + fault->invoke(thread, inst->staticInst); + } + + doInstCommitAccounting(inst); + tryToBranch(inst, fault, branch); + } + + if (completed_inst) { + /* Keep a copy of this instruction's predictionSeqNum just in case + * we need to issue a branch without an instruction (such as an + * interrupt) */ + lastPredictionSeqNum = inst->id.predictionSeqNum; + + /* Check to see if this instruction suspended the current thread. */ + if (!inst->isFault() && + thread->status() == ThreadContext::Suspended && + branch.isBubble() && /* It didn't branch too */ + !isInterrupted(thread_id)) /* Don't suspend if we have + interrupts */ + { + TheISA::PCState resume_pc = cpu.getContext(0)->pcState(); + + assert(resume_pc.microPC() == 0); + + DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute" + " inst: %s\n", inst->id.threadId, *inst); + + cpu.stats.numFetchSuspends++; + + updateBranchData(BranchData::SuspendThread, inst, resume_pc, + branch); + } + } + + return completed_inst; +} + +void +Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) +{ + Fault fault = NoFault; + Cycles now = cpu.curCycle(); + + /** + * Try and execute as many instructions from the end of FU pipelines as + * possible. This *doesn't* include actually advancing the pipelines. + * + * We do this by looping on the front of the inFlightInsts queue for as + * long as we can find the desired instruction at the end of the + * functional unit it was issued to without seeing a branch or a fault. + * In this function, these terms are used: + * complete -- The instruction has finished its passage through + * its functional unit and its fate has been decided + * (committed, discarded, issued to the memory system) + * commit -- The instruction is complete(d), not discarded and has + * its effects applied to the CPU state + * discard(ed) -- The instruction is complete but not committed + * as its streamSeqNum disagrees with the current + * Execute::streamSeqNum + * + * Commits are also possible from two other places: + * + * 1) Responses returning from the LSQ + * 2) Mem ops issued to the LSQ ('committed' from the FUs) earlier + * than their position in the inFlightInsts queue, but after all + * their dependencies are resolved. + */ + + /* Has an instruction been completed? Once this becomes false, we stop + * trying to complete instructions. */ + bool completed_inst = true; + + /* Number of insts committed this cycle to check against commitLimit */ + unsigned int num_insts_committed = 0; + + /* Number of memory access instructions committed to check against + * memCommitLimit */ + unsigned int num_mem_refs_committed = 0; + + if (only_commit_microops && !inFlightInsts->empty()) { + DPRINTF(MinorInterrupt, "Only commit microops %s %d\n", + *(inFlightInsts->front().inst), + lastCommitWasEndOfMacroop); + } + + while (!inFlightInsts->empty() && /* Some more instructions to process */ + !branch.isStreamChange() && /* No real branch */ + fault == NoFault && /* No faults */ + completed_inst && /* Still finding instructions to execute */ + num_insts_committed != commitLimit /* Not reached commit limit */ + ) + { + if (only_commit_microops) { + DPRINTF(MinorInterrupt, "Committing tail of insts before" + " interrupt: %s\n", + *(inFlightInsts->front().inst)); + } + + QueuedInst *head_inflight_inst = &(inFlightInsts->front()); + + InstSeqNum head_exec_seq_num = + head_inflight_inst->inst->id.execSeqNum; + + /* The instruction we actually process if completed_inst + * remains true to the end of the loop body. + * Start by considering the the head of the in flight insts queue */ + MinorDynInstPtr inst = head_inflight_inst->inst; + + bool committed_inst = false; + bool discard_inst = false; + bool completed_mem_ref = false; + bool issued_mem_ref = false; + bool early_memory_issue = false; + + /* Must set this again to go around the loop */ + completed_inst = false; + + /* If we're just completing a macroop before an interrupt or drain, + * can we stil commit another microop (rather than a memory response) + * without crosing into the next full instruction? */ + bool can_commit_insts = !inFlightInsts->empty() && + !(only_commit_microops && lastCommitWasEndOfMacroop); + + /* Can we find a mem response for this inst */ + LSQ::LSQRequestPtr mem_response = + (inst->inLSQ ? lsq.findResponse(inst) : NULL); + + DPRINTF(MinorExecute, "Trying to commit canCommitInsts: %d\n", + can_commit_insts); + + /* Test for PC events after every instruction */ + if (isInbetweenInsts() && tryPCEvents()) { + ThreadContext *thread = cpu.getContext(0); + + /* Branch as there was a change in PC */ + updateBranchData(BranchData::UnpredictedBranch, + MinorDynInst::bubble(), thread->pcState(), branch); + } else if (mem_response && + num_mem_refs_committed < memoryCommitLimit) + { + /* Try to commit from the memory responses next */ + discard_inst = inst->id.streamSeqNum != streamSeqNum || + discard; + + DPRINTF(MinorExecute, "Trying to commit mem response: %s\n", + *inst); + + /* Complete or discard the response */ + if (discard_inst) { + DPRINTF(MinorExecute, "Discarding mem inst: %s as its" + " stream state was unexpected, expected: %d\n", + *inst, streamSeqNum); + + lsq.popResponse(mem_response); + } else { + handleMemResponse(inst, mem_response, branch, fault); + committed_inst = true; + } + + completed_mem_ref = true; + completed_inst = true; + } else if (can_commit_insts) { + /* If true, this instruction will, subject to timing tweaks, + * be considered for completion. try_to_commit flattens + * the `if' tree a bit and allows other tests for inst + * commit to be inserted here. */ + bool try_to_commit = false; + + /* Try and issue memory ops early if they: + * - Can push a request into the LSQ + * - Have reached the end of their FUs + * - Have had all their dependencies satisfied + * - Are from the right stream + * + * For any other case, leave it to the normal instruction + * issue below to handle them. + */ + if (!inFUMemInsts->empty() && lsq.canRequest()) { + DPRINTF(MinorExecute, "Trying to commit from mem FUs\n"); + + const MinorDynInstPtr head_mem_ref_inst = + inFUMemInsts->front().inst; + FUPipeline *fu = funcUnits[head_mem_ref_inst->fuIndex]; + const MinorDynInstPtr &fu_inst = fu->front().inst; + + /* Use this, possibly out of order, inst as the one + * to 'commit'/send to the LSQ */ + if (!fu_inst->isBubble() && + !fu_inst->inLSQ && + fu_inst->canEarlyIssue && + streamSeqNum == fu_inst->id.streamSeqNum && + head_exec_seq_num > fu_inst->instToWaitFor) + { + DPRINTF(MinorExecute, "Issuing mem ref early" + " inst: %s instToWaitFor: %d\n", + *(fu_inst), fu_inst->instToWaitFor); + + inst = fu_inst; + try_to_commit = true; + early_memory_issue = true; + completed_inst = true; + } + } + + /* Try and commit FU-less insts */ + if (!completed_inst && inst->isNoCostInst()) { + DPRINTF(MinorExecute, "Committing no cost inst: %s", *inst); + + try_to_commit = true; + completed_inst = true; + } + + /* Try to issue from the ends of FUs and the inFlightInsts + * queue */ + if (!completed_inst && !inst->inLSQ) { + DPRINTF(MinorExecute, "Trying to commit from FUs\n"); + + /* Try to commit from a functional unit */ + /* Is the head inst of the expected inst's FU actually the + * expected inst? */ + QueuedInst &fu_inst = + funcUnits[inst->fuIndex]->front(); + InstSeqNum fu_inst_seq_num = fu_inst.inst->id.execSeqNum; + + if (fu_inst.inst->isBubble()) { + /* No instruction ready */ + completed_inst = false; + } else if (fu_inst_seq_num != head_exec_seq_num) { + /* Past instruction: we must have already executed it + * in the same cycle and so the head inst isn't + * actually at the end of its pipeline + * Future instruction: handled above and only for + * mem refs on their way to the LSQ */ + } else /* if (fu_inst_seq_num == head_exec_seq_num) */ { + /* All instructions can be committed if they have the + * right execSeqNum and there are no in-flight + * mem insts before us */ + try_to_commit = true; + completed_inst = true; + } + } + + if (try_to_commit) { + discard_inst = inst->id.streamSeqNum != streamSeqNum || + discard; + + /* Is this instruction discardable as its streamSeqNum + * doesn't match? */ + if (!discard_inst) { + /* Try to commit or discard a non-memory instruction. + * Memory ops are actually 'committed' from this FUs + * and 'issued' into the memory system so we need to + * account for them later (commit_was_mem_issue gets + * set) */ + if (inst->extraCommitDelayExpr) { + DPRINTF(MinorExecute, "Evaluating expression for" + " extra commit delay inst: %s\n", *inst); + + ThreadContext *thread = + cpu.getContext(inst->id.threadId); + + TimingExprEvalContext context(inst->staticInst, + thread, NULL); + + uint64_t extra_delay = inst->extraCommitDelayExpr-> + eval(context); + + DPRINTF(MinorExecute, "Extra commit delay expr" + " result: %d\n", extra_delay); + + if (extra_delay < 128) { + inst->extraCommitDelay += Cycles(extra_delay); + } else { + DPRINTF(MinorExecute, "Extra commit delay was" + " very long: %d\n", extra_delay); + } + inst->extraCommitDelayExpr = NULL; + } + + /* Move the extraCommitDelay from the instruction + * into the minimumCommitCycle */ + if (inst->extraCommitDelay != Cycles(0)) { + inst->minimumCommitCycle = cpu.curCycle() + + inst->extraCommitDelay; + inst->extraCommitDelay = Cycles(0); + } + + /* @todo Think about making lastMemBarrier be + * MAX_UINT_64 to avoid using 0 as a marker value */ + if (!inst->isFault() && inst->isMemRef() && + lsq.getLastMemBarrier() < + inst->id.execSeqNum && + lsq.getLastMemBarrier() != 0) + { + DPRINTF(MinorExecute, "Not committing inst: %s yet" + " as there are incomplete barriers in flight\n", + *inst); + completed_inst = false; + } else if (inst->minimumCommitCycle > now) { + DPRINTF(MinorExecute, "Not committing inst: %s yet" + " as it wants to be stalled for %d more cycles\n", + *inst, inst->minimumCommitCycle - now); + completed_inst = false; + } else { + completed_inst = commitInst(inst, + early_memory_issue, branch, fault, + committed_inst, issued_mem_ref); + } + } else { + /* Discard instruction */ + completed_inst = true; + } + + if (completed_inst) { + /* Allow the pipeline to advance. If the FU head + * instruction wasn't the inFlightInsts head + * but had already been committed, it would have + * unstalled the pipeline before here */ + if (inst->fuIndex != noCostFUIndex) + funcUnits[inst->fuIndex]->stalled = false; + } + } + } else { + DPRINTF(MinorExecute, "No instructions to commit\n"); + completed_inst = false; + } + + /* All discardable instructions must also be 'completed' by now */ + assert(!(discard_inst && !completed_inst)); + + /* Instruction committed but was discarded due to streamSeqNum + * mismatch */ + if (discard_inst) { + DPRINTF(MinorExecute, "Discarding inst: %s as its stream" + " state was unexpected, expected: %d\n", + *inst, streamSeqNum); + + if (fault == NoFault) + cpu.stats.numDiscardedOps++; + } + + /* Mark the mem inst as being in the LSQ */ + if (issued_mem_ref) { + inst->fuIndex = 0; + inst->inLSQ = true; + } + + /* Pop issued (to LSQ) and discarded mem refs from the inFUMemInsts + * as they've *definitely* exited the FUs */ + if (completed_inst && inst->isMemRef()) { + /* The MemRef could have been discarded from the FU or the memory + * queue, so just check an FU instruction */ + if (!inFUMemInsts->empty() && + inFUMemInsts->front().inst == inst) + { + inFUMemInsts->pop(); + } + } + + if (completed_inst && !(issued_mem_ref && fault == NoFault)) { + /* Note that this includes discarded insts */ + DPRINTF(MinorExecute, "Completed inst: %s\n", *inst); + + /* Got to the end of a full instruction? */ + lastCommitWasEndOfMacroop = inst->isFault() || + inst->isLastOpInInst(); + + /* lastPredictionSeqNum is kept as a convenience to prevent its + * value from changing too much on the minorview display */ + lastPredictionSeqNum = inst->id.predictionSeqNum; + + /* Finished with the inst, remove it from the inst queue and + * clear its dependencies */ + inFlightInsts->pop(); + + /* Complete barriers in the LSQ/move to store buffer */ + if (inst->isInst() && inst->staticInst->isMemBarrier()) { + DPRINTF(MinorMem, "Completing memory barrier" + " inst: %s committed: %d\n", *inst, committed_inst); + lsq.completeMemBarrierInst(inst, committed_inst); + } + + scoreboard.clearInstDests(inst, inst->isMemRef()); + } + + /* Handle per-cycle instruction counting */ + if (committed_inst) { + bool is_no_cost_inst = inst->isNoCostInst(); + + /* Don't show no cost instructions as having taken a commit + * slot */ + if (DTRACE(MinorTrace) && !is_no_cost_inst) + instsBeingCommitted.insts[num_insts_committed] = inst; + + if (!is_no_cost_inst) + num_insts_committed++; + + if (num_insts_committed == commitLimit) + DPRINTF(MinorExecute, "Reached inst commit limit\n"); + + /* Re-set the time of the instruction if that's required for + * tracing */ + if (inst->traceData) { + if (setTraceTimeOnCommit) + inst->traceData->setWhen(curTick()); + inst->traceData->dump(); + } + + if (completed_mem_ref) + num_mem_refs_committed++; + + if (num_mem_refs_committed == memoryCommitLimit) + DPRINTF(MinorExecute, "Reached mem ref commit limit\n"); + } + } +} + +bool +Execute::isInbetweenInsts() const +{ + return lastCommitWasEndOfMacroop && + !lsq.accessesInFlight(); +} + +void +Execute::evaluate() +{ + inputBuffer.setTail(*inp.outputWire); + BranchData &branch = *out.inputWire; + + const ForwardInstData *insts_in = getInput(); + + /* Do all the cycle-wise activities for dcachePort here to potentially + * free up input spaces in the LSQ's requests queue */ + lsq.step(); + + /* Has an interrupt been signalled? This may not be acted on + * straighaway so this is different from took_interrupt below */ + bool interrupted = false; + /* If there was an interrupt signalled, was it acted on now? */ + bool took_interrupt = false; + + if (cpu.getInterruptController()) { + /* This is here because it seems that after drainResume the + * interrupt controller isn't always set */ + interrupted = drainState == NotDraining && isInterrupted(0); + } else { + DPRINTF(MinorInterrupt, "No interrupt controller\n"); + } + + unsigned int num_issued = 0; + + if (DTRACE(MinorTrace)) { + /* Empty the instsBeingCommitted for MinorTrace */ + instsBeingCommitted.bubbleFill(); + } + + /* THREAD threadId on isInterrupted */ + /* Act on interrupts */ + if (interrupted && isInbetweenInsts()) { + took_interrupt = takeInterrupt(0, branch); + /* Clear interrupted if no interrupt was actually waiting */ + interrupted = took_interrupt; + } + + if (took_interrupt) { + /* Do no commit/issue this cycle */ + } else if (!branch.isBubble()) { + /* It's important that this is here to carry Fetch1 wakeups to Fetch1 + * without overwriting them */ + DPRINTF(MinorInterrupt, "Execute skipping a cycle to allow old" + " branch to complete\n"); + } else { + if (interrupted) { + if (inFlightInsts->empty()) { + DPRINTF(MinorInterrupt, "Waiting but no insts\n"); + } else { + DPRINTF(MinorInterrupt, "Waiting for end of inst before" + " signalling interrupt\n"); + } + } + + /* commit can set stalled flags observable to issue and so *must* be + * called first */ + if (drainState != NotDraining) { + if (drainState == DrainCurrentInst) { + /* Commit only micro-ops, don't kill anything else */ + commit(true, false, branch); + + if (isInbetweenInsts()) + setDrainState(DrainHaltFetch); + + /* Discard any generated branch */ + branch = BranchData::bubble(); + } else if (drainState == DrainAllInsts) { + /* Kill all instructions */ + while (getInput()) + popInput(); + commit(false, true, branch); + } + } else { + /* Commit micro-ops only if interrupted. Otherwise, commit + * anything you like */ + commit(interrupted, false, branch); + } + + /* This will issue merrily even when interrupted in the sure and + * certain knowledge that the interrupt with change the stream */ + if (insts_in) + num_issued = issue(false); + } + + /* Halt fetch, but don't do it until we have the current instruction in + * the bag */ + if (drainState == DrainHaltFetch) { + updateBranchData(BranchData::HaltFetch, MinorDynInst::bubble(), + TheISA::PCState(0), branch); + + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + setDrainState(DrainAllInsts); + } + + MinorDynInstPtr next_issuable_inst = NULL; + bool can_issue_next = false; + + /* Find the next issuable instruction and see if it can be issued */ + if (getInput()) { + MinorDynInstPtr inst = getInput()->insts[inputIndex]; + + if (inst->isFault()) { + can_issue_next = true; + } else if (!inst->isBubble()) { + if (cpu.getContext(inst->id.threadId)->status() != + ThreadContext::Suspended) + { + next_issuable_inst = inst; + } + } + } + + bool becoming_stalled = true; + + /* Advance the pipelines and note whether they still need to be + * advanced */ + for (unsigned int i = 0; i < numFuncUnits; i++) { + FUPipeline *fu = funcUnits[i]; + + fu->advance(); + + /* If we need to go again, the pipeline will have been left or set + * to be unstalled */ + if (fu->occupancy != 0 && !fu->stalled) + becoming_stalled = false; + + /* Could we possibly issue the next instruction? This is quite + * an expensive test */ + if (next_issuable_inst && !fu->stalled && + scoreboard.canInstIssue(next_issuable_inst, + NULL, NULL, cpu.curCycle() + Cycles(1), + cpu.getContext(next_issuable_inst->id.threadId)) && + fu->provides(next_issuable_inst->staticInst->opClass())) + { + can_issue_next = true; + } + } + + bool head_inst_might_commit = false; + + /* Could the head in flight insts be committed */ + if (!inFlightInsts->empty()) { + const QueuedInst &head_inst = inFlightInsts->front(); + + if (head_inst.inst->isNoCostInst()) { + head_inst_might_commit = true; + } else { + FUPipeline *fu = funcUnits[head_inst.inst->fuIndex]; + + /* Head inst is commitable */ + if ((fu->stalled && + fu->front().inst->id == head_inst.inst->id) || + lsq.findResponse(head_inst.inst)) + { + head_inst_might_commit = true; + } + } + } + + DPRINTF(Activity, "Need to tick num issued insts: %s%s%s%s%s%s\n", + (num_issued != 0 ? " (issued some insts)" : ""), + (becoming_stalled ? " (becoming stalled)" : "(not becoming stalled)"), + (can_issue_next ? " (can issued next inst)" : ""), + (head_inst_might_commit ? "(head inst might commit)" : ""), + (lsq.needsToTick() ? " (LSQ needs to tick)" : ""), + (interrupted ? " (interrupted)" : "")); + + bool need_to_tick = + num_issued != 0 || /* Issued some insts this cycle */ + !becoming_stalled || /* Some FU pipelines can still move */ + can_issue_next || /* Can still issue a new inst */ + head_inst_might_commit || /* Could possible commit the next inst */ + lsq.needsToTick() || /* Must step the dcache port */ + interrupted; /* There are pending interrupts */ + + if (!need_to_tick) { + DPRINTF(Activity, "The next cycle might be skippable as there are no" + " advanceable FUs\n"); + } + + /* Wake up if we need to tick again */ + if (need_to_tick) + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + + /* Note activity of following buffer */ + if (!branch.isBubble()) + cpu.activityRecorder->activity(); + + /* Make sure the input (if any left) is pushed */ + inputBuffer.pushTail(); +} + +void +Execute::wakeupFetch(BranchData::Reason reason) +{ + BranchData branch; + assert(branch.isBubble()); + + /* THREAD thread id */ + ThreadContext *thread = cpu.getContext(0); + + /* Force a branch to the current PC (which should be the next inst.) to + * wake up Fetch1 */ + if (!branch.isStreamChange() /* No real branch already happened */) { + DPRINTF(MinorInterrupt, "Waking up Fetch (via Execute) by issuing" + " a branch: %s\n", thread->pcState()); + + assert(thread->pcState().microPC() == 0); + + updateBranchData(reason, + MinorDynInst::bubble(), thread->pcState(), branch); + } else { + DPRINTF(MinorInterrupt, "Already branching, no need for wakeup\n"); + } + + *out.inputWire = branch; + + /* Make sure we get ticked */ + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); +} + +void +Execute::minorTrace() const +{ + std::ostringstream insts; + std::ostringstream stalled; + + instsBeingCommitted.reportData(insts); + lsq.minorTrace(); + inputBuffer.minorTrace(); + scoreboard.minorTrace(); + + /* Report functional unit stalling in one string */ + unsigned int i = 0; + while (i < numFuncUnits) + { + stalled << (funcUnits[i]->stalled ? '1' : 'E'); + i++; + if (i != numFuncUnits) + stalled << ','; + } + + MINORTRACE("insts=%s inputIndex=%d streamSeqNum=%d" + " stalled=%s drainState=%d isInbetweenInsts=%d\n", + insts.str(), inputIndex, streamSeqNum, stalled.str(), drainState, + isInbetweenInsts()); + + std::for_each(funcUnits.begin(), funcUnits.end(), + std::mem_fun(&FUPipeline::minorTrace)); + + inFlightInsts->minorTrace(); + inFUMemInsts->minorTrace(); +} + +void +Execute::drainResume() +{ + DPRINTF(Drain, "MinorExecute drainResume\n"); + + setDrainState(NotDraining); + + /* Wakeup fetch and keep the pipeline running until that branch takes + * effect */ + wakeupFetch(BranchData::WakeupFetch); + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); +} + +std::ostream &operator <<(std::ostream &os, Execute::DrainState state) +{ + switch (state) + { + case Execute::NotDraining: + os << "NotDraining"; + break; + case Execute::DrainCurrentInst: + os << "DrainCurrentInst"; + break; + case Execute::DrainHaltFetch: + os << "DrainHaltFetch"; + break; + case Execute::DrainAllInsts: + os << "DrainAllInsts"; + break; + default: + os << "Drain-" << static_cast<int>(state); + break; + } + + return os; +} + +void +Execute::setDrainState(DrainState state) +{ + DPRINTF(Drain, "setDrainState: %s\n", state); + drainState = state; +} + +unsigned int +Execute::drain() +{ + DPRINTF(Drain, "MinorExecute drain\n"); + + if (drainState == NotDraining) { + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + + /* Go to DrainCurrentInst if we're not between operations + * this should probably test the LSQ as well. Or maybe + * just always go to DrainCurrentInst anyway */ + if (lastCommitWasEndOfMacroop) + setDrainState(DrainHaltFetch); + else + setDrainState(DrainCurrentInst); + } + + return (isDrained() ? 0 : 1); +} + +bool +Execute::isDrained() +{ + return drainState == DrainAllInsts && + inputBuffer.empty() && + inFlightInsts->empty() && + lsq.isDrained(); +} + +Execute::~Execute() +{ + for (unsigned int i = 0; i < numFuncUnits; i++) + delete funcUnits[i]; + + delete inFlightInsts; +} + +bool +Execute::instIsRightStream(MinorDynInstPtr inst) +{ + return inst->id.streamSeqNum == streamSeqNum; +} + +bool +Execute::instIsHeadInst(MinorDynInstPtr inst) +{ + bool ret = false; + + if (!inFlightInsts->empty()) + ret = inFlightInsts->front().inst->id == inst->id; + + return ret; +} + +MinorCPU::MinorCPUPort & +Execute::getDcachePort() +{ + return lsq.getDcachePort(); +} + +} diff --git a/src/cpu/minor/execute.hh b/src/cpu/minor/execute.hh new file mode 100644 index 000000000..8cd026534 --- /dev/null +++ b/src/cpu/minor/execute.hh @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * All the fun of executing instructions from Decode and sending branch/new + * instruction stream info. to Fetch1. + */ + +#ifndef __CPU_MINOR_EXECUTE_HH__ +#define __CPU_MINOR_EXECUTE_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/func_unit.hh" +#include "cpu/minor/lsq.hh" +#include "cpu/minor/pipe_data.hh" +#include "cpu/minor/scoreboard.hh" + +namespace Minor +{ + +/** Execute stage. Everything apart from fetching and decoding instructions. + * The LSQ lives here too. */ +class Execute : public Named +{ + protected: + /** Input port carrying instructions from Decode */ + Latch<ForwardInstData>::Output inp; + + /** Input port carrying stream changes to Fetch1 */ + Latch<BranchData>::Input out; + + /** Pointer back to the containing CPU */ + MinorCPU &cpu; + + /** Number of instructions that can be issued per cycle */ + unsigned int issueLimit; + + /** Number of memory ops that can be issued per cycle */ + unsigned int memoryIssueLimit; + + /** Number of instructions that can be committed per cycle */ + unsigned int commitLimit; + + /** Number of memory instructions that can be committed per cycle */ + unsigned int memoryCommitLimit; + + /** If true, more than one input line can be processed each cycle if + * there is room to execute more instructions than taken from the first + * line */ + bool processMoreThanOneInput; + + /** Descriptions of the functional units we want to generate */ + MinorFUPool &fuDescriptions; + + /** Number of functional units to produce */ + unsigned int numFuncUnits; + + /** Longest latency of any FU, useful for setting up the activity + * recoder */ + Cycles longestFuLatency; + + /** Modify instruction trace times on commit */ + bool setTraceTimeOnCommit; + + /** Modify instruction trace times on issue */ + bool setTraceTimeOnIssue; + + /** Allow mem refs to leave their FUs before reaching the head + * of the in flight insts queue if their dependencies are met */ + bool allowEarlyMemIssue; + + /** The FU index of the non-existent costless FU for instructions + * which pass the MinorDynInst::isNoCostInst test */ + unsigned int noCostFUIndex; + + /** Dcache port to pass on to the CPU. Execute owns this */ + LSQ lsq; + + /** Scoreboard of instruction dependencies */ + Scoreboard scoreboard; + + /** The execution functional units */ + std::vector<FUPipeline *> funcUnits; + + public: /* Public for Pipeline to be able to pass it to Decode */ + InputBuffer<ForwardInstData> inputBuffer; + + protected: + /** Stage cycle-by-cycle state */ + + /** State that drain passes through (in order). On a drain request, + * Execute transitions into either DrainCurrentInst (if between + * microops) or DrainHaltFetch. + * + * Note that Execute doesn't actually have * a 'Drained' state, only + * an indication that it's currently draining and isDrained that can't + * tell if there are insts still in the pipeline leading up to + * Execute */ + enum DrainState + { + NotDraining, /* Not draining, possibly running */ + DrainCurrentInst, /* Draining to end of inst/macroop */ + DrainHaltFetch, /* Halting Fetch after completing current inst */ + DrainAllInsts /* Discarding all remaining insts */ + }; + + /** In-order instructions either in FUs or the LSQ */ + Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFlightInsts; + + /** Memory ref instructions still in the FUs */ + Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFUMemInsts; + + /** Index that we've completed upto in getInput data. We can say we're + * popInput when this equals getInput()->width() */ + unsigned int inputIndex; + + /** The last commit was the end of a full instruction so an interrupt + * can safely happen */ + bool lastCommitWasEndOfMacroop; + + /** Structure for reporting insts currently being processed/retired + * for MinorTrace */ + ForwardInstData instsBeingCommitted; + + /** Source of sequence number for instuction streams. Increment this and + * pass to fetch whenever an instruction stream needs to be changed. + * For any more complicated behaviour (e.g. speculation) there'll need + * to be another plan. THREAD, need one for each thread */ + InstSeqNum streamSeqNum; + + /** A prediction number for use where one isn't available from an + * instruction. This is harvested from committed instructions. + * This isn't really needed as the streamSeqNum will change on + * a branch, but it minimises disruption in stream identification */ + InstSeqNum lastPredictionSeqNum; + + /** State progression for draining NotDraining -> ... -> DrainAllInsts */ + DrainState drainState; + + protected: + friend std::ostream &operator <<(std::ostream &os, DrainState state); + + /** Get a piece of data to work on from the inputBuffer, or 0 if there + * is no data. */ + const ForwardInstData *getInput(); + + /** Pop an element off the input buffer, if there are any */ + void popInput(); + + /** Generate Branch data based (into branch) on an observed (or not) + * change in PC while executing an instruction. + * Also handles branch prediction information within the inst. */ + void tryToBranch(MinorDynInstPtr inst, Fault fault, BranchData &branch); + + /** Actually create a branch to communicate to Fetch1/Fetch2 and, + * if that is a stream-changing branch update the streamSeqNum */ + void updateBranchData(BranchData::Reason reason, + MinorDynInstPtr inst, const TheISA::PCState &target, + BranchData &branch); + + /** Handle extracting mem ref responses from the memory queues and + * completing the associated instructions. + * Fault is an output and will contain any fault caused (and already + * invoked by the function) + * Sets branch to any branch generated by the instruction. */ + void handleMemResponse(MinorDynInstPtr inst, + LSQ::LSQRequestPtr response, BranchData &branch, + Fault &fault); + + /** Execute a memory reference instruction. This calls initiateAcc on + * the instruction which will then call writeMem or readMem to issue a + * memory access to the LSQ. + * Returns true if the instruction was executed rather than stalled + * because of a lack of LSQ resources and false otherwise. + * branch is set to any branch raised by the instruction. + * failed_predicate is set to false if the instruction passed its + * predicate and so will access memory or true if the instruction + * *failed* its predicate and is now complete. + * fault is set if any non-NoFault fault is raised. + * Any faults raised are actually invoke-d by this function. */ + bool executeMemRefInst(MinorDynInstPtr inst, BranchData &branch, + bool &failed_predicate, Fault &fault); + + /** Has an interrupt been raised */ + bool isInterrupted(ThreadID thread_id) const; + + /** Are we between instructions? Can we be interrupted? */ + bool isInbetweenInsts() const; + + /** Act on an interrupt. Returns true if an interrupt was actually + * signalled and invoked */ + bool takeInterrupt(ThreadID thread_id, BranchData &branch); + + /** Try and issue instructions from the inputBuffer */ + unsigned int issue(bool only_issue_microops); + + /** Try to act on PC-related events. Returns true if any were + * executed */ + bool tryPCEvents(); + + /** Do the stats handling and instruction count and PC event events + * related to the new instruction/op counts */ + void doInstCommitAccounting(MinorDynInstPtr inst); + + /** Commit a single instruction. Returns true if the instruction being + * examined was completed (fully executed, discarded, or initiated a + * memory access), false if there is still some processing to do. + * fu_index is the index of the functional unit this instruction is + * being executed in into for funcUnits + * If early_memory_issue is true then this is an early execution + * of a mem ref and so faults will not be processed. + * If the return value is true: + * fault is set if a fault happened, + * branch is set to indicate any branch that occurs + * committed is set to true if this instruction is committed + * (and so needs to be traced and accounted for) + * completed_mem_issue is set if the instruction was a + * memory access that was issued */ + bool commitInst(MinorDynInstPtr inst, bool early_memory_issue, + BranchData &branch, Fault &fault, bool &committed, + bool &completed_mem_issue); + + /** Try and commit instructions from the ends of the functional unit + * pipelines. + * If only_commit_microops is true then only commit upto the + * end of the currect full instruction. + * If discard is true then discard all instructions rather than + * committing. + * branch is set to any branch raised during commit. */ + void commit(bool only_commit_microops, bool discard, BranchData &branch); + + /** Set the drain state (with useful debugging messages) */ + void setDrainState(DrainState state); + + public: + Execute(const std::string &name_, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<ForwardInstData>::Output inp_, + Latch<BranchData>::Input out_); + + ~Execute(); + + public: + + /** Cause Execute to issue an UnpredictedBranch (or WakeupFetch if + * that was passed as the reason) to Fetch1 to wake the + * system up (using the PC from the thread context). */ + void wakeupFetch(BranchData::Reason reason = + BranchData::UnpredictedBranch); + + /** Returns the DcachePort owned by this Execute to pass upwards */ + MinorCPU::MinorCPUPort &getDcachePort(); + + /** To allow ExecContext to find the LSQ */ + LSQ &getLSQ() { return lsq; } + + /** Does the given instruction have the right stream sequence number + * to be committed? */ + bool instIsRightStream(MinorDynInstPtr inst); + + /** Returns true if the given instruction is at the head of the + * inFlightInsts instruction queue */ + bool instIsHeadInst(MinorDynInstPtr inst); + + /** Pass on input/buffer data to the output if you can */ + void evaluate(); + + void minorTrace() const; + + /** After thread suspension, has Execute been drained of in-flight + * instructions and memory accesses. */ + bool isDrained(); + + /** Like the drain interface on SimObject */ + unsigned int drain(); + void drainResume(); +}; + +} + +#endif /* __CPU_MINOR_EXECUTE_HH__ */ diff --git a/src/cpu/minor/fetch1.cc b/src/cpu/minor/fetch1.cc new file mode 100644 index 000000000..45dc5eddc --- /dev/null +++ b/src/cpu/minor/fetch1.cc @@ -0,0 +1,676 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <cstring> +#include <iomanip> +#include <sstream> + +#include "base/cast.hh" +#include "cpu/minor/fetch1.hh" +#include "cpu/minor/pipeline.hh" +#include "debug/Drain.hh" +#include "debug/Fetch.hh" +#include "debug/MinorTrace.hh" + +namespace Minor +{ + +Fetch1::Fetch1(const std::string &name_, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<BranchData>::Output inp_, + Latch<ForwardLineData>::Input out_, + Latch<BranchData>::Output prediction_, + Reservable &next_stage_input_buffer) : + Named(name_), + cpu(cpu_), + inp(inp_), + out(out_), + prediction(prediction_), + nextStageReserve(next_stage_input_buffer), + icachePort(name_ + ".icache_port", *this, cpu_), + lineSnap(params.fetch1LineSnapWidth), + maxLineWidth(params.fetch1LineWidth), + fetchLimit(params.fetch1FetchLimit), + state(FetchWaitingForPC), + pc(0), + streamSeqNum(InstId::firstStreamSeqNum), + predictionSeqNum(InstId::firstPredictionSeqNum), + blocked(false), + requests(name_ + ".requests", "lines", params.fetch1FetchLimit), + transfers(name_ + ".transfers", "lines", params.fetch1FetchLimit), + icacheState(IcacheRunning), + lineSeqNum(InstId::firstLineSeqNum), + numFetchesInMemorySystem(0), + numFetchesInITLB(0) +{ + if (lineSnap == 0) { + lineSnap = cpu.cacheLineSize(); + DPRINTF(Fetch, "lineSnap set to cache line size of: %d\n", + lineSnap); + } + + if (maxLineWidth == 0) { + maxLineWidth = cpu.cacheLineSize(); + DPRINTF(Fetch, "maxLineWidth set to cache line size of: %d\n", + maxLineWidth); + } + + /* These assertions should be copied to the Python config. as well */ + if ((lineSnap % sizeof(TheISA::MachInst)) != 0) { + fatal("%s: fetch1LineSnapWidth must be a multiple " + "of sizeof(TheISA::MachInst) (%d)\n", name_, + sizeof(TheISA::MachInst)); + } + + if (!(maxLineWidth >= lineSnap && + (maxLineWidth % sizeof(TheISA::MachInst)) == 0)) + { + fatal("%s: fetch1LineWidth must be a multiple of" + " sizeof(TheISA::MachInst)" + " (%d), and >= fetch1LineSnapWidth (%d)\n", + name_, sizeof(TheISA::MachInst), lineSnap); + } + + if (fetchLimit < 1) { + fatal("%s: fetch1FetchLimit must be >= 1 (%d)\n", name_, + fetchLimit); + } +} + +void +Fetch1::fetchLine() +{ + /* If line_offset != 0, a request is pushed for the remainder of the + * line. */ + /* Use a lower, sizeof(MachInst) aligned address for the fetch */ + Addr aligned_pc = pc.instAddr() & ~((Addr) lineSnap - 1); + unsigned int line_offset = aligned_pc % lineSnap; + unsigned int request_size = maxLineWidth - line_offset; + + /* Fill in the line's id */ + InstId request_id(0 /* thread */, + streamSeqNum, predictionSeqNum, + lineSeqNum); + + FetchRequestPtr request = new FetchRequest(*this, request_id, pc); + + DPRINTF(Fetch, "Inserting fetch into the fetch queue " + "%s addr: 0x%x pc: %s line_offset: %d request_size: %d\n", + request_id, aligned_pc, pc, line_offset, request_size); + + request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0); + request->request.setVirt(0 /* asid */, + aligned_pc, request_size, Request::INST_FETCH, cpu.instMasterId(), + /* I've no idea why we need the PC, but give it */ + pc.instAddr()); + + DPRINTF(Fetch, "Submitting ITLB request\n"); + numFetchesInITLB++; + + request->state = FetchRequest::InTranslation; + + /* Reserve space in the queues upstream of requests for results */ + transfers.reserve(); + requests.push(request); + + /* Submit the translation request. The response will come + * through finish/markDelayed on this request as it bears + * the Translation interface */ + cpu.threads[request->id.threadId]->itb->translateTiming( + &request->request, + cpu.getContext(request->id.threadId), + request, BaseTLB::Execute); + + lineSeqNum++; + + /* Step the PC for the next line onto the line aligned next address. + * Note that as instructions can span lines, this PC is only a + * reliable 'new' PC if the next line has a new stream sequence number. */ +#if THE_ISA == ALPHA_ISA + /* Restore the low bits of the PC used as address space flags */ + Addr pc_low_bits = pc.instAddr() & + ((Addr) (1 << sizeof(TheISA::MachInst)) - 1); + + pc.set(aligned_pc + request_size + pc_low_bits); +#else + pc.set(aligned_pc + request_size); +#endif +} + +std::ostream & +operator <<(std::ostream &os, Fetch1::IcacheState state) +{ + switch (state) { + case Fetch1::IcacheRunning: + os << "IcacheRunning"; + break; + case Fetch1::IcacheNeedsRetry: + os << "IcacheNeedsRetry"; + break; + default: + os << "IcacheState-" << static_cast<int>(state); + break; + } + return os; +} + +void +Fetch1::FetchRequest::makePacket() +{ + /* Make the necessary packet for a memory transaction */ + packet = new Packet(&request, MemCmd::ReadReq); + packet->allocate(); + + /* This FetchRequest becomes SenderState to allow the response to be + * identified */ + packet->pushSenderState(this); +} + +void +Fetch1::FetchRequest::finish( + Fault fault_, RequestPtr request_, ThreadContext *tc, BaseTLB::Mode mode) +{ + fault = fault_; + + state = Translated; + fetch.handleTLBResponse(this); + + /* Let's try and wake up the processor for the next cycle */ + fetch.cpu.wakeupOnEvent(Pipeline::Fetch1StageId); +} + +void +Fetch1::handleTLBResponse(FetchRequestPtr response) +{ + numFetchesInITLB--; + + if (response->fault != NoFault) { + DPRINTF(Fetch, "Fault in address ITLB translation: %s, " + "paddr: 0x%x, vaddr: 0x%x\n", + response->fault->name(), + (response->request.hasPaddr() ? response->request.getPaddr() : 0), + response->request.getVaddr()); + + if (DTRACE(MinorTrace)) + minorTraceResponseLine(name(), response); + } else { + DPRINTF(Fetch, "Got ITLB response\n"); + } + + response->state = FetchRequest::Translated; + + tryToSendToTransfers(response); +} + +Fetch1::FetchRequest::~FetchRequest() +{ + if (packet) + delete packet; +} + +void +Fetch1::tryToSendToTransfers(FetchRequestPtr request) +{ + if (!requests.empty() && requests.front() != request) { + DPRINTF(Fetch, "Fetch not at front of requests queue, can't" + " issue to memory\n"); + return; + } + + if (request->state == FetchRequest::InTranslation) { + DPRINTF(Fetch, "Fetch still in translation, not issuing to" + " memory\n"); + return; + } + + if (request->isDiscardable() || request->fault != NoFault) { + /* Discarded and faulting requests carry on through transfers + * as Complete/packet == NULL */ + + request->state = FetchRequest::Complete; + moveFromRequestsToTransfers(request); + + /* Wake up the pipeline next cycle as there will be no event + * for this queue->queue transfer */ + cpu.wakeupOnEvent(Pipeline::Fetch1StageId); + } else if (request->state == FetchRequest::Translated) { + if (!request->packet) + request->makePacket(); + + /* Ensure that the packet won't delete the request */ + assert(request->packet->needsResponse()); + + if (tryToSend(request)) + moveFromRequestsToTransfers(request); + } else { + DPRINTF(Fetch, "Not advancing line fetch\n"); + } +} + +void +Fetch1::moveFromRequestsToTransfers(FetchRequestPtr request) +{ + assert(!requests.empty() && requests.front() == request); + + requests.pop(); + transfers.push(request); +} + +bool +Fetch1::tryToSend(FetchRequestPtr request) +{ + bool ret = false; + + if (icachePort.sendTimingReq(request->packet)) { + /* Invalidate the fetch_requests packet so we don't + * accidentally fail to deallocate it (or use it!) + * later by overwriting it */ + request->packet = NULL; + request->state = FetchRequest::RequestIssuing; + numFetchesInMemorySystem++; + + ret = true; + + DPRINTF(Fetch, "Issued fetch request to memory: %s\n", + request->id); + } else { + /* Needs to be resent, wait for that */ + icacheState = IcacheNeedsRetry; + + DPRINTF(Fetch, "Line fetch needs to retry: %s\n", + request->id); + } + + return ret; +} + +void +Fetch1::stepQueues() +{ + IcacheState old_icache_state = icacheState; + + switch (icacheState) { + case IcacheRunning: + /* Move ITLB results on to the memory system */ + if (!requests.empty()) { + tryToSendToTransfers(requests.front()); + } + break; + case IcacheNeedsRetry: + break; + } + + if (icacheState != old_icache_state) { + DPRINTF(Fetch, "Step in state %s moving to state %s\n", + old_icache_state, icacheState); + } +} + +void +Fetch1::popAndDiscard(FetchQueue &queue) +{ + if (!queue.empty()) { + delete queue.front(); + queue.pop(); + } +} + +unsigned int +Fetch1::numInFlightFetches() +{ + return requests.occupiedSpace() + + transfers.occupiedSpace(); +} + +/** Print the appropriate MinorLine line for a fetch response */ +void +Fetch1::minorTraceResponseLine(const std::string &name, + Fetch1::FetchRequestPtr response) const +{ + Request &request M5_VAR_USED = response->request; + + if (response->packet && response->packet->isError()) { + MINORLINE(this, "id=F;%s vaddr=0x%x fault=\"error packet\"\n", + response->id, request.getVaddr()); + } else if (response->fault != NoFault) { + MINORLINE(this, "id=F;%s vaddr=0x%x fault=\"%s\"\n", + response->id, request.getVaddr(), response->fault->name()); + } else { + MINORLINE(this, "id=%s size=%d vaddr=0x%x paddr=0x%x\n", + response->id, request.getSize(), + request.getVaddr(), request.getPaddr()); + } +} + +bool +Fetch1::recvTimingResp(PacketPtr response) +{ + DPRINTF(Fetch, "recvTimingResp %d\n", numFetchesInMemorySystem); + + /* Only push the response if we didn't change stream? No, all responses + * should hit the responses queue. It's the job of 'step' to throw them + * away. */ + FetchRequestPtr fetch_request = safe_cast<FetchRequestPtr> + (response->popSenderState()); + + /* Fixup packet in fetch_request as this may have changed */ + assert(!fetch_request->packet); + fetch_request->packet = response; + + numFetchesInMemorySystem--; + fetch_request->state = FetchRequest::Complete; + + if (DTRACE(MinorTrace)) + minorTraceResponseLine(name(), fetch_request); + + if (response->isError()) { + DPRINTF(Fetch, "Received error response packet: %s\n", + fetch_request->id); + } + + /* We go to idle even if there are more things to do on the queues as + * it's the job of step to actually step us on to the next transaction */ + + /* Let's try and wake up the processor for the next cycle to move on + * queues */ + cpu.wakeupOnEvent(Pipeline::Fetch1StageId); + + /* Never busy */ + return true; +} + +void +Fetch1::recvRetry() +{ + DPRINTF(Fetch, "recvRetry\n"); + assert(icacheState == IcacheNeedsRetry); + assert(!requests.empty()); + + FetchRequestPtr retryRequest = requests.front(); + + icacheState = IcacheRunning; + + if (tryToSend(retryRequest)) + moveFromRequestsToTransfers(retryRequest); +} + +std::ostream & +operator <<(std::ostream &os, Fetch1::FetchState state) +{ + switch (state) { + case Fetch1::FetchHalted: + os << "FetchHalted"; + break; + case Fetch1::FetchWaitingForPC: + os << "FetchWaitingForPC"; + break; + case Fetch1::FetchRunning: + os << "FetchRunning"; + break; + default: + os << "FetchState-" << static_cast<int>(state); + break; + } + return os; +} + +void +Fetch1::changeStream(const BranchData &branch) +{ + updateExpectedSeqNums(branch); + + /* Start fetching again if we were stopped */ + switch (branch.reason) { + case BranchData::SuspendThread: + DPRINTF(Fetch, "Suspending fetch: %s\n", branch); + state = FetchWaitingForPC; + break; + case BranchData::HaltFetch: + DPRINTF(Fetch, "Halting fetch\n"); + state = FetchHalted; + break; + default: + DPRINTF(Fetch, "Changing stream on branch: %s\n", branch); + state = FetchRunning; + break; + } + pc = branch.target; +} + +void +Fetch1::updateExpectedSeqNums(const BranchData &branch) +{ + DPRINTF(Fetch, "Updating streamSeqNum from: %d to %d," + " predictionSeqNum from: %d to %d\n", + streamSeqNum, branch.newStreamSeqNum, + predictionSeqNum, branch.newPredictionSeqNum); + + /* Change the stream */ + streamSeqNum = branch.newStreamSeqNum; + /* Update the prediction. Note that it's possible for this to + * actually set the prediction to an *older* value if new + * predictions have been discarded by execute */ + predictionSeqNum = branch.newPredictionSeqNum; +} + +void +Fetch1::processResponse(Fetch1::FetchRequestPtr response, + ForwardLineData &line) +{ + PacketPtr packet = response->packet; + + /* Pass the prefetch abort (if any) on to Fetch2 in a ForwardLineData + * structure */ + line.setFault(response->fault); + /* Make sequence numbers valid in return */ + line.id = response->id; + /* Set PC to virtual address */ + line.pc = response->pc; + /* Set the lineBase, which is a sizeof(MachInst) aligned address <= + * pc.instAddr() */ + line.lineBaseAddr = response->request.getVaddr(); + + if (response->fault != NoFault) { + /* Stop fetching if there was a fault */ + /* Should probably try to flush the queues as well, but we + * can't be sure that this fault will actually reach Execute, and we + * can't (currently) selectively remove this stream from the queues */ + DPRINTF(Fetch, "Stopping line fetch because of fault: %s\n", + response->fault->name()); + state = Fetch1::FetchWaitingForPC; + } else { + line.adoptPacketData(packet); + /* Null the response's packet to prevent the response from trying to + * deallocate the packet */ + response->packet = NULL; + } +} + +void +Fetch1::evaluate() +{ + const BranchData &execute_branch = *inp.outputWire; + const BranchData &fetch2_branch = *prediction.outputWire; + ForwardLineData &line_out = *out.inputWire; + + assert(line_out.isBubble()); + + blocked = !nextStageReserve.canReserve(); + + /* Are we changing stream? Look to the Execute branches first, then + * to predicted changes of stream from Fetch2 */ + /* @todo, find better way to express ignoring branch predictions */ + if (execute_branch.isStreamChange() && + execute_branch.reason != BranchData::BranchPrediction) + { + if (state == FetchHalted) { + if (execute_branch.reason == BranchData::WakeupFetch) { + DPRINTF(Fetch, "Waking up fetch: %s\n", execute_branch); + changeStream(execute_branch); + } else { + DPRINTF(Fetch, "Halted, ignoring branch: %s\n", + execute_branch); + } + } else { + changeStream(execute_branch); + } + + if (!fetch2_branch.isBubble()) { + DPRINTF(Fetch, "Ignoring simultaneous prediction: %s\n", + fetch2_branch); + } + + /* The streamSeqNum tagging in request/response ->req should handle + * discarding those requests when we get to them. */ + } else if (state != FetchHalted && fetch2_branch.isStreamChange()) { + /* Handle branch predictions by changing the instruction source + * if we're still processing the same stream (as set by streamSeqNum) + * as the one of the prediction. + */ + if (fetch2_branch.newStreamSeqNum != streamSeqNum) { + DPRINTF(Fetch, "Not changing stream on prediction: %s," + " streamSeqNum mismatch\n", + fetch2_branch); + } else { + changeStream(fetch2_branch); + } + } + + /* Can we fetch? */ + /* The bare minimum requirements for initiating a fetch */ + /* THREAD need to handle multiple threads */ + if (state == FetchRunning && /* We are actually fetching */ + !blocked && /* Space in the Fetch2 inputBuffer */ + /* The thread we're going to fetch for (thread 0), is active */ + cpu.getContext(0)->status() == ThreadContext::Active && + numInFlightFetches() < fetchLimit) + { + fetchLine(); + /* Take up a slot in the fetch queue */ + nextStageReserve.reserve(); + } + + /* Halting shouldn't prevent fetches in flight from being processed */ + /* Step fetches through the icachePort queues and memory system */ + stepQueues(); + + /* As we've thrown away early lines, if there is a line, it must + * be from the right stream */ + if (!transfers.empty() && + transfers.front()->isComplete()) + { + Fetch1::FetchRequestPtr response = transfers.front(); + + if (response->isDiscardable()) { + nextStageReserve.freeReservation(); + + DPRINTF(Fetch, "Discarding translated fetch at it's for" + " an old stream\n"); + + /* Wake up next cycle just in case there was some other + * action to do */ + cpu.wakeupOnEvent(Pipeline::Fetch1StageId); + } else { + DPRINTF(Fetch, "Processing fetched line: %s\n", + response->id); + + processResponse(response, line_out); + } + + popAndDiscard(transfers); + } + + /* If we generated output, and mark the stage as being active + * to encourage that output on to the next stage */ + if (!line_out.isBubble()) + cpu.activityRecorder->activity(); + + /* Fetch1 has no inputBuffer so the only activity we can have is to + * generate a line output (tested just above) or to initiate a memory + * fetch which will signal activity when it returns/needs stepping + * between queues */ +} + +bool +Fetch1::isDrained() +{ + DPRINTF(Drain, "isDrained %s %s%s%s\n", + state == FetchHalted, + (numInFlightFetches() == 0 ? "" : "inFlightFetches "), + ((*out.inputWire).isBubble() ? "" : "outputtingLine")); + + return state == FetchHalted && + numInFlightFetches() == 0 && + (*out.inputWire).isBubble(); +} + +void +Fetch1::FetchRequest::reportData(std::ostream &os) const +{ + os << id; +} + +bool Fetch1::FetchRequest::isDiscardable() const +{ + /* Can't discard lines in TLB/memory */ + return state != InTranslation && state != RequestIssuing && + (id.streamSeqNum != fetch.streamSeqNum || + id.predictionSeqNum != fetch.predictionSeqNum); +} + +void +Fetch1::minorTrace() const +{ + std::ostringstream data; + + if (blocked) + data << 'B'; + else + (*out.inputWire).reportData(data); + + MINORTRACE("state=%s icacheState=%s in_tlb_mem=%s/%s" + " streamSeqNum=%d lines=%s\n", state, icacheState, + numFetchesInITLB, numFetchesInMemorySystem, + streamSeqNum, data.str()); + requests.minorTrace(); + transfers.minorTrace(); +} + +} diff --git a/src/cpu/minor/fetch1.hh b/src/cpu/minor/fetch1.hh new file mode 100644 index 000000000..29a63d1f1 --- /dev/null +++ b/src/cpu/minor/fetch1.hh @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Fetch1 is responsible for fetching "lines" from memory and passing + * them to Fetch2 + */ + +#ifndef __CPU_MINOR_FETCH1_HH__ +#define __CPU_MINOR_FETCH1_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/pipe_data.hh" +#include "cpu/base.hh" +#include "mem/packet.hh" + +namespace Minor +{ + +/** A stage responsible for fetching "lines" from memory and passing + * them to Fetch2 */ +class Fetch1 : public Named +{ + protected: + /** Exposable fetch port */ + class IcachePort : public MinorCPU::MinorCPUPort + { + protected: + /** My owner */ + Fetch1 &fetch; + + public: + IcachePort(std::string name, Fetch1 &fetch_, MinorCPU &cpu) : + MinorCPU::MinorCPUPort(name, cpu), fetch(fetch_) + { } + + protected: + bool recvTimingResp(PacketPtr pkt) + { return fetch.recvTimingResp(pkt); } + + void recvRetry() { fetch.recvRetry(); } + }; + + /** Memory access queuing. + * + * A request can be submitted by pushing it onto the requests queue after + * issuing an ITLB lookup (state becomes InTranslation) with a + * FetchSenderState senderState containing the current lineSeqNum and + * stream/predictionSeqNum. + * + * Translated packets (state becomes Translation) are then passed to the + * memory system and the transfers queue (state becomes RequestIssuing). + * Retries are handled by leaving the packet on the requests queue and + * changing the state to IcacheNeedsRetry). + * + * Responses from the memory system alter the request object (state + * become Complete). Responses can be picked up from the head of the + * transfers queue to pass on to Fetch2. */ + + /** Structure to hold SenderState info through + * translation and memory accesses. */ + class FetchRequest : + public BaseTLB::Translation, /* For TLB lookups */ + public Packet::SenderState /* For packing into a Packet */ + { + protected: + /** Owning fetch unit */ + Fetch1 &fetch; + + public: + /** Progress of this request through address translation and + * memory */ + enum FetchRequestState + { + NotIssued, /* Just been made */ + InTranslation, /* Issued to ITLB, must wait for reqply */ + Translated, /* Translation complete */ + RequestIssuing, /* Issued to memory, must wait for response */ + Complete /* Complete. Either a fault, or a fetched line */ + }; + + FetchRequestState state; + + /** Identity of the line that this request will generate */ + InstId id; + + /** FetchRequests carry packets while they're in the requests and + * transfers responses queues. When a Packet returns from the memory + * system, its request needs to have its packet updated as this may + * have changed in flight */ + PacketPtr packet; + + /** The underlying request that this fetch represents */ + Request request; + + /** PC to fixup with line address */ + TheISA::PCState pc; + + /** Fill in a fault if one happens during fetch, check this by + * picking apart the response packet */ + Fault fault; + + /** Make a packet to use with the memory transaction */ + void makePacket(); + + /** Report interface */ + void reportData(std::ostream &os) const; + + /** Is this line out of date with the current stream/prediction + * sequence and can it be discarded without orphaning in flight + * TLB lookups/memory accesses? */ + bool isDiscardable() const; + + /** Is this a complete read line or fault */ + bool isComplete() const { return state == Complete; } + + protected: + /** BaseTLB::Translation interface */ + + /** Interface for ITLB responses. We can handle delay, so don't + * do anything */ + void markDelayed() { } + + /** Interface for ITLB responses. Populates self and then passes + * the request on to the ports' handleTLBResponse member + * function */ + void finish(Fault fault_, RequestPtr request_, ThreadContext *tc, + BaseTLB::Mode mode); + + public: + FetchRequest(Fetch1 &fetch_, InstId id_, TheISA::PCState pc_) : + SenderState(), + fetch(fetch_), + state(NotIssued), + id(id_), + packet(NULL), + request(), + pc(pc_), + fault(NoFault) + { } + + ~FetchRequest(); + }; + + typedef FetchRequest *FetchRequestPtr; + + protected: + /** Construction-assigned data members */ + + /** Pointer back to the containing CPU */ + MinorCPU &cpu; + + /** Input port carrying branch requests from Execute */ + Latch<BranchData>::Output inp; + /** Output port carrying read lines to Fetch2 */ + Latch<ForwardLineData>::Input out; + /** Input port carrying branch predictions from Fetch2 */ + Latch<BranchData>::Output prediction; + + /** Interface to reserve space in the next stage */ + Reservable &nextStageReserve; + + /** IcachePort to pass to the CPU. Fetch1 is the only module that uses + * it. */ + IcachePort icachePort; + + /** Line snap size in bytes. All fetches clip to make their ends not + * extend beyond this limit. Setting this to the machine L1 cache line + * length will result in fetches never crossing line boundaries. */ + unsigned int lineSnap; + + /** Maximum fetch width in bytes. Setting this (and lineSnap) to the + * machine L1 cache line length will result in fetches of whole cache + * lines. Setting this to sizeof(MachInst) will result it fetches of + * single instructions (except near the end of lineSnap lines) */ + unsigned int maxLineWidth; + + /** Maximum number of fetches allowed in flight (in queues or memory) */ + unsigned int fetchLimit; + + protected: + /** Cycle-by-cycle state */ + + /** State of memory access for head instruction fetch */ + enum FetchState + { + FetchHalted, /* Not fetching, waiting to be woken by transition + to FetchWaitingForPC. The PC is not valid in this state */ + FetchWaitingForPC, /* Not fetching, waiting for stream change. + This doesn't stop issued fetches from being returned and + processed or for branches to change the state to Running. */ + FetchRunning /* Try to fetch, when possible */ + }; + + /** Stage cycle-by-cycle state */ + + FetchState state; + + /** Fetch PC value. This is updated by branches from Execute, branch + * prediction targets from Fetch2 and by incrementing it as we fetch + * lines subsequent to those two sources. */ + TheISA::PCState pc; + + /** Stream sequence number. This changes on request from Execute and is + * used to tag instructions by the fetch stream to which they belong. + * Execute originates new prediction sequence numbers. */ + InstSeqNum streamSeqNum; + + /** Prediction sequence number. This changes when requests from Execute + * or Fetch2 ask for a change of fetch address and is used to tag lines + * by the prediction to which they belong. Fetch2 originates + * prediction sequence numbers. */ + InstSeqNum predictionSeqNum; + + /** The sequence number expected for the next returned cache line. The + * responses queue should be ordered and so, if the front of that queue + * has a lower lineSeqNum than this, lines need to be discarded. If it + * has a higher lineSeqNum, our line hasn't appeared yet */ + InstSeqNum expectedLineSeqNum; + + /** Blocked indication for report */ + bool blocked; + + /** State of memory access for head instruction fetch */ + enum IcacheState + { + IcacheRunning, /* Default. Step icache queues when possible */ + IcacheNeedsRetry /* Request rejected, will be asked to retry */ + }; + + typedef Queue<FetchRequestPtr, + ReportTraitsPtrAdaptor<FetchRequestPtr>, + NoBubbleTraits<FetchRequestPtr> > + FetchQueue; + + /** Queue of address translated requests from Fetch1 */ + FetchQueue requests; + + /** Queue of in-memory system requests and responses */ + FetchQueue transfers; + + /** Retry state of icache_port */ + IcacheState icacheState; + + /** Sequence number for line fetch used for ordering lines to flush */ + InstSeqNum lineSeqNum; + + /** Count of the number fetches which have left the transfers queue + * and are in the 'wild' in the memory system. Try not to rely on + * this value, it's better to code without knowledge of the number + * of outstanding accesses */ + unsigned int numFetchesInMemorySystem; + /** Number of requests inside the ITLB rather than in the queues. + * All requests so located *must* have reserved space in the + * transfers queue */ + unsigned int numFetchesInITLB; + + protected: + friend std::ostream &operator <<(std::ostream &os, + Fetch1::FetchState state); + + /** Start fetching from a new address. */ + void changeStream(const BranchData &branch); + + /** Update streamSeqNum and predictionSeqNum from the given branch (and + * assume these have changed and discard (on delivery) all lines in + * flight) */ + void updateExpectedSeqNums(const BranchData &branch); + + /** Convert a response to a ForwardLineData */ + void processResponse(FetchRequestPtr response, + ForwardLineData &line); + + friend std::ostream &operator <<(std::ostream &os, + IcacheState state); + + /** Insert a line fetch into the requests. This can be a partial + * line request where the given address has a non-0 offset into a + * line. */ + void fetchLine(); + + /** Try and issue a fetch for a translated request at the + * head of the requests queue. Also tries to move the request + * between queues */ + void tryToSendToTransfers(FetchRequestPtr request); + + /** Try to send (or resend) a memory request's next/only packet to + * the memory system. Returns true if the fetch was successfully + * sent to memory */ + bool tryToSend(FetchRequestPtr request); + + /** Move a request between queues */ + void moveFromRequestsToTransfers(FetchRequestPtr request); + + /** Step requests along between requests and transfers queues */ + void stepQueues(); + + /** Pop a request from the given queue and correctly deallocate and + * discard it. */ + void popAndDiscard(FetchQueue &queue); + + /** Handle pushing a TLB response onto the right queue */ + void handleTLBResponse(FetchRequestPtr response); + + /** Returns the total number of queue occupancy, in-ITLB and + * in-memory system fetches */ + unsigned int numInFlightFetches(); + + /** Print the appropriate MinorLine line for a fetch response */ + void minorTraceResponseLine(const std::string &name, + FetchRequestPtr response) const; + + /** Memory interface */ + virtual bool recvTimingResp(PacketPtr pkt); + virtual void recvRetry(); + + public: + Fetch1(const std::string &name_, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<BranchData>::Output inp_, + Latch<ForwardLineData>::Input out_, + Latch<BranchData>::Output prediction_, + Reservable &next_stage_input_buffer); + + public: + /** Returns the IcachePort owned by this Fetch1 */ + MinorCPU::MinorCPUPort &getIcachePort() { return icachePort; } + + /** Pass on input/buffer data to the output if you can */ + void evaluate(); + + void minorTrace() const; + + /** Is this stage drained? For Fetch1, draining is initiated by + * Execute signalling a branch with the reason HaltFetch */ + bool isDrained(); +}; + +} + +#endif /* __CPU_MINOR_FETCH1_HH__ */ diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc new file mode 100644 index 000000000..4827b75fc --- /dev/null +++ b/src/cpu/minor/fetch2.cc @@ -0,0 +1,543 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <string> + +#include "arch/decoder.hh" +#include "arch/utility.hh" +#include "cpu/minor/fetch2.hh" +#include "cpu/minor/pipeline.hh" +#include "cpu/pred/bpred_unit.hh" +#include "debug/Branch.hh" +#include "debug/Fetch.hh" +#include "debug/MinorTrace.hh" + +namespace Minor +{ + +Fetch2::Fetch2(const std::string &name, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<ForwardLineData>::Output inp_, + Latch<BranchData>::Output branchInp_, + Latch<BranchData>::Input predictionOut_, + Latch<ForwardInstData>::Input out_, + Reservable &next_stage_input_buffer) : + Named(name), + cpu(cpu_), + inp(inp_), + branchInp(branchInp_), + predictionOut(predictionOut_), + out(out_), + nextStageReserve(next_stage_input_buffer), + outputWidth(params.decodeInputWidth), + processMoreThanOneInput(params.fetch2CycleInput), + branchPredictor(*params.branchPred), + inputBuffer(name + ".inputBuffer", "lines", params.fetch2InputBufferSize), + inputIndex(0), + pc(TheISA::PCState(0)), + havePC(false), + lastStreamSeqNum(InstId::firstStreamSeqNum), + fetchSeqNum(InstId::firstFetchSeqNum), + expectedStreamSeqNum(InstId::firstStreamSeqNum), + predictionSeqNum(InstId::firstPredictionSeqNum) +{ + if (outputWidth < 1) + fatal("%s: decodeInputWidth must be >= 1 (%d)\n", name, outputWidth); + + if (params.fetch2InputBufferSize < 1) { + fatal("%s: fetch2InputBufferSize must be >= 1 (%d)\n", name, + params.fetch2InputBufferSize); + } +} + +const ForwardLineData * +Fetch2::getInput() +{ + /* Get a line from the inputBuffer to work with */ + if (!inputBuffer.empty()) { + return &(inputBuffer.front()); + } else { + return NULL; + } +} + +void +Fetch2::popInput() +{ + if (!inputBuffer.empty()) { + inputBuffer.front().freeLine(); + inputBuffer.pop(); + } + + inputIndex = 0; +} + +void +Fetch2::dumpAllInput() +{ + DPRINTF(Fetch, "Dumping whole input buffer\n"); + while (!inputBuffer.empty()) + popInput(); + + inputIndex = 0; +} + +void +Fetch2::updateBranchPrediction(const BranchData &branch) +{ + MinorDynInstPtr inst = branch.inst; + + /* Don't even consider instructions we didn't try to predict or faults */ + if (inst->isFault() || !inst->triedToPredict) + return; + + switch (branch.reason) { + case BranchData::NoBranch: + /* No data to update */ + break; + case BranchData::Interrupt: + /* Never try to predict interrupts */ + break; + case BranchData::SuspendThread: + /* Don't need to act on suspends */ + break; + case BranchData::WakeupFetch: + /* Don't need to act on wakeups, no instruction tied to action. */ + break; + case BranchData::HaltFetch: + /* Don't need to act on fetch wakeup */ + break; + case BranchData::BranchPrediction: + /* Shouldn't happen. Fetch2 is the only source of + * BranchPredictions */ + break; + case BranchData::UnpredictedBranch: + /* Unpredicted branch or barrier */ + DPRINTF(Branch, "Unpredicted branch seen inst: %s\n", *inst); + branchPredictor.squash(inst->id.fetchSeqNum, + branch.target, true, inst->id.threadId); + break; + case BranchData::CorrectlyPredictedBranch: + /* Predicted taken, was taken */ + DPRINTF(Branch, "Branch predicted correctly inst: %s\n", *inst); + branchPredictor.update(inst->id.fetchSeqNum, + inst->id.threadId); + break; + case BranchData::BadlyPredictedBranch: + /* Predicted taken, not taken */ + DPRINTF(Branch, "Branch mis-predicted inst: %s\n", *inst); + branchPredictor.squash(inst->id.fetchSeqNum, + branch.target /* Not used */, false, inst->id.threadId); + break; + case BranchData::BadlyPredictedBranchTarget: + /* Predicted taken, was taken but to a different target */ + DPRINTF(Branch, "Branch mis-predicted target inst: %s target: %s\n", + *inst, branch.target); + branchPredictor.squash(inst->id.fetchSeqNum, + branch.target, true, inst->id.threadId); + break; + } +} + +void +Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch) +{ + TheISA::PCState inst_pc = inst->pc; + + assert(!inst->predictedTaken); + + /* Skip non-control/sys call instructions */ + if (inst->staticInst->isControl() || + inst->staticInst->isSyscall()) + { + /* Tried to predict */ + inst->triedToPredict = true; + + DPRINTF(Branch, "Trying to predict for inst: %s\n", *inst); + + if (branchPredictor.predict(inst->staticInst, + inst->id.fetchSeqNum, inst_pc, + inst->id.threadId)) + { + inst->predictedTaken = true; + inst->predictedTarget = inst_pc; + branch.target = inst_pc; + } + } else { + DPRINTF(Branch, "Not attempting prediction for inst: %s\n", *inst); + } + + /* If we predict taken, set branch and update sequence numbers */ + if (inst->predictedTaken) { + /* Update the predictionSeqNum and remember the streamSeqNum that it + * was associated with */ + expectedStreamSeqNum = inst->id.streamSeqNum; + + BranchData new_branch = BranchData(BranchData::BranchPrediction, + inst->id.streamSeqNum, predictionSeqNum + 1, + inst->predictedTarget, inst); + + /* Mark with a new prediction number by the stream number of the + * instruction causing the prediction */ + predictionSeqNum++; + branch = new_branch; + + DPRINTF(Branch, "Branch predicted taken inst: %s target: %s" + " new predictionSeqNum: %d\n", + *inst, inst->predictedTarget, predictionSeqNum); + } +} + +void +Fetch2::evaluate() +{ + inputBuffer.setTail(*inp.outputWire); + ForwardInstData &insts_out = *out.inputWire; + BranchData prediction; + BranchData &branch_inp = *branchInp.outputWire; + + assert(insts_out.isBubble()); + + blocked = false; + + /* React to branches from Execute to update local branch prediction + * structures */ + updateBranchPrediction(branch_inp); + + /* If a branch arrives, don't try and do anything about it. Only + * react to your own predictions */ + if (branch_inp.isStreamChange()) { + DPRINTF(Fetch, "Dumping all input as a stream changing branch" + " has arrived\n"); + dumpAllInput(); + havePC = false; + } + + /* Even when blocked, clear out input lines with the wrong + * prediction sequence number */ + { + const ForwardLineData *line_in = getInput(); + + while (line_in && + expectedStreamSeqNum == line_in->id.streamSeqNum && + predictionSeqNum != line_in->id.predictionSeqNum) + { + DPRINTF(Fetch, "Discarding line %s" + " due to predictionSeqNum mismatch (expected: %d)\n", + line_in->id, predictionSeqNum); + + popInput(); + havePC = false; + + if (processMoreThanOneInput) { + DPRINTF(Fetch, "Wrapping\n"); + line_in = getInput(); + } else { + line_in = NULL; + } + } + } + + if (!nextStageReserve.canReserve()) { + blocked = true; + } else { + const ForwardLineData *line_in = getInput(); + + unsigned int output_index = 0; + + /* Pack instructions into the output while we can. This may involve + * using more than one input line. Note that lineWidth will be 0 + * for faulting lines */ + while (line_in && + (line_in->isFault() || + inputIndex < line_in->lineWidth) && /* More input */ + output_index < outputWidth && /* More output to fill */ + prediction.isBubble() /* No predicted branch */) + { + ThreadContext *thread = cpu.getContext(line_in->id.threadId); + TheISA::Decoder *decoder = thread->getDecoderPtr(); + + /* Discard line due to prediction sequence number being wrong but + * without the streamSeqNum number having changed */ + bool discard_line = + expectedStreamSeqNum == line_in->id.streamSeqNum && + predictionSeqNum != line_in->id.predictionSeqNum; + + /* Set the PC if the stream changes. Setting havePC to false in + * a previous cycle handles all other change of flow of control + * issues */ + bool set_pc = lastStreamSeqNum != line_in->id.streamSeqNum; + + if (!discard_line && (!havePC || set_pc)) { + /* Set the inputIndex to be the MachInst-aligned offset + * from lineBaseAddr of the new PC value */ + inputIndex = + (line_in->pc.instAddr() & BaseCPU::PCMask) - + line_in->lineBaseAddr; + DPRINTF(Fetch, "Setting new PC value: %s inputIndex: 0x%x" + " lineBaseAddr: 0x%x lineWidth: 0x%x\n", + line_in->pc, inputIndex, line_in->lineBaseAddr, + line_in->lineWidth); + pc = line_in->pc; + havePC = true; + decoder->reset(); + } + + /* The generated instruction. Leave as NULL if no instruction + * is to be packed into the output */ + MinorDynInstPtr dyn_inst = NULL; + + if (discard_line) { + /* Rest of line was from an older prediction in the same + * stream */ + DPRINTF(Fetch, "Discarding line %s (from inputIndex: %d)" + " due to predictionSeqNum mismatch (expected: %d)\n", + line_in->id, inputIndex, predictionSeqNum); + } else if (line_in->isFault()) { + /* Pack a fault as a MinorDynInst with ->fault set */ + + /* Make a new instruction and pick up the line, stream, + * prediction, thread ids from the incoming line */ + dyn_inst = new MinorDynInst(line_in->id); + + /* Fetch and prediction sequence numbers originate here */ + dyn_inst->id.fetchSeqNum = fetchSeqNum; + dyn_inst->id.predictionSeqNum = predictionSeqNum; + /* To complete the set, test that exec sequence number has + * not been set */ + assert(dyn_inst->id.execSeqNum == 0); + + dyn_inst->pc = pc; + + /* Pack a faulting instruction but allow other + * instructions to be generated. (Fetch2 makes no + * immediate judgement about streamSeqNum) */ + dyn_inst->fault = line_in->fault; + DPRINTF(Fetch, "Fault being passed output_index: " + "%d: %s\n", output_index, dyn_inst->fault->name()); + } else { + uint8_t *line = line_in->line; + + TheISA::MachInst inst_word; + /* The instruction is wholly in the line, can just + * assign */ + inst_word = TheISA::gtoh( + *(reinterpret_cast<TheISA::MachInst *> + (line + inputIndex))); + + if (!decoder->instReady()) { + decoder->moreBytes(pc, + line_in->lineBaseAddr + inputIndex, inst_word); + DPRINTF(Fetch, "Offering MachInst to decoder" + " addr: 0x%x\n", line_in->lineBaseAddr + inputIndex); + } + + /* Maybe make the above a loop to accomodate ISAs with + * instructions longer than sizeof(MachInst) */ + + if (decoder->instReady()) { + /* Make a new instruction and pick up the line, stream, + * prediction, thread ids from the incoming line */ + dyn_inst = new MinorDynInst(line_in->id); + + /* Fetch and prediction sequence numbers originate here */ + dyn_inst->id.fetchSeqNum = fetchSeqNum; + dyn_inst->id.predictionSeqNum = predictionSeqNum; + /* To complete the set, test that exec sequence number + * has not been set */ + assert(dyn_inst->id.execSeqNum == 0); + + /* Note that the decoder can update the given PC. + * Remember not to assign it until *after* calling + * decode */ + StaticInstPtr decoded_inst = decoder->decode(pc); + dyn_inst->staticInst = decoded_inst; + + dyn_inst->pc = pc; + + DPRINTF(Fetch, "Instruction extracted from line %s" + " lineWidth: %d output_index: %d inputIndex: %d" + " pc: %s inst: %s\n", + line_in->id, + line_in->lineWidth, output_index, inputIndex, + pc, *dyn_inst); + +#if THE_ISA == X86_ISA || THE_ISA == ARM_ISA + /* In SE mode, it's possible to branch to a microop when + * replaying faults such as page faults (or simply + * intra-microcode branches in X86). Unfortunately, + * as Minor has micro-op decomposition in a separate + * pipeline stage from instruction decomposition, the + * following advancePC (which may follow a branch with + * microPC() != 0) *must* see a fresh macroop. This + * kludge should be improved with an addition to PCState + * but I offer it in this form for the moment + * + * X86 can branch within microops so we need to deal with + * the case that, after a branch, the first un-advanced PC + * may be pointing to a microop other than 0. Once + * advanced, however, the microop number *must* be 0 */ + pc.upc(0); + pc.nupc(1); +#endif + + /* Advance PC for the next instruction */ + TheISA::advancePC(pc, decoded_inst); + + /* Predict any branches and issue a branch if + * necessary */ + predictBranch(dyn_inst, prediction); + } else { + DPRINTF(Fetch, "Inst not ready yet\n"); + } + + /* Step on the pointer into the line if there's no + * complete instruction waiting */ + if (decoder->needMoreBytes()) { + inputIndex += sizeof(TheISA::MachInst); + + DPRINTF(Fetch, "Updated inputIndex value PC: %s" + " inputIndex: 0x%x lineBaseAddr: 0x%x lineWidth: 0x%x\n", + line_in->pc, inputIndex, line_in->lineBaseAddr, + line_in->lineWidth); + } + } + + if (dyn_inst) { + /* Step to next sequence number */ + fetchSeqNum++; + + /* Correctly size the output before writing */ + if (output_index == 0) + insts_out.resize(outputWidth); + /* Pack the generated dynamic instruction into the output */ + insts_out.insts[output_index] = dyn_inst; + output_index++; + + /* Output MinorTrace instruction info for + * pre-microop decomposition macroops */ + if (DTRACE(MinorTrace) && !dyn_inst->isFault() && + dyn_inst->staticInst->isMacroop()) + { + dyn_inst->minorTraceInst(*this); + } + } + + /* Remember the streamSeqNum of this line so we can tell when + * we change stream */ + lastStreamSeqNum = line_in->id.streamSeqNum; + + /* Asked to discard line or there was a branch or fault */ + if (!prediction.isBubble() || /* The remains of a + line with a prediction in it */ + line_in->isFault() /* A line which is just a fault */) + { + DPRINTF(Fetch, "Discarding all input on branch/fault\n"); + dumpAllInput(); + havePC = false; + line_in = NULL; + } else if (discard_line) { + /* Just discard one line, one's behind it may have new + * stream sequence numbers. There's a DPRINTF above + * for this event */ + popInput(); + havePC = false; + line_in = NULL; + } else if (inputIndex == line_in->lineWidth) { + /* Got to end of a line, pop the line but keep PC + * in case this is a line-wrapping inst. */ + popInput(); + line_in = NULL; + } + + if (!line_in && processMoreThanOneInput) { + DPRINTF(Fetch, "Wrapping\n"); + line_in = getInput(); + } + } + + /* The rest of the output (if any) should already have been packed + * with bubble instructions by insts_out's initialisation */ + } + + /** Reserve a slot in the next stage and output data */ + *predictionOut.inputWire = prediction; + + /* If we generated output, reserve space for the result in the next stage + * and mark the stage as being active this cycle */ + if (!insts_out.isBubble()) { + /* Note activity of following buffer */ + cpu.activityRecorder->activity(); + nextStageReserve.reserve(); + } + + /* If we still have input to process and somewhere to put it, + * mark stage as active */ + if (getInput() && nextStageReserve.canReserve()) + cpu.activityRecorder->activateStage(Pipeline::Fetch2StageId); + + /* Make sure the input (if any left) is pushed */ + inputBuffer.pushTail(); +} + +bool +Fetch2::isDrained() +{ + return inputBuffer.empty() && + (*inp.outputWire).isBubble() && + (*predictionOut.inputWire).isBubble(); +} + +void +Fetch2::minorTrace() const +{ + std::ostringstream data; + + if (blocked) + data << 'B'; + else + (*out.inputWire).reportData(data); + + MINORTRACE("inputIndex=%d havePC=%d predictionSeqNum=%d insts=%s\n", + inputIndex, havePC, predictionSeqNum, data.str()); + inputBuffer.minorTrace(); +} + +} diff --git a/src/cpu/minor/fetch2.hh b/src/cpu/minor/fetch2.hh new file mode 100644 index 000000000..2fc38b377 --- /dev/null +++ b/src/cpu/minor/fetch2.hh @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Fetch2 receives lines of data from Fetch1, separates them into + * instructions and passes them to Decode + */ + +#ifndef __CPU_MINOR_FETCH2_HH__ +#define __CPU_MINOR_FETCH2_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/pipe_data.hh" +#include "cpu/pred/bpred_unit.hh" +#include "params/MinorCPU.hh" + +namespace Minor +{ + +/** This stage receives lines of data from Fetch1, separates them into + * instructions and passes them to Decode */ +class Fetch2 : public Named +{ + protected: + /** Pointer back to the containing CPU */ + MinorCPU &cpu; + + /** Input port carrying lines from Fetch1 */ + Latch<ForwardLineData>::Output inp; + + /** Input port carrying branches from Execute. This is a snoop of the + * data provided to F1. */ + Latch<BranchData>::Output branchInp; + + /** Output port carrying predictions back to Fetch1 */ + Latch<BranchData>::Input predictionOut; + + /** Output port carrying instructions into Decode */ + Latch<ForwardInstData>::Input out; + + /** Interface to reserve space in the next stage */ + Reservable &nextStageReserve; + + /** Width of output of this stage/input of next in instructions */ + unsigned int outputWidth; + + /** If true, more than one input word can be processed each cycle if + * there is room in the output to contain its processed data */ + bool processMoreThanOneInput; + + /** Branch predictor passed from Python configuration */ + BPredUnit &branchPredictor; + + public: + /* Public so that Pipeline can pass it to Fetch1 */ + InputBuffer<ForwardLineData> inputBuffer; + + protected: + /** Data members after this line are cycle-to-cycle state */ + + /** Index into an incompletely processed input line that instructions + * are to be extracted from */ + unsigned int inputIndex; + + /** Remembered program counter value. Between contiguous lines, this + * is just updated with advancePC. For lines following changes of + * stream, a new PC must be loaded and havePC be set. + * havePC is needed to accomodate instructions which span across + * lines meaning that Fetch2 and the decoder need to remember a PC + * value and a partially-offered instruction from the previous line */ + TheISA::PCState pc; + + /** PC is currently valid. Initially false, gets set to true when a + * change-of-stream line is received and false again when lines are + * discarded for any reason */ + bool havePC; + + /** Stream sequence number of the last seen line used to identify changes + * of instruction stream */ + InstSeqNum lastStreamSeqNum; + + /** Fetch2 is the source of fetch sequence numbers. These represent the + * sequence that instructions were extracted from fetched lines. */ + InstSeqNum fetchSeqNum; + + /** Stream sequence number remembered from last time the predictionSeqNum + * changed. Lines should only be discarded when their predictionSeqNums + * disagree with Fetch2::predictionSeqNum *and* they are from the same + * stream that bore that prediction number */ + InstSeqNum expectedStreamSeqNum; + + /** Fetch2 is the source of prediction sequence numbers. These represent + * predicted changes of control flow sources from branch prediction in + * Fetch2. */ + InstSeqNum predictionSeqNum; + + /** Blocked indication for report */ + bool blocked; + + protected: + /** Get a piece of data to work on from the inputBuffer, or 0 if there + * is no data. */ + const ForwardLineData *getInput(); + + /** Pop an element off the input buffer, if there are any */ + void popInput(); + + /** Dump the whole contents of the input buffer. Useful after a + * prediction changes control flow */ + void dumpAllInput(); + + /** Update local branch prediction structures from feedback from + * Execute. */ + void updateBranchPrediction(const BranchData &branch); + + /** Predicts branches for the given instruction. Updates the + * instruction's predicted... fields and also the branch which + * carries the prediction to Fetch1 */ + void predictBranch(MinorDynInstPtr inst, BranchData &branch); + + public: + Fetch2(const std::string &name, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<ForwardLineData>::Output inp_, + Latch<BranchData>::Output branchInp_, + Latch<BranchData>::Input predictionOut_, + Latch<ForwardInstData>::Input out_, + Reservable &next_stage_input_buffer); + + public: + /** Pass on input/buffer data to the output if you can */ + void evaluate(); + + void minorTrace() const; + + /** Is this stage drained? For Fetch2, draining is initiated by + * Execute halting Fetch1 causing Fetch2 to naturally drain. + * Branch predictions are ignored by Fetch1 during halt */ + bool isDrained(); +}; + +} + +#endif /* __CPU_MINOR_FETCH2_HH__ */ diff --git a/src/cpu/minor/func_unit.cc b/src/cpu/minor/func_unit.cc new file mode 100644 index 000000000..1a75c4aa8 --- /dev/null +++ b/src/cpu/minor/func_unit.cc @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <iomanip> +#include <sstream> +#include <typeinfo> + +#include "cpu/minor/func_unit.hh" +#include "debug/MinorTiming.hh" +#include "enums/OpClass.hh" + +MinorOpClass * +MinorOpClassParams::create() +{ + return new MinorOpClass(this); +} + +MinorOpClassSet * +MinorOpClassSetParams::create() +{ + return new MinorOpClassSet(this); +} + +MinorFUTiming * +MinorFUTimingParams::create() +{ + return new MinorFUTiming(this); +} + +MinorFU * +MinorFUParams::create() +{ + return new MinorFU(this); +} + +MinorFUPool * +MinorFUPoolParams::create() +{ + return new MinorFUPool(this); +} + +MinorOpClassSet::MinorOpClassSet(const MinorOpClassSetParams *params) : + SimObject(params), + opClasses(params->opClasses), + /* Initialise to true for an empty list so that 'fully capable' is + * the default */ + capabilityList(Num_OpClasses, (opClasses.empty() ? true : false)) +{ + for (unsigned int i = 0; i < opClasses.size(); i++) + capabilityList[opClasses[i]->opClass] = true; +} + +MinorFUTiming::MinorFUTiming( + const MinorFUTimingParams *params) : + SimObject(params), + mask(params->mask), + match(params->match), + description(params->description), + suppress(params->suppress), + extraCommitLat(params->extraCommitLat), + extraCommitLatExpr(params->extraCommitLatExpr), + extraAssumedLat(params->extraAssumedLat), + srcRegsRelativeLats(params->srcRegsRelativeLats), + opClasses(params->opClasses) +{ } + +namespace Minor +{ + +void +QueuedInst::reportData(std::ostream &os) const +{ + inst->reportData(os); +} + +FUPipeline::FUPipeline(const std::string &name, const MinorFU &description_, + ClockedObject &timeSource_) : + FUPipelineBase(name, "insts", description_.opLat), + description(description_), + timeSource(timeSource_), + nextInsertCycle(Cycles(0)) +{ + /* Issue latencies are set to 1 in calls to addCapability here. + * Issue latencies are associated with the pipeline as a whole, + * rather than instruction classes in Minor */ + + /* All pipelines should be able to execute No_OpClass instructions */ + addCapability(No_OpClass, description.opLat, 1); + + /* Add the capabilities listed in the MinorFU for this functional unit */ + for (unsigned int i = 0; i < description.opClasses->opClasses.size(); + i++) + { + addCapability(description.opClasses->opClasses[i]->opClass, + description.opLat, 1); + } + + for (unsigned int i = 0; i < description.timings.size(); i++) { + MinorFUTiming &timing = *(description.timings[i]); + + if (DTRACE(MinorTiming)) { + std::ostringstream lats; + + unsigned int num_lats = timing.srcRegsRelativeLats.size(); + unsigned int j = 0; + while (j < num_lats) { + lats << timing.srcRegsRelativeLats[j]; + + j++; + if (j != num_lats) + lats << ','; + } + + DPRINTFS(MinorTiming, static_cast<Named *>(this), + "Adding extra timing decode pattern %d to FU" + " mask: %016x match: %016x srcRegLatencies: %s\n", + i, timing.mask, timing.match, lats.str()); + } + } + + const std::vector<unsigned> &cant_forward = + description.cantForwardFromFUIndices; + + /* Setup the bit vector cantForward... with the set indices + * specified in the parameters */ + for (auto i = cant_forward.begin(); i != cant_forward.end(); ++i) { + cantForwardFromFUIndices.resize((*i) + 1, false); + cantForwardFromFUIndices[*i] = true; + } +} + +Cycles +FUPipeline::cyclesBeforeInsert() +{ + if (nextInsertCycle == 0 || timeSource.curCycle() > nextInsertCycle) + return Cycles(0); + else + return nextInsertCycle - timeSource.curCycle(); +} + +bool +FUPipeline::canInsert() const +{ + return nextInsertCycle == 0 || timeSource.curCycle() >= nextInsertCycle; +} + +void +FUPipeline::advance() +{ + bool was_stalled = stalled; + + /* If an instruction was pushed into the pipeline, set the delay before + * the next instruction can follow */ + if (alreadyPushed()) { + if (nextInsertCycle <= timeSource.curCycle()) { + nextInsertCycle = timeSource.curCycle() + description.issueLat; + } + } else if (was_stalled && nextInsertCycle != 0) { + /* Don't count stalled cycles as part of the issue latency */ + ++nextInsertCycle; + } + FUPipelineBase::advance(); +} + +MinorFUTiming * +FUPipeline::findTiming(StaticInstPtr inst) +{ +#if THE_ISA == ARM_ISA + /* This should work for any ISA with a POD mach_inst */ + TheISA::ExtMachInst mach_inst = inst->machInst; +#else + /* Just allow extra decode based on op classes */ + uint64_t mach_inst = 0; +#endif + + const std::vector<MinorFUTiming *> &timings = + description.timings; + unsigned int num_timings = timings.size(); + + for (unsigned int i = 0; i < num_timings; i++) { + MinorFUTiming &timing = *timings[i]; + + if (timing.provides(inst->opClass()) && + (mach_inst & timing.mask) == timing.match) + { + DPRINTFS(MinorTiming, static_cast<Named *>(this), + "Found extra timing match (pattern %d '%s')" + " %s %16x (type %s)\n", + i, timing.description, inst->disassemble(0), mach_inst, + typeid(*inst).name()); + + return &timing; + } + } + + if (num_timings != 0) { + DPRINTFS(MinorTiming, static_cast<Named *>(this), + "No extra timing info. found for inst: %s" + " mach_inst: %16x\n", + inst->disassemble(0), mach_inst); + } + + return NULL; +} + +} diff --git a/src/cpu/minor/func_unit.hh b/src/cpu/minor/func_unit.hh new file mode 100644 index 000000000..34da579b6 --- /dev/null +++ b/src/cpu/minor/func_unit.hh @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Execute function unit descriptions and pipeline implementations. + */ + +#ifndef __CPU_MINOR_FUNC_UNIT_HH__ +#define __CPU_MINOR_FUNC_UNIT_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/dyn_inst.hh" +#include "cpu/func_unit.hh" +#include "cpu/timing_expr.hh" +#include "params/MinorFU.hh" +#include "params/MinorFUPool.hh" +#include "params/MinorOpClass.hh" +#include "params/MinorOpClassSet.hh" +#include "sim/clocked_object.hh" + +/** Boxing for MinorOpClass to get around a build problem with C++11 but + * also allow for future additions to op class checking */ +class MinorOpClass : public SimObject +{ + public: + OpClass opClass; + + public: + MinorOpClass(const MinorOpClassParams *params) : + SimObject(params), + opClass(params->opClass) + { } +}; + +/** Wrapper for a matchable set of op classes */ +class MinorOpClassSet : public SimObject +{ + public: + std::vector<MinorOpClass *> opClasses; + + /** Convenience packing of opClasses into a bit vector for easier + * testing */ + std::vector<bool> capabilityList; + + public: + MinorOpClassSet(const MinorOpClassSetParams *params); + + public: + /** Does this set support the given op class */ + bool provides(OpClass op_class) { return capabilityList[op_class]; } +}; + +/** Extra timing capability to allow individual ops to have their source + * register dependency latencies tweaked based on the ExtMachInst of the + * source instruction. + */ +class MinorFUTiming: public SimObject +{ + public: + /** Mask off the ExtMachInst of an instruction before comparing with + * match */ + uint64_t mask; + uint64_t match; + + /** Textual description of the decode's purpose */ + std::string description; + + /** If true, instructions matching this mask/match should *not* be + * issued in this FU */ + bool suppress; + + /** Extra latency that the instruction should spend at the end of + * the pipeline */ + Cycles extraCommitLat; + TimingExpr *extraCommitLatExpr; + + /** Extra delay that results should show in the scoreboard after + * leaving the pipeline. If set to Cycles(0) for memory references, + * an 'unpredictable' return time will be set in the scoreboard + * blocking following dependent instructions from issuing */ + Cycles extraAssumedLat; + + /** Cycle offsets from the scoreboard delivery times of register values + * for each of this instruction's source registers (in srcRegs order). + * The offsets are subtracted from the scoreboard returnCycle times. + * For example, for an instruction type with 3 source registers, + * [2, 1, 2] will allow the instruction to issue upto 2 cycles early + * for dependencies on the 1st and 3rd register and upto 1 cycle early + * on the 2nd. */ + std::vector<Cycles> srcRegsRelativeLats; + + /** Extra opClasses check (after the FU one) */ + MinorOpClassSet *opClasses; + + public: + MinorFUTiming(const MinorFUTimingParams *params); + + public: + /** Does the extra decode in this object support the given op class */ + bool provides(OpClass op_class) { return opClasses->provides(op_class); } +}; + +/** A functional unit that can execute any of opClasses operations with a + * single op(eration)Lat(ency) and issueLat(ency) associated with the unit + * rather than each operation (as in src/FuncUnit). + * + * This is very similar to cpu/func_unit but replicated here to allow + * the Minor functional units to change without having to disturb the common + * definition. + */ +class MinorFU : public SimObject +{ + public: + MinorOpClassSet *opClasses; + + /** Delay from issuing the operation, to it reaching the + * end of the associated pipeline */ + Cycles opLat; + + /** Delay after issuing an operation before the next + * operation can be issued */ + Cycles issueLat; + + /** FUs which this pipeline can't receive a forwarded (i.e. relative + * latency != 0) result from */ + std::vector<unsigned int> cantForwardFromFUIndices; + + /** Extra timing info to give timings to individual ops */ + std::vector<MinorFUTiming *> timings; + + public: + MinorFU(const MinorFUParams *params) : + SimObject(params), + opClasses(params->opClasses), + opLat(params->opLat), + issueLat(params->issueLat), + cantForwardFromFUIndices(params->cantForwardFromFUIndices), + timings(params->timings) + { } +}; + +/** A collection of MinorFUs */ +class MinorFUPool : public SimObject +{ + public: + std::vector<MinorFU *> funcUnits; + + public: + MinorFUPool(const MinorFUPoolParams *params) : + SimObject(params), + funcUnits(params->funcUnits) + { } +}; + +namespace Minor +{ + +/** Container class to box instructions in the FUs to make those + * queues have correct bubble behaviour when stepped */ +class QueuedInst +{ + public: + MinorDynInstPtr inst; + + public: + QueuedInst(MinorDynInstPtr inst_ = MinorDynInst::bubble()) : + inst(inst_) + { } + + public: + /** Report and bubble interfaces */ + void reportData(std::ostream &os) const; + bool isBubble() const { return inst->isBubble(); } + + static QueuedInst bubble() + { return QueuedInst(MinorDynInst::bubble()); } +}; + +/** Functional units have pipelines which stall when an inst gets to + * their ends allowing Execute::commit to pick up timing-completed insts + * when it feels like it */ +typedef SelfStallingPipeline<QueuedInst, + ReportTraitsAdaptor<QueuedInst> > FUPipelineBase; + +/** A functional unit configured from a MinorFU object */ +class FUPipeline : public FUPipelineBase, public FuncUnit +{ + public: + /** Functional unit description that this pipeline implements */ + const MinorFU &description; + + /** An FUPipeline needs access to curCycle, use this timing source */ + ClockedObject &timeSource; + + /** Set of operation classes supported by this FU */ + std::bitset<Num_OpClasses> capabilityList; + + /** FUs which this pipeline can't receive a forwarded (i.e. relative + * latency != 0) result from */ + std::vector<bool> cantForwardFromFUIndices; + + public: + /** When can a new instruction be inserted into the pipeline? This is + * an absolute cycle time unless it is 0 in which case the an + * instruction can be pushed straightaway */ + Cycles nextInsertCycle; + + public: + FUPipeline(const std::string &name, const MinorFU &description_, + ClockedObject &timeSource_); + + public: + /** How many cycles must from curCycle before insertion into the + * pipeline is allowed */ + Cycles cyclesBeforeInsert(); + + /** Can an instruction be inserted now? */ + bool canInsert() const; + + /** Find the extra timing information for this instruction. Returns + * NULL if no decode info. is found */ + MinorFUTiming *findTiming(StaticInstPtr inst); + + /** Step the pipeline. Allow multiple steps? */ + void advance(); +}; + +} + +#endif /* __CPU_MINOR_FUNC_UNIT_HH__ */ diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc new file mode 100644 index 000000000..c5e38c78d --- /dev/null +++ b/src/cpu/minor/lsq.cc @@ -0,0 +1,1614 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <iomanip> +#include <sstream> + +#include "arch/locked_mem.hh" +#include "arch/mmapped_ipr.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/exec_context.hh" +#include "cpu/minor/execute.hh" +#include "cpu/minor/lsq.hh" +#include "cpu/minor/pipeline.hh" +#include "debug/Activity.hh" +#include "debug/MinorMem.hh" + +namespace Minor +{ + +/** Returns the offset of addr into an aligned a block of size block_size */ +static Addr +addrBlockOffset(Addr addr, unsigned int block_size) +{ + return addr & (block_size - 1); +} + +/** Returns true if the given [addr .. addr+size-1] transfer needs to be + * fragmented across a block size of block_size */ +static bool +transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size) +{ + return (addrBlockOffset(addr, block_size) + size) > block_size; +} + +LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, + PacketDataPtr data_, uint64_t *res_) : + SenderState(), + port(port_), + inst(inst_), + isLoad(isLoad_), + data(data_), + packet(NULL), + request(), + fault(NoFault), + res(res_), + skipped(false), + issuedToMemory(false), + state(NotIssued) +{ } + +LSQ::AddrRangeCoverage +LSQ::LSQRequest::containsAddrRangeOf( + Addr req1_addr, unsigned int req1_size, + Addr req2_addr, unsigned int req2_size) +{ + /* 'end' here means the address of the byte just past the request + * blocks */ + Addr req2_end_addr = req2_addr + req2_size; + Addr req1_end_addr = req1_addr + req1_size; + + AddrRangeCoverage ret; + + if (req1_addr > req2_end_addr || req1_end_addr < req2_addr) + ret = NoAddrRangeCoverage; + else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr) + ret = FullAddrRangeCoverage; + else + ret = PartialAddrRangeCoverage; + + return ret; +} + +LSQ::AddrRangeCoverage +LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request) +{ + return containsAddrRangeOf(request.getPaddr(), request.getSize(), + other_request->request.getPaddr(), other_request->request.getSize()); +} + +bool +LSQ::LSQRequest::isBarrier() +{ + return inst->isInst() && inst->staticInst->isMemBarrier(); +} + +bool +LSQ::LSQRequest::needsToBeSentToStoreBuffer() +{ + return state == StoreToStoreBuffer; +} + +void +LSQ::LSQRequest::setState(LSQRequestState new_state) +{ + DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:" + " %s\n", state, new_state, *inst); + state = new_state; +} + +bool +LSQ::LSQRequest::isComplete() const +{ + /* @todo, There is currently only one 'completed' state. This + * may not be a good choice */ + return state == Complete; +} + +void +LSQ::LSQRequest::reportData(std::ostream &os) const +{ + os << (isLoad ? 'R' : 'W') << ';'; + inst->reportData(os); + os << ';' << state; +} + +std::ostream & +operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage) +{ + switch (coverage) { + case LSQ::PartialAddrRangeCoverage: + os << "PartialAddrRangeCoverage"; + break; + case LSQ::FullAddrRangeCoverage: + os << "FullAddrRangeCoverage"; + break; + case LSQ::NoAddrRangeCoverage: + os << "NoAddrRangeCoverage"; + break; + default: + os << "AddrRangeCoverage-" << static_cast<int>(coverage); + break; + } + return os; +} + +std::ostream & +operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state) +{ + switch (state) { + case LSQ::LSQRequest::NotIssued: + os << "NotIssued"; + break; + case LSQ::LSQRequest::InTranslation: + os << "InTranslation"; + break; + case LSQ::LSQRequest::Translated: + os << "Translated"; + break; + case LSQ::LSQRequest::Failed: + os << "Failed"; + break; + case LSQ::LSQRequest::RequestIssuing: + os << "RequestIssuing"; + break; + case LSQ::LSQRequest::StoreToStoreBuffer: + os << "StoreToStoreBuffer"; + break; + case LSQ::LSQRequest::StoreInStoreBuffer: + os << "StoreInStoreBuffer"; + break; + case LSQ::LSQRequest::StoreBufferIssuing: + os << "StoreBufferIssuing"; + break; + case LSQ::LSQRequest::RequestNeedsRetry: + os << "RequestNeedsRetry"; + break; + case LSQ::LSQRequest::StoreBufferNeedsRetry: + os << "StoreBufferNeedsRetry"; + break; + case LSQ::LSQRequest::Complete: + os << "Complete"; + break; + default: + os << "LSQRequestState-" << static_cast<int>(state); + break; + } + return os; +} + +void +LSQ::clearMemBarrier(MinorDynInstPtr inst) +{ + bool is_last_barrier = inst->id.execSeqNum >= lastMemBarrier; + + DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n", + (is_last_barrier ? "last" : "a"), *inst); + + if (is_last_barrier) + lastMemBarrier = 0; +} + +void +LSQ::SingleDataRequest::finish(Fault fault_, RequestPtr request_, + ThreadContext *tc, BaseTLB::Mode mode) +{ + fault = fault_; + + port.numAccessesInDTLB--; + + DPRINTFS(MinorMem, (&port), "Received translation response for" + " request: %s\n", *inst); + + makePacket(); + + setState(Translated); + port.tryToSendToTransfers(this); + + /* Let's try and wake up the processor for the next cycle */ + port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); +} + +void +LSQ::SingleDataRequest::startAddrTranslation() +{ + ThreadContext *thread = port.cpu.getContext( + inst->id.threadId); + + port.numAccessesInDTLB++; + + setState(LSQ::LSQRequest::InTranslation); + + DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n"); + /* Submit the translation request. The response will come through + * finish/markDelayed on the LSQRequest as it bears the Translation + * interface */ + thread->getDTBPtr()->translateTiming( + &request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write)); +} + +void +LSQ::SingleDataRequest::retireResponse(PacketPtr packet_) +{ + DPRINTFS(MinorMem, (&port), "Retiring packet\n"); + packet = packet_; + packetInFlight = false; + setState(Complete); +} + +void +LSQ::SplitDataRequest::finish(Fault fault_, RequestPtr request_, + ThreadContext *tc, BaseTLB::Mode mode) +{ + fault = fault_; + + port.numAccessesInDTLB--; + + unsigned int M5_VAR_USED expected_fragment_index = + numTranslatedFragments; + + numInTranslationFragments--; + numTranslatedFragments++; + + DPRINTFS(MinorMem, (&port), "Received translation response for fragment" + " %d of request: %s\n", expected_fragment_index, *inst); + + assert(request_ == fragmentRequests[expected_fragment_index]); + + /* Wake up next cycle to get things going again in case the + * tryToSendToTransfers does take */ + port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + + if (fault != NoFault) { + /* tryToSendToTransfers will handle the fault */ + + DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:" + " %d of request: %s\n", + expected_fragment_index, *inst); + + setState(Translated); + port.tryToSendToTransfers(this); + } else if (numTranslatedFragments == numFragments) { + makeFragmentPackets(); + + setState(Translated); + port.tryToSendToTransfers(this); + } else { + /* Avoid calling translateTiming from within ::finish */ + assert(!translationEvent.scheduled()); + port.cpu.schedule(translationEvent, curTick()); + } +} + +LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_, + bool isLoad_, PacketDataPtr data_, uint64_t *res_) : + LSQRequest(port_, inst_, isLoad_, data_, res_), + translationEvent(*this), + numFragments(0), + numInTranslationFragments(0), + numTranslatedFragments(0), + numIssuedFragments(0), + numRetiredFragments(0), + fragmentRequests(), + fragmentPackets() +{ + /* Don't know how many elements are needed until the request is + * populated by the caller. */ +} + +LSQ::SplitDataRequest::~SplitDataRequest() +{ + for (auto i = fragmentRequests.begin(); + i != fragmentRequests.end(); i++) + { + delete *i; + } + + for (auto i = fragmentPackets.begin(); + i != fragmentPackets.end(); i++) + { + delete *i; + } +} + +void +LSQ::SplitDataRequest::makeFragmentRequests() +{ + Addr base_addr = request.getVaddr(); + unsigned int whole_size = request.getSize(); + unsigned int line_width = port.lineWidth; + + unsigned int fragment_size; + Addr fragment_addr; + + /* Assume that this transfer is across potentially many block snap + * boundaries: + * + * | _|________|________|________|___ | + * | |0| 1 | 2 | 3 | 4 | | + * | |_|________|________|________|___| | + * | | | | | | + * + * The first transfer (0) can be up to lineWidth in size. + * All the middle transfers (1-3) are lineWidth in size + * The last transfer (4) can be from zero to lineWidth - 1 in size + */ + unsigned int first_fragment_offset = + addrBlockOffset(base_addr, line_width); + unsigned int last_fragment_size = + addrBlockOffset(base_addr + whole_size, line_width); + unsigned int first_fragment_size = + line_width - first_fragment_offset; + + unsigned int middle_fragments_total_size = + whole_size - (first_fragment_size + last_fragment_size); + + assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0); + + unsigned int middle_fragment_count = + middle_fragments_total_size / line_width; + + numFragments = 1 /* first */ + middle_fragment_count + + (last_fragment_size == 0 ? 0 : 1); + + DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests." + " First fragment size: %d Last fragment size: %d\n", + numFragments, first_fragment_size, + (last_fragment_size == 0 ? line_width : last_fragment_size)); + + assert(((middle_fragment_count * line_width) + + first_fragment_size + last_fragment_size) == whole_size); + + fragment_addr = base_addr; + fragment_size = first_fragment_size; + + /* Just past the last address in the request */ + Addr end_addr = base_addr + whole_size; + + for (unsigned int fragment_index = 0; fragment_index < numFragments; + fragment_index++) + { + bool M5_VAR_USED is_last_fragment = false; + + if (fragment_addr == base_addr) { + /* First fragment */ + fragment_size = first_fragment_size; + } else { + if ((fragment_addr + line_width) > end_addr) { + /* Adjust size of last fragment */ + fragment_size = end_addr - fragment_addr; + is_last_fragment = true; + } else { + /* Middle fragments */ + fragment_size = line_width; + } + } + + Request *fragment = new Request(); + + fragment->setThreadContext(request.contextId(), /* thread id */ 0); + fragment->setVirt(0 /* asid */, + fragment_addr, fragment_size, request.getFlags(), + request.masterId(), + request.getPC()); + + DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x size: %d" + " (whole request addr: 0x%x size: %d) %s\n", + fragment_addr, fragment_size, base_addr, whole_size, + (is_last_fragment ? "last fragment" : "")); + + fragment_addr += fragment_size; + + fragmentRequests.push_back(fragment); + } +} + +void +LSQ::SplitDataRequest::makeFragmentPackets() +{ + Addr base_addr = request.getVaddr(); + + DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst); + + for (unsigned int fragment_index = 0; fragment_index < numFragments; + fragment_index++) + { + Request *fragment = fragmentRequests[fragment_index]; + + DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s" + " (%d, 0x%x)\n", + fragment_index, *inst, + (fragment->hasPaddr() ? "has paddr" : "no paddr"), + (fragment->hasPaddr() ? fragment->getPaddr() : 0)); + + Addr fragment_addr = fragment->getVaddr(); + unsigned int fragment_size = fragment->getSize(); + + uint8_t *request_data = NULL; + + if (!isLoad) { + /* Split data for Packets. Will become the property of the + * outgoing Packets */ + request_data = new uint8_t[fragment_size]; + std::memcpy(request_data, data + (fragment_addr - base_addr), + fragment_size); + } + + assert(fragment->hasPaddr()); + + PacketPtr fragment_packet = + makePacketForRequest(*fragment, isLoad, this, request_data); + + fragmentPackets.push_back(fragment_packet); + } + + /* Might as well make the overall/response packet here */ + /* Get the physical address for the whole request/packet from the first + * fragment */ + request.setPaddr(fragmentRequests[0]->getPaddr()); + makePacket(); +} + +void +LSQ::SplitDataRequest::startAddrTranslation() +{ + setState(LSQ::LSQRequest::InTranslation); + + makeFragmentRequests(); + + numInTranslationFragments = 0; + numTranslatedFragments = 0; + + /* @todo, just do these in sequence for now with + * a loop of: + * do { + * sendNextFragmentToTranslation ; translateTiming ; finish + * } while (numTranslatedFragments != numFragments); + */ + + /* Do first translation */ + sendNextFragmentToTranslation(); +} + +PacketPtr +LSQ::SplitDataRequest::getHeadPacket() +{ + assert(numIssuedFragments < numFragments); + + return fragmentPackets[numIssuedFragments]; +} + +void +LSQ::SplitDataRequest::stepToNextPacket() +{ + assert(numIssuedFragments < numFragments); + + numIssuedFragments++; +} + +void +LSQ::SplitDataRequest::retireResponse(PacketPtr response) +{ + assert(numRetiredFragments < numFragments); + + DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d" + " offset: 0x%x (retired fragment num: %d) %s\n", + response->req->getVaddr(), response->req->getSize(), + request.getVaddr() - response->req->getVaddr(), + numRetiredFragments, + (fault == NoFault ? "" : fault->name())); + + numRetiredFragments++; + + if (skipped) { + /* Skip because we already knew the request had faulted or been + * skipped */ + DPRINTFS(MinorMem, (&port), "Skipping this fragment\n"); + } else if (response->isError()) { + /* Mark up the error and leave to execute to handle it */ + DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n"); + setSkipped(); + packet->copyError(response); + } else { + if (isLoad) { + if (!data) { + /* For a split transfer, a Packet must be constructed + * to contain all returning data. This is that packet's + * data */ + data = new uint8_t[request.getSize()]; + } + + /* Populate the portion of the overall response data represented + * by the response fragment */ + std::memcpy( + data + (response->req->getVaddr() - request.getVaddr()), + response->getPtr<uint8_t>(), + response->req->getSize()); + } + } + + /* Complete early if we're skipping are no more in-flight accesses */ + if (skipped && !hasPacketsInMemSystem()) { + DPRINTFS(MinorMem, (&port), "Completed skipped burst\n"); + setState(Complete); + if (packet->needsResponse()) + packet->makeResponse(); + } + + if (numRetiredFragments == numFragments) + setState(Complete); + + if (!skipped && isComplete()) { + DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL); + + DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d" + " needsResponse: %d packetSize: %s requestSize: %s responseSize:" + " %s\n", packet->isRead(), packet->isWrite(), + packet->needsResponse(), packet->getSize(), request.getSize(), + response->getSize()); + + /* A request can become complete by several paths, this is a sanity + * check to make sure the packet's data is created */ + if (!data) { + data = new uint8_t[request.getSize()]; + } + + if (isLoad) { + DPRINTFS(MinorMem, (&port), "Copying read data\n"); + std::memcpy(packet->getPtr<uint8_t>(), data, request.getSize()); + } + packet->makeResponse(); + } + + /* Packets are all deallocated together in ~SplitLSQRequest */ +} + +void +LSQ::SplitDataRequest::sendNextFragmentToTranslation() +{ + unsigned int fragment_index = numTranslatedFragments; + + ThreadContext *thread = port.cpu.getContext( + inst->id.threadId); + + DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n", + fragment_index); + + port.numAccessesInDTLB++; + numInTranslationFragments++; + + thread->getDTBPtr()->translateTiming( + fragmentRequests[fragment_index], thread, this, (isLoad ? + BaseTLB::Read : BaseTLB::Write)); +} + +bool +LSQ::StoreBuffer::canInsert() const +{ + /* @todo, support store amalgamation */ + return slots.size() < numSlots; +} + +void +LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request) +{ + auto found = std::find(slots.begin(), slots.end(), request); + + if (found != slots.end()) { + DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n", + request, *found, *(request->inst)); + slots.erase(found); + + delete request; + } +} + +void +LSQ::StoreBuffer::insert(LSQRequestPtr request) +{ + if (!canInsert()) { + warn("%s: store buffer insertion without space to insert from" + " inst: %s\n", name(), *(request->inst)); + } + + DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request); + + numUnissuedAccesses++; + + if (request->state != LSQRequest::Complete) + request->setState(LSQRequest::StoreInStoreBuffer); + + slots.push_back(request); + + /* Let's try and wake up the processor for the next cycle to step + * the store buffer */ + lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); +} + +LSQ::AddrRangeCoverage +LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request, + unsigned int &found_slot) +{ + unsigned int slot_index = slots.size() - 1; + auto i = slots.rbegin(); + AddrRangeCoverage ret = NoAddrRangeCoverage; + + /* Traverse the store buffer in reverse order (most to least recent) + * and try to find a slot whose address range overlaps this request */ + while (ret == NoAddrRangeCoverage && i != slots.rend()) { + LSQRequestPtr slot = *i; + + if (slot->packet) { + AddrRangeCoverage coverage = slot->containsAddrRangeOf(request); + + if (coverage != NoAddrRangeCoverage) { + DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:" + " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n", + slot_index, coverage, + request->request.getPaddr(), request->request.getSize(), + slot->request.getPaddr(), slot->request.getSize()); + + found_slot = slot_index; + ret = coverage; + } + } + + i++; + slot_index--; + } + + return ret; +} + +/** Fill the given packet with appropriate date from slot slot_number */ +void +LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load, + unsigned int slot_number) +{ + assert(slot_number < slots.size()); + assert(load->packet); + assert(load->isLoad); + + LSQRequestPtr store = slots[slot_number]; + + assert(store->packet); + assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage); + + Addr load_addr = load->request.getPaddr(); + Addr store_addr = store->request.getPaddr(); + Addr addr_offset = load_addr - store_addr; + + unsigned int load_size = load->request.getSize(); + + DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer" + " slot: %d addr: 0x%x addressOffset: 0x%x\n", + load_size, load_addr, slot_number, + store_addr, addr_offset); + + void *load_packet_data = load->packet->getPtr<void>(); + void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset; + + std::memcpy(load_packet_data, store_packet_data, load_size); +} + +void +LSQ::StoreBuffer::step() +{ + DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n", + numUnissuedAccesses); + + if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) { + /* Clear all the leading barriers */ + while (!slots.empty() && + slots.front()->isComplete() && slots.front()->isBarrier()) + { + LSQRequestPtr barrier = slots.front(); + + DPRINTF(MinorMem, "Clearing barrier for inst: %s\n", + *(barrier->inst)); + + numUnissuedAccesses--; + lsq.clearMemBarrier(barrier->inst); + slots.pop_front(); + + delete barrier; + } + + auto i = slots.begin(); + bool issued = true; + unsigned int issue_count = 0; + + /* Skip trying if the memory system is busy */ + if (lsq.state == LSQ::MemoryNeedsRetry) + issued = false; + + /* Try to issue all stores in order starting from the head + * of the queue. Responses are allowed to be retired + * out of order */ + while (issued && + issue_count < storeLimitPerCycle && + lsq.canSendToMemorySystem() && + i != slots.end()) + { + LSQRequestPtr request = *i; + + DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d" + " state: %s\n", + *(request->inst), request->sentAllPackets(), + request->state); + + if (request->isBarrier() && request->isComplete()) { + /* Give up at barriers */ + issued = false; + } else if (!(request->state == LSQRequest::StoreBufferIssuing && + request->sentAllPackets())) + { + DPRINTF(MinorMem, "Trying to send request: %s to memory" + " system\n", *(request->inst)); + + if (lsq.tryToSend(request)) { + /* Barrier are accounted for as they are cleared from + * the queue, not after their transfers are complete */ + if (!request->isBarrier()) + numUnissuedAccesses--; + issue_count++; + } else { + /* Don't step on to the next store buffer entry if this + * one hasn't issued all its packets as the store + * buffer must still enforce ordering */ + issued = false; + } + } + i++; + } + } +} + +void +LSQ::completeMemBarrierInst(MinorDynInstPtr inst, + bool committed) +{ + if (committed) { + /* Not already sent to the store buffer as a store request? */ + if (!inst->inStoreBuffer) { + /* Insert an entry into the store buffer to tick off barriers + * until there are none in flight */ + storeBuffer.insert(new BarrierDataRequest(*this, inst)); + } + } else { + /* Clear the barrier anyway if it wasn't actually committed */ + clearMemBarrier(inst); + } +} + +void +LSQ::StoreBuffer::minorTrace() const +{ + unsigned int size = slots.size(); + unsigned int i = 0; + std::ostringstream os; + + while (i < size) { + LSQRequestPtr request = slots[i]; + + request->reportData(os); + + i++; + if (i < numSlots) + os << ','; + } + + while (i < numSlots) { + os << '-'; + + i++; + if (i < numSlots) + os << ','; + } + + MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(), + numUnissuedAccesses); +} + +void +LSQ::tryToSendToTransfers(LSQRequestPtr request) +{ + if (state == MemoryNeedsRetry) { + DPRINTF(MinorMem, "Request needs retry, not issuing to" + " memory until retry arrives\n"); + return; + } + + if (request->state == LSQRequest::InTranslation) { + DPRINTF(MinorMem, "Request still in translation, not issuing to" + " memory\n"); + return; + } + + assert(request->state == LSQRequest::Translated || + request->state == LSQRequest::RequestIssuing || + request->state == LSQRequest::Failed || + request->state == LSQRequest::Complete); + + if (requests.empty() || requests.front() != request) { + DPRINTF(MinorMem, "Request not at front of requests queue, can't" + " issue to memory\n"); + return; + } + + if (transfers.unreservedRemainingSpace() == 0) { + DPRINTF(MinorMem, "No space to insert request into transfers" + " queue\n"); + return; + } + + if (request->isComplete() || request->state == LSQRequest::Failed) { + DPRINTF(MinorMem, "Passing a %s transfer on to transfers" + " queue\n", (request->isComplete() ? "completed" : "failed")); + request->setState(LSQRequest::Complete); + request->setSkipped(); + moveFromRequestsToTransfers(request); + return; + } + + if (!execute.instIsRightStream(request->inst)) { + /* Wrong stream, try to abort the transfer but only do so if + * there are no packets in flight */ + if (request->hasPacketsInMemSystem()) { + DPRINTF(MinorMem, "Request's inst. is from the wrong stream," + " waiting for responses before aborting request\n"); + } else { + DPRINTF(MinorMem, "Request's inst. is from the wrong stream," + " aborting request\n"); + request->setState(LSQRequest::Complete); + request->setSkipped(); + moveFromRequestsToTransfers(request); + } + return; + } + + if (request->fault != NoFault) { + if (request->inst->staticInst->isPrefetch()) { + DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n"); + } + DPRINTF(MinorMem, "Moving faulting request into the transfers" + " queue\n"); + request->setState(LSQRequest::Complete); + request->setSkipped(); + moveFromRequestsToTransfers(request); + return; + } + + bool is_load = request->isLoad; + bool is_llsc = request->request.isLLSC(); + bool is_swap = request->request.isSwap(); + bool bufferable = !(request->request.isUncacheable() || + is_llsc || is_swap); + + if (is_load) { + if (numStoresInTransfers != 0) { + DPRINTF(MinorMem, "Load request with stores still in transfers" + " queue, stalling\n"); + return; + } + } else { + /* Store. Can it be sent to the store buffer? */ + if (bufferable && !request->request.isMmappedIpr()) { + request->setState(LSQRequest::StoreToStoreBuffer); + moveFromRequestsToTransfers(request); + DPRINTF(MinorMem, "Moving store into transfers queue\n"); + return; + } + } + + /* Check if this is the head instruction (and so must be executable as + * its stream sequence number was checked above) for loads which must + * not be speculatively issued and stores which must be issued here */ + if (!bufferable) { + if (!execute.instIsHeadInst(request->inst)) { + DPRINTF(MinorMem, "Memory access not the head inst., can't be" + " sure it can be performed, not issuing\n"); + return; + } + + unsigned int forwarding_slot = 0; + + if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) != + NoAddrRangeCoverage) + { + DPRINTF(MinorMem, "Memory access can receive forwarded data" + " from the store buffer, need to wait for store buffer to" + " drain\n"); + return; + } + } + + /* True: submit this packet to the transfers queue to be sent to the + * memory system. + * False: skip the memory and push a packet for this request onto + * requests */ + bool do_access = true; + + if (!is_llsc) { + /* Check for match in the store buffer */ + if (is_load) { + unsigned int forwarding_slot = 0; + AddrRangeCoverage forwarding_result = + storeBuffer.canForwardDataToLoad(request, + forwarding_slot); + + switch (forwarding_result) { + case FullAddrRangeCoverage: + /* Forward data from the store buffer into this request and + * repurpose this request's packet into a response packet */ + storeBuffer.forwardStoreData(request, forwarding_slot); + request->packet->makeResponse(); + + /* Just move between queues, no access */ + do_access = false; + break; + case PartialAddrRangeCoverage: + DPRINTF(MinorMem, "Load partly satisfied by store buffer" + " data. Must wait for the store to complete\n"); + return; + break; + case NoAddrRangeCoverage: + DPRINTF(MinorMem, "No forwardable data from store buffer\n"); + /* Fall through to try access */ + break; + } + } + } else { + if (!canSendToMemorySystem()) { + DPRINTF(MinorMem, "Can't send request to memory system yet\n"); + return; + } + + SimpleThread &thread = *cpu.threads[request->inst->id.threadId]; + + TheISA::PCState old_pc = thread.pcState(); + ExecContext context(cpu, thread, execute, request->inst); + + /* Handle LLSC requests and tests */ + if (is_load) { + TheISA::handleLockedRead(&context, &request->request); + } else { + do_access = TheISA::handleLockedWrite(&context, + &request->request, cacheBlockMask); + + if (!do_access) { + DPRINTF(MinorMem, "Not perfoming a memory " + "access for store conditional\n"); + } + } + thread.pcState(old_pc); + } + + /* See the do_access comment above */ + if (do_access) { + if (!canSendToMemorySystem()) { + DPRINTF(MinorMem, "Can't send request to memory system yet\n"); + return; + } + + /* Remember if this is an access which can't be idly + * discarded by an interrupt */ + if (!bufferable) { + numAccessesIssuedToMemory++; + request->issuedToMemory = true; + } + + if (tryToSend(request)) + moveFromRequestsToTransfers(request); + } else { + request->setState(LSQRequest::Complete); + moveFromRequestsToTransfers(request); + } +} + +bool +LSQ::tryToSend(LSQRequestPtr request) +{ + bool ret = false; + + if (!canSendToMemorySystem()) { + DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n", + *(request->inst)); + } else { + PacketPtr packet = request->getHeadPacket(); + + DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n", + *(request->inst), packet->req->getVaddr()); + + /* The sender state of the packet *must* be an LSQRequest + * so the response can be correctly handled */ + assert(packet->findNextSenderState<LSQRequest>()); + + if (request->request.isMmappedIpr()) { + ThreadContext *thread = + cpu.getContext(request->request.threadId()); + + if (request->isLoad) { + DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst)); + TheISA::handleIprRead(thread, packet); + } else { + DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst)); + TheISA::handleIprWrite(thread, packet); + } + + request->stepToNextPacket(); + ret = request->sentAllPackets(); + + if (!ret) { + DPRINTF(MinorMem, "IPR access has another packet: %s\n", + *(request->inst)); + } + + if (ret) + request->setState(LSQRequest::Complete); + else + request->setState(LSQRequest::RequestIssuing); + } else if (dcachePort.sendTimingReq(packet)) { + DPRINTF(MinorMem, "Sent data memory request\n"); + + numAccessesInMemorySystem++; + + request->stepToNextPacket(); + + ret = request->sentAllPackets(); + + switch (request->state) { + case LSQRequest::Translated: + case LSQRequest::RequestIssuing: + /* Fully or partially issued a request in the transfers + * queue */ + request->setState(LSQRequest::RequestIssuing); + break; + case LSQRequest::StoreInStoreBuffer: + case LSQRequest::StoreBufferIssuing: + /* Fully or partially issued a request in the store + * buffer */ + request->setState(LSQRequest::StoreBufferIssuing); + break; + default: + assert(false); + break; + } + + state = MemoryRunning; + } else { + DPRINTF(MinorMem, + "Sending data memory request - needs retry\n"); + + /* Needs to be resent, wait for that */ + state = MemoryNeedsRetry; + retryRequest = request; + + switch (request->state) { + case LSQRequest::Translated: + case LSQRequest::RequestIssuing: + request->setState(LSQRequest::RequestNeedsRetry); + break; + case LSQRequest::StoreInStoreBuffer: + case LSQRequest::StoreBufferIssuing: + request->setState(LSQRequest::StoreBufferNeedsRetry); + break; + default: + assert(false); + break; + } + } + } + + return ret; +} + +void +LSQ::moveFromRequestsToTransfers(LSQRequestPtr request) +{ + assert(!requests.empty() && requests.front() == request); + assert(transfers.unreservedRemainingSpace() != 0); + + /* Need to count the number of stores in the transfers + * queue so that loads know when their store buffer forwarding + * results will be correct (only when all those stores + * have reached the store buffer) */ + if (!request->isLoad) + numStoresInTransfers++; + + requests.pop(); + transfers.push(request); +} + +bool +LSQ::canSendToMemorySystem() +{ + return state == MemoryRunning && + numAccessesInMemorySystem < inMemorySystemLimit; +} + +bool +LSQ::recvTimingResp(PacketPtr response) +{ + LSQRequestPtr request = + safe_cast<LSQRequestPtr>(response->popSenderState()); + + DPRINTF(MinorMem, "Received response packet inst: %s" + " addr: 0x%x cmd: %s\n", + *(request->inst), response->getAddr(), + response->cmd.toString()); + + numAccessesInMemorySystem--; + + if (response->isError()) { + DPRINTF(MinorMem, "Received error response packet: %s\n", + *request->inst); + } + + switch (request->state) { + case LSQRequest::RequestIssuing: + case LSQRequest::RequestNeedsRetry: + /* Response to a request from the transfers queue */ + request->retireResponse(response); + + DPRINTF(MinorMem, "Has outstanding packets?: %d %d\n", + request->hasPacketsInMemSystem(), request->isComplete()); + + break; + case LSQRequest::StoreBufferIssuing: + case LSQRequest::StoreBufferNeedsRetry: + /* Response to a request from the store buffer */ + request->retireResponse(response); + + /* Remove completed requests unless they are barrier (which will + * need to be removed in order */ + if (request->isComplete()) { + if (!request->isBarrier()) { + storeBuffer.deleteRequest(request); + } else { + DPRINTF(MinorMem, "Completed transfer for barrier: %s" + " leaving the request as it is also a barrier\n", + *(request->inst)); + } + } + break; + default: + /* Shouldn't be allowed to receive a response from another + * state */ + assert(false); + break; + } + + /* We go to idle even if there are more things in the requests queue + * as it's the job of step to actually step us on to the next + * transaction */ + + /* Let's try and wake up the processor for the next cycle */ + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + + /* Never busy */ + return true; +} + +void +LSQ::recvRetry() +{ + DPRINTF(MinorMem, "Received retry request\n"); + + assert(state == MemoryNeedsRetry); + + switch (retryRequest->state) { + case LSQRequest::RequestNeedsRetry: + /* Retry in the requests queue */ + retryRequest->setState(LSQRequest::Translated); + break; + case LSQRequest::StoreBufferNeedsRetry: + /* Retry in the store buffer */ + retryRequest->setState(LSQRequest::StoreInStoreBuffer); + break; + default: + assert(false); + } + + /* Set state back to MemoryRunning so that the following + * tryToSend can actually send. Note that this won't + * allow another transfer in as tryToSend should + * issue a memory request and either succeed for this + * request or return the LSQ back to MemoryNeedsRetry */ + state = MemoryRunning; + + /* Try to resend the request */ + if (tryToSend(retryRequest)) { + /* Successfully sent, need to move the request */ + switch (retryRequest->state) { + case LSQRequest::RequestIssuing: + /* In the requests queue */ + moveFromRequestsToTransfers(retryRequest); + break; + case LSQRequest::StoreBufferIssuing: + /* In the store buffer */ + storeBuffer.numUnissuedAccesses--; + break; + default: + assert(false); + break; + } + } + + retryRequest = NULL; +} + +LSQ::LSQ(std::string name_, std::string dcache_port_name_, + MinorCPU &cpu_, Execute &execute_, + unsigned int in_memory_system_limit, unsigned int line_width, + unsigned int requests_queue_size, unsigned int transfers_queue_size, + unsigned int store_buffer_size, + unsigned int store_buffer_cycle_store_limit) : + Named(name_), + cpu(cpu_), + execute(execute_), + dcachePort(dcache_port_name_, *this, cpu_), + lastMemBarrier(0), + state(MemoryRunning), + inMemorySystemLimit(in_memory_system_limit), + lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)), + requests(name_ + ".requests", "addr", requests_queue_size), + transfers(name_ + ".transfers", "addr", transfers_queue_size), + storeBuffer(name_ + ".storeBuffer", + *this, store_buffer_size, store_buffer_cycle_store_limit), + numAccessesInMemorySystem(0), + numAccessesInDTLB(0), + numStoresInTransfers(0), + numAccessesIssuedToMemory(0), + retryRequest(NULL), + cacheBlockMask(~(cpu_.cacheLineSize() - 1)) +{ + if (in_memory_system_limit < 1) { + fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_, + in_memory_system_limit); + } + + if (store_buffer_cycle_store_limit < 1) { + fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be" + " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit); + } + + if (requests_queue_size < 1) { + fatal("%s: executeLSQRequestsQueueSize must be" + " >= 1 (%d)\n", name_, requests_queue_size); + } + + if (transfers_queue_size < 1) { + fatal("%s: executeLSQTransfersQueueSize must be" + " >= 1 (%d)\n", name_, transfers_queue_size); + } + + if (store_buffer_size < 1) { + fatal("%s: executeLSQStoreBufferSize must be" + " >= 1 (%d)\n", name_, store_buffer_size); + } + + if ((lineWidth & (lineWidth - 1)) != 0) { + fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth); + } +} + +LSQ::~LSQ() +{ } + +LSQ::LSQRequest::~LSQRequest() +{ + if (packet) + delete packet; + if (data) + delete [] data; +} + +/** + * Step the memory access mechanism on to its next state. In reality, most + * of the stepping is done by the callbacks on the LSQ but this + * function is responsible for issuing memory requests lodged in the + * requests queue. + */ +void +LSQ::step() +{ + /* Try to move address-translated requests between queues and issue + * them */ + if (!requests.empty()) + tryToSendToTransfers(requests.front()); + + storeBuffer.step(); +} + +LSQ::LSQRequestPtr +LSQ::findResponse(MinorDynInstPtr inst) +{ + LSQ::LSQRequestPtr ret = NULL; + + if (!transfers.empty()) { + LSQRequestPtr request = transfers.front(); + + /* Same instruction and complete access or a store that's + * capable of being moved to the store buffer */ + if (request->inst->id == inst->id) { + if (request->isComplete() || + (request->state == LSQRequest::StoreToStoreBuffer && + storeBuffer.canInsert())) + { + ret = request; + } + } + } + + if (ret) { + DPRINTF(MinorMem, "Found matching memory response for inst: %s\n", + *inst); + } else { + DPRINTF(MinorMem, "No matching memory response for inst: %s\n", + *inst); + } + + return ret; +} + +void +LSQ::popResponse(LSQ::LSQRequestPtr response) +{ + assert(!transfers.empty() && transfers.front() == response); + + transfers.pop(); + + if (!response->isLoad) + numStoresInTransfers--; + + if (response->issuedToMemory) + numAccessesIssuedToMemory--; + + if (response->state != LSQRequest::StoreInStoreBuffer) { + DPRINTF(MinorMem, "Deleting %s request: %s\n", + (response->isLoad ? "load" : "store"), + *(response->inst)); + + delete response; + } +} + +void +LSQ::sendStoreToStoreBuffer(LSQRequestPtr request) +{ + assert(request->state == LSQRequest::StoreToStoreBuffer); + + DPRINTF(MinorMem, "Sending store: %s to store buffer\n", + *(request->inst)); + + request->inst->inStoreBuffer = true; + + storeBuffer.insert(request); +} + +bool +LSQ::isDrained() +{ + return requests.empty() && transfers.empty() && + storeBuffer.isDrained(); +} + +bool +LSQ::needsToTick() +{ + bool ret = false; + + if (canSendToMemorySystem()) { + bool have_translated_requests = !requests.empty() && + requests.front()->state != LSQRequest::InTranslation && + transfers.unreservedRemainingSpace() != 0; + + ret = have_translated_requests || + storeBuffer.numUnissuedStores() != 0; + } + + if (ret) + DPRINTF(Activity, "Need to tick\n"); + + return ret; +} + +void +LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, + unsigned int size, Addr addr, unsigned int flags, uint64_t *res) +{ + bool needs_burst = transferNeedsBurst(addr, size, lineWidth); + LSQRequestPtr request; + + /* Copy given data into the request. The request will pass this to the + * packet and then it will own the data */ + uint8_t *request_data = NULL; + + DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:" + " 0x%x%s lineWidth : 0x%x\n", + (isLoad ? "load" : "store"), addr, size, flags, + (needs_burst ? " (needs burst)" : ""), lineWidth); + + if (!isLoad) { + /* request_data becomes the property of a ...DataRequest (see below) + * and destroyed by its destructor */ + request_data = new uint8_t[size]; + if (flags & Request::CACHE_BLOCK_ZERO) { + /* For cache zeroing, just use zeroed data */ + std::memset(request_data, 0, size); + } else { + std::memcpy(request_data, data, size); + } + } + + if (needs_burst) { + request = new SplitDataRequest( + *this, inst, isLoad, request_data, res); + } else { + request = new SingleDataRequest( + *this, inst, isLoad, request_data, res); + } + + if (inst->traceData) + inst->traceData->setAddr(addr); + + request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0); + request->request.setVirt(0 /* asid */, + addr, size, flags, cpu.instMasterId(), + /* I've no idea why we need the PC, but give it */ + inst->pc.instAddr()); + + requests.push(request); + request->startAddrTranslation(); +} + +void +LSQ::pushFailedRequest(MinorDynInstPtr inst) +{ + LSQRequestPtr request = new FailedDataRequest(*this, inst); + requests.push(request); +} + +void +LSQ::minorTrace() const +{ + MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d" + " lastMemBarrier=%d\n", + state, numAccessesInDTLB, numAccessesInMemorySystem, + numStoresInTransfers, lastMemBarrier); + requests.minorTrace(); + transfers.minorTrace(); + storeBuffer.minorTrace(); +} + +LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_, + unsigned int store_buffer_size, + unsigned int store_limit_per_cycle) : + Named(name_), lsq(lsq_), + numSlots(store_buffer_size), + storeLimitPerCycle(store_limit_per_cycle), + slots(), + numUnissuedAccesses(0) +{ +} + +PacketPtr +makePacketForRequest(Request &request, bool isLoad, + Packet::SenderState *sender_state, PacketDataPtr data) +{ + MemCmd command; + + /* Make a ret with the right command type to match the request */ + if (request.isLLSC()) { + command = (isLoad ? MemCmd::LoadLockedReq : MemCmd::StoreCondReq); + } else if (request.isSwap()) { + command = MemCmd::SwapReq; + } else { + command = (isLoad ? MemCmd::ReadReq : MemCmd::WriteReq); + } + + PacketPtr ret = new Packet(&request, command); + + if (sender_state) + ret->pushSenderState(sender_state); + + if (isLoad) + ret->allocate(); + else + ret->dataDynamicArray(data); + + return ret; +} + +void +LSQ::issuedMemBarrierInst(MinorDynInstPtr inst) +{ + assert(inst->isInst() && inst->staticInst->isMemBarrier()); + assert(inst->id.execSeqNum > lastMemBarrier); + + /* Remember the barrier. We only have a notion of one + * barrier so this may result in some mem refs being + * delayed if they are between barriers */ + lastMemBarrier = inst->id.execSeqNum; +} + +void +LSQ::LSQRequest::makePacket() +{ + /* Make the function idempotent */ + if (packet) + return; + + packet = makePacketForRequest(request, isLoad, this, data); + /* Null the ret data so we know not to deallocate it when the + * ret is destroyed. The data now belongs to the ret and + * the ret is responsible for its destruction */ + data = NULL; +} + +std::ostream & +operator <<(std::ostream &os, LSQ::MemoryState state) +{ + switch (state) { + case LSQ::MemoryRunning: + os << "MemoryRunning"; + break; + case LSQ::MemoryNeedsRetry: + os << "MemoryNeedsRetry"; + break; + default: + os << "MemoryState-" << static_cast<int>(state); + break; + } + return os; +} + +void +LSQ::recvTimingSnoopReq(PacketPtr pkt) +{ + /* LLSC operations in Minor can't be speculative and are executed from + * the head of the requests queue. We shouldn't need to do more than + * this action on snoops. */ + + /* THREAD */ + TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask); +} + +} diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh new file mode 100644 index 000000000..0998395e0 --- /dev/null +++ b/src/cpu/minor/lsq.hh @@ -0,0 +1,722 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * A load/store queue that allows outstanding reads and writes. + * + */ + +#ifndef __CPU_MINOR_NEW_LSQ_HH__ +#define __CPU_MINOR_NEW_LSQ_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/pipe_data.hh" +#include "cpu/minor/trace.hh" + +namespace Minor +{ + +/* Forward declaration */ +class Execute; + +class LSQ : public Named +{ + protected: + /** My owner(s) */ + MinorCPU &cpu; + Execute &execute; + + protected: + /** State of memory access for head access. */ + enum MemoryState + { + MemoryRunning, /* Default. Step dcache queues when possible. */ + MemoryNeedsRetry /* Request rejected, will be asked to retry */ + }; + + /** Print MemoryState values as shown in the enum definition */ + friend std::ostream &operator <<(std::ostream &os, + MemoryState state); + + /** Coverage of one address range with another */ + enum AddrRangeCoverage + { + PartialAddrRangeCoverage, /* Two ranges partly overlap */ + FullAddrRangeCoverage, /* One range fully covers another */ + NoAddrRangeCoverage /* Two ranges are disjoint */ + }; + + /** Exposable data port */ + class DcachePort : public MinorCPU::MinorCPUPort + { + protected: + /** My owner */ + LSQ &lsq; + + public: + DcachePort(std::string name, LSQ &lsq_, MinorCPU &cpu) : + MinorCPU::MinorCPUPort(name, cpu), lsq(lsq_) + { } + + protected: + bool recvTimingResp(PacketPtr pkt) + { return lsq.recvTimingResp(pkt); } + + void recvRetry() { lsq.recvRetry(); } + + void recvTimingSnoopReq(PacketPtr pkt) + { return lsq.recvTimingSnoopReq(pkt); } + }; + + DcachePort dcachePort; + + public: + /** Derived SenderState to carry data access info. through address + * translation, the queues in this port and back from the memory + * system. */ + class LSQRequest : + public BaseTLB::Translation, /* For TLB lookups */ + public Packet::SenderState /* For packing into a Packet */ + { + public: + /** Owning port */ + LSQ &port; + + /** Instruction which made this request */ + MinorDynInstPtr inst; + + /** Load/store indication used for building packet. This isn't + * carried by Request so we need to keep it here */ + bool isLoad; + + /** Dynamically allocated and populated data carried for + * building write packets */ + PacketDataPtr data; + + /* Requests carry packets on their way to the memory system. + * When a Packet returns from the memory system, its + * request needs to have its packet updated as this + * may have changed in flight */ + PacketPtr packet; + + /** The underlying request of this LSQRequest */ + Request request; + + /** Fault generated performing this request */ + Fault fault; + + /** Res from pushRequest */ + uint64_t *res; + + /** Was skipped. Set to indicate any reason (faulted, bad + * stream sequence number, in a fault shadow) that this + * request did not perform a memory transfer */ + bool skipped; + + /** This in an access other than a normal cacheable load + * that's visited the memory system */ + bool issuedToMemory; + + enum LSQRequestState + { + NotIssued, /* Newly created */ + InTranslation, /* TLB accessed, no reply yet */ + Translated, /* Finished address translation */ + Failed, /* The starting start of FailedDataRequests */ + RequestIssuing, /* Load/store issued to memory in the requests + queue */ + StoreToStoreBuffer, /* Store in transfers on its way to the + store buffer */ + RequestNeedsRetry, /* Retry needed for load */ + StoreInStoreBuffer, /* Store in the store buffer, before issuing + a memory transfer */ + StoreBufferIssuing, /* Store in store buffer and has been + issued */ + StoreBufferNeedsRetry, /* Retry needed for store */ + /* All completed states. Includes + completed loads, TLB faults and skipped requests whose + seqNum's no longer match */ + Complete + }; + + LSQRequestState state; + + protected: + /** BaseTLB::Translation interface */ + void markDelayed() { } + + public: + LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, + PacketDataPtr data_ = NULL, uint64_t *res_ = NULL); + + virtual ~LSQRequest(); + + public: + /** Make a packet to use with the memory transaction */ + void makePacket(); + + /** Was no memory access attempted for this request? */ + bool skippedMemAccess() { return skipped; } + + /** Set this request as having been skipped before a memory + * transfer was attempt */ + void setSkipped() { skipped = true; } + + /** Does address range req1 (req1_addr to req1_addr + req1_size - 1) + * fully cover, partially cover or not cover at all the range req2 */ + static AddrRangeCoverage containsAddrRangeOf( + Addr req1_addr, unsigned int req1_size, + Addr req2_addr, unsigned int req2_size); + + /** Does this request's address range fully cover the range + * of other_request? */ + AddrRangeCoverage containsAddrRangeOf(LSQRequest *other_request); + + /** Start the address translation process for this request. This + * will issue a translation request to the TLB. */ + virtual void startAddrTranslation() = 0; + + /** Get the next packet to issue for this request. For split + * transfers, it will be necessary to step through the available + * packets by calling do { getHeadPacket ; stepToNextPacket } while + * (!sentAllPackets) and by retiring response using retireResponse */ + virtual PacketPtr getHeadPacket() = 0; + + /** Step to the next packet for the next call to getHeadPacket */ + virtual void stepToNextPacket() = 0; + + /** Have all packets been sent? */ + virtual bool sentAllPackets() = 0; + + /** True if this request has any issued packets in the memory + * system and so can't be interrupted until it gets responses */ + virtual bool hasPacketsInMemSystem() = 0; + + /** Retire a response packet into the LSQRequest packet possibly + * completing this transfer */ + virtual void retireResponse(PacketPtr packet_) = 0; + + /** Is this a request a barrier? */ + virtual bool isBarrier(); + + /** This request, once processed by the requests/transfers + * queues, will need to go to the store buffer */ + bool needsToBeSentToStoreBuffer(); + + /** Set state and output trace output */ + void setState(LSQRequestState new_state); + + /** Has this request been completed. This includes *all* reasons + * for completion: successful transfers, faults, skipped because + * of preceding faults */ + bool isComplete() const; + + /** MinorTrace report interface */ + void reportData(std::ostream &os) const; + }; + + typedef LSQRequest *LSQRequestPtr; + + friend std::ostream & operator <<(std::ostream &os, + AddrRangeCoverage state); + + friend std::ostream & operator <<(std::ostream &os, + LSQRequest::LSQRequestState state); + + protected: + /** Special request types that don't actually issue memory requests */ + class SpecialDataRequest : public LSQRequest + { + protected: + /** TLB interace */ + void finish(Fault fault_, RequestPtr request_, ThreadContext *tc, + BaseTLB::Mode mode) + { } + + public: + /** Send single translation request */ + void startAddrTranslation() { } + + /** Get the head packet as counted by numIssuedFragments */ + PacketPtr getHeadPacket() + { fatal("No packets in a SpecialDataRequest"); } + + /** Step on numIssuedFragments */ + void stepToNextPacket() { } + + /** Has no packets to send */ + bool sentAllPackets() { return true; } + + /** Never sends any requests */ + bool hasPacketsInMemSystem() { return false; } + + /** Keep the given packet as the response packet + * LSQRequest::packet */ + void retireResponse(PacketPtr packet_) { } + + public: + SpecialDataRequest(LSQ &port_, MinorDynInstPtr inst_) : + /* Say this is a load, not actually relevant */ + LSQRequest(port_, inst_, true, NULL, 0) + { } + }; + + /** FailedDataRequest represents requests from instructions that + * failed their predicates but need to ride the requests/transfers + * queues to maintain trace ordering */ + class FailedDataRequest : public SpecialDataRequest + { + public: + FailedDataRequest(LSQ &port_, MinorDynInstPtr inst_) : + SpecialDataRequest(port_, inst_) + { state = Failed; } + }; + + /** Request for doing barrier accounting in the store buffer. Not + * for use outside that unit */ + class BarrierDataRequest : public SpecialDataRequest + { + public: + bool isBarrier() { return true; } + + public: + BarrierDataRequest(LSQ &port_, MinorDynInstPtr inst_) : + SpecialDataRequest(port_, inst_) + { state = Complete; } + }; + + /** SingleDataRequest is used for requests that don't fragment */ + class SingleDataRequest : public LSQRequest + { + protected: + /** TLB interace */ + void finish(Fault fault_, RequestPtr request_, ThreadContext *tc, + BaseTLB::Mode mode); + + /** Has my only packet been sent to the memory system but has not + * yet been responded to */ + bool packetInFlight; + + /** Has the packet been at least sent to the memory system? */ + bool packetSent; + + public: + /** Send single translation request */ + void startAddrTranslation(); + + /** Get the head packet as counted by numIssuedFragments */ + PacketPtr getHeadPacket() { return packet; } + + /** Remember that the packet has been sent */ + void stepToNextPacket() { packetInFlight = true; packetSent = true; } + + /** Has packet been sent */ + bool hasPacketsInMemSystem() { return packetInFlight; } + + /** packetInFlight can become false again, so need to check + * packetSent */ + bool sentAllPackets() { return packetSent; } + + /** Keep the given packet as the response packet + * LSQRequest::packet */ + void retireResponse(PacketPtr packet_); + + public: + SingleDataRequest(LSQ &port_, MinorDynInstPtr inst_, + bool isLoad_, PacketDataPtr data_ = NULL, uint64_t *res_ = NULL) : + LSQRequest(port_, inst_, isLoad_, data_, res_), + packetInFlight(false), + packetSent(false) + { } + }; + + class SplitDataRequest : public LSQRequest + { + protected: + /** Event to step between translations */ + class TranslationEvent : public Event + { + protected: + SplitDataRequest &owner; + + public: + TranslationEvent(SplitDataRequest &owner_) + : owner(owner_) { } + + void process() + { owner.sendNextFragmentToTranslation(); } + }; + + TranslationEvent translationEvent; + protected: + /** Number of fragments this request is split into */ + unsigned int numFragments; + + /** Number of fragments in the address translation mechanism */ + unsigned int numInTranslationFragments; + + /** Number of fragments that have completed address translation, + * (numTranslatedFragments + numInTranslationFragments) <= + * numFragments. When numTranslatedFramgents == numFragments, + * translation is complete */ + unsigned int numTranslatedFragments; + + /** Number of fragments already issued (<= numFragments) */ + unsigned int numIssuedFragments; + + /** Number of fragments retired back to this request */ + unsigned int numRetiredFragments; + + /** Fragment Requests corresponding to the address ranges of + * each fragment */ + std::vector<Request *> fragmentRequests; + + /** Packets matching fragmentRequests to issue fragments to memory */ + std::vector<Packet *> fragmentPackets; + + protected: + /** TLB response interface */ + void finish(Fault fault_, RequestPtr request_, ThreadContext *tc, + BaseTLB::Mode mode); + + public: + SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_, + bool isLoad_, PacketDataPtr data_ = NULL, + uint64_t *res_ = NULL); + + ~SplitDataRequest(); + + public: + /** Make all the Requests for this transfer's fragments so that those + * requests can be sent for address translation */ + void makeFragmentRequests(); + + /** Make the packets to go with the requests so they can be sent to + * the memory system */ + void makeFragmentPackets(); + + /** Start a loop of do { sendNextFragmentToTranslation ; + * translateTiming ; finish } while (numTranslatedFragments != + * numFragments) to complete all this requests' fragments' address + * translations */ + void startAddrTranslation(); + + /** Get the head packet as counted by numIssuedFragments */ + PacketPtr getHeadPacket(); + + /** Step on numIssuedFragments */ + void stepToNextPacket(); + + bool hasPacketsInMemSystem() + { return numIssuedFragments != numRetiredFragments; } + + /** Have we stepped past the end of fragmentPackets? */ + bool sentAllPackets() { return numIssuedFragments == numFragments; } + + /** For loads, paste the response data into the main + * response packet */ + void retireResponse(PacketPtr packet_); + + /** Part of the address translation loop, see startAddTranslation */ + void sendNextFragmentToTranslation(); + }; + + /** Store buffer. This contains stores which have been committed + * but whose memory transfers have not yet been issued. Load data + * can be forwarded out of the store buffer */ + class StoreBuffer : public Named + { + public: + /** My owner */ + LSQ &lsq; + + /** Number of slots, this is a bound on the size of slots */ + const unsigned int numSlots; + + /** Maximum number of stores that can be issued per cycle */ + const unsigned int storeLimitPerCycle; + + public: + /** Queue of store requests on their way to memory */ + std::deque<LSQRequestPtr> slots; + + /** Number of occupied slots which have not yet issued a + * memory access */ + unsigned int numUnissuedAccesses; + + public: + StoreBuffer(std::string name_, LSQ &lsq_, + unsigned int store_buffer_size, + unsigned int store_limit_per_cycle); + + public: + /** Can a new request be inserted into the queue? */ + bool canInsert() const; + + /** Delete the given request and free the slot it occupied */ + void deleteRequest(LSQRequestPtr request); + + /** Insert a request at the back of the queue */ + void insert(LSQRequestPtr request); + + /** Look for a store which satisfies the given load. Returns an + * indication whether the forwarding request can be wholly, + * partly or not all all satisfied. If the request can be + * wholly satisfied, the store buffer slot number which can be used + * is returned in found_slot */ + AddrRangeCoverage canForwardDataToLoad(LSQRequestPtr request, + unsigned int &found_slot); + + /** Fill the given packet with appropriate date from slot + * slot_number */ + void forwardStoreData(LSQRequestPtr load, unsigned int slot_number); + + /** Number of stores in the store buffer which have not been + * completely issued to the memory system */ + unsigned int numUnissuedStores() { return numUnissuedAccesses; } + + /** Drained if there is absolutely nothing left in the buffer */ + bool isDrained() const { return slots.empty(); } + + /** Try to issue more stores to memory */ + void step(); + + /** Report queue contents for MinorTrace */ + void minorTrace() const; + }; + + protected: + /** Most recent execSeqNum of a memory barrier instruction or + * 0 if there are no in-flight barriers. Useful as a + * dependency for early-issued memory operations */ + InstSeqNum lastMemBarrier; + + public: + /** Retry state of last issued memory transfer */ + MemoryState state; + + /** Maximum number of in-flight accesses issued to the memory system */ + const unsigned int inMemorySystemLimit; + + /** Memory system access width (and snap) in bytes */ + const unsigned int lineWidth; + + public: + /** The LSQ consists of three queues: requests, transfers and the + * store buffer storeBuffer. */ + + typedef Queue<LSQRequestPtr, + ReportTraitsPtrAdaptor<LSQRequestPtr>, + NoBubbleTraits<LSQRequestPtr> > + LSQQueue; + + /** requests contains LSQRequests which have been issued to the TLB by + * calling ExecContext::readMem/writeMem (which in turn calls + * LSQ::pushRequest and LSQRequest::startAddrTranslation). Once they + * have a physical address, requests at the head of requests can be + * issued to the memory system. At this stage, it cannot be clear that + * memory accesses *must* happen (that there are no preceding faults or + * changes of flow of control) and so only cacheable reads are issued + * to memory. + * Cacheable stores are not issued at all (and just pass through + * 'transfers' in order) and all other transfers are stalled in requests + * until their corresponding instructions are at the head of the + * inMemInsts instruction queue and have the right streamSeqNum. */ + LSQQueue requests; + + /** Once issued to memory (or, for stores, just had their + * state changed to StoreToStoreBuffer) LSQRequests pass through + * transfers waiting for memory responses. At the head of transfers, + * Execute::commitInst can pick up the memory response for a request + * using LSQ::findResponse. Responses to be committed can then + * have ExecContext::completeAcc on them. Stores can then be pushed + * into the store buffer. All other transfers will then be complete. */ + LSQQueue transfers; + + /* The store buffer contains committed cacheable stores on + * their way to memory decoupled from subsequence instruction execution. + * Before trying to issue a cacheable read from 'requests' to memory, + * the store buffer is checked to see if a previous store contains the + * needed data (StoreBuffer::canForwardDataToLoad) which can be + * forwarded in lieu of a memory access. If there are outstanding + * stores in the transfers queue, they must be promoted to the store + * buffer (and so be commited) before they can be correctly checked + * for forwarding. */ + StoreBuffer storeBuffer; + + protected: + /** Count of the number of mem. accesses which have left the + * requests queue and are in the 'wild' in the memory system. */ + unsigned int numAccessesInMemorySystem; + + /** Number of requests in the DTLB in the requests queue */ + unsigned int numAccessesInDTLB; + + /** The number of stores in the transfers queue. Useful when + * testing if the store buffer contains all the forwardable stores */ + unsigned int numStoresInTransfers; + + /** The number of accesses which have been issued to the memory + * system but have not been committed/discarded *excluding* + * cacheable normal loads which don't need to be tracked */ + unsigned int numAccessesIssuedToMemory; + + /** The request (from either requests or the store buffer) which is + * currently waiting have its memory access retried */ + LSQRequestPtr retryRequest; + + /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */ + Addr cacheBlockMask; + + protected: + /** Try and issue a memory access for a translated request at the + * head of the requests queue. Also tries to move the request + * between queues */ + void tryToSendToTransfers(LSQRequestPtr request); + + /** Try to send (or resend) a memory request's next/only packet to + * the memory system. Returns true if the request was successfully + * sent to memory (and was also the last packet in a transfer) */ + bool tryToSend(LSQRequestPtr request); + + /** Clear a barrier (if it's the last one marked up in lastMemBarrier) */ + void clearMemBarrier(MinorDynInstPtr inst); + + /** Move a request between queues */ + void moveFromRequestsToTransfers(LSQRequestPtr request); + + /** Can a request be sent to the memory system */ + bool canSendToMemorySystem(); + + public: + LSQ(std::string name_, std::string dcache_port_name_, + MinorCPU &cpu_, Execute &execute_, + unsigned int max_accesses_in_memory_system, unsigned int line_width, + unsigned int requests_queue_size, unsigned int transfers_queue_size, + unsigned int store_buffer_size, + unsigned int store_buffer_cycle_store_limit); + + virtual ~LSQ(); + + public: + /** Step checks the queues to see if their are issuable transfers + * which were not otherwise picked up by tests at the end of other + * events. + * + * Steppable actions include deferred actions which couldn't be + * cascaded on the end of a memory response/TLB response event + * because of resource congestion. */ + void step(); + + /** Is their space in the request queue to be able to push a request by + * issuing an isMemRef instruction */ + bool canRequest() { return requests.unreservedRemainingSpace() != 0; } + + /** Returns a response if it's at the head of the transfers queue and + * it's either complete or can be sent on to the store buffer. After + * calling, the request still remains on the transfer queue until + * popResponse is called */ + LSQRequestPtr findResponse(MinorDynInstPtr inst); + + /** Sanity check and pop the head response */ + void popResponse(LSQRequestPtr response); + + /** Must check this before trying to insert into the store buffer */ + bool canPushIntoStoreBuffer() const { return storeBuffer.canInsert(); } + + /** A store has been committed, please move it to the store buffer */ + void sendStoreToStoreBuffer(LSQRequestPtr request); + + /** Are there any accesses other than normal cached loads in the + * memory system or having received responses which need to be + * handled for their instruction's to be completed */ + bool accessesInFlight() const + { return numAccessesIssuedToMemory != 0; } + + /** A memory barrier instruction has been issued, remember its + * execSeqNum that we can avoid issuing memory ops until it is + * committed */ + void issuedMemBarrierInst(MinorDynInstPtr inst); + + /** Get the execSeqNum of the last issued memory barrier */ + InstSeqNum getLastMemBarrier() const { return lastMemBarrier; } + + /** Is there nothing left in the LSQ */ + bool isDrained(); + + /** May need to be ticked next cycle as one of the queues contains + * an actionable transfers or address translation */ + bool needsToTick(); + + /** Complete a barrier instruction. Where committed, makes a + * BarrierDataRequest and pushed it into the store buffer */ + void completeMemBarrierInst(MinorDynInstPtr inst, + bool committed); + + /** Single interface for readMem/writeMem to issue requests into + * the LSQ */ + void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, + unsigned int size, Addr addr, unsigned int flags, uint64_t *res); + + /** Push a predicate failed-representing request into the queues just + * to maintain commit order */ + void pushFailedRequest(MinorDynInstPtr inst); + + /** Memory interface */ + bool recvTimingResp(PacketPtr pkt); + void recvRetry(); + void recvTimingSnoopReq(PacketPtr pkt); + + /** Return the raw-bindable port */ + MinorCPU::MinorCPUPort &getDcachePort() { return dcachePort; } + + void minorTrace() const; +}; + +/** Make a suitable packet for the given request. If the request is a store, + * data will be the payload data. If sender_state is NULL, it won't be + * pushed into the packet as senderState */ +PacketPtr makePacketForRequest(Request &request, bool isLoad, + Packet::SenderState *sender_state = NULL, PacketDataPtr data = NULL); +} + +#endif /* __CPU_MINOR_NEW_LSQ_HH__ */ diff --git a/src/cpu/minor/pipe_data.cc b/src/cpu/minor/pipe_data.cc new file mode 100644 index 000000000..447f9c0e7 --- /dev/null +++ b/src/cpu/minor/pipe_data.cc @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "cpu/minor/pipe_data.hh" + +namespace Minor +{ + +std::ostream & +operator <<(std::ostream &os, BranchData::Reason reason) +{ + switch (reason) + { + case BranchData::NoBranch: + os << "NoBranch"; + break; + case BranchData::UnpredictedBranch: + os << "UnpredictedBranch"; + break; + case BranchData::BranchPrediction: + os << "BranchPrediction"; + break; + case BranchData::CorrectlyPredictedBranch: + os << "CorrectlyPredictedBranch"; + break; + case BranchData::BadlyPredictedBranch: + os << "BadlyPredictedBranch"; + break; + case BranchData::BadlyPredictedBranchTarget: + os << "BadlyPredictedBranchTarget"; + break; + case BranchData::Interrupt: + os << "Interrupt"; + break; + case BranchData::SuspendThread: + os << "SuspendThread"; + break; + case BranchData::WakeupFetch: + os << "WakeupFetch"; + break; + case BranchData::HaltFetch: + os << "HaltFetch"; + break; + } + + return os; +} + +bool +BranchData::isStreamChange(const BranchData::Reason reason) +{ + bool ret = false; + + switch (reason) + { + /* No change of stream (see the enum comment in pipe_data.hh) */ + case NoBranch: + case CorrectlyPredictedBranch: + ret = false; + break; + + /* Change of stream (Fetch1 should act on) */ + case UnpredictedBranch: + case BranchPrediction: + case BadlyPredictedBranchTarget: + case BadlyPredictedBranch: + case SuspendThread: + case Interrupt: + case WakeupFetch: + case HaltFetch: + ret = true; + break; + } + + return ret; +} + +bool +BranchData::isBranch(const BranchData::Reason reason) +{ + bool ret = false; + + switch (reason) + { + /* No change of stream (see the enum comment in pipe_data.hh) */ + case NoBranch: + case CorrectlyPredictedBranch: + case SuspendThread: + case Interrupt: + case WakeupFetch: + case HaltFetch: + ret = false; + break; + + /* Change of stream (Fetch1 should act on) */ + case UnpredictedBranch: + case BranchPrediction: + case BadlyPredictedBranchTarget: + case BadlyPredictedBranch: + ret = true; + break; + } + + return ret; +} + +void +BranchData::reportData(std::ostream &os) const +{ + if (isBubble()) { + os << '-'; + } else { + os << reason + << ';' << newStreamSeqNum << '.' << newPredictionSeqNum + << ";0x" << std::hex << target.instAddr() << std::dec + << ';'; + inst->reportData(os); + } +} + +std::ostream & +operator <<(std::ostream &os, const BranchData &branch) +{ + os << branch.reason << " target: 0x" + << std::hex << branch.target.instAddr() << std::dec + << ' ' << *branch.inst + << ' ' << branch.newStreamSeqNum << "(stream)." + << branch.newPredictionSeqNum << "(pred)"; + + return os; +} + +void +ForwardLineData::setFault(Fault fault_) +{ + fault = fault_; + if (isFault()) + bubbleFlag = false; +} + +void +ForwardLineData::allocateLine(unsigned int width_) +{ + lineWidth = width_; + bubbleFlag = false; + + assert(!isFault()); + assert(!line); + + line = new uint8_t[width_]; +} + +void +ForwardLineData::adoptPacketData(Packet *packet) +{ + this->packet = packet; + lineWidth = packet->req->getSize(); + bubbleFlag = false; + + assert(!isFault()); + assert(!line); + + line = packet->getPtr<uint8_t>(); +} + +void +ForwardLineData::freeLine() +{ + /* Only free lines in non-faulting, non-bubble lines */ + if (!isFault() && !isBubble()) { + assert(line); + /* If packet is not NULL then the line must belong to the packet so + * we don't need to separately deallocate the line */ + if (packet) { + delete packet; + } else { + delete [] line; + } + line = NULL; + bubbleFlag = true; + } +} + +void +ForwardLineData::reportData(std::ostream &os) const +{ + if (isBubble()) + os << '-'; + else if (fault != NoFault) + os << "F;" << id; + else + os << id; +} + +ForwardInstData::ForwardInstData(unsigned int width) : + numInsts(width) +{ + bubbleFill(); +} + +ForwardInstData::ForwardInstData(const ForwardInstData &src) +{ + *this = src; +} + +ForwardInstData & +ForwardInstData::operator =(const ForwardInstData &src) +{ + numInsts = src.numInsts; + + for (unsigned int i = 0; i < src.numInsts; i++) + insts[i] = src.insts[i]; + + return *this; +} + +bool +ForwardInstData::isBubble() const +{ + return numInsts == 0 || insts[0]->isBubble(); +} + +void +ForwardInstData::bubbleFill() +{ + for (unsigned int i = 0; i < numInsts; i++) + insts[i] = MinorDynInst::bubble(); +} + +void +ForwardInstData::resize(unsigned int width) +{ + assert(width < MAX_FORWARD_INSTS); + numInsts = width; + + bubbleFill(); +} + +void +ForwardInstData::reportData(std::ostream &os) const +{ + if (isBubble()) { + os << '-'; + } else { + unsigned int i = 0; + + os << '('; + while (i != numInsts) { + insts[i]->reportData(os); + i++; + if (i != numInsts) + os << ','; + } + os << ')'; + } +} + +} diff --git a/src/cpu/minor/pipe_data.hh b/src/cpu/minor/pipe_data.hh new file mode 100644 index 000000000..4468cb89e --- /dev/null +++ b/src/cpu/minor/pipe_data.hh @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Contains class definitions for data flowing between pipeline stages in + * the top-level structure portion of this model. Latch types are also + * defined which pair forward/backward flowing data specific to each stage + * pair. + * + * No post-configuration inter-stage communication should *ever* take place + * outside these classes (except for reservation!) + */ + +#ifndef __CPU_MINOR_PIPE_DATA_HH__ +#define __CPU_MINOR_PIPE_DATA_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/dyn_inst.hh" +#include "cpu/base.hh" + +namespace Minor +{ + +/** Forward data betwen Execute and Fetch1 carrying change-of-address/stream + * information. */ +class BranchData /* : public ReportIF, public BubbleIF */ +{ + public: + enum Reason + { + /* *** No change of stream (information to branch prediction) */ + + /* Don't branch at all (bubble) */ + NoBranch, + /* Don't branch, but here's the details of a correct prediction + * that was executed */ + CorrectlyPredictedBranch, + + /* *** Change of stream */ + + /* Take an unpredicted branch */ + UnpredictedBranch, + /* Take a branch on branch prediction data (from Fetch2) */ + BranchPrediction, + /* Prediction of wrong target PC */ + BadlyPredictedBranchTarget, + /* Bad branch prediction (didn't actually branch). Need to branch + * back to correct stream. If the target is wrong, use + * BadlyPredictedBranchTarget */ + BadlyPredictedBranch, + /* Suspend fetching for this thread (inst->id.threadId). + * This will be woken up by another stream changing branch so + * count it as stream changing itself and expect pc to be the PC + * of the next instruction */ + SuspendThread, + /* Wakeup fetching from Halted */ + WakeupFetch, + /* Branch from an interrupt (no instruction) */ + Interrupt, + /* Stop fetching in anticipation of of draining */ + HaltFetch + }; + + /** Is a request with this reason actually a request to change the + * PC rather than a bubble or branch prediction information */ + static bool isStreamChange(const BranchData::Reason reason); + + /** Is a request with this reason actually a 'real' branch, that is, + * a stream change that's not just an instruction to Fetch1 to halt + * or wake up */ + static bool isBranch(const BranchData::Reason reason); + + public: + /** Explanation for this branch */ + Reason reason; + + /** Sequence number of new stream/prediction to be adopted */ + InstSeqNum newStreamSeqNum; + InstSeqNum newPredictionSeqNum; + + /** Starting PC of that stream */ + TheISA::PCState target; + + /** Instruction which caused this branch */ + MinorDynInstPtr inst; + + public: + BranchData() : + reason(NoBranch), newStreamSeqNum(0), + newPredictionSeqNum(0), target(TheISA::PCState(0)), + inst(MinorDynInst::bubble()) + { } + + BranchData( + Reason reason_, + InstSeqNum new_stream_seq_num, + InstSeqNum new_prediction_seq_num, + TheISA::PCState target, + MinorDynInstPtr inst_) : + reason(reason_), + newStreamSeqNum(new_stream_seq_num), + newPredictionSeqNum(new_prediction_seq_num), + target(target), + inst(inst_) + { } + + /** BubbleIF interface */ + static BranchData bubble() { return BranchData(); } + bool isBubble() const { return reason == NoBranch; } + + /** As static isStreamChange but on this branch data */ + bool isStreamChange() const { return isStreamChange(reason); } + + /** As static isBranch but on this branch data */ + bool isBranch() const { return isBranch(reason); } + + /** ReportIF interface */ + void reportData(std::ostream &os) const; +}; + +/** Print a branch reason enum */ +std::ostream &operator <<(std::ostream &os, BranchData::Reason reason); + +/** Print BranchData contents in a format suitable for DPRINTF comments, not + * for MinorTrace */ +std::ostream &operator <<(std::ostream &os, const BranchData &branch); + +/** Line fetch data in the forward direction. Contains a single cache line + * (or fragment of a line), its address, a sequence number assigned when + * that line was fetched and a bubbleFlag that can allow ForwardLineData to + * be used to represent the absence of line data in a pipeline. */ +class ForwardLineData /* : public ReportIF, public BubbleIF */ +{ + private: + /** This line is a bubble. No other data member is required to be valid + * if this is true */ + bool bubbleFlag; + + public: + /** First byte address in the line. This is allowed to be + * <= pc.instAddr() */ + Addr lineBaseAddr; + + /** PC of the first requested inst within this line */ + TheISA::PCState pc; + + /** Explicit line width, don't rely on data.size */ + unsigned int lineWidth; + + public: + /** This line has a fault. The bubble flag will be false and seqNums + * will be valid but no data will */ + Fault fault; + + /** Thread, stream, prediction ... id of this line */ + InstId id; + + /** Line data. line[0] is the byte at address pc.instAddr(). Data is + * only valid upto lineWidth - 1. */ + uint8_t *line; + + /** Packet from which the line is taken */ + Packet *packet; + + public: + ForwardLineData() : + bubbleFlag(true), + lineBaseAddr(0), + lineWidth(0), + fault(NoFault), + line(NULL), + packet(NULL) + { + /* Make lines bubbles by default */ + } + + ~ForwardLineData() { line = NULL; } + + public: + /** This is a fault, not a line */ + bool isFault() const { return fault != NoFault; } + + /** Set fault and possible clear the bubble flag */ + void setFault(Fault fault_); + + /** In-place initialise a ForwardLineData, freeing and overridding the + * line */ + void allocateLine(unsigned int width_); + + /** Use the data from a packet as line instead of allocating new + * space. On destruction of this object, the packet will be destroyed */ + void adoptPacketData(Packet *packet); + + /** Free this ForwardLineData line. Note that these are shared between + * line objects and so you must be careful when deallocating them. + * Copying of ForwardLineData can, therefore, be done by default copy + * constructors/assignment */ + void freeLine(); + + /** BubbleIF interface */ + static ForwardLineData bubble() { return ForwardLineData(); } + bool isBubble() const { return bubbleFlag; } + + /** ReportIF interface */ + void reportData(std::ostream &os) const; +}; + +/** Maximum number of instructions that can be carried by the pipeline. */ +const unsigned int MAX_FORWARD_INSTS = 16; + +/** Forward flowing data between Fetch2,Decode,Execute carrying a packet of + * instructions of a width appropriate to the configured stage widths. + * Also carries exception information where instructions are not valid */ +class ForwardInstData /* : public ReportIF, public BubbleIF */ +{ + public: + /** Array of carried insts, ref counted */ + MinorDynInstPtr insts[MAX_FORWARD_INSTS]; + + /** The number of insts slots that can be expected to be valid insts */ + unsigned int numInsts; + + public: + explicit ForwardInstData(unsigned int width = 0); + + ForwardInstData(const ForwardInstData &src); + + public: + /** Number of instructions carried by this object */ + unsigned int width() const { return numInsts; } + + /** Copy the inst array only as far as numInsts */ + ForwardInstData &operator =(const ForwardInstData &src); + + /** Resize a bubble/empty ForwardInstData and fill with bubbles */ + void resize(unsigned int width); + + /** Fill with bubbles from 0 to width() - 1 */ + void bubbleFill(); + + /** BubbleIF interface */ + bool isBubble() const; + + /** ReportIF interface */ + void reportData(std::ostream &os) const; +}; + +} + +#endif /* __CPU_MINOR_PIPE_DATA_HH__ */ diff --git a/src/cpu/minor/pipeline.cc b/src/cpu/minor/pipeline.cc new file mode 100644 index 000000000..9d802234b --- /dev/null +++ b/src/cpu/minor/pipeline.cc @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <algorithm> + +#include "cpu/minor/decode.hh" +#include "cpu/minor/execute.hh" +#include "cpu/minor/fetch1.hh" +#include "cpu/minor/fetch2.hh" +#include "cpu/minor/pipeline.hh" +#include "debug/Drain.hh" +#include "debug/MinorCPU.hh" +#include "debug/MinorTrace.hh" +#include "debug/Quiesce.hh" + +namespace Minor +{ + +Pipeline::Pipeline(MinorCPU &cpu_, MinorCPUParams ¶ms) : + Ticked(cpu_, &(cpu_.BaseCPU::numCycles)), + cpu(cpu_), + allow_idling(params.enableIdling), + f1ToF2(cpu.name() + ".f1ToF2", "lines", + params.fetch1ToFetch2ForwardDelay), + f2ToF1(cpu.name() + ".f2ToF1", "prediction", + params.fetch1ToFetch2BackwardDelay, true), + f2ToD(cpu.name() + ".f2ToD", "insts", + params.fetch2ToDecodeForwardDelay), + dToE(cpu.name() + ".dToE", "insts", + params.decodeToExecuteForwardDelay), + eToF1(cpu.name() + ".eToF1", "branch", + params.executeBranchDelay), + execute(cpu.name() + ".execute", cpu, params, + dToE.output(), eToF1.input()), + decode(cpu.name() + ".decode", cpu, params, + f2ToD.output(), dToE.input(), execute.inputBuffer), + fetch2(cpu.name() + ".fetch2", cpu, params, + f1ToF2.output(), eToF1.output(), f2ToF1.input(), f2ToD.input(), + decode.inputBuffer), + fetch1(cpu.name() + ".fetch1", cpu, params, + eToF1.output(), f1ToF2.input(), f2ToF1.output(), fetch2.inputBuffer), + activityRecorder(cpu.name() + ".activity", Num_StageId, + /* The max depth of inter-stage FIFOs */ + std::max(params.fetch1ToFetch2ForwardDelay, + std::max(params.fetch2ToDecodeForwardDelay, + std::max(params.decodeToExecuteForwardDelay, + params.executeBranchDelay)))), + needToSignalDrained(false) +{ + if (params.fetch1ToFetch2ForwardDelay < 1) { + fatal("%s: fetch1ToFetch2ForwardDelay must be >= 1 (%d)\n", + cpu.name(), params.fetch1ToFetch2ForwardDelay); + } + + if (params.fetch2ToDecodeForwardDelay < 1) { + fatal("%s: fetch2ToDecodeForwardDelay must be >= 1 (%d)\n", + cpu.name(), params.fetch2ToDecodeForwardDelay); + } + + if (params.decodeToExecuteForwardDelay < 1) { + fatal("%s: decodeToExecuteForwardDelay must be >= 1 (%d)\n", + cpu.name(), params.decodeToExecuteForwardDelay); + } + + if (params.executeBranchDelay < 1) { + fatal("%s: executeBranchDelay must be >= 1\n", + cpu.name(), params.executeBranchDelay); + } +} + +void +Pipeline::minorTrace() const +{ + fetch1.minorTrace(); + f1ToF2.minorTrace(); + f2ToF1.minorTrace(); + fetch2.minorTrace(); + f2ToD.minorTrace(); + decode.minorTrace(); + dToE.minorTrace(); + execute.minorTrace(); + eToF1.minorTrace(); + activityRecorder.minorTrace(); +} + +void +Pipeline::evaluate() +{ + /* Note that it's important to evaluate the stages in order to allow + * 'immediate', 0-time-offset TimeBuffer activity to be visible from + * later stages to earlier ones in the same cycle */ + execute.evaluate(); + decode.evaluate(); + fetch2.evaluate(); + fetch1.evaluate(); + + if (DTRACE(MinorTrace)) + minorTrace(); + + /* Update the time buffers after the stages */ + f1ToF2.evaluate(); + f2ToF1.evaluate(); + f2ToD.evaluate(); + dToE.evaluate(); + eToF1.evaluate(); + + /* The activity recorder must be be called after all the stages and + * before the idler (which acts on the advice of the activity recorder */ + activityRecorder.evaluate(); + + if (allow_idling) { + /* Become idle if we can but are not draining */ + if (!activityRecorder.active() && !needToSignalDrained) { + DPRINTF(Quiesce, "Suspending as the processor is idle\n"); + stop(); + } + + /* Deactivate all stages. Note that the stages *could* + * activate and deactivate themselves but that's fraught + * with additional difficulty. + * As organised herre */ + activityRecorder.deactivateStage(Pipeline::CPUStageId); + activityRecorder.deactivateStage(Pipeline::Fetch1StageId); + activityRecorder.deactivateStage(Pipeline::Fetch2StageId); + activityRecorder.deactivateStage(Pipeline::DecodeStageId); + activityRecorder.deactivateStage(Pipeline::ExecuteStageId); + } + + if (needToSignalDrained) /* Must be draining */ + { + DPRINTF(Drain, "Still draining\n"); + if (isDrained()) { + DPRINTF(Drain, "Signalling end of draining\n"); + cpu.signalDrainDone(); + needToSignalDrained = false; + stop(); + } + } +} + +MinorCPU::MinorCPUPort & +Pipeline::getInstPort() +{ + return fetch1.getIcachePort(); +} + +MinorCPU::MinorCPUPort & +Pipeline::getDataPort() +{ + return execute.getDcachePort(); +} + +void +Pipeline::wakeupFetch() +{ + execute.wakeupFetch(); +} + +unsigned int +Pipeline::drain(DrainManager *manager) +{ + DPRINTF(MinorCPU, "Draining pipeline by halting inst fetches. " + " Execution should drain naturally\n"); + + execute.drain(); + + /* Make sure that needToSignalDrained isn't accidentally set if we + * are 'pre-drained' */ + bool drained = isDrained(); + needToSignalDrained = !drained; + + return (drained ? 0 : 1); +} + +void +Pipeline::drainResume() +{ + DPRINTF(Drain, "Drain resume\n"); + execute.drainResume(); +} + +bool +Pipeline::isDrained() +{ + bool fetch1_drained = fetch1.isDrained(); + bool fetch2_drained = fetch2.isDrained(); + bool decode_drained = decode.isDrained(); + bool execute_drained = execute.isDrained(); + + bool f1_to_f2_drained = f1ToF2.empty(); + bool f2_to_f1_drained = f2ToF1.empty(); + bool f2_to_d_drained = f2ToD.empty(); + bool d_to_e_drained = dToE.empty(); + + bool ret = fetch1_drained && fetch2_drained && + decode_drained && execute_drained && + f1_to_f2_drained && f2_to_f1_drained && + f2_to_d_drained && d_to_e_drained; + + DPRINTF(MinorCPU, "Pipeline undrained stages state:%s%s%s%s%s%s%s%s\n", + (fetch1_drained ? "" : " Fetch1"), + (fetch2_drained ? "" : " Fetch2"), + (decode_drained ? "" : " Decode"), + (execute_drained ? "" : " Execute"), + (f1_to_f2_drained ? "" : " F1->F2"), + (f2_to_f1_drained ? "" : " F2->F1"), + (f2_to_d_drained ? "" : " F2->D"), + (d_to_e_drained ? "" : " D->E") + ); + + return ret; +} + +} diff --git a/src/cpu/minor/pipeline.hh b/src/cpu/minor/pipeline.hh new file mode 100644 index 000000000..893efbf50 --- /dev/null +++ b/src/cpu/minor/pipeline.hh @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * The constructed pipeline. Kept out of MinorCPU to keep the interface + * between the CPU and its grubby implementation details clean. + */ + +#ifndef __CPU_MINOR_PIPELINE_HH__ +#define __CPU_MINOR_PIPELINE_HH__ + +#include "cpu/minor/activity.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/decode.hh" +#include "cpu/minor/execute.hh" +#include "cpu/minor/fetch1.hh" +#include "cpu/minor/fetch2.hh" +#include "params/MinorCPU.hh" +#include "sim/ticked_object.hh" + +namespace Minor +{ + +/** + * @namespace Minor + * + * Minor contains all the definitions within the MinorCPU apart from the CPU + * class itself + */ + +/** The constructed pipeline. Kept out of MinorCPU to keep the interface + * between the CPU and its grubby implementation details clean. */ +class Pipeline : public Ticked +{ + protected: + MinorCPU &cpu; + + /** Allow cycles to be skipped when the pipeline is idle */ + bool allow_idling; + + Latch<ForwardLineData> f1ToF2; + Latch<BranchData> f2ToF1; + Latch<ForwardInstData> f2ToD; + Latch<ForwardInstData> dToE; + Latch<BranchData> eToF1; + + Execute execute; + Decode decode; + Fetch2 fetch2; + Fetch1 fetch1; + + /** Activity recording for the pipeline. This is access through the CPU + * by the pipeline stages but belongs to the Pipeline as it is the + * cleanest place to initialise it */ + MinorActivityRecorder activityRecorder; + + public: + /** Enumerated ids of the 'stages' for the activity recorder */ + enum StageId + { + /* A stage representing wakeup of the whole processor */ + CPUStageId = 0, + /* Real pipeline stages */ + Fetch1StageId, Fetch2StageId, DecodeStageId, ExecuteStageId, + Num_StageId /* Stage count */ + }; + + /** True after drain is called but draining isn't complete */ + bool needToSignalDrained; + + public: + Pipeline(MinorCPU &cpu_, MinorCPUParams ¶ms); + + public: + /** Wake up the Fetch unit. This is needed on thread activation esp. + * after quiesce wakeup */ + void wakeupFetch(); + + /** Try to drain the CPU */ + unsigned int drain(DrainManager *manager); + + void drainResume(); + + /** Test to see if the CPU is drained */ + bool isDrained(); + + /** A custom evaluate allows report in the right place (between + * stages and pipeline advance) */ + void evaluate(); + + void minorTrace() const; + + /** Functions below here are BaseCPU operations passed on to pipeline + * stages */ + + /** Return the IcachePort belonging to Fetch1 for the CPU */ + MinorCPU::MinorCPUPort &getInstPort(); + /** Return the DcachePort belonging to Execute for the CPU */ + MinorCPU::MinorCPUPort &getDataPort(); + + /** To give the activity recorder to the CPU */ + MinorActivityRecorder *getActivityRecorder() { return &activityRecorder; } +}; + +} + +#endif /* __CPU_MINOR_PIPELINE_HH__ */ diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc new file mode 100644 index 000000000..f6b1f7944 --- /dev/null +++ b/src/cpu/minor/scoreboard.cc @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "arch/registers.hh" +#include "cpu/minor/scoreboard.hh" +#include "cpu/reg_class.hh" +#include "debug/MinorScoreboard.hh" +#include "debug/MinorTiming.hh" + +namespace Minor +{ + +bool +Scoreboard::findIndex(RegIndex reg, Index &scoreboard_index) +{ + RegClass reg_class = regIdxToClass(reg); + bool ret = false; + + if (reg == TheISA::ZeroReg) { + /* Don't bother with the zero register */ + ret = false; + } else { + switch (reg_class) + { + case IntRegClass: + scoreboard_index = reg; + ret = true; + break; + case FloatRegClass: + scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs + + reg - TheISA::FP_Reg_Base; + ret = true; + break; + case CCRegClass: + scoreboard_index = TheISA::NumIntRegs + reg - TheISA::FP_Reg_Base; + ret = true; + break; + case MiscRegClass: + /* Don't bother with Misc registers */ + ret = false; + break; + } + } + + return ret; +} + +/** Flatten a RegIndex, irrespective of what reg type it's pointing to */ +static TheISA::RegIndex +flattenRegIndex(TheISA::RegIndex reg, ThreadContext *thread_context) +{ + RegClass reg_class = regIdxToClass(reg); + TheISA::RegIndex ret = reg; + + switch (reg_class) + { + case IntRegClass: + ret = thread_context->flattenIntIndex(reg); + break; + case FloatRegClass: + ret = thread_context->flattenFloatIndex(reg); + break; + case CCRegClass: + ret = thread_context->flattenCCIndex(reg); + break; + case MiscRegClass: + /* Don't bother to flatten misc regs as we don't need them here */ + /* return thread_context->flattenMiscIndex(reg); */ + ret = reg; + break; + } + + return ret; +} + +void +Scoreboard::markupInstDests(MinorDynInstPtr inst, Cycles retire_time, + ThreadContext *thread_context, bool mark_unpredictable) +{ + if (inst->isFault()) + return; + + StaticInstPtr staticInst = inst->staticInst; + unsigned int num_dests = staticInst->numDestRegs(); + + /** Mark each destination register */ + for (unsigned int dest_index = 0; dest_index < num_dests; + dest_index++) + { + RegIndex reg = flattenRegIndex( + staticInst->destRegIdx(dest_index), thread_context); + Index index; + + if (findIndex(reg, index)) { + if (mark_unpredictable) + numUnpredictableResults[index]++; + + inst->flatDestRegIdx[dest_index] = reg; + + numResults[index]++; + returnCycle[index] = retire_time; + /* We should be able to rely on only being given accending + * execSeqNums, but sanity check */ + if (inst->id.execSeqNum > writingInst[index]) { + writingInst[index] = inst->id.execSeqNum; + fuIndices[index] = inst->fuIndex; + } + + DPRINTF(MinorScoreboard, "Marking up inst: %s" + " regIndex: %d final numResults: %d returnCycle: %d\n", + *inst, index, numResults[index], returnCycle[index]); + } else { + /* Use ZeroReg to mark invalid/untracked dests */ + inst->flatDestRegIdx[dest_index] = TheISA::ZeroReg; + } + } +} + +InstSeqNum +Scoreboard::execSeqNumToWaitFor(MinorDynInstPtr inst, + ThreadContext *thread_context) +{ + InstSeqNum ret = 0; + + if (inst->isFault()) + return ret; + + StaticInstPtr staticInst = inst->staticInst; + unsigned int num_srcs = staticInst->numSrcRegs(); + + for (unsigned int src_index = 0; src_index < num_srcs; src_index++) { + RegIndex reg = flattenRegIndex(staticInst->srcRegIdx(src_index), + thread_context); + unsigned short int index; + + if (findIndex(reg, index)) { + if (writingInst[index] > ret) + ret = writingInst[index]; + } + } + + DPRINTF(MinorScoreboard, "Inst: %s depends on execSeqNum: %d\n", + *inst, ret); + + return ret; +} + +void +Scoreboard::clearInstDests(MinorDynInstPtr inst, bool clear_unpredictable) +{ + if (inst->isFault()) + return; + + StaticInstPtr staticInst = inst->staticInst; + unsigned int num_dests = staticInst->numDestRegs(); + + /** Mark each destination register */ + for (unsigned int dest_index = 0; dest_index < num_dests; + dest_index++) + { + RegIndex reg = inst->flatDestRegIdx[dest_index]; + Index index; + + if (findIndex(reg, index)) { + if (clear_unpredictable && numUnpredictableResults[index] != 0) + numUnpredictableResults[index] --; + + numResults[index] --; + + if (numResults[index] == 0) { + returnCycle[index] = Cycles(0); + writingInst[index] = 0; + fuIndices[index] = -1; + } + + DPRINTF(MinorScoreboard, "Clearing inst: %s" + " regIndex: %d final numResults: %d\n", + *inst, index, numResults[index]); + } + } +} + +bool +Scoreboard::canInstIssue(MinorDynInstPtr inst, + const std::vector<Cycles> *src_reg_relative_latencies, + const std::vector<bool> *cant_forward_from_fu_indices, + Cycles now, ThreadContext *thread_context) +{ + /* Always allow fault to be issued */ + if (inst->isFault()) + return true; + + StaticInstPtr staticInst = inst->staticInst; + unsigned int num_srcs = staticInst->numSrcRegs(); + + /* Default to saying you can issue */ + bool ret = true; + + unsigned int num_relative_latencies = 0; + Cycles default_relative_latency = Cycles(0); + + /* Where relative latencies are given, the default is the last + * one as that allows the rel. lat. list to be shorted than the + * number of src. regs */ + if (src_reg_relative_latencies && + src_reg_relative_latencies->size() != 0) + { + num_relative_latencies = src_reg_relative_latencies->size(); + default_relative_latency = (*src_reg_relative_latencies) + [num_relative_latencies-1]; + } + + /* For each source register, find the latest result */ + unsigned int src_index = 0; + while (src_index < num_srcs && /* More registers */ + ret /* Still possible */) + { + RegIndex reg = flattenRegIndex(staticInst->srcRegIdx(src_index), + thread_context); + unsigned short int index; + + if (findIndex(reg, index)) { + bool cant_forward = fuIndices[index] != 1 && + cant_forward_from_fu_indices && + index < cant_forward_from_fu_indices->size() && + (*cant_forward_from_fu_indices)[index]; + + Cycles relative_latency = (cant_forward ? Cycles(0) : + (src_index >= num_relative_latencies ? + default_relative_latency : + (*src_reg_relative_latencies)[src_index])); + + if (returnCycle[index] > (now + relative_latency) || + numUnpredictableResults[index] != 0) + { + ret = false; + } + } + src_index++; + } + + if (DTRACE(MinorTiming)) { + if (ret && num_srcs > num_relative_latencies && + num_relative_latencies != 0) + { + DPRINTF(MinorTiming, "Warning, inst: %s timing extra decode has" + " more src. regs: %d than relative latencies: %d\n", + staticInst->disassemble(0), num_srcs, num_relative_latencies); + } + } + + return ret; +} + +void +Scoreboard::minorTrace() const +{ + std::ostringstream result_stream; + + bool printed_element = false; + + unsigned int i = 0; + while (i < numRegs) { + unsigned short int num_results = numResults[i]; + unsigned short int num_unpredictable_results = + numUnpredictableResults[i]; + + if (!(num_results == 0 && num_unpredictable_results == Cycles(0))) { + if (printed_element) + result_stream << ','; + + result_stream << '(' << i << ',' + << num_results << '/' + << num_unpredictable_results << '/' + << returnCycle[i] << '/' + << writingInst[i] << ')'; + + printed_element = true; + } + + i++; + } + + MINORTRACE("busy=%s\n", result_stream.str()); +} + +} diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh new file mode 100644 index 000000000..711bcafb2 --- /dev/null +++ b/src/cpu/minor/scoreboard.hh @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * A simple instruction scoreboard for tracking dependencies in Execute. + */ + +#ifndef __CPU_MINOR_SCOREBOARD_HH__ +#define __CPU_MINOR_SCOREBOARD_HH__ + +#include "cpu/minor/cpu.hh" +#include "cpu/minor/dyn_inst.hh" +#include "cpu/minor/trace.hh" + +namespace Minor +{ + +/** A scoreboard of register dependencies including, for each register: + * The number of in-flight instructions which will generate a result for + * this register */ +class Scoreboard : public Named +{ + public: + /** The number of registers in the Scoreboard. These + * are just the integer, CC and float registers packed + * together with integer regs in the range [0,NumIntRegs-1], + * CC regs in the range [NumIntRegs, NumIntRegs+NumCCRegs-1] + * and float regs in the range + * [NumIntRegs+NumCCRegs, NumFloatRegs+NumIntRegs+NumCCRegs-1] */ + const unsigned numRegs; + + /** Type to use for thread context registers */ + typedef TheISA::RegIndex RegIndex; + + /** Type to use when indexing numResults */ + typedef unsigned short int Index; + + /** Count of the number of in-flight instructions that + * have results for each register */ + std::vector<Index> numResults; + + /** Count of the number of results which can't be predicted */ + std::vector<Index> numUnpredictableResults; + + /** Index of the FU generating this result */ + std::vector<int> fuIndices; + + /** The estimated cycle number that the result will be presented. + * This can be offset from to allow forwarding to be simulated as + * long as instruction completion is *strictly* in order with + * respect to instructions with unpredictable result timing */ + std::vector<Cycles> returnCycle; + + /** The execute sequence number of the most recent inst to generate this + * register value */ + std::vector<InstSeqNum> writingInst; + + public: + Scoreboard(const std::string &name) : + Named(name), + numRegs(TheISA::NumIntRegs + TheISA::NumCCRegs + + TheISA::NumFloatRegs), + numResults(numRegs, 0), + numUnpredictableResults(numRegs, 0), + fuIndices(numRegs, 0), + returnCycle(numRegs, Cycles(0)), + writingInst(numRegs, 0) + { } + + public: + /** Sets scoreboard_index to the index into numResults of the + * given register index. Returns true if the given register + * is in the scoreboard and false if it isn't */ + bool findIndex(RegIndex reg, Index &scoreboard_index); + + /** Mark up an instruction's effects by incrementing + * numResults counts. If mark_unpredictable is true, the inst's + * destination registers are marked as being unpredictable without + * an estimated retire time */ + void markupInstDests(MinorDynInstPtr inst, Cycles retire_time, + ThreadContext *thread_context, bool mark_unpredictable); + + /** Clear down the dependencies for this instruction. clear_unpredictable + * must match mark_unpredictable for the same inst. */ + void clearInstDests(MinorDynInstPtr inst, bool clear_unpredictable); + + /** Returns the exec sequence number of the most recent inst on + * which the given inst depends. Useful for determining which + * inst must actually be committed before a dependent inst + * can call initiateAcc */ + InstSeqNum execSeqNumToWaitFor(MinorDynInstPtr inst, + ThreadContext *thread_context); + + /** Can this instruction be issued. Are any of its source registers + * due to be written by other marked-up instructions in flight */ + bool canInstIssue(MinorDynInstPtr inst, + const std::vector<Cycles> *src_reg_relative_latencies, + const std::vector<bool> *cant_forward_from_fu_indices, + Cycles now, ThreadContext *thread_context); + + /** MinorTraceIF interface */ + void minorTrace() const; +}; + +} + +#endif /* __CPU_MINOR_SCOREBOARD_HH__ */ diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc new file mode 100644 index 000000000..baa0aa7f3 --- /dev/null +++ b/src/cpu/minor/stats.cc @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "cpu/minor/stats.hh" + +namespace Minor +{ + +MinorStats::MinorStats() +{ } + +void +MinorStats::regStats(const std::string &name, BaseCPU &baseCpu) +{ + numInsts + .name(name + ".committedInsts") + .desc("Number of instructions committed"); + + numOps + .name(name + ".committedOps") + .desc("Number of ops (including micro ops) committed"); + + numDiscardedOps + .name(name + ".discardedOps") + .desc("Number of ops (including micro ops) which were discarded " + "before commit"); + + numFetchSuspends + .name(name + ".numFetchSuspends") + .desc("Number of times Execute suspended instruction fetching"); + + quiesceCycles + .name(name + ".quiesceCycles") + .desc("Total number of cycles that CPU has spent quiesced or waiting " + "for an interrupt") + .prereq(quiesceCycles); + + cpi + .name(name + ".cpi") + .desc("CPI: cycles per instruction") + .precision(6); + cpi = baseCpu.numCycles / numInsts; + + ipc + .name(name + ".ipc") + .desc("IPC: instructions per cycle") + .precision(6); + ipc = numInsts / baseCpu.numCycles; +} + +}; diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh new file mode 100644 index 000000000..dc246304d --- /dev/null +++ b/src/cpu/minor/stats.hh @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2011-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * The stats for MinorCPU separated from the CPU definition. + */ + +#ifndef __CPU_MINOR_STATS_HH__ +#define __CPU_MINOR_STATS_HH__ + +#include "base/statistics.hh" +#include "cpu/base.hh" +#include "sim/ticked_object.hh" + +namespace Minor +{ + +/** Currently unused stats class. */ +class MinorStats +{ + public: + /** Number of simulated instructions */ + Stats::Scalar numInsts; + + /** Number of simulated insts and microops */ + Stats::Scalar numOps; + + /** Number of ops discarded before committing */ + Stats::Scalar numDiscardedOps; + + /** Number of times fetch was asked to suspend by Execute */ + Stats::Scalar numFetchSuspends; + + /** Number of cycles in quiescent state */ + Stats::Scalar quiesceCycles; + + /** CPI/IPC for total cycle counts and macro insts */ + Stats::Formula cpi; + Stats::Formula ipc; + + public: + MinorStats(); + + public: + void regStats(const std::string &name, BaseCPU &baseCpu); +}; + +} + +#endif /* __CPU_MINOR_STATS_HH__ */ diff --git a/src/cpu/minor/trace.hh b/src/cpu/minor/trace.hh new file mode 100644 index 000000000..9bbe09750 --- /dev/null +++ b/src/cpu/minor/trace.hh @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * This file contains miscellaneous classes and functions for formatting + * general trace information and also MinorTrace information. + * + * MinorTrace is this model's cycle-by-cycle trace information for use by + * minorview. + */ + +#ifndef __CPU_MINOR_TRACE_HH__ +#define __CPU_MINOR_TRACE_HH__ + +#include <string> + +#include "base/trace.hh" +#include "debug/MinorTrace.hh" + +namespace Minor +{ + +/** DPRINTFN for MinorTrace reporting */ +#define MINORTRACE(...) \ + DPRINTF(MinorTrace, "MinorTrace: " __VA_ARGS__) + +/** DPRINTFN for MinorTrace MinorInst line reporting */ +#define MINORINST(sim_object, ...) \ + DPRINTFS(MinorTrace, (sim_object), "MinorInst: " __VA_ARGS__) + +/** DPRINTFN for MinorTrace MinorLine line reporting */ +#define MINORLINE(sim_object, ...) \ + DPRINTFS(MinorTrace, (sim_object), "MinorLine: " __VA_ARGS__) + +} + +#endif /* __CPU_MINOR_TRACE_HH__ */ diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript index 5b2ecceef..bb9342f06 100644 --- a/src/cpu/pred/SConscript +++ b/src/cpu/pred/SConscript @@ -30,7 +30,8 @@ Import('*') -if 'InOrderCPU' in env['CPU_MODELS'] or 'O3CPU' in env['CPU_MODELS']: +if 'InOrderCPU' in env['CPU_MODELS'] or 'O3CPU' in env['CPU_MODELS'] \ + or 'Minor' in env['CPU_MODELS']: SimObject('BranchPredictor.py') Source('bpred_unit.cc') diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index f598c920d..375b7d0ba 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -59,6 +59,11 @@ class CheckerCPU; class AtomicSimpleCPU; class TimingSimpleCPU; class InorderCPU; +namespace Minor +{ + class ExecContext; +}; + class SymbolTable; namespace Trace { diff --git a/src/cpu/timing_expr.cc b/src/cpu/timing_expr.cc new file mode 100644 index 000000000..d6d904956 --- /dev/null +++ b/src/cpu/timing_expr.cc @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "base/intmath.hh" +#include "cpu/timing_expr.hh" + +TimingExprEvalContext::TimingExprEvalContext (StaticInstPtr inst_, + ThreadContext *thread_, + TimingExprLet *let_) : + inst(inst_), thread(thread_), let(let_) +{ + /* Reserve space to hold the results of evaluating the + * let expressions */ + if (let) { + unsigned int num_defns = let->defns.size(); + + results.resize(num_defns, 0); + resultAvailable.resize(num_defns, false); + } +} + +uint64_t TimingExprSrcReg::eval(TimingExprEvalContext &context) +{ + return context.inst->srcRegIdx(index); +} + +uint64_t TimingExprReadIntReg::eval(TimingExprEvalContext &context) +{ + return context.thread->readIntReg(reg->eval(context)); +} + +uint64_t TimingExprLet::eval(TimingExprEvalContext &context) +{ + TimingExprEvalContext new_context(context.inst, + context.thread, this); + + return expr->eval(new_context); +} + +uint64_t TimingExprRef::eval(TimingExprEvalContext &context) +{ + /* Lookup the result, evaluating if necessary. @todo, this + * should have more error checking */ + if (!context.resultAvailable[index]) { + context.results[index] = context.let->defns[index]->eval(context); + context.resultAvailable[index] = true; + } + + return context.results[index]; +} + +uint64_t TimingExprUn::eval(TimingExprEvalContext &context) +{ + uint64_t arg_value = arg->eval(context); + uint64_t ret = 0; + + switch (op) { + case Enums::timingExprSizeInBits: + if (arg_value == 0) + ret = 0; + else + ret = ceilLog2(arg_value); + break; + case Enums::timingExprNot: + ret = arg_value != 0; + break; + case Enums::timingExprInvert: + ret = ~arg_value; + break; + case Enums::timingExprSignExtend32To64: + ret = static_cast<int64_t>( + static_cast<int32_t>(arg_value)); + break; + case Enums::timingExprAbs: + if (static_cast<int64_t>(arg_value) < 0) + ret = -arg_value; + else + ret = arg_value; + break; + default: + break; + } + + return ret; +} + +uint64_t TimingExprBin::eval(TimingExprEvalContext &context) +{ + uint64_t left_value = left->eval(context); + uint64_t right_value = right->eval(context); + uint64_t ret = 0; + + switch (op) { + case Enums::timingExprAdd: + ret = left_value + right_value; + break; + case Enums::timingExprSub: + ret = left_value - right_value; + break; + case Enums::timingExprUMul: + ret = left_value * right_value; + break; + case Enums::timingExprUDiv: + if (right_value != 0) { + ret = left_value / right_value; + } + break; + case Enums::timingExprUCeilDiv: + if (right_value != 0) { + ret = (left_value + (right_value - 1)) / right_value; + } + break; + case Enums::timingExprSMul: + ret = static_cast<int64_t>(left_value) * + static_cast<int64_t>(right_value); + break; + case Enums::timingExprSDiv: + if (right_value != 0) { + ret = static_cast<int64_t>(left_value) / + static_cast<int64_t>(right_value); + } + break; + case Enums::timingExprEqual: + ret = left_value == right_value; + break; + case Enums::timingExprNotEqual: + ret = left_value != right_value; + break; + case Enums::timingExprULessThan: + ret = left_value < right_value; + break; + case Enums::timingExprUGreaterThan: + ret = left_value > right_value; + break; + case Enums::timingExprSLessThan: + ret = static_cast<int64_t>(left_value) < + static_cast<int64_t>(right_value); + break; + case Enums::timingExprSGreaterThan: + ret = static_cast<int64_t>(left_value) > + static_cast<int64_t>(right_value); + break; + case Enums::timingExprAnd: + ret = (left_value != 0) && (right_value != 0); + break; + case Enums::timingExprOr: + ret = (left_value != 0) || (right_value != 0); + break; + default: + break; + } + + return ret; +} + +uint64_t TimingExprIf::eval(TimingExprEvalContext &context) +{ + uint64_t cond_value = cond->eval(context); + + if (cond_value != 0) + return trueExpr->eval(context); + else + return falseExpr->eval(context); +} + +TimingExprLiteral * +TimingExprLiteralParams::create() +{ + return new TimingExprLiteral(this); +} + +TimingExprSrcReg * +TimingExprSrcRegParams::create() +{ + return new TimingExprSrcReg(this); +} + +TimingExprReadIntReg * +TimingExprReadIntRegParams::create() +{ + return new TimingExprReadIntReg(this); +} + +TimingExprLet * +TimingExprLetParams::create() +{ + return new TimingExprLet(this); +} + +TimingExprRef * +TimingExprRefParams::create() +{ + return new TimingExprRef(this); +} + +TimingExprUn * +TimingExprUnParams::create() +{ + return new TimingExprUn(this); +} + +TimingExprBin * +TimingExprBinParams::create() +{ + return new TimingExprBin(this); +} + +TimingExprIf * +TimingExprIfParams::create() +{ + return new TimingExprIf(this); +} diff --git a/src/cpu/timing_expr.hh b/src/cpu/timing_expr.hh new file mode 100644 index 000000000..d2c38ea90 --- /dev/null +++ b/src/cpu/timing_expr.hh @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/* + * These classes define an expression language over uint64_t with only + * a few operators. This can be used to form expressions for the extra + * delay required in variable execution time instructions. + * + * Expressions, in evaluation, will have access to the ThreadContext and + * a StaticInst. + */ + +#ifndef __CPU_TIMING_EXPR_HH__ +#define __CPU_TIMING_EXPR_HH__ + +#include "cpu/static_inst.hh" +#include "cpu/thread_context.hh" +#include "enums/TimingExprOp.hh" +#include "params/TimingExpr.hh" +#include "params/TimingExprBin.hh" +#include "params/TimingExprIf.hh" +#include "params/TimingExprLet.hh" +#include "params/TimingExprLiteral.hh" +#include "params/TimingExprReadIntReg.hh" +#include "params/TimingExprRef.hh" +#include "params/TimingExprSrcReg.hh" +#include "params/TimingExprUn.hh" +#include "sim/sim_object.hh" + +/** These classes are just the C++ counterparts for those in Expr.py and + * are, therefore, documented there */ + +class TimingExprLet; + +/** Object to gather the visible context for evaluation */ +class TimingExprEvalContext +{ + public: + /** Special visible context */ + StaticInstPtr inst; + ThreadContext *thread; + + /** Context visible as sub expressions. results will hold the results + * of (lazily) evaluating let's expressions. resultAvailable elements + * are true when a result has actually been evaluated */ + TimingExprLet *let; + std::vector<uint64_t> results; + std::vector<bool > resultAvailable; + + TimingExprEvalContext(StaticInstPtr inst_, + ThreadContext *thread_, TimingExprLet *let_); +}; + +class TimingExpr : public SimObject +{ + public: + TimingExpr(const TimingExprParams *params) : + SimObject(params) + { } + + virtual uint64_t eval(TimingExprEvalContext &context) = 0; +}; + +class TimingExprLiteral : public TimingExpr +{ + public: + uint64_t value; + + TimingExprLiteral(const TimingExprLiteralParams *params) : + TimingExpr(params), + value(params->value) + { } + + uint64_t eval(TimingExprEvalContext &context) { return value; } +}; + +class TimingExprSrcReg : public TimingExpr +{ + public: + unsigned int index; + + TimingExprSrcReg(const TimingExprSrcRegParams *params) : + TimingExpr(params), + index(params->index) + { } + + uint64_t eval(TimingExprEvalContext &context); +}; + +class TimingExprReadIntReg : public TimingExpr +{ + public: + TimingExpr *reg; + + TimingExprReadIntReg(const TimingExprReadIntRegParams *params) : + TimingExpr(params), + reg(params->reg) + { } + + uint64_t eval(TimingExprEvalContext &context); +}; + +class TimingExprLet : public TimingExpr +{ + public: + std::vector<TimingExpr *> defns; + TimingExpr *expr; + + TimingExprLet(const TimingExprLetParams *params) : + TimingExpr(params), + defns(params->defns), + expr(params->expr) + { } + + uint64_t eval(TimingExprEvalContext &context); +}; + +class TimingExprRef : public TimingExpr +{ + public: + unsigned int index; + + TimingExprRef(const TimingExprRefParams *params) : + TimingExpr(params), + index(params->index) + { } + + uint64_t eval(TimingExprEvalContext &context); +}; + +class TimingExprUn : public TimingExpr +{ + public: + Enums::TimingExprOp op; + TimingExpr *arg; + + TimingExprUn(const TimingExprUnParams *params) : + TimingExpr(params), + op(params->op), + arg(params->arg) + { } + + uint64_t eval(TimingExprEvalContext &context); +}; + +class TimingExprBin : public TimingExpr +{ + public: + Enums::TimingExprOp op; + TimingExpr *left; + TimingExpr *right; + + TimingExprBin(const TimingExprBinParams *params) : + TimingExpr(params), + op(params->op), + left(params->left), + right(params->right) + { } + + uint64_t eval(TimingExprEvalContext &context); +}; + +class TimingExprIf : public TimingExpr +{ + public: + TimingExpr *cond; + TimingExpr *trueExpr; + TimingExpr *falseExpr; + + TimingExprIf(const TimingExprIfParams *params) : + TimingExpr(params), + cond(params->cond), + trueExpr(params->trueExpr), + falseExpr(params->falseExpr) + { } + + uint64_t eval(TimingExprEvalContext &context); +}; + +#endif diff --git a/src/doc/inside-minor.doxygen b/src/doc/inside-minor.doxygen new file mode 100644 index 000000000..e55f61c01 --- /dev/null +++ b/src/doc/inside-minor.doxygen @@ -0,0 +1,1091 @@ +# Copyright (c) 2014 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Andrew Bardsley + +namespace Minor +{ + +/*! + +\page minor Inside the Minor CPU model + +\tableofcontents + +This document contains a description of the structure and function of the +Minor gem5 in-order processor model. It is recommended reading for anyone who +wants to understand Minor's internal organisation, design decisions, C++ +implementation and Python configuration. A familiarity with gem5 and some of +its internal structures is assumed. This document is meant to be read +alongside the Minor source code and to explain its general structure without +being too slavish about naming every function and data type. + +\section whatis What is Minor? + +Minor is an in-order processor model with a fixed pipeline but configurable +data structures and execute behaviour. It is intended to be used to model +processors with strict in-order execution behaviour and allows visualisation +of an instruction's position in the pipeline through the +MinorTrace/minorview.py format/tool. The intention is to provide a framework +for micro-architecturally correlating the model with a particular, chosen +processor with similar capabilities. + +\section philo Design philosophy + +\subsection mt Multithreading + +The model isn't currently capable of multithreading but there are THREAD +comments in key places where stage data needs to be arrayed to support +multithreading. + +\subsection structs Data structures + +Decorating data structures with large amounts of life-cycle information is +avoided. Only instructions (MinorDynInst) contain a significant proportion of +their data content whose values are not set at construction. + +All internal structures have fixed sizes on construction. Data held in queues +and FIFOs (MinorBuffer, FUPipeline) should have a BubbleIF interface to +allow a distinct 'bubble'/no data value option for each type. + +Inter-stage 'struct' data is packaged in structures which are passed by value. +Only MinorDynInst, the line data in ForwardLineData and the memory-interfacing +objects Fetch1::FetchRequest and LSQ::LSQRequest are '::new' allocated while +running the model. + +\section model Model structure + +Objects of class MinorCPU are provided by the model to gem5. MinorCPU +implements the interfaces of (cpu.hh) and can provide data and +instruction interfaces for connection to a cache system. The model is +configured in a similar way to other gem5 models through Python. That +configuration is passed on to MinorCPU::pipeline (of class Pipeline) which +actually implements the processor pipeline. + +The hierarchy of major unit ownership from MinorCPU down looks like this: + +<ul> +<li>MinorCPU</li> +<ul> + <li>Pipeline - container for the pipeline, owns the cyclic 'tick' + event mechanism and the idling (cycle skipping) mechanism.</li> + <ul> + <li>Fetch1 - instruction fetch unit responsible for fetching cache + lines (or parts of lines from the I-cache interface)</li> + <ul> + <li>Fetch1::IcachePort - interface to the I-cache from + Fetch1</li> + </ul> + <li>Fetch2 - line to instruction decomposition</li> + <li>Decode - instruction to micro-op decomposition</li> + <li>Execute - instruction execution and data memory + interface</li> + <ul> + <li>LSQ - load store queue for memory ref. instructions</li> + <li>LSQ::DcachePort - interface to the D-cache from + Execute</li> + </ul> + </ul> + </ul> +</ul> + +\section keystruct Key data structures + +\subsection ids Instruction and line identity: InstId (dyn_inst.hh) + +An InstId contains the sequence numbers and thread numbers that describe the +life cycle and instruction stream affiliations of individual fetched cache +lines and instructions. + +An InstId is printed in one of the following forms: + + - T/S.P/L - for fetched cache lines + - T/S.P/L/F - for instructions before Decode + - T/S.P/L/F.E - for instructions from Decode onwards + +for example: + + - 0/10.12/5/6.7 + +InstId's fields are: + +<table> +<tr> + <td><b>Field</b></td> + <td><b>Symbol</b></td> + <td><b>Generated by</b></td> + <td><b>Checked by</b></td> + <td><b>Function</b></td> +</tr> + +<tr> + <td>InstId::threadId</td> + <td>T</td> + <td>Fetch1</td> + <td>Everywhere the thread number is needed</td> + <td>Thread number (currently always 0).</td> +</tr> + +<tr> + <td>InstId::streamSeqNum</td> + <td>S</td> + <td>Execute</td> + <td>Fetch1, Fetch2, Execute (to discard lines/insts)</td> + <td>Stream sequence number as chosen by Execute. Stream + sequence numbers change after changes of PC (branches, exceptions) in + Execute and are used to separate pre and post branch instruction + streams.</td> +</tr> + +<tr> + <td>InstId::predictionSeqNum</td> + <td>P</td> + <td>Fetch2</td> + <td>Fetch2 (while discarding lines after prediction)</td> + <td>Prediction sequence numbers represent branch prediction decisions. + This is used by Fetch2 to mark lines/instructions according to the last + followed branch prediction made by Fetch2. Fetch2 can signal to Fetch1 + that it should change its fetch address and mark lines with a new + prediction sequence number (which it will only do if the stream sequence + number Fetch1 expects matches that of the request). </td> </tr> + +<tr> +<td>InstId::lineSeqNum</td> +<td>L</td> +<td>Fetch1</td> +<td>(Just for debugging)</td> +<td>Line fetch sequence number of this cache line or the line + this instruction was extracted from. + </td> +</tr> + +<tr> +<td>InstId::fetchSeqNum</td> +<td>F</td> +<td>Fetch2</td> +<td>Fetch2 (as the inst. sequence number for branches)</td> +<td>Instruction fetch order assigned by Fetch2 when lines + are decomposed into instructions. + </td> +</tr> + +<tr> +<td>InstId::execSeqNum</td> +<td>E</td> +<td>Decode</td> +<td>Execute (to check instruction identity in queues/FUs/LSQ)</td> +<td>Instruction order after micro-op decomposition.</td> +</tr> + +</table> + +The sequence number fields are all independent of each other and although, for +instance, InstId::execSeqNum for an instruction will always be >= +InstId::fetchSeqNum, the comparison is not useful. + +The originating stage of each sequence number field keeps a counter for that +field which can be incremented in order to generate new, unique numbers. + +\subsection insts Instructions: MinorDynInst (dyn_inst.hh) + +MinorDynInst represents an instruction's progression through the pipeline. An +instruction can be three things: + +<table> +<tr> + <td><b>Thing</b></td> + <td><b>Predicate</b></td> + <td><b>Explanation</b></td> +</tr> +<tr> + <td>A bubble</td> + <td>MinorDynInst::isBubble()</td> + <td>no instruction at all, just a space-filler</td> +</tr> +<tr> + <td>A fault</td> + <td>MinorDynInst::isFault()</td> + <td>a fault to pass down the pipeline in an instruction's clothing</td> +</tr> +<tr> + <td>A decoded instruction</td> + <td>MinorDynInst::isInst()</td> + <td>instructions are actually passed to the gem5 decoder in Fetch2 and so + are created fully decoded. MinorDynInst::staticInst is the decoded + instruction form.</td> +</tr> +</table> + +Instructions are reference counted using the gem5 RefCountingPtr +(base/refcnt.hh) wrapper. They therefore usually appear as MinorDynInstPtr in +code. Note that as RefCountingPtr initialises as nullptr rather than an +object that supports BubbleIF::isBubble, passing raw MinorDynInstPtrs to +Queue%s and other similar structures from stage.hh without boxing is +dangerous. + +\subsection fld ForwardLineData (pipe_data.hh) + +ForwardLineData is used to pass cache lines from Fetch1 to Fetch2. Like +MinorDynInst%s, they can be bubbles (ForwardLineData::isBubble()), +fault-carrying or can contain a line (partial line) fetched by Fetch1. The +data carried by ForwardLineData is owned by a Packet object returned from +memory and is explicitly memory managed and do must be deleted once processed +(by Fetch2 deleting the Packet). + +\subsection fid ForwardInstData (pipe_data.hh) + +ForwardInstData can contain up to ForwardInstData::width() instructions in its +ForwardInstData::insts vector. This structure is used to carry instructions +between Fetch2, Decode and Execute and to store input buffer vectors in Decode +and Execute. + +\subsection fr Fetch1::FetchRequest (fetch1.hh) + +FetchRequests represent I-cache line fetch requests. The are used in the +memory queues of Fetch1 and are pushed into/popped from Packet::senderState +while traversing the memory system. + +FetchRequests contain a memory system Request (mem/request.hh) for that fetch +access, a packet (Packet, mem/packet.hh), if the request gets to memory, and a +fault field that can be populated with a TLB-sourced prefetch fault (if any). + +\subsection lsqr LSQ::LSQRequest (execute.hh) + +LSQRequests are similar to FetchRequests but for D-cache accesses. They carry +the instruction associated with a memory access. + +\section pipeline The pipeline + +\verbatim +------------------------------------------------------------------------------ + Key: + + [] : inter-stage BufferBuffer + ,--. + | | : pipeline stage + `--' + ---> : forward communication + <--- : backward communication + + rv : reservation information for input buffers + + ,------. ,------. ,------. ,-------. + (from --[]-v->|Fetch1|-[]->|Fetch2|-[]->|Decode|-[]->|Execute|--> (to Fetch1 + Execute) | | |<-[]-| |<-rv-| |<-rv-| | & Fetch2) + | `------'<-rv-| | | | | | + `-------------->| | | | | | + `------' `------' `-------' +------------------------------------------------------------------------------ +\endverbatim + +The four pipeline stages are connected together by MinorBuffer FIFO +(stage.hh, derived ultimately from TimeBuffer) structures which allow +inter-stage delays to be modelled. There is a MinorBuffer%s between adjacent +stages in the forward direction (for example: passing lines from Fetch1 to +Fetch2) and, between Fetch2 and Fetch1, a buffer in the backwards direction +carrying branch predictions. + +Stages Fetch2, Decode and Execute have input buffers which, each cycle, can +accept input data from the previous stage and can hold that data if the stage +is not ready to process it. Input buffers store data in the same form as it +is received and so Decode and Execute's input buffers contain the output +instruction vector (ForwardInstData (pipe_data.hh)) from their previous stages +with the instructions and bubbles in the same positions as a single buffer +entry. + +Stage input buffers provide a Reservable (stage.hh) interface to their +previous stages, to allow slots to be reserved in their input buffers, and +communicate their input buffer occupancy backwards to allow the previous stage +to plan whether it should make an output in a given cycle. + +\subsection events Event handling: MinorActivityRecorder (activity.hh, +pipeline.hh) + +Minor is essentially a cycle-callable model with some ability to skip cycles +based on pipeline activity. External events are mostly received by callbacks +(e.g. Fetch1::IcachePort::recvTimingResp) and cause the pipeline to be woken +up to service advancing request queues. + +Ticked (sim/ticked.hh) is a base class bringing together an evaluate +member function and a provided SimObject. It provides a Ticked::start/stop +interface to start and pause clock events from being periodically issued. +Pipeline is a derived class of Ticked. + +During evaluate calls, stages can signal that they still have work to do in +the next cycle by calling either MinorCPU::activityRecorder->activity() (for +non-callable related activity) or MinorCPU::wakeupOnEvent(<stageId>) (for +stage callback-related 'wakeup' activity). + +Pipeline::evaluate contains calls to evaluate for each unit and a test for +pipeline idling which can turns off the clock tick if no unit has signalled +that it may become active next cycle. + +Within Pipeline (pipeline.hh), the stages are evaluated in reverse order (and +so will ::evaluate in reverse order) and their backwards data can be +read immediately after being written in each cycle allowing output decisions +to be 'perfect' (allowing synchronous stalling of the whole pipeline). Branch +predictions from Fetch2 to Fetch1 can also be transported in 0 cycles making +fetch1ToFetch2BackwardDelay the only configurable delay which can be set as +low as 0 cycles. + +The MinorCPU::activateContext and MinorCPU::suspendContext interface can be +called to start and pause threads (threads in the MT sense) and to start and +pause the pipeline. Executing instructions can call this interface +(indirectly through the ThreadContext) to idle the CPU/their threads. + +\subsection stages Each pipeline stage + +In general, the behaviour of a stage (each cycle) is: + +\verbatim + evaluate: + push input to inputBuffer + setup references to input/output data slots + + do 'every cycle' 'step' tasks + + if there is input and there is space in the next stage: + process and generate a new output + maybe re-activate the stage + + send backwards data + + if the stage generated output to the following FIFO: + signal pipe activity + + if the stage has more processable input and space in the next stage: + re-activate the stage for the next cycle + + commit the push to the inputBuffer if that data hasn't all been used +\endverbatim + +The Execute stage differs from this model as its forward output (branch) data +is unconditionally sent to Fetch1 and Fetch2. To allow this behaviour, Fetch1 +and Fetch2 must be unconditionally receptive to that data. + +\subsection fetch1 Fetch1 stage + +Fetch1 is responsible for fetching cache lines or partial cache lines from the +I-cache and passing them on to Fetch2 to be decomposed into instructions. It +can receive 'change of stream' indications from both Execute and Fetch2 to +signal that it should change its internal fetch address and tag newly fetched +lines with new stream or prediction sequence numbers. When both Execute and +Fetch2 signal changes of stream at the same time, Fetch1 takes Execute's +change. + +Every line issued by Fetch1 will bear a unique line sequence number which can +be used for debugging stream changes. + +When fetching from the I-cache, Fetch1 will ask for data from the current +fetch address (Fetch1::pc) up to the end of the 'data snap' size set in the +parameter fetch1LineSnapWidth. Subsequent autonomous line fetches will fetch +whole lines at a snap boundary and of size fetch1LineWidth. + +Fetch1 will only initiate a memory fetch if it can reserve space in Fetch2 +input buffer. That input buffer serves an the fetch queue/LFL for the system. + +Fetch1 contains two queues: requests and transfers to handle the stages of +translating the address of a line fetch (via the TLB) and accommodating the +request/response of fetches to/from memory. + +Fetch requests from Fetch1 are pushed into the requests queue as newly +allocated FetchRequest objects once they have been sent to the ITLB with a +call to itb->translateTiming. + +A response from the TLB moves the request from the requests queue to the +transfers queue. If there is more than one entry in each queue, it is +possible to get a TLB response for request which is not at the head of the +requests queue. In that case, the TLB response is marked up as a state change +to Translated in the request object, and advancing the request to transfers +(and the memory system) is left to calls to Fetch1::stepQueues which is called +in the cycle following any event is received. + +Fetch1::tryToSendToTransfers is responsible for moving requests between the +two queues and issuing requests to memory. Failed TLB lookups (prefetch +aborts) continue to occupy space in the queues until they are recovered at the +head of transfers. + +Responses from memory change the request object state to Complete and +Fetch1::evaluate can pick up response data, package it in the ForwardLineData +object, and forward it to Fetch2%'s input buffer. + +As space is always reserved in Fetch2::inputBuffer, setting the input buffer's +size to 1 results in non-prefetching behaviour. + +When a change of stream occurs, translated requests queue members and +completed transfers queue members can be unconditionally discarded to make way +for new transfers. + +\subsection fetch2 Fetch2 stage + +Fetch2 receives a line from Fetch1 into its input buffer. The data in the +head line in that buffer is iterated over and separated into individual +instructions which are packed into a vector of instructions which can be +passed to Decode. Packing instructions can be aborted early if a fault is +found in either the input line as a whole or a decomposed instruction. + +\subsubsection bp Branch prediction + +Fetch2 contains the branch prediction mechanism. This is a wrapper around the +branch predictor interface provided by gem5 (cpu/pred/...). + +Branches are predicted for any control instructions found. If prediction is +attempted for an instruction, the MinorDynInst::triedToPredict flag is set on +that instruction. + +When a branch is predicted to take, the MinorDynInst::predictedTaken flag is +set and MinorDynInst::predictedTarget is set to the predicted target PC value. +The predicted branch instruction is then packed into Fetch2%'s output vector, +the prediction sequence number is incremented, and the branch is communicated +to Fetch1. + +After signalling a prediction, Fetch2 will discard its input buffer contents +and will reject any new lines which have the same stream sequence number as +that branch but have a different prediction sequence number. This allows +following sequentially fetched lines to be rejected without ignoring new lines +generated by a change of stream indicated from a 'real' branch from Execute +(which will have a new stream sequence number). + +The program counter value provided to Fetch2 by Fetch1 packets is only updated +when there is a change of stream. Fetch2::havePC indicates whether the PC +will be picked up from the next processed input line. Fetch2::havePC is +necessary to allow line-wrapping instructions to be tracked through decode. + +Branches (and instructions predicted to branch) which are processed by Execute +will generate BranchData (pipe_data.hh) data explaining the outcome of the +branch which is sent forwards to Fetch1 and Fetch2. Fetch1 uses this data to +change stream (and update its stream sequence number and address for new +lines). Fetch2 uses it to update the branch predictor. Minor does not +communicate branch data to the branch predictor for instructions which are +discarded on the way to commit. + +BranchData::BranchReason (pipe_data.hh) encodes the possible branch scenarios: + +<table> +<tr> + <td>Branch enum val.</td> + <td>In Execute</td> + <td>Fetch1 reaction</td> + <td>Fetch2 reaction</td> +</tr> +<tr> + <td>NoBranch</td> + <td>(output bubble data)</td> + <td>-</td> + <td>-</td> +</tr> +<tr> + <td>CorrectlyPredictedBranch</td> + <td>Predicted, taken</td> + <td>-</td> + <td>Update BP as taken branch</td> +</tr> +<tr> + <td>UnpredictedBranch</td> + <td>Not predicted, taken and was taken</td> + <td>New stream</td> + <td>Update BP as taken branch</td> +</tr> +<tr> + <td>BadlyPredictedBranch</td> + <td>Predicted, not taken</td> + <td>New stream to restore to old inst. source</td> + <td>Update BP as not taken branch</td> +</tr> +<tr> + <td>BadlyPredictedBranchTarget</td> + <td>Predicted, taken, but to a different target than predicted one</td> + <td>New stream</td> + <td>Update BTB to new target</td> +</tr> +<tr> + <td>SuspendThread</td> + <td>Hint to suspend fetching</td> + <td>Suspend fetch for this thread (branch to next inst. as wakeup + fetch addr)</td> + <td>-</td> +</tr> +<tr> + <td>Interrupt</td> + <td>Interrupt detected</td> + <td>New stream</td> + <td>-</td> +</tr> +</table> + +The parameter decodeInputWidth sets the number of instructions which can be +packed into the output per cycle. If the parameter fetch2CycleInput is true, +Decode can try to take instructions from more than one entry in its input +buffer per cycle. + +\subsection decode Decode stage + +Decode takes a vector of instructions from Fetch2 (via its input buffer) and +decomposes those instructions into micro-ops (if necessary) and packs them +into its output instruction vector. + +The parameter executeInputWidth sets the number of instructions which can be +packed into the output per cycle. If the parameter decodeCycleInput is true, +Decode can try to take instructions from more than one entry in its input +buffer per cycle. + +\subsection execute Execute stage + +Execute provides all the instruction execution and memory access mechanisms. +An instructions passage through Execute can take multiple cycles with its +precise timing modelled by a functional unit pipeline FIFO. + +A vector of instructions (possibly including fault 'instructions') is provided +to Execute by Decode and can be queued in the Execute input buffer before +being issued. Setting the parameter executeCycleInput allows execute to +examine more than one input buffer entry (more than one instruction vector). +The number of instructions in the input vector can be set with +executeInputWidth and the depth of the input buffer can be set with parameter +executeInputBufferSize. + +\subsubsection fus Functional units + +The Execute stage contains pipelines for each functional unit comprising the +computational core of the CPU. Functional units are configured via the +executeFuncUnits parameter. Each functional unit has a number of instruction +classes it supports, a stated delay between instruction issues, and a delay +from instruction issue to (possible) commit and an optional timing annotation +capable of more complicated timing. + +Each active cycle, Execute::evaluate performs this action: + +\verbatim + Execute::evaluate: + push input to inputBuffer + setup references to input/output data slots and branch output slot + + step D-cache interface queues (similar to Fetch1) + + if interrupt posted: + take interrupt (signalling branch to Fetch1/Fetch2) + else + commit instructions + issue new instructions + + advance functional unit pipelines + + reactivate Execute if the unit is still active + + commit the push to the inputBuffer if that data hasn't all been used +\endverbatim + +\subsubsection fifos Functional unit FIFOs + +Functional units are implemented as SelfStallingPipelines (stage.hh). These +are TimeBuffer FIFOs with two distinct 'push' and 'pop' wires. They respond +to SelfStallingPipeline::advance in the same way as TimeBuffers <b>unless</b> +there is data at the far, 'pop', end of the FIFO. A 'stalled' flag is +provided for signalling stalling and to allow a stall to be cleared. The +intention is to provide a pipeline for each functional unit which will never +advance an instruction out of that pipeline until it has been processed and +the pipeline is explicitly unstalled. + +The actions 'issue', 'commit', and 'advance' act on the functional units. + +\subsubsection issue Issue + +Issuing instructions involves iterating over both the input buffer +instructions and the heads of the functional units to try and issue +instructions in order. The number of instructions which can be issued each +cycle is limited by the parameter executeIssueLimit, how executeCycleInput is +set, the availability of pipeline space and the policy used to choose a +pipeline in which the instruction can be issued. + +At present, the only issue policy is strict round-robin visiting of each +pipeline with the given instructions in sequence. For greater flexibility, +better (and more specific policies) will need to be possible. + +Memory operation instructions traverse their functional units to perform their +EA calculations. On 'commit', the ExecContext::initiateAcc execution phase is +performed and any memory access is issued (via. ExecContext::{read,write}Mem +calling LSQ::pushRequest) to the LSQ. + +Note that faults are issued as if they are instructions and can (currently) be +issued to *any* functional unit. + +Every issued instruction is also pushed into the Execute::inFlightInsts queue. +Memory ref. instructions are pushing into Execute::inFUMemInsts queue. + +\subsubsection commit Commit + +Instructions are committed by examining the head of the Execute::inFlightInsts +queue (which is decorated with the functional unit number to which the +instruction was issued). Instructions which can then be found in their +functional units are executed and popped from Execute::inFlightInsts. + +Memory operation instructions are committed into the memory queues (as +described above) and exit their functional unit pipeline but are not popped +from the Execute::inFlightInsts queue. The Execute::inFUMemInsts queue +provides ordering to memory operations as they pass through the functional +units (maintaining issue order). On entering the LSQ, instructions are popped +from Execute::inFUMemInsts. + +If the parameter executeAllowEarlyMemoryIssue is set, memory operations can be +sent from their FU to the LSQ before reaching the head of +Execute::inFlightInsts but after their dependencies are met. +MinorDynInst::instToWaitFor is marked up with the latest dependent instruction +execSeqNum required to be committed for a memory operation to progress to the +LSQ. + +Once a memory response is available (by testing the head of +Execute::inFlightInsts against LSQ::findResponse), commit will process that +response (ExecContext::completeAcc) and pop the instruction from +Execute::inFlightInsts. + +Any branch, fault or interrupt will cause a stream sequence number change and +signal a branch to Fetch1/Fetch2. Only instructions with the current stream +sequence number will be issued and/or committed. + +\subsubsection advance Advance + +All non-stalled pipeline are advanced and may, thereafter, become stalled. +Potential activity in the next cycle is signalled if there are any +instructions remaining in any pipeline. + +\subsubsection sb Scoreboard + +The scoreboard (Scoreboard) is used to control instruction issue. It contains +a count of the number of in flight instructions which will write each general +purpose CPU integer or float register. Instructions will only be issued where +the scoreboard contains a count of 0 instructions which will write to one of +the instructions source registers. + +Once an instruction is issued, the scoreboard counts for each destination +register for an instruction will be incremented. + +The estimated delivery time of the instruction's result is marked up in the +scoreboard by adding the length of the issued-to FU to the current time. The +timings parameter on each FU provides a list of additional rules for +calculating the delivery time. These are documented in the parameter comments +in MinorCPU.py. + +On commit, (for memory operations, memory response commit) the scoreboard +counters for an instruction's source registers are decremented. will be +decremented. + +\subsubsection ifi Execute::inFlightInsts + +The Execute::inFlightInsts queue will always contain all instructions in +flight in Execute in the correct issue order. Execute::issue is the only +process which will push an instruction into the queue. Execute::commit is the +only process that can pop an instruction. + +\subsubsection lsq LSQ + +The LSQ can support multiple outstanding transactions to memory in a number of +conservative cases. + +There are three queues to contain requests: requests, transfers and the store +buffer. The requests and transfers queue operate in a similar manner to the +queues in Fetch1. The store buffer is used to decouple the delay of +completing store operations from following loads. + +Requests are issued to the DTLB as their instructions leave their functional +unit. At the head of requests, cacheable load requests can be sent to memory +and on to the transfers queue. Cacheable stores will be passed to transfers +unprocessed and progress that queue maintaining order with other transactions. + +The conditions in LSQ::tryToSendToTransfers dictate when requests can +be sent to memory. + +All uncacheable transactions, split transactions and locked transactions are +processed in order at the head of requests. Additionally, store results +residing in the store buffer can have their data forwarded to cacheable loads +(removing the need to perform a read from memory) but no cacheable load can be +issue to the transfers queue until that queue's stores have drained into the +store buffer. + +At the end of transfers, requests which are LSQ::LSQRequest::Complete (are +faulting, are cacheable stores, or have been sent to memory and received a +response) can be picked off by Execute and either committed +(ExecContext::completeAcc) and, for stores, be sent to the store buffer. + +Barrier instructions do not prevent cacheable loads from progressing to memory +but do cause a stream change which will discard that load. Stores will not be +committed to the store buffer if they are in the shadow of the barrier but +before the new instruction stream has arrived at Execute. As all other memory +transactions are delayed at the end of the requests queue until they are at +the head of Execute::inFlightInsts, they will be discarded by any barrier +stream change. + +After commit, LSQ::BarrierDataRequest requests are inserted into the +store buffer to track each barrier until all preceding memory transactions +have drained from the store buffer. No further memory transactions will be +issued from the ends of FUs until after the barrier has drained. + +\subsubsection drain Draining + +Draining is mostly handled by the Execute stage. When initiated by calling +MinorCPU::drain, Pipeline::evaluate checks the draining status of each unit +each cycle and keeps the pipeline active until draining is complete. It is +Pipeline that signals the completion of draining. Execute is triggered by +MinorCPU::drain and starts stepping through its Execute::DrainState state +machine, starting from state Execute::NotDraining, in this order: + +<table> +<tr> + <td><b>State</b></td> + <td><b>Meaning</b></td> +</tr> +<tr> + <td>Execute::NotDraining</td> + <td>Not trying to drain, normal execution</td> +</tr> +<tr> + <td>Execute::DrainCurrentInst</td> + <td>Draining micro-ops to complete inst.</td> +</tr> +<tr> + <td>Execute::DrainHaltFetch</td> + <td>Halt fetching instructions</td> +</tr> +<tr> + <td>Execute::DrainAllInsts</td> + <td>Discarding all instructions presented</td> +</tr> +</table> + +When complete, a drained Execute unit will be in the Execute::DrainAllInsts +state where it will continue to discard instructions but has no knowledge of +the drained state of the rest of the model. + +\section debug Debug options + +The model provides a number of debug flags which can be passed to gem5 with +the --debug-flags option. + +The available flags are: + +<table> +<tr> + <td><b>Debug flag</b></td> + <td><b>Unit which will generate debugging output</b></td> +</tr> +<tr> + <td>Activity</td> + <td>Debug ActivityMonitor actions</td> +</tr> +<tr> + <td>Branch</td> + <td>Fetch2 and Execute branch prediction decisions</td> +</tr> +<tr> + <td>MinorCPU</td> + <td>CPU global actions such as wakeup/thread suspension</td> +</tr> +<tr> + <td>Decode</td> + <td>Decode</td> +</tr> +<tr> + <td>MinorExec</td> + <td>Execute behaviour</td> +</tr> +<tr> + <td>Fetch</td> + <td>Fetch1 and Fetch2</td> +</tr> +<tr> + <td>MinorInterrupt</td> + <td>Execute interrupt handling</td> +</tr> +<tr> + <td>MinorMem</td> + <td>Execute memory interactions</td> +</tr> +<tr> + <td>MinorScoreboard</td> + <td>Execute scoreboard activity</td> +</tr> +<tr> + <td>MinorTrace</td> + <td>Generate MinorTrace cyclic state trace output (see below)</td> +</tr> +<tr> + <td>MinorTiming</td> + <td>MinorTiming instruction timing modification operations</td> +</tr> +</table> + +The group flag Minor enables all of the flags beginning with Minor. + +\section trace MinorTrace and minorview.py + +The debug flag MinorTrace causes cycle-by-cycle state data to be printed which +can then be processed and viewed by the minorview.py tool. This output is +very verbose and so it is recommended it only be used for small examples. + +\subsection traceformat MinorTrace format + +There are three types of line outputted by MinorTrace: + +\subsubsection state MinorTrace - Ticked unit cycle state + +For example: + +\verbatim + 110000: system.cpu.dcachePort: MinorTrace: state=MemoryRunning in_tlb_mem=0/0 +\endverbatim + +For each time step, the MinorTrace flag will cause one MinorTrace line to be +printed for every named element in the model. + +\subsubsection traceunit MinorInst - summaries of instructions issued by \ + Decode + +For example: + +\verbatim + 140000: system.cpu.execute: MinorInst: id=0/1.1/1/1.1 addr=0x5c \ + inst=" mov r0, #0" class=IntAlu +\endverbatim + +MinorInst lines are currently only generated for instructions which are +committed. + +\subsubsection tracefetch1 MinorLine - summaries of line fetches issued by \ + Fetch1 + +For example: + +\verbatim + 92000: system.cpu.icachePort: MinorLine: id=0/1.1/1 size=36 \ + vaddr=0x5c paddr=0x5c +\endverbatim + +\subsection minorview minorview.py + +Minorview (util/minorview.py) can be used to visualise the data created by +MinorTrace. + +\verbatim +usage: minorview.py [-h] [--picture picture-file] [--prefix name] + [--start-time time] [--end-time time] [--mini-views] + event-file + +Minor visualiser + +positional arguments: + event-file + +optional arguments: + -h, --help show this help message and exit + --picture picture-file + markup file containing blob information (default: + <minorview-path>/minor.pic) + --prefix name name prefix in trace for CPU to be visualised + (default: system.cpu) + --start-time time time of first event to load from file + --end-time time time of last event to load from file + --mini-views show tiny views of the next 10 time steps +\endverbatim + +Raw debugging output can be passed to minorview.py as the event-file. It will +pick out the MinorTrace lines and use other lines where units in the +simulation are named (such as system.cpu.dcachePort in the above example) will +appear as 'comments' when units are clicked on the visualiser. + +Clicking on a unit which contains instructions or lines will bring up a speech +bubble giving extra information derived from the MinorInst/MinorLine lines. + +--start-time and --end-time allow only sections of debug files to be loaded. + +--prefix allows the name prefix of the CPU to be inspected to be supplied. +This defaults to 'system.cpu'. + +In the visualiser, The buttons Start, End, Back, Forward, Play and Stop can be +used to control the displayed simulation time. + +The diagonally striped coloured blocks are showing the InstId of the +instruction or line they represent. Note that lines in Fetch1 and f1ToF2.F +only show the id fields of a line and that instructions in Fetch2, f2ToD, and +decode.inputBuffer do not yet have execute sequence numbers. The T/S.P/L/F.E +buttons can be used to toggle parts of InstId on and off to make it easier to +understand the display. Useful combinations are: + +<table> +<tr> + <td><b>Combination</b></td> + <td><b>Reason</b></td> +</tr> +<tr> + <td>E</td> + <td>just show the final execute sequence number</td> +</tr> +<tr> + <td>F/E</td> + <td>show the instruction-related numbers</td> +</tr> +<tr> + <td>S/P</td> + <td>show just the stream-related numbers (watch the stream sequence + change with branches and not change with predicted branches)</td> +</tr> +<tr> + <td>S/E</td> + <td>show instructions and their stream</td> +</tr> +</table> + +The key to the right shows all the displayable colours (some of the colour +choices are quite bad!): + +<table> +<tr> + <td><b>Symbol</b></td> + <td><b>Meaning</b></td> +</tr> +<tr> + <td>U</td> + <td>Unknown data</td> +</tr> +<tr> + <td>B</td> + <td>Blocked stage</td> +</tr> +<tr> + <td>-</td> + <td>Bubble</td> +</tr> +<tr> + <td>E</td> + <td>Empty queue slot</td> +</tr> +<tr> + <td>R</td> + <td>Reserved queue slot</td> +</tr> +<tr> + <td>F</td> + <td>Fault</td> +</tr> +<tr> + <td>r</td> + <td>Read (used as the leftmost stripe on data in the dcachePort)</td> +</tr> +<tr> + <td>w</td> + <td>Write " "</td> +</tr> +<tr> + <td>0 to 9</td> + <td>last decimal digit of the corresponding data</td> +</tr> +</table> + +\verbatim + + ,---------------. .--------------. *U + | |=|->|=|->|=| | ||=|||->||->|| | *- <- Fetch queues/LSQ + `---------------' `--------------' *R + === ====== *w <- Activity/Stage activity + ,--------------. *1 + ,--. ,. ,. | ============ | *3 <- Scoreboard + | |-\[]-\||-\[]-\||-\[]-\| ============ | *5 <- Execute::inFlightInsts + | | :[] :||-/[]-/||-/[]-/| -. -------- | *7 + | |-/[]-/|| ^ || | | --------- | *9 + | | || | || | | ------ | +[]->| | ->|| | || | | ---- | + | |<-[]<-||<-+-<-||<-[]<-| | ------ |->[] <- Execute to Fetch1, + '--` `' ^ `' | -' ------ | Fetch2 branch data + ---. | ---. `--------------' + ---' | ---' ^ ^ + | ^ | `------------ Execute + MinorBuffer ----' input `-------------------- Execute input buffer + buffer +\endverbatim + +Stages show the colours of the instructions currently being +generated/processed. + +Forward FIFOs between stages show the data being pushed into them at the +current tick (to the left), the data in transit, and the data available at +their outputs (to the right). + +The backwards FIFO between Fetch2 and Fetch1 shows branch prediction data. + +In general, all displayed data is correct at the end of a cycle's activity at +the time indicated but before the inter-stage FIFOs are ticked. Each FIFO +has, therefore an extra slot to show the asserted new input data, and all the +data currently within the FIFO. + +Input buffers for each stage are shown below the corresponding stage and show +the contents of those buffers as horizontal strips. Strips marked as reserved +(cyan by default) are reserved to be filled by the previous stage. An input +buffer with all reserved or occupied slots will, therefore, block the previous +stage from generating output. + +Fetch queues and LSQ show the lines/instructions in the queues of each +interface and show the number of lines/instructions in TLB and memory in the +two striped colours of the top of their frames. + +Inside Execute, the horizontal bars represent the individual FU pipelines. +The vertical bar to the left is the input buffer and the bar to the right, the +instructions committed this cycle. The background of Execute shows +instructions which are being committed this cycle in their original FU +pipeline positions. + +The strip at the top of the Execute block shows the current streamSeqNum that +Execute is committing. A similar stripe at the top of Fetch1 shows that +stage's expected streamSeqNum and the stripe at the top of Fetch2 shows its +issuing predictionSeqNum. + +The scoreboard shows the number of instructions in flight which will commit a +result to the register in the position shown. The scoreboard contains slots +for each integer and floating point register. + +The Execute::inFlightInsts queue shows all the instructions in flight in +Execute with the oldest instruction (the next instruction to be committed) to +the right. + +'Stage activity' shows the signalled activity (as E/1) for each stage (with +CPU miscellaneous activity to the left) + +'Activity' show a count of stage and pipe activity. + +\subsection picformat minor.pic format + +The minor.pic file (src/minor/minor.pic) describes the layout of the +models blocks on the visualiser. Its format is described in the supplied +minor.pic file. + +*/ + +} diff --git a/src/sim/SConscript b/src/sim/SConscript index 5a5c1ab8a..9f9022f30 100644 --- a/src/sim/SConscript +++ b/src/sim/SConscript @@ -32,6 +32,7 @@ Import('*') SimObject('BaseTLB.py') SimObject('ClockedObject.py') +SimObject('TickedObject.py') SimObject('Root.py') SimObject('ClockDomain.py') SimObject('VoltageDomain.py') @@ -51,6 +52,7 @@ Source('serialize.cc') Source('drain.cc') Source('sim_events.cc') Source('sim_object.cc') +Source('ticked_object.cc') Source('simulate.cc') Source('stat_control.cc') Source('clock_domain.cc') diff --git a/src/sim/TickedObject.py b/src/sim/TickedObject.py new file mode 100644 index 000000000..a566aac92 --- /dev/null +++ b/src/sim/TickedObject.py @@ -0,0 +1,43 @@ +# Copyright (c) 2014 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Andrew Bardsley + +from ClockedObject import ClockedObject + +class TickedObject(ClockedObject): + type = 'TickedObject' + abstract = True + cxx_header = "sim/ticked_object.hh" diff --git a/src/sim/ticked_object.cc b/src/sim/ticked_object.cc new file mode 100644 index 000000000..22a149388 --- /dev/null +++ b/src/sim/ticked_object.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "sim/ticked_object.hh" + +Ticked::Ticked(ClockedObject &object_, + Stats::Scalar *imported_num_cycles, + Event::Priority priority) : + object(object_), + event(*this, priority), + running(false), + lastStopped(0), + /* Allocate numCycles if an external stat wasn't passed in */ + numCyclesLocal((imported_num_cycles ? NULL : new Stats::Scalar)), + numCycles((imported_num_cycles ? *imported_num_cycles : + *numCyclesLocal)) +{ } + +void +Ticked::regStats() +{ + if (numCyclesLocal) { + numCycles + .name(object.name() + ".tickCycles") + .desc("Number of cycles that the object ticked or was stopped"); + } + + tickCycles + .name(object.name() + ".tickCycles") + .desc("Number of cycles that the object actually ticked"); + + idleCycles + .name(object.name() + ".idleCycles") + .desc("Total number of cycles that the object has spent stopped"); + idleCycles = numCycles - tickCycles; +} + +void +Ticked::serialize(std::ostream &os) +{ + uint64_t lastStoppedUint = lastStopped; + + paramOut(os, "lastStopped", lastStoppedUint); +} + +void +Ticked::unserialize(Checkpoint *cp, const std::string §ion) +{ + uint64_t lastStoppedUint; + + paramIn(cp, section, "lastStopped", lastStoppedUint); + + lastStopped = Cycles(lastStoppedUint); +} + +TickedObject::TickedObject(TickedObjectParams *params, + Event::Priority priority) : + ClockedObject(params), + /* Make numCycles in Ticked */ + Ticked(*this, NULL, priority) +{ } + +void +TickedObject::regStats() +{ + Ticked::regStats(); +} + +void +TickedObject::serialize(std::ostream &os) +{ + Ticked::serialize(os); + ClockedObject::serialize(os); +} +void +TickedObject::unserialize(Checkpoint *cp, const std::string §ion) +{ + Ticked::unserialize(cp, section); + ClockedObject::unserialize(cp, section); +} diff --git a/src/sim/ticked_object.hh b/src/sim/ticked_object.hh new file mode 100644 index 000000000..5bca92443 --- /dev/null +++ b/src/sim/ticked_object.hh @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Base classes for ClockedObjects which have evaluate functions to + * look like clock ticking operations. TickedObject attaches gem5's event + * queue to Ticked to apply actual scheduling. + */ + +#ifndef __SIM_TICKED_OBJECT_HH__ +#define __SIM_TICKED_OBJECT_HH__ + +#include "params/TickedObject.hh" +#include "sim/clocked_object.hh" + +/** Ticked attaches gem5's event queue/scheduler to evaluate + * calls and provides a start/stop interface to ticking. + * + * Ticked is not a ClockedObject but can be attached to one by + * inheritance and by calling regStats, serialize/unserialize */ +class Ticked +{ + protected: + /** An event to call process periodically */ + class ClockEvent : public Event + { + public: + Ticked &owner; + + ClockEvent(Ticked &owner_, Priority priority) : + Event(priority), + owner(owner_) + { } + + /** Evaluate and reschedule */ + void + process() + { + ++owner.tickCycles; + ++owner.numCycles; + owner.evaluate(); + if (owner.running) { + owner.object.schedule(this, + owner.object.clockEdge(Cycles(1))); + } + } + }; + + friend class ClockEvent; + + /** ClockedObject who is responsible for this Ticked's actions/stats */ + ClockedObject &object; + + /** The single instance of ClockEvent used */ + ClockEvent event; + + /** Have I been started? and am not stopped */ + bool running; + + /** Time of last stop event to calculate run time */ + Cycles lastStopped; + + private: + /** Locally allocated stats */ + Stats::Scalar *numCyclesLocal; + + protected: + /** Total number of cycles either ticked or spend stopped */ + Stats::Scalar &numCycles; + + /** Number of cycles ticked */ + Stats::Scalar tickCycles; + + /** Number of cycles stopped */ + Stats::Formula idleCycles; + + public: + Ticked(ClockedObject &object_, + Stats::Scalar *imported_num_cycles = NULL, + Event::Priority priority = Event::CPU_Tick_Pri); + + virtual ~Ticked() { } + + /** Register {num,ticks}Cycles if necessary. If numCycles is + * imported, be sure to register it *before* calling this regStats */ + void regStats(); + + /** Start ticking */ + void + start() + { + if (!running) { + if (!event.scheduled()) + object.schedule(event, object.clockEdge(Cycles(1))); + running = true; + numCycles += cyclesSinceLastStopped(); + } + } + + /** How long have we been stopped for? */ + Cycles + cyclesSinceLastStopped() const + { + return object.curCycle() - lastStopped; + } + + /** Reset stopped time to current time */ + void + resetLastStopped() + { + lastStopped = object.curCycle(); + } + + /** Cancel the next tick event and issue no more */ + void + stop() + { + if (running) { + if (event.scheduled()) + object.deschedule(event); + running = false; + resetLastStopped(); + } + } + + /** Checkpoint lastStopped */ + void serialize(std::ostream &os); + void unserialize(Checkpoint *cp, const std::string §ion); + + /** Action to call on the clock tick */ + virtual void evaluate() = 0; +}; + +/** TickedObject attaches Ticked to ClockedObject and can be used as + * a base class where ticked operation */ +class TickedObject : public ClockedObject, public Ticked +{ + public: + TickedObject(TickedObjectParams *params, + Event::Priority priority = Event::CPU_Tick_Pri); + + /** Disambiguate to make these functions overload correctly */ + using ClockedObject::regStats; + using ClockedObject::serialize; + using ClockedObject::unserialize; + + /** Pass on regStats, serialize etc. onto Ticked */ + void regStats(); + void serialize(std::ostream &os); + void unserialize(Checkpoint *cp, const std::string §ion); +}; + +#endif /* __SIM_TICKED_OBJECT_HH__ */ |