diff options
Diffstat (limited to 'src')
44 files changed, 13521 insertions, 1 deletions
diff --git a/src/base/trace.hh b/src/base/trace.hh index dbeffdc8b..eb0ab9dae 100644 --- a/src/base/trace.hh +++ b/src/base/trace.hh @@ -72,6 +72,20 @@ struct StringWrap inline const std::string &name() { return Trace::DefaultName; } +// Interface for things with names. (cf. SimObject but without other +// functionality). This is useful when using DPRINTF +class Named +{ + protected: + const std::string _name; + + public: + Named(const std::string &name_) : _name(name_) { } + + public: + const std::string &name() const { return _name; } +}; + // // DPRINTF is a debugging trace facility that allows one to // selectively enable tracing statements. To use DPRINTF, there must diff --git a/src/cpu/SConscript b/src/cpu/SConscript index ca9c6a791..1ea92114a 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -106,6 +106,7 @@ SimObject('ExeTracer.py') SimObject('IntelTrace.py') SimObject('IntrControl.py') SimObject('NativeTrace.py') +SimObject('TimingExpr.py') Source('activity.cc') Source('base.cc') @@ -123,6 +124,7 @@ Source('static_inst.cc') Source('simple_thread.cc') Source('thread_context.cc') Source('thread_state.cc') +Source('timing_expr.cc') if env['TARGET_ISA'] == 'sparc': SimObject('LegionTrace.py') diff --git a/src/cpu/TimingExpr.py b/src/cpu/TimingExpr.py new file mode 100644 index 000000000..6a9d6f95c --- /dev/null +++ b/src/cpu/TimingExpr.py @@ -0,0 +1,176 @@ +# Copyright (c) 2013-2014 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Andrew Bardsley + +from m5.params import * +from m5.SimObject import SimObject + +# These classes define an expression language over uint64_t with only +# a few operators. This can be used to form expressions for the extra +# delay required in variable execution time instructions. +# +# Expressions, in evaluation, will have access to the ThreadContext and +# a StaticInst + +class TimingExpr(SimObject): + type = 'TimingExpr' + cxx_header = 'cpu/timing_expr.hh' + abstract = True; + +class TimingExprLiteral(TimingExpr): + """Literal 64 bit unsigned value""" + type = 'TimingExprLiteral' + cxx_header = 'cpu/timing_expr.hh' + + value = Param.UInt64("literal value") + + def set_params(self, value): + self.value = value + return self + +class TimingExpr0(TimingExprLiteral): + """Convenient 0""" + value = 0 + +class TimingExprSrcReg(TimingExpr): + """Find the source register number from the current inst""" + type = 'TimingExprSrcReg' + cxx_header = 'cpu/timing_expr.hh' + + # index = Param.Unsigned("index into inst src regs") + index = Param.Unsigned("index into inst src regs") + + def set_params(self, index): + self.index = index + return self + +class TimingExprReadIntReg(TimingExpr): + """Read an architectural register""" + type = 'TimingExprReadIntReg' + cxx_header = 'cpu/timing_expr.hh' + + reg = Param.TimingExpr("register raw index to read") + + def set_params(self, reg): + self.reg = reg + return self + +class TimingExprLet(TimingExpr): + """Block of declarations""" + type = 'TimingExprLet' + cxx_header = 'cpu/timing_expr.hh' + + defns = VectorParam.TimingExpr("expressions for bindings") + expr = Param.TimingExpr("body expression") + + def set_params(self, defns, expr): + self.defns = defns + self.expr = expr + return self + +class TimingExprRef(TimingExpr): + """Value of a bound sub-expression""" + type = 'TimingExprRef' + cxx_header = 'cpu/timing_expr.hh' + + index = Param.Unsigned("expression index") + + def set_params(self, index): + self.index = index + return self + +class TimingExprOp(Enum): + vals = [ + 'timingExprAdd', 'timingExprSub', + 'timingExprUMul', 'timingExprUDiv', + 'timingExprSMul', 'timingExprSDiv', + 'timingExprUCeilDiv', # Unsigned divide rounding up + 'timingExprEqual', 'timingExprNotEqual', + 'timingExprULessThan', + 'timingExprUGreaterThan', + 'timingExprSLessThan', + 'timingExprSGreaterThan', + 'timingExprInvert', + 'timingExprNot', + 'timingExprAnd', + 'timingExprOr', + 'timingExprSizeInBits', + 'timingExprSignExtend32To64', + 'timingExprAbs' + ] + +class TimingExprUn(TimingExpr): + """Unary operator""" + type = 'TimingExprUn' + cxx_header = 'cpu/timing_expr.hh' + + op = Param.TimingExprOp("operator") + arg = Param.TimingExpr("expression") + + def set_params(self, op, arg): + self.op = op + self.arg = arg + return self + +class TimingExprBin(TimingExpr): + """Binary operator""" + type = 'TimingExprBin' + cxx_header = 'cpu/timing_expr.hh' + + op = Param.TimingExprOp("operator") + left = Param.TimingExpr("LHS expression") + right = Param.TimingExpr("RHS expression") + + def set_params(self, op, left, right): + self.op = op + self.left = left + self.right = right + return self + +class TimingExprIf(TimingExpr): + """If-then-else operator""" + type = 'TimingExprIf' + cxx_header = 'cpu/timing_expr.hh' + + cond = Param.TimingExpr("condition expression") + trueExpr = Param.TimingExpr("true expression") + falseExpr = Param.TimingExpr("false expression") + + def set_params(self, cond, trueExpr, falseExpr): + self.cond = cond + self.trueExpr = trueExpr + self.falseExpr = falseExpr + return self diff --git a/src/cpu/minor/MinorCPU.py b/src/cpu/minor/MinorCPU.py new file mode 100644 index 000000000..07953cf5a --- /dev/null +++ b/src/cpu/minor/MinorCPU.py @@ -0,0 +1,274 @@ +# Copyright (c) 2012-2014 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2007 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Gabe Black +# Nathan Binkert +# Andrew Bardsley + +from m5.defines import buildEnv +from m5.params import * +from m5.proxy import * +from m5.SimObject import SimObject +from BaseCPU import BaseCPU +from DummyChecker import DummyChecker +from BranchPredictor import BranchPredictor +from TimingExpr import TimingExpr + +from FuncUnit import OpClass + +class MinorOpClass(SimObject): + """Boxing of OpClass to get around build problems and provide a hook for + future additions to OpClass checks""" + + type = 'MinorOpClass' + cxx_header = "cpu/minor/func_unit.hh" + + opClass = Param.OpClass("op class to match") + +class MinorOpClassSet(SimObject): + """A set of matchable op classes""" + + type = 'MinorOpClassSet' + cxx_header = "cpu/minor/func_unit.hh" + + opClasses = VectorParam.MinorOpClass([], "op classes to be matched." + " An empty list means any class") + +class MinorFUTiming(SimObject): + type = 'MinorFUTiming' + cxx_header = "cpu/minor/func_unit.hh" + + mask = Param.UInt64(0, "mask for testing ExtMachInst") + match = Param.UInt64(0, "match value for testing ExtMachInst:" + " (ext_mach_inst & mask) == match") + suppress = Param.Bool(False, "if true, this inst. is not executed by" + " this FU") + extraCommitLat = Param.Cycles(0, "extra cycles to stall commit for" + " this inst.") + extraCommitLatExpr = Param.TimingExpr(NULL, "extra cycles as a" + " run-time evaluated expression") + extraAssumedLat = Param.Cycles(0, "extra cycles to add to scoreboard" + " retire time for this insts dest registers once it leaves the" + " functional unit. For mem refs, if this is 0, the result's time" + " is marked as unpredictable and no forwarding can take place.") + srcRegsRelativeLats = VectorParam.Cycles("the maximum number of cycles" + " after inst. issue that each src reg can be available for this" + " inst. to issue") + opClasses = Param.MinorOpClassSet(MinorOpClassSet(), + "op classes to be considered for this decode. An empty set means any" + " class") + description = Param.String('', "description string of the decoding/inst." + " class") + +def minorMakeOpClassSet(op_classes): + """Make a MinorOpClassSet from a list of OpClass enum value strings""" + def boxOpClass(op_class): + return MinorOpClass(opClass=op_class) + + return MinorOpClassSet(opClasses=map(boxOpClass, op_classes)) + +class MinorFU(SimObject): + type = 'MinorFU' + cxx_header = "cpu/minor/func_unit.hh" + + opClasses = Param.MinorOpClassSet(MinorOpClassSet(), "type of operations" + " allowed on this functional unit") + opLat = Param.Cycles(1, "latency in cycles") + issueLat = Param.Cycles(1, "cycles until another instruction can be" + " issued") + timings = VectorParam.MinorFUTiming([], "extra decoding rules") + + cantForwardFromFUIndices = VectorParam.Unsigned([], + "list of FU indices from which this FU can't receive and early" + " (forwarded) result") + +class MinorFUPool(SimObject): + type = 'MinorFUPool' + cxx_header = "cpu/minor/func_unit.hh" + + funcUnits = VectorParam.MinorFU("functional units") + +class MinorDefaultIntFU(MinorFU): + opClasses = minorMakeOpClassSet(['IntAlu']) + timings = [MinorFUTiming(description="Int", + srcRegsRelativeLats=[2])] + opLat = 3 + +class MinorDefaultIntMulFU(MinorFU): + opClasses = minorMakeOpClassSet(['IntMult']) + timings = [MinorFUTiming(description='Mul', + srcRegsRelativeLats=[0])] + opLat = 3 + +class MinorDefaultIntDivFU(MinorFU): + opClasses = minorMakeOpClassSet(['IntDiv']) + issueLat = 9 + opLat = 9 + +class MinorDefaultFloatSimdFU(MinorFU): + opClasses = minorMakeOpClassSet([ + 'FloatAdd', 'FloatCmp', 'FloatCvt', 'FloatMult', 'FloatDiv', + 'FloatSqrt', + 'SimdAdd', 'SimdAddAcc', 'SimdAlu', 'SimdCmp', 'SimdCvt', + 'SimdMisc', 'SimdMult', 'SimdMultAcc', 'SimdShift', 'SimdShiftAcc', + 'SimdSqrt', 'SimdFloatAdd', 'SimdFloatAlu', 'SimdFloatCmp', + 'SimdFloatCvt', 'SimdFloatDiv', 'SimdFloatMisc', 'SimdFloatMult', + 'SimdFloatMultAcc', 'SimdFloatSqrt']) + timings = [MinorFUTiming(description='FloatSimd', + srcRegsRelativeLats=[2])] + opLat = 6 + +class MinorDefaultMemFU(MinorFU): + opClasses = minorMakeOpClassSet(['MemRead', 'MemWrite']) + timings = [MinorFUTiming(description='Mem', + srcRegsRelativeLats=[1], extraAssumedLat=2)] + opLat = 1 + +class MinorDefaultMiscFU(MinorFU): + opClasses = minorMakeOpClassSet(['IprAccess', 'InstPrefetch']) + opLat = 1 + +class MinorDefaultFUPool(MinorFUPool): + funcUnits = [MinorDefaultIntFU(), MinorDefaultIntFU(), + MinorDefaultIntMulFU(), MinorDefaultIntDivFU(), + MinorDefaultFloatSimdFU(), MinorDefaultMemFU(), + MinorDefaultMiscFU()] + +class MinorCPU(BaseCPU): + type = 'MinorCPU' + cxx_header = "cpu/minor/cpu.hh" + + @classmethod + def memory_mode(cls): + return 'timing' + + @classmethod + def require_caches(cls): + return True + + @classmethod + def support_take_over(cls): + return True + + fetch1FetchLimit = Param.Unsigned(1, + "Number of line fetches allowable in flight at once") + fetch1LineSnapWidth = Param.Unsigned(0, + "Fetch1 'line' fetch snap size in bytes" + " (0 means use system cache line size)") + fetch1LineWidth = Param.Unsigned(0, + "Fetch1 maximum fetch size in bytes (0 means use system cache" + " line size)") + fetch1ToFetch2ForwardDelay = Param.Cycles(1, + "Forward cycle delay from Fetch1 to Fetch2 (1 means next cycle)") + fetch1ToFetch2BackwardDelay = Param.Cycles(1, + "Backward cycle delay from Fetch2 to Fetch1 for branch prediction" + " signalling (0 means in the same cycle, 1 mean the next cycle)") + + fetch2InputBufferSize = Param.Unsigned(2, + "Size of input buffer to Fetch2 in cycles-worth of insts.") + fetch2ToDecodeForwardDelay = Param.Cycles(1, + "Forward cycle delay from Fetch2 to Decode (1 means next cycle)") + fetch2CycleInput = Param.Bool(True, + "Allow Fetch2 to cross input lines to generate full output each" + " cycle") + + decodeInputBufferSize = Param.Unsigned(3, + "Size of input buffer to Decode in cycles-worth of insts.") + decodeToExecuteForwardDelay = Param.Cycles(1, + "Forward cycle delay from Decode to Execute (1 means next cycle)") + decodeInputWidth = Param.Unsigned(2, + "Width (in instructions) of input to Decode (and implicitly" + " Decode's own width)") + decodeCycleInput = Param.Bool(True, + "Allow Decode to pack instructions from more than one input cycle" + " to fill its output each cycle") + + executeInputWidth = Param.Unsigned(2, + "Width (in instructions) of input to Execute") + executeCycleInput = Param.Bool(True, + "Allow Execute to use instructions from more than one input cycle" + " each cycle") + executeIssueLimit = Param.Unsigned(2, + "Number of issuable instructions in Execute each cycle") + executeMemoryIssueLimit = Param.Unsigned(1, + "Number of issuable memory instructions in Execute each cycle") + executeCommitLimit = Param.Unsigned(2, + "Number of committable instructions in Execute each cycle") + executeMemoryCommitLimit = Param.Unsigned(1, + "Number of committable memory references in Execute each cycle") + executeInputBufferSize = Param.Unsigned(7, + "Size of input buffer to Execute in cycles-worth of insts.") + executeMemoryWidth = Param.Unsigned(0, + "Width (and snap) in bytes of the data memory interface. (0 mean use" + " the system cacheLineSize)") + executeMaxAccessesInMemory = Param.Unsigned(2, + "Maximum number of concurrent accesses allowed to the memory system" + " from the dcache port") + executeLSQMaxStoreBufferStoresPerCycle = Param.Unsigned(2, + "Maximum number of stores that the store buffer can issue per cycle") + executeLSQRequestsQueueSize = Param.Unsigned(1, + "Size of LSQ requests queue (address translation queue)") + executeLSQTransfersQueueSize = Param.Unsigned(2, + "Size of LSQ transfers queue (memory transaction queue)") + executeLSQStoreBufferSize = Param.Unsigned(5, + "Size of LSQ store buffer") + executeBranchDelay = Param.Cycles(1, + "Delay from Execute deciding to branch and Fetch1 reacting" + " (1 means next cycle)") + + executeFuncUnits = Param.MinorFUPool(MinorDefaultFUPool(), + "FUlines for this processor") + + executeSetTraceTimeOnCommit = Param.Bool(True, + "Set inst. trace times to be commit times") + executeSetTraceTimeOnIssue = Param.Bool(False, + "Set inst. trace times to be issue times") + + executeAllowEarlyMemoryIssue = Param.Bool(True, + "Allow mem refs to be issued to the LSQ before reaching the head of" + " the in flight insts queue") + + enableIdling = Param.Bool(True, + "Enable cycle skipping when the processor is idle\n"); + + branchPred = Param.BranchPredictor(BranchPredictor( + numThreads = Parent.numThreads), "Branch Predictor") + + def addCheckerCpu(self): + print "Checker not yet supported by MinorCPU" + exit(1) diff --git a/src/cpu/minor/SConscript b/src/cpu/minor/SConscript new file mode 100644 index 000000000..2234f9a8d --- /dev/null +++ b/src/cpu/minor/SConscript @@ -0,0 +1,73 @@ +# -*- mode:python -*- + +# Copyright (c) 2013-2014 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Nathan Binkert +# Andrew Bardsley + +Import('*') + +if 'MinorCPU' in env['CPU_MODELS']: + SimObject('MinorCPU.py') + + Source('activity.cc') + Source('cpu.cc') + Source('decode.cc') + Source('dyn_inst.cc') + Source('execute.cc') + Source('fetch1.cc') + Source('fetch2.cc') + Source('func_unit.cc') + Source('lsq.cc') + Source('pipe_data.cc') + Source('pipeline.cc') + Source('scoreboard.cc') + Source('stats.cc') + + DebugFlag('MinorCPU', 'Minor CPU-level events') + DebugFlag('MinorExecute', 'Minor Execute stage') + DebugFlag('MinorInterrupt', 'Minor interrupt handling') + DebugFlag('MinorMem', 'Minor memory accesses') + DebugFlag('MinorScoreboard', 'Minor Execute register scoreboard') + DebugFlag('MinorTrace', 'MinorTrace cycle-by-cycle state trace') + DebugFlag('MinorTiming', 'Extra timing for instructions') + + CompoundFlag('Minor', [ + 'MinorCPU', 'MinorExecute', 'MinorInterrupt', 'MinorMem', + 'MinorScoreboard']) diff --git a/src/cpu/minor/SConsopts b/src/cpu/minor/SConsopts new file mode 100644 index 000000000..68c420779 --- /dev/null +++ b/src/cpu/minor/SConsopts @@ -0,0 +1,45 @@ +# -*- mode:python -*- + +# Copyright (c) 2012-2014 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Andrew Bardsley + +Import('*') + +CpuModel('MinorCPU', 'minor_cpu_exec.cc', + '#include "cpu/minor/exec_context.hh"', + { 'CPU_exec_context': 'Minor::ExecContext' }, + default=True) diff --git a/src/cpu/minor/activity.cc b/src/cpu/minor/activity.cc new file mode 100644 index 000000000..8e322d3e7 --- /dev/null +++ b/src/cpu/minor/activity.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <sstream> + +#include "cpu/minor/activity.hh" +#include "cpu/minor/trace.hh" + +namespace Minor +{ + +void +MinorActivityRecorder::minorTrace() const +{ + std::ostringstream stages; + unsigned int num_stages = getNumStages(); + + unsigned int stage_index = 0; + while (stage_index < num_stages) { + stages << (getStageActive(stage_index) ? '1' : 'E'); + + stage_index++; + if (stage_index != num_stages) + stages << ','; + } + + MINORTRACE("activity=%d stages=%s\n", getActivityCount(), stages.str()); +} + +} diff --git a/src/cpu/minor/activity.hh b/src/cpu/minor/activity.hh new file mode 100644 index 000000000..e38c476c0 --- /dev/null +++ b/src/cpu/minor/activity.hh @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * ActivityRecoder from cpu/activity.h wrapped to provide evaluate and + * minorTrace. + */ + +#ifndef __CPU_MINOR_ACTIVITY_HH__ +#define __CPU_MINOR_ACTIVITY_HH__ + +#include "cpu/activity.hh" + +namespace Minor +{ + +/** ActivityRecorder with a Ticked interface */ +class MinorActivityRecorder : public ActivityRecorder +{ + public: + /** Ticked interface */ + void evaluate() { advance(); } + void minorTrace() const; + + public: + MinorActivityRecorder(const std::string &name, int num_stages, + int longest_latency) : + ActivityRecorder(name, num_stages, longest_latency, 0) + { } +}; + +} + +#endif /* __CPU_MINOR_ACTIVITY_HH__ */ diff --git a/src/cpu/minor/buffers.hh b/src/cpu/minor/buffers.hh new file mode 100644 index 000000000..f4ae91a70 --- /dev/null +++ b/src/cpu/minor/buffers.hh @@ -0,0 +1,653 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Classes for buffer, queue and FIFO behaviour. + */ + +#ifndef __CPU_MINOR_BUFFERS_HH__ +#define __CPU_MINOR_BUFFERS_HH__ + +#include <iostream> +#include <queue> +#include <sstream> + +#include "cpu/minor/trace.hh" +#include "cpu/activity.hh" +#include "cpu/timebuf.hh" + +namespace Minor +{ + +/** Interface class for data with reporting/tracing facilities. This + * interface doesn't actually have to be used as other classes which need + * this interface uses templating rather than inheritance but it's provided + * here to document the interface needed by those classes. */ +class ReportIF +{ + public: + /** Print the data in a format suitable to be the value in "name=value" + * trace lines */ + virtual void reportData(std::ostream &os) const = 0; + + virtual ~ReportIF() { } +}; + +/** Interface class for data with 'bubble' values. This interface doesn't + * actually have to be used as other classes which need this interface uses + * templating rather than inheritance but it's provided here to document + * the interface needed by those classes. */ +class BubbleIF +{ + public: + virtual bool isBubble() const = 0; +}; + +/** ...ReportTraits are trait classes with the same functionality as + * ReportIF, but with elements explicitly passed into the report... + * functions. */ + +/** Allow a template using ReportTraits to call report... functions of + * ReportIF-bearing elements themselves */ +template <typename ElemType> /* ElemType should implement ReportIF */ +class ReportTraitsAdaptor +{ + public: + static void + reportData(std::ostream &os, const ElemType &elem) + { elem.reportData(os); } +}; + +/** A similar adaptor but for elements held by pointer + * ElemType should implement ReportIF */ +template <typename PtrType> +class ReportTraitsPtrAdaptor +{ + public: + static void + reportData(std::ostream &os, const PtrType &elem) + { elem->reportData(os); } +}; + +/** ... BubbleTraits are trait classes to add BubbleIF interface + * functionality to templates which process elements which don't necessarily + * implement BubbleIF themselves */ + +/** Default behaviour, no bubbles */ +template <typename ElemType> +class NoBubbleTraits +{ + public: + static bool isBubble(const ElemType &) { return false; } + static ElemType bubble() { assert(false); } +}; + +/** Pass on call to the element */ +template <typename ElemType> +class BubbleTraitsAdaptor +{ + public: + static bool isBubble(const ElemType &elem) + { return elem.isBubble(); } + + static ElemType bubble() { return ElemType::bubble(); } +}; + +/** Pass on call to the element where the element is a pointer */ +template <typename PtrType, typename ElemType> +class BubbleTraitsPtrAdaptor +{ + public: + static bool isBubble(const PtrType &elem) + { return elem->isBubble(); } + + static PtrType bubble() { return ElemType::bubble(); } +}; + +/** TimeBuffer with MinorTrace and Named interfaces */ +template <typename ElemType, + typename ReportTraits = ReportTraitsAdaptor<ElemType>, + typename BubbleTraits = BubbleTraitsAdaptor<ElemType> > +class MinorBuffer : public Named, public TimeBuffer<ElemType> +{ + protected: + /** The range of elements that should appear in trace lines */ + int reportLeft, reportRight; + + /** Name to use for the data in a MinorTrace line */ + std::string dataName; + + public: + MinorBuffer(const std::string &name, + const std::string &data_name, + int num_past, int num_future, + int report_left = -1, int report_right = -1) : + Named(name), TimeBuffer<ElemType>(num_past, num_future), + reportLeft(report_left), reportRight(report_right), + dataName(data_name) + { } + + public: + /* Is this buffer full of only bubbles */ + bool + empty() const + { + bool ret = true; + + for (int i = -this->past; i <= this->future; i++) { + if (!BubbleTraits::isBubble((*this)[i])) + ret = false; + } + + return ret; + } + + /** Report buffer states from 'slot' 'from' to 'to'. For example 0,-1 + * will produce two slices with current (just assigned) and last (one + * advance() old) slices with the current (0) one on the left. + * Reverse the numbers to change the order of slices */ + void + minorTrace() const + { + std::ostringstream data; + + int step = (reportLeft > reportRight ? -1 : 1); + int end = reportRight + step; + int i = reportLeft; + + while (i != end) { + const ElemType &datum = (*this)[i]; + + ReportTraits::reportData(data, datum); + i += step; + if (i != end) + data << ','; + } + + MINORTRACE("%s=%s\n", dataName, data.str()); + } +}; + +/** Wraps a MinorBuffer with Input/Output interfaces to ensure that units + * within the model can only see the right end of buffers between them. */ +template <typename Data> +class Latch +{ + public: + typedef MinorBuffer<Data> Buffer; + + protected: + /** Delays, in cycles, writing data into the latch and seeing it on the + * latched wires */ + Cycles delay; + + Buffer buffer; + + public: + /** forward/backwardDelay specify the delay from input to output in each + * direction. These arguments *must* be >= 1 */ + Latch(const std::string &name, + const std::string &data_name, + Cycles delay_ = Cycles(1), + bool report_backwards = false) : + delay(delay_), + buffer(name, data_name, delay_, 0, (report_backwards ? -delay_ : 0), + (report_backwards ? 0 : -delay_)) + { } + + public: + /** Encapsulate wires on either input or output of the latch. + * forward/backward correspond to data direction relative to the + * pipeline. Latched and Immediate specify delay for backward data. + * Immediate data is available to earlier stages *during* the cycle it + * is written */ + class Input + { + public: + typename Buffer::wire inputWire; + + public: + Input(typename Buffer::wire input_wire) : + inputWire(input_wire) + { } + }; + + class Output + { + public: + typename Buffer::wire outputWire; + + public: + Output(typename Buffer::wire output_wire) : + outputWire(output_wire) + { } + }; + + bool empty() const { return buffer.empty(); } + + /** An interface to just the input of the buffer */ + Input input() { return Input(buffer.getWire(0)); } + + /** An interface to just the output of the buffer */ + Output output() { return Output(buffer.getWire(-delay)); } + + void minorTrace() const { buffer.minorTrace(); } + + void evaluate() { buffer.advance(); } +}; + +/** A pipeline simulating class that will stall (not advance when advance() + * is called) if a non-bubble value lies at the far end of the pipeline. + * The user can clear the stall before calling advance to unstall the + * pipeline. */ +template <typename ElemType, + typename ReportTraits, + typename BubbleTraits = BubbleTraitsAdaptor<ElemType> > +class SelfStallingPipeline : public MinorBuffer<ElemType, ReportTraits> +{ + protected: + /** Wire at the input end of the pipeline (for convenience) */ + typename TimeBuffer<ElemType>::wire pushWire; + /** Wire at the output end of the pipeline (for convenience) */ + typename TimeBuffer<ElemType>::wire popWire; + + public: + /** If true, advance will not advance the pipeline */ + bool stalled; + + /** The number of slots with non-bubbles in them */ + unsigned int occupancy; + + public: + SelfStallingPipeline(const std::string &name, + const std::string &data_name, + unsigned depth) : + MinorBuffer<ElemType, ReportTraits> + (name, data_name, depth, 0, -1, -depth), + pushWire(this->getWire(0)), + popWire(this->getWire(-depth)), + stalled(false), + occupancy(0) + { + assert(depth > 0); + + /* Write explicit bubbles to get around the case where the default + * constructor for the element type isn't good enough */ + for (unsigned i = 0; i <= depth; i++) + (*this)[-i] = BubbleTraits::bubble(); + } + + public: + /** Write an element to the back of the pipeline. This doesn't cause + * the pipeline to advance until advance is called. Pushing twice + * without advance-ing will just cause an overwrite of the last push's + * data. */ + void push(ElemType &elem) + { + assert(!alreadyPushed()); + *pushWire = elem; + if (!BubbleTraits::isBubble(elem)) + occupancy++; + } + + /** Peek at the end element of the pipe */ + ElemType &front() { return *popWire; } + + const ElemType &front() const { return *popWire; } + + /** Have we already pushed onto this pipe without advancing */ + bool alreadyPushed() { return !BubbleTraits::isBubble(*pushWire); } + + /** There's data (not a bubble) at the end of the pipe */ + bool isPopable() { return !BubbleTraits::isBubble(front()); } + + /** Try to advance the pipeline. If we're stalled, don't advance. If + * we're not stalled, advance then check to see if we become stalled + * (a non-bubble at the end of the pipe) */ + void + advance() + { + bool data_at_end = isPopable(); + + if (!stalled) { + TimeBuffer<ElemType>::advance(); + /* If there was data at the end of the pipe that has now been + * advanced out of the pipe, we've lost data */ + if (data_at_end) + occupancy--; + /* Is there data at the end of the pipe now? */ + stalled = isPopable(); + /* Insert a bubble into the empty input slot to make sure that + * element is correct in the case where the default constructor + * for ElemType doesn't produce a bubble */ + ElemType bubble = BubbleTraits::bubble(); + *pushWire = bubble; + } + } +}; + +/** Base class for space reservation requestable objects */ +class Reservable +{ + public: + /** Can a slot be reserved? */ + virtual bool canReserve() const = 0; + + /** Reserve a slot in whatever structure this is attached to */ + virtual void reserve() = 0; + + /** Free a reserved slot */ + virtual void freeReservation() = 0; +}; + +/** Wrapper for a queue type to act as a pipeline stage input queue. + * Handles capacity management, bubble value suppression and provides + * reporting. + * + * In an ideal world, ElemType would be derived from ReportIF and BubbleIF, + * but here we use traits and allow the Adaptors ReportTraitsAdaptor and + * BubbleTraitsAdaptor to work on data which *does* directly implement + * those interfaces. */ +template <typename ElemType, + typename ReportTraits = ReportTraitsAdaptor<ElemType>, + typename BubbleTraits = BubbleTraitsAdaptor<ElemType> > +class Queue : public Named, public Reservable +{ + private: + std::deque<ElemType> queue; + + /** Number of slots currently reserved for future (reservation + * respecting) pushes */ + unsigned int numReservedSlots; + + /** Need this here as queues usually don't have a limited capacity */ + unsigned int capacity; + + /** Name to use for the data in MinorTrace */ + std::string dataName; + + public: + Queue(const std::string &name, const std::string &data_name, + unsigned int capacity_) : + Named(name), + numReservedSlots(0), + capacity(capacity_), + dataName(data_name) + { } + + virtual ~Queue() { } + + public: + /** Push an element into the buffer if it isn't a bubble. Bubbles are + * just discarded. It is assummed that any push into a queue with + * reserved space intends to take that space */ + void + push(ElemType &data) + { + if (!BubbleTraits::isBubble(data)) { + freeReservation(); + queue.push_back(data); + + if (queue.size() > capacity) { + warn("%s: No space to push data into queue of capacity" + " %u, pushing anyway\n", name(), capacity); + } + + } + } + + /** Clear all allocated space. Be careful how this is used */ + void clearReservedSpace() { numReservedSlots = 0; } + + /** Clear a single reserved slot */ + void freeReservation() + { + if (numReservedSlots != 0) + numReservedSlots--; + } + + /** Reserve space in the queue for future pushes. Enquiries about space + * in the queue using unreservedRemainingSpace will only tell about + * space which is not full and not reserved. */ + void + reserve() + { + /* Check reservable space */ + if (unreservedRemainingSpace() == 0) + warn("%s: No space is reservable in queue", name()); + + numReservedSlots++; + } + + bool canReserve() const { return unreservedRemainingSpace() != 0; } + + /** Number of slots available in an empty buffer */ + unsigned int totalSpace() const { return capacity; } + + /** Number of slots already occupied in this buffer */ + unsigned int occupiedSpace() const { return queue.size(); } + + /** Number of slots which are reserved. */ + unsigned int reservedSpace() const { return numReservedSlots; } + + /** Number of slots yet to fill in this buffer. This doesn't include + * reservation. */ + unsigned int + remainingSpace() const + { + int ret = capacity - queue.size(); + + return (ret < 0 ? 0 : ret); + } + + /** Like remainingSpace but does not count reserved spaces */ + unsigned int + unreservedRemainingSpace() const + { + int ret = capacity - (queue.size() + numReservedSlots); + + return (ret < 0 ? 0 : ret); + } + + /** Head value. Like std::queue::front */ + ElemType &front() { return queue.front(); } + + const ElemType &front() const { return queue.front(); } + + /** Pop the head item. Like std::queue::pop */ + void pop() { queue.pop_front(); } + + /** Is the queue empty? */ + bool empty() const { return queue.empty(); } + + void + minorTrace() const + { + std::ostringstream data; + /* If we become over-full, totalSpace() can actually be smaller than + * occupiedSpace(). Handle this */ + unsigned int num_total = (occupiedSpace() > totalSpace() ? + occupiedSpace() : totalSpace()); + + unsigned int num_reserved = reservedSpace(); + unsigned int num_occupied = occupiedSpace(); + + int num_printed = 1; + /* Bodge to rotate queue to report elements */ + while (num_printed <= num_occupied) { + ReportTraits::reportData(data, queue[num_printed - 1]); + num_printed++; + + if (num_printed <= num_total) + data << ','; + } + + int num_printed_reserved = 1; + /* Show reserved slots */ + while (num_printed_reserved <= num_reserved && + num_printed <= num_total) + { + data << 'R'; + num_printed_reserved++; + num_printed++; + + if (num_printed <= num_total) + data << ','; + } + + /* And finally pad with empty slots (if there are any) */ + while (num_printed <= num_total) { + num_printed++; + + if (num_printed <= num_total) + data << ','; + } + + MINORTRACE("%s=%s\n", dataName, data.str()); + } +}; + +/** Like a Queue but with a restricted interface and a setTail function + * which, when the queue is empty, just takes a reference to the pushed + * item as the single element. Calling pushTail will push that element + * onto the queue. + * + * The purpose of this class is to allow the faster operation of queues of + * items which usually don't get deeper than one item and for which the copy + * associated with a push is expensive enough to want to avoid + * + * The intended use case is the input buffer for pipeline stages, hence the + * class name */ +template <typename ElemType, + typename ReportTraits = ReportTraitsAdaptor<ElemType>, + typename BubbleTraits = BubbleTraitsAdaptor<ElemType> > +class InputBuffer : public Reservable +{ + protected: + /** Underlying queue */ + mutable Queue<ElemType, ReportTraits, BubbleTraits> queue; + + /** Pointer to the single element (if not NULL) */ + mutable ElemType *elementPtr; + + public: + InputBuffer(const std::string &name, const std::string &data_name, + unsigned int capacity_) : + queue(name, data_name, capacity_), + elementPtr(NULL) + { } + + public: + /** Set the tail of the queue, this is like push but needs + * to be followed by pushTail for the new tail to make its + * way into the queue proper */ + void + setTail(ElemType &new_element) + { + assert(!elementPtr); + if (!BubbleTraits::isBubble(new_element)) { + if (queue.empty()) + elementPtr = &new_element; + else + queue.push(new_element); + } + } + + /** No single element or queue entries */ + bool empty() const { return !elementPtr && queue.empty(); } + + /** Return the element, or the front of the queue */ + const ElemType &front() const + { return (elementPtr ? *elementPtr : queue.front()); } + + ElemType &front() + { return (elementPtr ? *elementPtr : queue.front()); } + + /** Pop either the head, or if none, the head of the queue */ + void + pop() + { + if (elementPtr) { + /* A popped element was expected to be pushed into queue + * and so take a reserved space */ + elementPtr = NULL; + queue.freeReservation(); + } else { + queue.pop(); + } + } + + /** Push the single element (if any) into the queue proper. If the + * element's reference points to a transient object, remember to + * always do this before the end of that object's life */ + void + pushTail() const + { + if (elementPtr) + queue.push(*elementPtr); + elementPtr = NULL; + } + + /** Report elements */ + void + minorTrace() const + { + pushTail(); + queue.minorTrace(); + } + + /** Reservable interface, passed on to queue */ + bool canReserve() const { return queue.canReserve(); } + void reserve() { queue.reserve(); } + void freeReservation() { queue.freeReservation(); } + + /** Like remainingSpace but does not count reserved spaces */ + unsigned int + unreservedRemainingSpace() + { + pushTail(); + return queue.unreservedRemainingSpace(); + } +}; + +} + +#endif /* __CPU_MINOR_BUFFERS_HH__ */ diff --git a/src/cpu/minor/cpu.cc b/src/cpu/minor/cpu.cc new file mode 100644 index 000000000..f7007f6ff --- /dev/null +++ b/src/cpu/minor/cpu.cc @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2012-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "arch/utility.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/dyn_inst.hh" +#include "cpu/minor/fetch1.hh" +#include "cpu/minor/pipeline.hh" +#include "debug/Drain.hh" +#include "debug/MinorCPU.hh" +#include "debug/Quiesce.hh" + +MinorCPU::MinorCPU(MinorCPUParams *params) : + BaseCPU(params), + drainManager(NULL) +{ + /* This is only written for one thread at the moment */ + Minor::MinorThread *thread; + + if (FullSystem) { + thread = new Minor::MinorThread(this, 0, params->system, params->itb, + params->dtb, params->isa[0]); + } else { + /* thread_id 0 */ + thread = new Minor::MinorThread(this, 0, params->system, + params->workload[0], params->itb, params->dtb, params->isa[0]); + } + + threads.push_back(thread); + threadActivateEvents.push_back(new ThreadActivateEvent(*this, 0)); + + thread->setStatus(ThreadContext::Halted); + + ThreadContext *tc = thread->getTC(); + + if (params->checker) { + fatal("The Minor model doesn't support checking (yet)\n"); + } + + threadContexts.push_back(tc); + + Minor::MinorDynInst::init(); + + pipeline = new Minor::Pipeline(*this, *params); + activityRecorder = pipeline->getActivityRecorder(); +} + +MinorCPU::~MinorCPU() +{ + delete pipeline; + + for (ThreadID thread_id = 0; thread_id < threads.size(); thread_id++) { + delete threads[thread_id]; + delete threadActivateEvents[thread_id]; + } +} + +void +MinorCPU::init() +{ + BaseCPU::init(); + + if (!params()->switched_out && + system->getMemoryMode() != Enums::timing) + { + fatal("The Minor CPU requires the memory system to be in " + "'timing' mode.\n"); + } + + /* Initialise the ThreadContext's memory proxies */ + for (ThreadID thread_id = 0; thread_id < threads.size(); thread_id++) { + ThreadContext *tc = getContext(thread_id); + + tc->initMemProxies(tc); + } + + /* Initialise CPUs (== threads in the ISA) */ + if (FullSystem && !params()->switched_out) { + for (ThreadID thread_id = 0; thread_id < threads.size(); thread_id++) + { + ThreadContext *tc = getContext(thread_id); + + /* Initialize CPU, including PC */ + TheISA::initCPU(tc, cpuId()); + } + } +} + +/** Stats interface from SimObject (by way of BaseCPU) */ +void +MinorCPU::regStats() +{ + BaseCPU::regStats(); + stats.regStats(name(), *this); + pipeline->regStats(); +} + +void +MinorCPU::serializeThread(std::ostream &os, ThreadID thread_id) +{ + threads[thread_id]->serialize(os); +} + +void +MinorCPU::unserializeThread(Checkpoint *cp, const std::string §ion, + ThreadID thread_id) +{ + if (thread_id != 0) + fatal("Trying to load more than one thread into a MinorCPU\n"); + + threads[thread_id]->unserialize(cp, section); +} + +void +MinorCPU::serialize(std::ostream &os) +{ + pipeline->serialize(os); + BaseCPU::serialize(os); +} + +void +MinorCPU::unserialize(Checkpoint *cp, const std::string §ion) +{ + pipeline->unserialize(cp, section); + BaseCPU::unserialize(cp, section); +} + +Addr +MinorCPU::dbg_vtophys(Addr addr) +{ + /* Note that this gives you the translation for thread 0 */ + panic("No implementation for vtophy\n"); + + return 0; +} + +void +MinorCPU::wakeup() +{ + DPRINTF(Drain, "MinorCPU wakeup\n"); + + for (auto i = threads.begin(); i != threads.end(); i ++) { + if ((*i)->status() == ThreadContext::Suspended) + (*i)->activate(); + } + + DPRINTF(Drain,"Suspended Processor awoke\n"); +} + +void +MinorCPU::startup() +{ + DPRINTF(MinorCPU, "MinorCPU startup\n"); + + BaseCPU::startup(); + + for (auto i = threads.begin(); i != threads.end(); i ++) + (*i)->startup(); +} + +unsigned int +MinorCPU::drain(DrainManager *drain_manager) +{ + DPRINTF(Drain, "MinorCPU drain\n"); + + drainManager = drain_manager; + + /* Need to suspend all threads and wait for Execute to idle. + * Tell Fetch1 not to fetch */ + unsigned int ret = pipeline->drain(drain_manager); + + if (ret == 0) + DPRINTF(Drain, "MinorCPU drained\n"); + else + DPRINTF(Drain, "MinorCPU not finished draining\n"); + + return ret; +} + +void +MinorCPU::signalDrainDone() +{ + DPRINTF(Drain, "MinorCPU drain done\n"); + setDrainState(Drainable::Drained); + drainManager->signalDrainDone(); + drainManager = NULL; +} + +void +MinorCPU::drainResume() +{ + assert(getDrainState() == Drainable::Drained || + getDrainState() == Drainable::Running); + + if (switchedOut()) { + DPRINTF(Drain, "drainResume while switched out. Ignoring\n"); + return; + } + + DPRINTF(Drain, "MinorCPU drainResume\n"); + + if (!system->isTimingMode()) { + fatal("The Minor CPU requires the memory system to be in " + "'timing' mode.\n"); + } + + wakeup(); + pipeline->drainResume(); + + setDrainState(Drainable::Running); +} + +void +MinorCPU::memWriteback() +{ + DPRINTF(Drain, "MinorCPU memWriteback\n"); +} + +void +MinorCPU::switchOut() +{ + DPRINTF(MinorCPU, "MinorCPU switchOut\n"); + + assert(!switchedOut()); + BaseCPU::switchOut(); + + /* Check that the CPU is drained? */ + activityRecorder->reset(); +} + +void +MinorCPU::takeOverFrom(BaseCPU *old_cpu) +{ + DPRINTF(MinorCPU, "MinorCPU takeOverFrom\n"); + + BaseCPU::takeOverFrom(old_cpu); + + /* Don't think I need to do anything here */ +} + +void +MinorCPU::activateContext(ThreadID thread_id, Cycles delay) +{ + DPRINTF(MinorCPU, "ActivateContext thread: %d delay: %d\n", + thread_id, delay); + + if (!threadActivateEvents[thread_id]->scheduled()) { + schedule(threadActivateEvents[thread_id], clockEdge(delay)); + } +} + +void +MinorCPU::ThreadActivateEvent::process() +{ + DPRINTFS(MinorCPU, (&cpu), "Activating thread: %d\n", thread_id); + + /* Do some cycle accounting. lastStopped is reset to stop the + * wakeup call on the pipeline from adding the quiesce period + * to BaseCPU::numCycles */ + cpu.stats.quiesceCycles += cpu.pipeline->cyclesSinceLastStopped(); + cpu.pipeline->resetLastStopped(); + + /* Wake up the thread, wakeup the pipeline tick */ + cpu.threads[thread_id]->activate(); + cpu.wakeupOnEvent(Minor::Pipeline::CPUStageId); + cpu.pipeline->wakeupFetch(); +} + +void +MinorCPU::suspendContext(ThreadID thread_id) +{ + DPRINTF(MinorCPU, "SuspendContext %d\n", thread_id); + + threads[thread_id]->suspend(); +} + +void +MinorCPU::wakeupOnEvent(unsigned int stage_id) +{ + DPRINTF(Quiesce, "Event wakeup from stage %d\n", stage_id); + + /* Mark that some activity has taken place and start the pipeline */ + activityRecorder->activateStage(stage_id); + pipeline->start(); +} + +MinorCPU * +MinorCPUParams::create() +{ + numThreads = 1; + if (!FullSystem && workload.size() != 1) + panic("only one workload allowed"); + return new MinorCPU(this); +} + +MasterPort &MinorCPU::getInstPort() +{ + return pipeline->getInstPort(); +} + +MasterPort &MinorCPU::getDataPort() +{ + return pipeline->getDataPort(); +} + +Counter +MinorCPU::totalInsts() const +{ + Counter ret = 0; + + for (auto i = threads.begin(); i != threads.end(); i ++) + ret += (*i)->numInst; + + return ret; +} + +Counter +MinorCPU::totalOps() const +{ + Counter ret = 0; + + for (auto i = threads.begin(); i != threads.end(); i ++) + ret += (*i)->numOp; + + return ret; +} diff --git a/src/cpu/minor/cpu.hh b/src/cpu/minor/cpu.hh new file mode 100644 index 000000000..80f41b5d2 --- /dev/null +++ b/src/cpu/minor/cpu.hh @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2012-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Top level definition of the Minor in-order CPU model + */ + +#ifndef __CPU_MINOR_CPU_HH__ +#define __CPU_MINOR_CPU_HH__ + +#include "cpu/minor/activity.hh" +#include "cpu/minor/stats.hh" +#include "cpu/base.hh" +#include "cpu/simple_thread.hh" +#include "params/MinorCPU.hh" + +namespace Minor +{ +/** Forward declared to break the cyclic inclusion dependencies between + * pipeline and cpu */ +class Pipeline; + +/** Minor will use the SimpleThread state for now */ +typedef SimpleThread MinorThread; +}; + +/** + * MinorCPU is an in-order CPU model with four fixed pipeline stages: + * + * Fetch1 - fetches lines from memory + * Fetch2 - decomposes lines into macro-op instructions + * Decode - decomposes macro-ops into micro-ops + * Execute - executes those micro-ops + * + * This pipeline is carried in the MinorCPU::pipeline object. + * The exec_context interface is not carried by MinorCPU but by + * Minor::ExecContext objects + * created by Minor::Execute. + */ +class MinorCPU : public BaseCPU +{ + protected: + /** Event for delayed wakeup of a thread */ + class ThreadActivateEvent : public Event + { + public: + MinorCPU &cpu; + ThreadID thread_id; + + ThreadActivateEvent(MinorCPU &cpu_, ThreadID thread_id_) : + cpu(cpu_), thread_id(thread_id_) + { } + + void process(); + }; + + /** Events to wakeup each thread */ + std::vector<ThreadActivateEvent *> threadActivateEvents; + + /** pipeline is a container for the clockable pipeline stage objects. + * Elements of pipeline call TheISA to implement the model. */ + Minor::Pipeline *pipeline; + + public: + /** Activity recording for pipeline. This belongs to Pipeline but + * stages will access it through the CPU as the MinorCPU object + * actually mediates idling behaviour */ + Minor::MinorActivityRecorder *activityRecorder; + + /** These are thread state-representing objects for this CPU. If + * you need a ThreadContext for *any* reason, use + * threads[threadId]->getTC() */ + std::vector<Minor::MinorThread *> threads; + + public: + /** Provide a non-protected base class for Minor's Ports as derived + * classes are created by Fetch1 and Execute */ + class MinorCPUPort : public MasterPort + { + public: + /** The enclosing cpu */ + MinorCPU &cpu; + + public: + MinorCPUPort(const std::string& name_, MinorCPU &cpu_) + : MasterPort(name_, &cpu_), cpu(cpu_) + { } + + protected: + /** Snooping a coherence request, do nothing. */ + virtual void recvTimingSnoopReq(PacketPtr pkt) { } + }; + + /** The DrainManager passed into drain that needs be signalled when + * draining is complete */ + DrainManager *drainManager; + + protected: + /** Return a reference to the data port. */ + MasterPort &getDataPort(); + + /** Return a reference to the instruction port. */ + MasterPort &getInstPort(); + + public: + MinorCPU(MinorCPUParams *params); + + ~MinorCPU(); + + public: + /** Starting, waking and initialisation */ + void init(); + void startup(); + void wakeup(); + + Addr dbg_vtophys(Addr addr); + + /** Processor-specific statistics */ + Minor::MinorStats stats; + + /** Stats interface from SimObject (by way of BaseCPU) */ + void regStats(); + + /** Simple inst count interface from BaseCPU */ + Counter totalInsts() const; + Counter totalOps() const; + + void serializeThread(std::ostream &os, ThreadID thread_id); + void unserializeThread(Checkpoint *cp, const std::string §ion, + ThreadID thread_id); + + /** Serialize pipeline data */ + void serialize(std::ostream &os); + void unserialize(Checkpoint *cp, const std::string §ion); + + /** Drain interface */ + unsigned int drain(DrainManager *drain_manager); + void drainResume(); + /** Signal from Pipeline that MinorCPU should signal the DrainManager + * that a drain is complete and set its drainState */ + void signalDrainDone(); + void memWriteback(); + + /** Switching interface from BaseCPU */ + void switchOut(); + void takeOverFrom(BaseCPU *old_cpu); + + /** Thread activation interface from BaseCPU. */ + void activateContext(ThreadID thread_id, Cycles delay); + void suspendContext(ThreadID thread_id); + + /** Interface for stages to signal that they have become active after + * a callback or eventq event where the pipeline itself may have + * already been idled. The stage argument should be from the + * enumeration Pipeline::StageId */ + void wakeupOnEvent(unsigned int stage_id); +}; + +#endif /* __CPU_MINOR_CPU_HH__ */ diff --git a/src/cpu/minor/decode.cc b/src/cpu/minor/decode.cc new file mode 100644 index 000000000..e380f0d2d --- /dev/null +++ b/src/cpu/minor/decode.cc @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "cpu/minor/decode.hh" +#include "cpu/minor/pipeline.hh" +#include "debug/Decode.hh" + +namespace Minor +{ + +Decode::Decode(const std::string &name, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<ForwardInstData>::Output inp_, + Latch<ForwardInstData>::Input out_, + Reservable &next_stage_input_buffer) : + Named(name), + cpu(cpu_), + inp(inp_), + out(out_), + nextStageReserve(next_stage_input_buffer), + outputWidth(params.executeInputWidth), + processMoreThanOneInput(params.decodeCycleInput), + inputBuffer(name + ".inputBuffer", "insts", params.decodeInputBufferSize), + inputIndex(0), + inMacroop(false), + execSeqNum(InstId::firstExecSeqNum) +{ + if (outputWidth < 1) + fatal("%s: executeInputWidth must be >= 1 (%d)\n", name, outputWidth); + + if (params.decodeInputBufferSize < 1) { + fatal("%s: decodeInputBufferSize must be >= 1 (%d)\n", name, + params.decodeInputBufferSize); + } +} + +const ForwardInstData * +Decode::getInput() +{ + /* Get insts from the inputBuffer to work with */ + if (!inputBuffer.empty()) { + const ForwardInstData &head = inputBuffer.front(); + + return (head.isBubble() ? NULL : &(inputBuffer.front())); + } else { + return NULL; + } +} + +void +Decode::popInput() +{ + if (!inputBuffer.empty()) + inputBuffer.pop(); + + inputIndex = 0; + inMacroop = false; +} + +#if TRACING_ON +/** Add the tracing data to an instruction. This originates in + * decode because this is the first place that execSeqNums are known + * (these are used as the 'FetchSeq' in tracing data) */ +static void +dynInstAddTracing(MinorDynInstPtr inst, StaticInstPtr static_inst, + MinorCPU &cpu) +{ + inst->traceData = cpu.getTracer()->getInstRecord(curTick(), + cpu.getContext(inst->id.threadId), + inst->staticInst, inst->pc, static_inst); + + /* Use the execSeqNum as the fetch sequence number as this most closely + * matches the other processor models' idea of fetch sequence */ + if (inst->traceData) + inst->traceData->setFetchSeq(inst->id.execSeqNum); +} +#endif + +void +Decode::evaluate() +{ + inputBuffer.setTail(*inp.outputWire); + ForwardInstData &insts_out = *out.inputWire; + + assert(insts_out.isBubble()); + + blocked = false; + + if (!nextStageReserve.canReserve()) { + blocked = true; + } else { + const ForwardInstData *insts_in = getInput(); + + unsigned int output_index = 0; + + /* Pack instructions into the output while we can. This may involve + * using more than one input line */ + while (insts_in && + inputIndex < insts_in->width() && /* Still more input */ + output_index < outputWidth /* Still more output to fill */) + { + MinorDynInstPtr inst = insts_in->insts[inputIndex]; + + if (inst->isBubble()) { + /* Skip */ + inputIndex++; + inMacroop = false; + } else { + StaticInstPtr static_inst = inst->staticInst; + /* Static inst of a macro-op above the output_inst */ + StaticInstPtr parent_static_inst = NULL; + MinorDynInstPtr output_inst = inst; + + if (inst->isFault()) { + DPRINTF(Decode, "Fault being passed: %d\n", + inst->fault->name()); + + inputIndex++; + inMacroop = false; + } else if (static_inst->isMacroop()) { + /* Generate a new micro-op */ + StaticInstPtr static_micro_inst; + + /* Set up PC for the next micro-op emitted */ + if (!inMacroop) { + microopPC = inst->pc; + inMacroop = true; + } + + /* Get the micro-op static instruction from the + * static_inst. */ + static_micro_inst = + static_inst->fetchMicroop(microopPC.microPC()); + + output_inst = new MinorDynInst(inst->id); + output_inst->pc = microopPC; + output_inst->staticInst = static_micro_inst; + output_inst->fault = NoFault; + + /* Allow a predicted next address only on the last + * microop */ + if (static_micro_inst->isLastMicroop()) { + output_inst->predictedTaken = inst->predictedTaken; + output_inst->predictedTarget = inst->predictedTarget; + } + + DPRINTF(Decode, "Microop decomposition inputIndex:" + " %d output_index: %d lastMicroop: %s microopPC:" + " %d.%d inst: %d\n", + inputIndex, output_index, + (static_micro_inst->isLastMicroop() ? + "true" : "false"), + microopPC.instAddr(), microopPC.microPC(), + *output_inst); + + /* Acknowledge that the static_inst isn't mine, it's my + * parent macro-op's */ + parent_static_inst = static_inst; + + static_micro_inst->advancePC(microopPC); + + /* Step input if this is the last micro-op */ + if (static_micro_inst->isLastMicroop()) { + inputIndex++; + inMacroop = false; + } + } else { + /* Doesn't need decomposing, pass on instruction */ + DPRINTF(Decode, "Passing on inst: %s inputIndex:" + " %d output_index: %d\n", + *output_inst, inputIndex, output_index); + + parent_static_inst = static_inst; + + /* Step input */ + inputIndex++; + inMacroop = false; + } + + /* Set execSeqNum of output_inst */ + output_inst->id.execSeqNum = execSeqNum; + /* Add tracing */ +#if TRACING_ON + dynInstAddTracing(output_inst, parent_static_inst, cpu); +#endif + + /* Step to next sequence number */ + execSeqNum++; + + /* Correctly size the output before writing */ + if(output_index == 0) insts_out.resize(outputWidth); + /* Push into output */ + insts_out.insts[output_index] = output_inst; + output_index++; + } + + /* Have we finished with the input? */ + if (inputIndex == insts_in->width()) { + /* If we have just been producing micro-ops, we *must* have + * got to the end of that for inputIndex to be pushed past + * insts_in->width() */ + assert(!inMacroop); + popInput(); + insts_in = NULL; + + if (processMoreThanOneInput) { + DPRINTF(Decode, "Wrapping\n"); + insts_in = getInput(); + } + } + } + + /* The rest of the output (if any) should already have been packed + * with bubble instructions by insts_out's initialisation + * + * for (; output_index < outputWidth; output_index++) + * assert(insts_out.insts[output_index]->isBubble()); + */ + } + + /* If we generated output, reserve space for the result in the next stage + * and mark the stage as being active this cycle */ + if (!insts_out.isBubble()) { + /* Note activity of following buffer */ + cpu.activityRecorder->activity(); + nextStageReserve.reserve(); + } + + /* If we still have input to process and somewhere to put it, + * mark stage as active */ + if (getInput() && nextStageReserve.canReserve()) + cpu.activityRecorder->activateStage(Pipeline::DecodeStageId); + + /* Make sure the input (if any left) is pushed */ + inputBuffer.pushTail(); +} + +bool +Decode::isDrained() +{ + return inputBuffer.empty() && (*inp.outputWire).isBubble(); +} + +void +Decode::minorTrace() const +{ + std::ostringstream data; + + if (blocked) + data << 'B'; + else + (*out.inputWire).reportData(data); + + MINORTRACE("insts=%s\n", data.str()); + inputBuffer.minorTrace(); +} + +} diff --git a/src/cpu/minor/decode.hh b/src/cpu/minor/decode.hh new file mode 100644 index 000000000..fcc18fd44 --- /dev/null +++ b/src/cpu/minor/decode.hh @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Decode collects macro-ops from Fetch2 and splits them into micro-ops + * passed to Execute. + */ + +#ifndef __CPU_MINOR_DECODE_HH__ +#define __CPU_MINOR_DECODE_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/dyn_inst.hh" +#include "cpu/minor/pipe_data.hh" + +namespace Minor +{ + +/* Decode takes instructions from Fetch2 and decomposes them into micro-ops + * to feed to Execute. It generates a new sequence number for each + * instruction: execSeqNum. + */ +class Decode : public Named +{ + protected: + /** Pointer back to the containing CPU */ + MinorCPU &cpu; + + /** Input port carrying macro instructions from Fetch2 */ + Latch<ForwardInstData>::Output inp; + /** Output port carrying micro-op decomposed instructions to Execute */ + Latch<ForwardInstData>::Input out; + + /** Interface to reserve space in the next stage */ + Reservable &nextStageReserve; + + /** Width of output of this stage/input of next in instructions */ + unsigned int outputWidth; + + /** If true, more than one input word can be processed each cycle if + * there is room in the output to contain its processed data */ + bool processMoreThanOneInput; + + public: + /* Public for Pipeline to be able to pass it to Fetch2 */ + InputBuffer<ForwardInstData> inputBuffer; + + protected: + /** Data members after this line are cycle-to-cycle state */ + + /** Index into the inputBuffer's head marking the start of unhandled + * instructions */ + unsigned int inputIndex; + + /** True when we're in the process of decomposing a micro-op and + * microopPC will be valid. This is only the case when there isn't + * sufficient space in Executes input buffer to take the whole of a + * decomposed instruction and some of that instructions micro-ops must + * be generated in a later cycle */ + bool inMacroop; + TheISA::PCState microopPC; + + /** Source of execSeqNums to number instructions. */ + InstSeqNum execSeqNum; + + /** Blocked indication for report */ + bool blocked; + + protected: + /** Get a piece of data to work on, or 0 if there is no data. */ + const ForwardInstData *getInput(); + + /** Pop an element off the input buffer, if there are any */ + void popInput(); + + public: + Decode(const std::string &name, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<ForwardInstData>::Output inp_, + Latch<ForwardInstData>::Input out_, + Reservable &next_stage_input_buffer); + + public: + /** Pass on input/buffer data to the output if you can */ + void evaluate(); + + void minorTrace() const; + + /** Is this stage drained? For Decoed, draining is initiated by + * Execute halting Fetch1 causing Fetch2 to naturally drain + * into Decode and on to Execute which is responsible for + * actually killing instructions */ + bool isDrained(); +}; + +} + +#endif /* __CPU_MINOR_DECODE_HH__ */ diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc new file mode 100644 index 000000000..ab08e6b4a --- /dev/null +++ b/src/cpu/minor/dyn_inst.cc @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <iomanip> +#include <sstream> + +#include "arch/isa.hh" +#include "arch/registers.hh" +#include "cpu/minor/dyn_inst.hh" +#include "cpu/minor/trace.hh" +#include "cpu/base.hh" +#include "cpu/reg_class.hh" +#include "debug/MinorExecute.hh" +#include "enums/OpClass.hh" + +namespace Minor +{ + +std::ostream & +operator <<(std::ostream &os, const InstId &id) +{ + os << id.threadId << '/' << id.streamSeqNum << '.' + << id.predictionSeqNum << '/' << id.lineSeqNum; + + /* Not all structures have fetch and exec sequence numbers */ + if (id.fetchSeqNum != 0) { + os << '/' << id.fetchSeqNum; + if (id.execSeqNum != 0) + os << '.' << id.execSeqNum; + } + + return os; +} + +MinorDynInstPtr MinorDynInst::bubbleInst = NULL; + +void +MinorDynInst::init() +{ + if (!bubbleInst) { + bubbleInst = new MinorDynInst(); + assert(bubbleInst->isBubble()); + /* Make bubbleInst immortal */ + bubbleInst->incref(); + } +} + +bool +MinorDynInst::isLastOpInInst() const +{ + assert(staticInst); + return !(staticInst->isMicroop() && !staticInst->isLastMicroop()); +} + +bool +MinorDynInst::isNoCostInst() const +{ + return isInst() && staticInst->opClass() == No_OpClass; +} + +void +MinorDynInst::reportData(std::ostream &os) const +{ + if (isBubble()) + os << "-"; + else if (isFault()) + os << "F;" << id; + else + os << id; +} + +std::ostream & +operator <<(std::ostream &os, const MinorDynInst &inst) +{ + os << inst.id << " pc: 0x" + << std::hex << inst.pc.instAddr() << std::dec << " ("; + + if (inst.isFault()) + os << "fault: \"" << inst.fault->name() << '"'; + else if (inst.staticInst) + os << inst.staticInst->getName(); + else + os << "bubble"; + + os << ')'; + + return os; +} + +/** Print a register in the form r<n>, f<n>, m<n>(<name>), z for integer, + * float, misc and zero registers given an 'architectural register number' */ +static void +printRegName(std::ostream &os, TheISA::RegIndex reg) +{ + RegClass reg_class = regIdxToClass(reg); + + switch (reg_class) + { + case MiscRegClass: + { + TheISA::RegIndex misc_reg = reg - TheISA::Misc_Reg_Base; + + /* This is an ugly test because not all archs. have miscRegName */ +#if THE_ISA == ARM_ISA + os << 'm' << misc_reg << '(' << TheISA::miscRegName[misc_reg] << + ')'; +#else + os << 'n' << misc_reg; +#endif + } + break; + case FloatRegClass: + os << 'f' << static_cast<unsigned int>(reg - TheISA::FP_Reg_Base); + break; + case IntRegClass: + if (reg == TheISA::ZeroReg) { + os << 'z'; + } else { + os << 'r' << static_cast<unsigned int>(reg); + } + break; + case CCRegClass: + os << 'c' << static_cast<unsigned int>(reg - TheISA::CC_Reg_Base); + } +} + +void +MinorDynInst::minorTraceInst(const Named &named_object) const +{ + if (isFault()) { + MINORINST(&named_object, "id=F;%s addr=0x%x fault=\"%s\"\n", + id, pc.instAddr(), fault->name()); + } else { + unsigned int num_src_regs = staticInst->numSrcRegs(); + unsigned int num_dest_regs = staticInst->numDestRegs(); + + std::ostringstream regs_str; + + /* Format lists of src and dest registers for microops and + * 'full' instructions */ + if (!staticInst->isMacroop()) { + regs_str << " srcRegs="; + + unsigned int src_reg = 0; + while (src_reg < num_src_regs) { + printRegName(regs_str, staticInst->srcRegIdx(src_reg)); + + src_reg++; + if (src_reg != num_src_regs) + regs_str << ','; + } + + regs_str << " destRegs="; + + unsigned int dest_reg = 0; + while (dest_reg < num_dest_regs) { + printRegName(regs_str, staticInst->destRegIdx(dest_reg)); + + dest_reg++; + if (dest_reg != num_dest_regs) + regs_str << ','; + } + +#if THE_ISA == ARM_ISA + regs_str << " extMachInst=" << std::hex << std::setw(16) + << std::setfill('0') << staticInst->machInst << std::dec; +#endif + } + + std::ostringstream flags; + staticInst->printFlags(flags, " "); + + MINORINST(&named_object, "id=%s addr=0x%x inst=\"%s\" class=%s" + " flags=\"%s\"%s%s\n", + id, pc.instAddr(), + (staticInst->opClass() == No_OpClass ? + "(invalid)" : staticInst->disassemble(0,NULL)), + Enums::OpClassStrings[staticInst->opClass()], + flags.str(), + regs_str.str(), + (predictedTaken ? " predictedTaken" : "")); + } +} + +MinorDynInst::~MinorDynInst() +{ + if (traceData) + delete traceData; +} + +} diff --git a/src/cpu/minor/dyn_inst.hh b/src/cpu/minor/dyn_inst.hh new file mode 100644 index 000000000..a30d68819 --- /dev/null +++ b/src/cpu/minor/dyn_inst.hh @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * The dynamic instruction and instruction/line id (sequence numbers) + * definition for Minor. A spirited attempt is made here to not carry too + * much on this structure. + */ + +#ifndef __CPU_MINOR_DYN_INST_HH__ +#define __CPU_MINOR_DYN_INST_HH__ + +#include <iostream> + +#include "base/refcnt.hh" +#include "cpu/minor/buffers.hh" +#include "cpu/inst_seq.hh" +#include "cpu/static_inst.hh" +#include "cpu/timing_expr.hh" +#include "sim/faults.hh" + +namespace Minor +{ + +class MinorDynInst; + +/** MinorDynInsts are currently reference counted. */ +typedef RefCountingPtr<MinorDynInst> MinorDynInstPtr; + +/** Id for lines and instructions. This includes all the relevant sequence + * numbers and thread ids for all stages of execution. */ +class InstId +{ + public: + /** First sequence numbers to use in initialisation of the pipeline and + * to be expected on the first line/instruction issued */ + static const InstSeqNum firstStreamSeqNum = 1; + static const InstSeqNum firstPredictionSeqNum = 1; + static const InstSeqNum firstLineSeqNum = 1; + static const InstSeqNum firstFetchSeqNum = 1; + static const InstSeqNum firstExecSeqNum = 1; + + public: + /** The thread to which this line/instruction belongs */ + ThreadID threadId; + + /** The 'stream' this instruction belongs to. Streams are interrupted + * (and sequence numbers increased) when Execute finds it wants to + * change the stream of instructions due to a branch. */ + InstSeqNum streamSeqNum; + + /** The predicted qualifier to stream, attached by Fetch2 as a + * consequence of branch prediction */ + InstSeqNum predictionSeqNum; + + /** Line sequence number. This is the sequence number of the fetched + * line from which this instruction was fetched */ + InstSeqNum lineSeqNum; + + /** Fetch sequence number. This is 0 for bubbles and an ascending + * sequence for the stream of all fetched instructions */ + InstSeqNum fetchSeqNum; + + /** 'Execute' sequence number. These are assigned after micro-op + * decomposition and form an ascending sequence (starting with 1) for + * post-micro-op decomposed instructions. */ + InstSeqNum execSeqNum; + + public: + /** Very boring default constructor */ + InstId( + ThreadID thread_id = 0, InstSeqNum stream_seq_num = 0, + InstSeqNum prediction_seq_num = 0, InstSeqNum line_seq_num = 0, + InstSeqNum fetch_seq_num = 0, InstSeqNum exec_seq_num = 0) : + threadId(thread_id), streamSeqNum(stream_seq_num), + predictionSeqNum(prediction_seq_num), lineSeqNum(line_seq_num), + fetchSeqNum(fetch_seq_num), execSeqNum(exec_seq_num) + { } + + public: + /* Equal if the thread and last set sequence number matches */ + bool + operator== (const InstId &rhs) + { + /* If any of fetch and exec sequence number are not set + * they need to be 0, so a straight comparison is still + * fine */ + bool ret = (threadId == rhs.threadId && + lineSeqNum == rhs.lineSeqNum && + fetchSeqNum == rhs.fetchSeqNum && + execSeqNum == rhs.execSeqNum); + + /* Stream and prediction *must* match if these are the same id */ + if (ret) { + assert(streamSeqNum == rhs.streamSeqNum && + predictionSeqNum == rhs.predictionSeqNum); + } + + return ret; + } +}; + +/** Print this id in the usual slash-separated format expected by + * MinorTrace */ +std::ostream &operator <<(std::ostream &os, const InstId &id); + +class MinorDynInst; + +/** Print a short reference to this instruction. '-' for a bubble and a + * series of '/' separated sequence numbers for other instructions. The + * sequence numbers will be in the order: stream, prediction, line, fetch, + * exec with exec absent if it is 0. This is used by MinorTrace. */ +std::ostream &operator <<(std::ostream &os, const MinorDynInst &inst); + +/** Dynamic instruction for Minor. + * MinorDynInst implements the BubbleIF interface + * Has two separate notions of sequence number for pre/post-micro-op + * decomposition: fetchSeqNum and execSeqNum */ +class MinorDynInst : public RefCounted +{ + private: + /** A prototypical bubble instruction. You must call MinorDynInst::init + * to initialise this */ + static MinorDynInstPtr bubbleInst; + + public: + StaticInstPtr staticInst; + + InstId id; + + /** Trace information for this instruction's execution */ + Trace::InstRecord *traceData; + + /** The fetch address of this instruction */ + TheISA::PCState pc; + + /** This is actually a fault masquerading as an instruction */ + Fault fault; + + /** Tried to predict the destination of this inst (if a control + * instruction or a sys call) */ + bool triedToPredict; + + /** This instruction was predicted to change control flow and + * the following instructions will have a newer predictionSeqNum */ + bool predictedTaken; + + /** Predicted branch target */ + TheISA::PCState predictedTarget; + + /** Fields only set during execution */ + + /** FU this instruction is issued to */ + unsigned int fuIndex; + + /** This instruction is in the LSQ, not a functional unit */ + bool inLSQ; + + /** The instruction has been sent to the store buffer */ + bool inStoreBuffer; + + /** Can this instruction be executed out of order. In this model, + * this only happens with mem refs that need to be issued early + * to allow other instructions to fill the fetch delay */ + bool canEarlyIssue; + + /** execSeqNum of the latest inst on which this inst depends. + * This can be used as a sanity check for dependency ordering + * where slightly out of order execution is required (notably + * initiateAcc for memory ops) */ + InstSeqNum instToWaitFor; + + /** Extra delay at the end of the pipeline */ + Cycles extraCommitDelay; + TimingExpr *extraCommitDelayExpr; + + /** Once issued, extraCommitDelay becomes minimumCommitCycle + * to account for delay in absolute time */ + Cycles minimumCommitCycle; + + /** Flat register indices so that, when clearing the scoreboard, we + * have the same register indices as when the instruction was marked + * up */ + TheISA::RegIndex flatDestRegIdx[TheISA::MaxInstDestRegs]; + + /** Effective address as set by ExecContext::setEA */ + Addr ea; + + public: + MinorDynInst(InstId id_ = InstId(), Fault fault_ = NoFault) : + staticInst(NULL), id(id_), traceData(NULL), + pc(TheISA::PCState(0)), fault(fault_), + triedToPredict(false), predictedTaken(false), + fuIndex(0), inLSQ(false), inStoreBuffer(false), + canEarlyIssue(false), + instToWaitFor(0), extraCommitDelay(Cycles(0)), + extraCommitDelayExpr(NULL), minimumCommitCycle(Cycles(0)), + ea(0) + { } + + public: + /** The BubbleIF interface. */ + bool isBubble() const { return id.fetchSeqNum == 0; } + + /** There is a single bubble inst */ + static MinorDynInstPtr bubble() { return bubbleInst; } + + /** Is this a fault rather than instruction */ + bool isFault() const { return fault != NoFault; } + + /** Is this a real instruction */ + bool isInst() const { return !isBubble() && !isFault(); } + + /** Is this a real mem ref instruction */ + bool isMemRef() const { return isInst() && staticInst->isMemRef(); } + + /** Is this an instruction that can be executed `for free' and + * needn't spend time in an FU */ + bool isNoCostInst() const; + + /** Assuming this is not a fault, is this instruction either + * a whole instruction or the last microop from a macroop */ + bool isLastOpInInst() const; + + /** Initialise the class */ + static void init(); + + /** Print (possibly verbose) instruction information for + * MinorTrace using the given Named object's name */ + void minorTraceInst(const Named &named_object) const; + + /** ReportIF interface */ + void reportData(std::ostream &os) const; + + ~MinorDynInst(); +}; + +/** Print a summary of the instruction */ +std::ostream &operator <<(std::ostream &os, const MinorDynInst &inst); + +} + +#endif /* __CPU_MINOR_DYN_INST_HH__ */ diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh new file mode 100644 index 000000000..df909a95c --- /dev/null +++ b/src/cpu/minor/exec_context.hh @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2011-2014 ARM Limited + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2002-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Reinhardt + * Dave Greene + * Nathan Binkert + * Andrew Bardsley + */ + +/** + * @file + * + * ExecContext bears the exec_context interface for Minor. + */ + +#ifndef __CPU_MINOR_EXEC_CONTEXT_HH__ +#define __CPU_MINOR_EXEC_CONTEXT_HH__ + +#include "cpu/minor/execute.hh" +#include "cpu/minor/pipeline.hh" +#include "cpu/base.hh" +#include "cpu/simple_thread.hh" +#include "debug/MinorExecute.hh" + +namespace Minor +{ + +/* Forward declaration of Execute */ +class Execute; + +/** ExecContext bears the exec_context interface for Minor. This nicely + * separates that interface from other classes such as Pipeline, MinorCPU + * and DynMinorInst and makes it easier to see what state is accessed by it. + */ +class ExecContext +{ + public: + MinorCPU &cpu; + + /** ThreadState object, provides all the architectural state. */ + SimpleThread &thread; + + /** The execute stage so we can peek at its contents. */ + Execute &execute; + + /** Instruction for the benefit of memory operations and for PC */ + MinorDynInstPtr inst; + + ExecContext ( + MinorCPU &cpu_, + SimpleThread &thread_, Execute &execute_, + MinorDynInstPtr inst_) : + cpu(cpu_), + thread(thread_), + execute(execute_), + inst(inst_) + { + DPRINTF(MinorExecute, "ExecContext setting PC: %s\n", inst->pc); + pcState(inst->pc); + setPredicate(true); + thread.setIntReg(TheISA::ZeroReg, 0); +#if THE_ISA == ALPHA_ISA + thread.setFloatReg(TheISA::ZeroReg, 0.0); +#endif + } + + Fault + readMem(Addr addr, uint8_t *data, unsigned int size, + unsigned int flags) + { + execute.getLSQ().pushRequest(inst, true /* load */, data, + size, addr, flags, NULL); + return NoFault; + } + + Fault + writeMem(uint8_t *data, unsigned int size, Addr addr, + unsigned int flags, uint64_t *res) + { + execute.getLSQ().pushRequest(inst, false /* store */, data, + size, addr, flags, res); + return NoFault; + } + + uint64_t + readIntRegOperand(const StaticInst *si, int idx) + { + return thread.readIntReg(si->srcRegIdx(idx)); + } + + TheISA::FloatReg + readFloatRegOperand(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base; + return thread.readFloatReg(reg_idx); + } + + TheISA::FloatRegBits + readFloatRegOperandBits(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base; + return thread.readFloatRegBits(reg_idx); + } + + void + setIntRegOperand(const StaticInst *si, int idx, uint64_t val) + { + thread.setIntReg(si->destRegIdx(idx), val); + } + + void + setFloatRegOperand(const StaticInst *si, int idx, + TheISA::FloatReg val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base; + thread.setFloatReg(reg_idx, val); + } + + void + setFloatRegOperandBits(const StaticInst *si, int idx, + TheISA::FloatRegBits val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base; + thread.setFloatRegBits(reg_idx, val); + } + + bool + readPredicate() + { + return thread.readPredicate(); + } + + void + setPredicate(bool val) + { + thread.setPredicate(val); + } + + TheISA::PCState + pcState() + { + return thread.pcState(); + } + + void + pcState(const TheISA::PCState &val) + { + thread.pcState(val); + } + + TheISA::MiscReg + readMiscRegNoEffect(int misc_reg) + { + return thread.readMiscRegNoEffect(misc_reg); + } + + TheISA::MiscReg + readMiscReg(int misc_reg) + { + return thread.readMiscReg(misc_reg); + } + + void + setMiscReg(int misc_reg, const TheISA::MiscReg &val) + { + thread.setMiscReg(misc_reg, val); + } + + TheISA::MiscReg + readMiscRegOperand(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::Misc_Reg_Base; + return thread.readMiscReg(reg_idx); + } + + void + setMiscRegOperand(const StaticInst *si, int idx, + const TheISA::MiscReg &val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::Misc_Reg_Base; + return thread.setMiscReg(reg_idx, val); + } + + Fault + hwrei() + { +#if THE_ISA == ALPHA_ISA + return thread.hwrei(); +#else + return NoFault; +#endif + } + + bool + simPalCheck(int palFunc) + { +#if THE_ISA == ALPHA_ISA + return thread.simPalCheck(palFunc); +#else + return false; +#endif + } + + void + syscall(int64_t callnum) + { + if (FullSystem) + panic("Syscall emulation isn't available in FS mode.\n"); + + thread.syscall(callnum); + } + + ThreadContext *tcBase() { return thread.getTC(); } + + /* @todo, should make stCondFailures persistent somewhere */ + unsigned int readStCondFailures() { return 0; } + unsigned int + setStCondFailures(unsigned int st_cond_failures) + { + return 0; + } + + int contextId() { return thread.contextId(); } + /* ISA-specific (or at least currently ISA singleton) functions */ + + /* X86: TLB twiddling */ + void + demapPage(Addr vaddr, uint64_t asn) + { + thread.getITBPtr()->demapPage(vaddr, asn); + thread.getDTBPtr()->demapPage(vaddr, asn); + } + + TheISA::CCReg + readCCRegOperand(const StaticInst *si, int idx) + { + int reg_idx = si->srcRegIdx(idx) - TheISA::CC_Reg_Base; + return thread.readCCReg(reg_idx); + } + + void + setCCRegOperand(const StaticInst *si, int idx, TheISA::CCReg val) + { + int reg_idx = si->destRegIdx(idx) - TheISA::CC_Reg_Base; + thread.setCCReg(reg_idx, val); + } + + void + demapInstPage(Addr vaddr, uint64_t asn) + { + thread.getITBPtr()->demapPage(vaddr, asn); + } + + void + demapDataPage(Addr vaddr, uint64_t asn) + { + thread.getDTBPtr()->demapPage(vaddr, asn); + } + + /* ALPHA/POWER: Effective address storage */ + void setEA(Addr &ea) + { + inst->ea = ea; + } + + BaseCPU *getCpuPtr() { return &cpu; } + + /* POWER: Effective address storage */ + Addr getEA() + { + return inst->ea; + } + + /* MIPS: other thread register reading/writing */ + uint64_t + readRegOtherThread(unsigned idx, ThreadID tid = InvalidThreadID) + { + SimpleThread *other_thread = (tid == InvalidThreadID + ? &thread : cpu.threads[tid]); + + if (idx < TheISA::FP_Reg_Base) { /* Integer */ + return other_thread->readIntReg(idx); + } else if (idx < TheISA::Misc_Reg_Base) { /* Float */ + return other_thread->readFloatRegBits(idx + - TheISA::FP_Reg_Base); + } else { /* Misc */ + return other_thread->readMiscReg(idx + - TheISA::Misc_Reg_Base); + } + } + + void + setRegOtherThread(unsigned idx, const TheISA::MiscReg &val, + ThreadID tid = InvalidThreadID) + { + SimpleThread *other_thread = (tid == InvalidThreadID + ? &thread : cpu.threads[tid]); + + if (idx < TheISA::FP_Reg_Base) { /* Integer */ + return other_thread->setIntReg(idx, val); + } else if (idx < TheISA::Misc_Reg_Base) { /* Float */ + return other_thread->setFloatRegBits(idx + - TheISA::FP_Reg_Base, val); + } else { /* Misc */ + return other_thread->setMiscReg(idx + - TheISA::Misc_Reg_Base, val); + } + } +}; + +} + +#endif /* __CPU_MINOR_EXEC_CONTEXT_HH__ */ diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc new file mode 100644 index 000000000..2a009a154 --- /dev/null +++ b/src/cpu/minor/execute.cc @@ -0,0 +1,1736 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "arch/locked_mem.hh" +#include "arch/registers.hh" +#include "arch/utility.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/exec_context.hh" +#include "cpu/minor/execute.hh" +#include "cpu/minor/fetch1.hh" +#include "cpu/minor/lsq.hh" +#include "cpu/op_class.hh" +#include "debug/Activity.hh" +#include "debug/Branch.hh" +#include "debug/Drain.hh" +#include "debug/MinorExecute.hh" +#include "debug/MinorInterrupt.hh" +#include "debug/MinorMem.hh" +#include "debug/MinorTrace.hh" +#include "debug/PCEvent.hh" + +namespace Minor +{ + +Execute::Execute(const std::string &name_, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<ForwardInstData>::Output inp_, + Latch<BranchData>::Input out_) : + Named(name_), + inp(inp_), + out(out_), + cpu(cpu_), + issueLimit(params.executeIssueLimit), + memoryIssueLimit(params.executeMemoryIssueLimit), + commitLimit(params.executeCommitLimit), + memoryCommitLimit(params.executeMemoryCommitLimit), + processMoreThanOneInput(params.executeCycleInput), + fuDescriptions(*params.executeFuncUnits), + numFuncUnits(fuDescriptions.funcUnits.size()), + setTraceTimeOnCommit(params.executeSetTraceTimeOnCommit), + setTraceTimeOnIssue(params.executeSetTraceTimeOnIssue), + allowEarlyMemIssue(params.executeAllowEarlyMemoryIssue), + noCostFUIndex(fuDescriptions.funcUnits.size() + 1), + lsq(name_ + ".lsq", name_ + ".dcache_port", + cpu_, *this, + params.executeMaxAccessesInMemory, + params.executeMemoryWidth, + params.executeLSQRequestsQueueSize, + params.executeLSQTransfersQueueSize, + params.executeLSQStoreBufferSize, + params.executeLSQMaxStoreBufferStoresPerCycle), + scoreboard(name_ + ".scoreboard"), + inputBuffer(name_ + ".inputBuffer", "insts", + params.executeInputBufferSize), + inputIndex(0), + lastCommitWasEndOfMacroop(true), + instsBeingCommitted(params.executeCommitLimit), + streamSeqNum(InstId::firstStreamSeqNum), + lastPredictionSeqNum(InstId::firstPredictionSeqNum), + drainState(NotDraining) +{ + if (commitLimit < 1) { + fatal("%s: executeCommitLimit must be >= 1 (%d)\n", name_, + commitLimit); + } + + if (issueLimit < 1) { + fatal("%s: executeCommitLimit must be >= 1 (%d)\n", name_, + issueLimit); + } + + if (memoryIssueLimit < 1) { + fatal("%s: executeMemoryIssueLimit must be >= 1 (%d)\n", name_, + memoryIssueLimit); + } + + if (memoryCommitLimit > commitLimit) { + fatal("%s: executeMemoryCommitLimit (%d) must be <=" + " executeCommitLimit (%d)\n", + name_, memoryCommitLimit, commitLimit); + } + + if (params.executeInputBufferSize < 1) { + fatal("%s: executeInputBufferSize must be >= 1 (%d)\n", name_, + params.executeInputBufferSize); + } + + if (params.executeInputBufferSize < 1) { + fatal("%s: executeInputBufferSize must be >= 1 (%d)\n", name_, + params.executeInputBufferSize); + } + + /* This should be large enough to count all the in-FU instructions + * which need to be accounted for in the inFlightInsts + * queue */ + unsigned int total_slots = 0; + + /* Make FUPipelines for each MinorFU */ + for (unsigned int i = 0; i < numFuncUnits; i++) { + std::ostringstream fu_name; + MinorFU *fu_description = fuDescriptions.funcUnits[i]; + + /* Note the total number of instruction slots (for sizing + * the inFlightInst queue) and the maximum latency of any FU + * (for sizing the activity recorder) */ + total_slots += fu_description->opLat; + + fu_name << name_ << ".fu." << i; + + FUPipeline *fu = new FUPipeline(fu_name.str(), *fu_description, cpu); + + funcUnits.push_back(fu); + } + + /** Check that there is a functional unit for all operation classes */ + for (int op_class = No_OpClass + 1; op_class < Num_OpClass; op_class++) { + bool found_fu = false; + unsigned int fu_index = 0; + + while (fu_index < numFuncUnits && !found_fu) + { + if (funcUnits[fu_index]->provides( + static_cast<OpClass>(op_class))) + { + found_fu = true; + } + fu_index++; + } + + if (!found_fu) { + warn("No functional unit for OpClass %s\n", + Enums::OpClassStrings[op_class]); + } + } + + inFlightInsts = new Queue<QueuedInst, + ReportTraitsAdaptor<QueuedInst> >( + name_ + ".inFlightInsts", "insts", total_slots); + + inFUMemInsts = new Queue<QueuedInst, + ReportTraitsAdaptor<QueuedInst> >( + name_ + ".inFUMemInsts", "insts", total_slots); +} + +const ForwardInstData * +Execute::getInput() +{ + /* Get a line from the inputBuffer to work with */ + if (!inputBuffer.empty()) { + const ForwardInstData &head = inputBuffer.front(); + + return (head.isBubble() ? NULL : &(inputBuffer.front())); + } else { + return NULL; + } +} + +void +Execute::popInput() +{ + if (!inputBuffer.empty()) + inputBuffer.pop(); + + inputIndex = 0; +} + +void +Execute::tryToBranch(MinorDynInstPtr inst, Fault fault, BranchData &branch) +{ + ThreadContext *thread = cpu.getContext(inst->id.threadId); + const TheISA::PCState &pc_before = inst->pc; + TheISA::PCState target = thread->pcState(); + + /* Force a branch for SerializeAfter instructions at the end of micro-op + * sequence when we're not suspended */ + bool force_branch = thread->status() != ThreadContext::Suspended && + !inst->isFault() && + inst->isLastOpInInst() && + (inst->staticInst->isSerializeAfter() || + inst->staticInst->isIprAccess()); + + DPRINTF(Branch, "tryToBranch before: %s after: %s%s\n", + pc_before, target, (force_branch ? " (forcing)" : "")); + + /* Will we change the PC to something other than the next instruction? */ + bool must_branch = pc_before != target || + fault != NoFault || + force_branch; + + /* The reason for the branch data we're about to generate, set below */ + BranchData::Reason reason = BranchData::NoBranch; + + if (fault == NoFault) + { + TheISA::advancePC(target, inst->staticInst); + thread->pcState(target); + + DPRINTF(Branch, "Advancing current PC from: %s to: %s\n", + pc_before, target); + } + + if (inst->predictedTaken && !force_branch) { + /* Predicted to branch */ + if (!must_branch) { + /* No branch was taken, change stream to get us back to the + * intended PC value */ + DPRINTF(Branch, "Predicted a branch from 0x%x to 0x%x but" + " none happened inst: %s\n", + inst->pc.instAddr(), inst->predictedTarget.instAddr(), *inst); + + reason = BranchData::BadlyPredictedBranch; + } else if (inst->predictedTarget == target) { + /* Branch prediction got the right target, kill the branch and + * carry on. + * Note that this information to the branch predictor might get + * overwritten by a "real" branch during this cycle */ + DPRINTF(Branch, "Predicted a branch from 0x%x to 0x%x correctly" + " inst: %s\n", + inst->pc.instAddr(), inst->predictedTarget.instAddr(), *inst); + + reason = BranchData::CorrectlyPredictedBranch; + } else { + /* Branch prediction got the wrong target */ + DPRINTF(Branch, "Predicted a branch from 0x%x to 0x%x" + " but got the wrong target (actual: 0x%x) inst: %s\n", + inst->pc.instAddr(), inst->predictedTarget.instAddr(), + target.instAddr() *inst); + + reason = BranchData::BadlyPredictedBranchTarget; + } + } else if (must_branch) { + /* Unpredicted branch */ + DPRINTF(Branch, "Unpredicted branch from 0x%x to 0x%x inst: %s\n", + inst->pc.instAddr(), target.instAddr(), *inst); + + reason = BranchData::UnpredictedBranch; + } else { + /* No branch at all */ + reason = BranchData::NoBranch; + } + + updateBranchData(reason, inst, target, branch); +} + +void +Execute::updateBranchData( + BranchData::Reason reason, + MinorDynInstPtr inst, const TheISA::PCState &target, + BranchData &branch) +{ + if (reason != BranchData::NoBranch) { + /* Bump up the stream sequence number on a real branch*/ + if (BranchData::isStreamChange(reason)) + streamSeqNum++; + + /* Branches (even mis-predictions) don't change the predictionSeqNum, + * just the streamSeqNum */ + branch = BranchData(reason, streamSeqNum, + /* Maintaining predictionSeqNum if there's no inst is just a + * courtesy and looks better on minorview */ + (inst->isBubble() ? lastPredictionSeqNum + : inst->id.predictionSeqNum), + target, inst); + + DPRINTF(Branch, "Branch data signalled: %s\n", branch); + } +} + +void +Execute::handleMemResponse(MinorDynInstPtr inst, + LSQ::LSQRequestPtr response, BranchData &branch, Fault &fault) +{ + ThreadID thread_id = inst->id.threadId; + ThreadContext *thread = cpu.getContext(thread_id); + + ExecContext context(cpu, *cpu.threads[thread_id], *this, inst); + + PacketPtr packet = response->packet; + + bool is_load = inst->staticInst->isLoad(); + bool is_store = inst->staticInst->isStore(); + bool is_prefetch = inst->staticInst->isDataPrefetch(); + + /* If true, the trace's predicate value will be taken from the exec + * context predicate, otherwise, it will be set to false */ + bool use_context_predicate = true; + + if (response->fault != NoFault) { + /* Invoke memory faults. */ + DPRINTF(MinorMem, "Completing fault from DTLB access: %s\n", + response->fault->name()); + + if (inst->staticInst->isPrefetch()) { + DPRINTF(MinorMem, "Not taking fault on prefetch: %s\n", + response->fault->name()); + + /* Don't assign to fault */ + } else { + /* Take the fault raised during the TLB/memory access */ + fault = response->fault; + + fault->invoke(thread, inst->staticInst); + } + } else if (!packet) { + DPRINTF(MinorMem, "Completing failed request inst: %s\n", + *inst); + use_context_predicate = false; + } else if (packet->isError()) { + DPRINTF(MinorMem, "Trying to commit error response: %s\n", + *inst); + + fatal("Received error response packet for inst: %s\n", *inst); + } else if (is_store || is_load || is_prefetch) { + assert(packet); + + DPRINTF(MinorMem, "Memory response inst: %s addr: 0x%x size: %d\n", + *inst, packet->getAddr(), packet->getSize()); + + if (is_load && packet->getSize() > 0) { + DPRINTF(MinorMem, "Memory data[0]: 0x%x\n", + static_cast<unsigned int>(packet->getPtr<uint8_t>()[0])); + } + + /* Complete the memory access instruction */ + fault = inst->staticInst->completeAcc(packet, &context, + inst->traceData); + + if (fault != NoFault) { + /* Invoke fault created by instruction completion */ + DPRINTF(MinorMem, "Fault in memory completeAcc: %s\n", + fault->name()); + fault->invoke(thread, inst->staticInst); + } else { + /* Stores need to be pushed into the store buffer to finish + * them off */ + if (response->needsToBeSentToStoreBuffer()) + lsq.sendStoreToStoreBuffer(response); + } + } else { + fatal("There should only ever be reads, " + "writes or faults at this point\n"); + } + + lsq.popResponse(response); + + if (inst->traceData) { + inst->traceData->setPredicate((use_context_predicate ? + context.readPredicate() : false)); + } + + doInstCommitAccounting(inst); + + /* Generate output to account for branches */ + tryToBranch(inst, fault, branch); +} + +bool +Execute::isInterrupted(ThreadID thread_id) const +{ + return cpu.checkInterrupts(cpu.getContext(thread_id)); +} + +bool +Execute::takeInterrupt(ThreadID thread_id, BranchData &branch) +{ + DPRINTF(MinorInterrupt, "Considering interrupt status from PC: %s\n", + cpu.getContext(thread_id)->pcState()); + + Fault interrupt = cpu.getInterruptController()->getInterrupt + (cpu.getContext(thread_id)); + + if (interrupt != NoFault) { + /* The interrupt *must* set pcState */ + cpu.getInterruptController()->updateIntrInfo + (cpu.getContext(thread_id)); + interrupt->invoke(cpu.getContext(thread_id)); + + assert(!lsq.accessesInFlight()); + + DPRINTF(MinorInterrupt, "Invoking interrupt: %s to PC: %s\n", + interrupt->name(), cpu.getContext(thread_id)->pcState()); + + /* Assume that an interrupt *must* cause a branch. Assert this? */ + + updateBranchData(BranchData::Interrupt, MinorDynInst::bubble(), + cpu.getContext(thread_id)->pcState(), branch); + } + + return interrupt != NoFault; +} + +bool +Execute::executeMemRefInst(MinorDynInstPtr inst, BranchData &branch, + bool &passed_predicate, Fault &fault) +{ + bool issued = false; + + /* Set to true if the mem op. is issued and sent to the mem system */ + passed_predicate = false; + + if (!lsq.canRequest()) { + /* Not acting on instruction yet as the memory + * queues are full */ + issued = false; + } else { + ThreadContext *thread = cpu.getContext(inst->id.threadId); + TheISA::PCState old_pc = thread->pcState(); + + ExecContext context(cpu, *cpu.threads[inst->id.threadId], + *this, inst); + + DPRINTF(MinorExecute, "Initiating memRef inst: %s\n", *inst); + + Fault init_fault = inst->staticInst->initiateAcc(&context, + inst->traceData); + + if (init_fault != NoFault) { + DPRINTF(MinorExecute, "Fault on memory inst: %s" + " initiateAcc: %s\n", *inst, init_fault->name()); + fault = init_fault; + } else { + /* Only set this if the instruction passed its + * predicate */ + passed_predicate = context.readPredicate(); + + /* Set predicate in tracing */ + if (inst->traceData) + inst->traceData->setPredicate(passed_predicate); + + /* If the instruction didn't pass its predicate (and so will not + * progress from here) Try to branch to correct and branch + * mis-prediction. */ + if (!passed_predicate) { + /* Leave it up to commit to handle the fault */ + lsq.pushFailedRequest(inst); + } + } + + /* Restore thread PC */ + thread->pcState(old_pc); + issued = true; + } + + return issued; +} + +/** Increment a cyclic buffer index for indices [0, cycle_size-1] */ +inline unsigned int +cyclicIndexInc(unsigned int index, unsigned int cycle_size) +{ + unsigned int ret = index + 1; + + if (ret == cycle_size) + ret = 0; + + return ret; +} + +/** Decrement a cyclic buffer index for indices [0, cycle_size-1] */ +inline unsigned int +cyclicIndexDec(unsigned int index, unsigned int cycle_size) +{ + int ret = index - 1; + + if (ret < 0) + ret = cycle_size - 1; + + return ret; +} + +unsigned int +Execute::issue(bool only_issue_microops) +{ + const ForwardInstData *insts_in = getInput(); + + /* Early termination if we have no instructions */ + if (!insts_in) + return 0; + + /* Start from the first FU */ + unsigned int fu_index = 0; + + /* Remains true while instructions are still being issued. If any + * instruction fails to issue, this is set to false and we exit issue. + * This strictly enforces in-order issue. For other issue behaviours, + * a more complicated test in the outer while loop below is needed. */ + bool issued = true; + + /* Number of insts issues this cycle to check for issueLimit */ + unsigned num_insts_issued = 0; + + /* Number of memory ops issues this cycle to check for memoryIssueLimit */ + unsigned num_mem_insts_issued = 0; + + /* Number of instructions discarded this cycle in order to enforce a + * discardLimit. @todo, add that parameter? */ + unsigned num_insts_discarded = 0; + + do { + MinorDynInstPtr inst = insts_in->insts[inputIndex]; + ThreadID thread_id = inst->id.threadId; + Fault fault = inst->fault; + bool discarded = false; + bool issued_mem_ref = false; + + if (inst->isBubble()) { + /* Skip */ + issued = true; + } else if (cpu.getContext(thread_id)->status() == + ThreadContext::Suspended) + { + DPRINTF(MinorExecute, "Not issuing inst: %s from suspended" + " thread\n", *inst); + + issued = false; + } else if (inst->id.streamSeqNum != streamSeqNum) { + DPRINTF(MinorExecute, "Discarding inst: %s as its stream" + " state was unexpected, expected: %d\n", + *inst, streamSeqNum); + issued = true; + discarded = true; + } else if (fault == NoFault && only_issue_microops && + /* Is this anything other than a non-first microop */ + (!inst->staticInst->isMicroop() || + !inst->staticInst->isFirstMicroop())) + { + DPRINTF(MinorExecute, "Not issuing new non-microop inst: %s\n", + *inst); + + issued = false; + } else { + /* Try and issue an instruction into an FU, assume we didn't and + * fix that in the loop */ + issued = false; + + /* Try FU from 0 each instruction */ + fu_index = 0; + + /* Try and issue a single instruction stepping through the + * available FUs */ + do { + FUPipeline *fu = funcUnits[fu_index]; + + DPRINTF(MinorExecute, "Trying to issue inst: %s to FU: %d\n", + *inst, fu_index); + + /* Does the examined fu have the OpClass-related capability + * needed to execute this instruction? Faults can always + * issue to any FU but probably should just 'live' in the + * inFlightInsts queue rather than having an FU. */ + bool fu_is_capable = (!inst->isFault() ? + fu->provides(inst->staticInst->opClass()) : true); + + if (inst->isNoCostInst()) { + /* Issue free insts. to a fake numbered FU */ + fu_index = noCostFUIndex; + + /* And start the countdown on activity to allow + * this instruction to get to the end of its FU */ + cpu.activityRecorder->activity(); + + /* Mark the destinations for this instruction as + * busy */ + scoreboard.markupInstDests(inst, cpu.curCycle() + + Cycles(0), cpu.getContext(thread_id), false); + + inst->fuIndex = noCostFUIndex; + inst->extraCommitDelay = Cycles(0); + inst->extraCommitDelayExpr = NULL; + + /* Push the instruction onto the inFlight queue so + * it can be committed in order */ + QueuedInst fu_inst(inst); + inFlightInsts->push(fu_inst); + + issued = true; + + } else if (!fu_is_capable || fu->alreadyPushed()) { + /* Skip */ + if (!fu_is_capable) { + DPRINTF(MinorExecute, "Can't issue as FU: %d isn't" + " capable\n", fu_index); + } else { + DPRINTF(MinorExecute, "Can't issue as FU: %d is" + " already busy\n", fu_index); + } + } else if (fu->stalled) { + DPRINTF(MinorExecute, "Can't issue inst: %s into FU: %d," + " it's stalled\n", + *inst, fu_index); + } else if (!fu->canInsert()) { + DPRINTF(MinorExecute, "Can't issue inst: %s to busy FU" + " for another: %d cycles\n", + *inst, fu->cyclesBeforeInsert()); + } else { + MinorFUTiming *timing = (!inst->isFault() ? + fu->findTiming(inst->staticInst) : NULL); + + const std::vector<Cycles> *src_latencies = + (timing ? &(timing->srcRegsRelativeLats) + : NULL); + + const std::vector<bool> *cant_forward_from_fu_indices = + &(fu->cantForwardFromFUIndices); + + if (timing && timing->suppress) { + DPRINTF(MinorExecute, "Can't issue inst: %s as extra" + " decoding is suppressing it\n", + *inst); + } else if (!scoreboard.canInstIssue(inst, src_latencies, + cant_forward_from_fu_indices, + cpu.curCycle(), cpu.getContext(thread_id))) + { + DPRINTF(MinorExecute, "Can't issue inst: %s yet\n", + *inst); + } else { + /* Can insert the instruction into this FU */ + DPRINTF(MinorExecute, "Issuing inst: %s" + " into FU %d\n", *inst, + fu_index); + + Cycles extra_dest_retire_lat = Cycles(0); + TimingExpr *extra_dest_retire_lat_expr = NULL; + Cycles extra_assumed_lat = Cycles(0); + + /* Add the extraCommitDelay and extraAssumeLat to + * the FU pipeline timings */ + if (timing) { + extra_dest_retire_lat = + timing->extraCommitLat; + extra_dest_retire_lat_expr = + timing->extraCommitLatExpr; + extra_assumed_lat = + timing->extraAssumedLat; + } + + bool issued_mem_ref = inst->isMemRef(); + + QueuedInst fu_inst(inst); + + /* Decorate the inst with FU details */ + inst->fuIndex = fu_index; + inst->extraCommitDelay = extra_dest_retire_lat; + inst->extraCommitDelayExpr = + extra_dest_retire_lat_expr; + + if (issued_mem_ref) { + /* Remember which instruction this memory op + * depends on so that initiateAcc can be called + * early */ + if (allowEarlyMemIssue) { + inst->instToWaitFor = + scoreboard.execSeqNumToWaitFor(inst, + cpu.getContext(thread_id)); + + if (lsq.getLastMemBarrier() > + inst->instToWaitFor) + { + DPRINTF(MinorExecute, "A barrier will" + " cause a delay in mem ref issue of" + " inst: %s until after inst" + " %d(exec)\n", *inst, + lsq.getLastMemBarrier()); + + inst->instToWaitFor = + lsq.getLastMemBarrier(); + } else { + DPRINTF(MinorExecute, "Memory ref inst:" + " %s must wait for inst %d(exec)" + " before issuing\n", + *inst, inst->instToWaitFor); + } + + inst->canEarlyIssue = true; + } + /* Also queue this instruction in the memory ref + * queue to ensure in-order issue to the LSQ */ + DPRINTF(MinorExecute, "Pushing mem inst: %s\n", + *inst); + inFUMemInsts->push(fu_inst); + } + + /* Issue to FU */ + fu->push(fu_inst); + /* And start the countdown on activity to allow + * this instruction to get to the end of its FU */ + cpu.activityRecorder->activity(); + + /* Mark the destinations for this instruction as + * busy */ + scoreboard.markupInstDests(inst, cpu.curCycle() + + fu->description.opLat + + extra_dest_retire_lat + + extra_assumed_lat, + cpu.getContext(thread_id), + issued_mem_ref && extra_assumed_lat == Cycles(0)); + + /* Push the instruction onto the inFlight queue so + * it can be committed in order */ + inFlightInsts->push(fu_inst); + + issued = true; + } + } + + fu_index++; + } while (fu_index != numFuncUnits && !issued); + + if (!issued) + DPRINTF(MinorExecute, "Didn't issue inst: %s\n", *inst); + } + + if (issued) { + /* Generate MinorTrace's MinorInst lines. Do this at commit + * to allow better instruction annotation? */ + if (DTRACE(MinorTrace) && !inst->isBubble()) + inst->minorTraceInst(*this); + + /* Mark up barriers in the LSQ */ + if (!discarded && inst->isInst() && + inst->staticInst->isMemBarrier()) + { + DPRINTF(MinorMem, "Issuing memory barrier inst: %s\n", *inst); + lsq.issuedMemBarrierInst(inst); + } + + if (inst->traceData && setTraceTimeOnIssue) { + inst->traceData->setWhen(curTick()); + } + + if (issued_mem_ref) + num_mem_insts_issued++; + + if (discarded) { + num_insts_discarded++; + } else { + num_insts_issued++; + + if (num_insts_issued == issueLimit) + DPRINTF(MinorExecute, "Reached inst issue limit\n"); + } + + inputIndex++; + DPRINTF(MinorExecute, "Stepping to next inst inputIndex: %d\n", + inputIndex); + } + + /* Got to the end of a line */ + if (inputIndex == insts_in->width()) { + popInput(); + /* Set insts_in to null to force us to leave the surrounding + * loop */ + insts_in = NULL; + + if (processMoreThanOneInput) { + DPRINTF(MinorExecute, "Wrapping\n"); + insts_in = getInput(); + } + } + } while (insts_in && inputIndex < insts_in->width() && + /* We still have instructions */ + fu_index != numFuncUnits && /* Not visited all FUs */ + issued && /* We've not yet failed to issue an instruction */ + num_insts_issued != issueLimit && /* Still allowed to issue */ + num_mem_insts_issued != memoryIssueLimit); + + return num_insts_issued; +} + +bool +Execute::tryPCEvents() +{ + ThreadContext *thread = cpu.getContext(0); + unsigned int num_pc_event_checks = 0; + + /* Handle PC events on instructions */ + Addr oldPC; + do { + oldPC = thread->instAddr(); + cpu.system->pcEventQueue.service(thread); + num_pc_event_checks++; + } while (oldPC != thread->instAddr()); + + if (num_pc_event_checks > 1) { + DPRINTF(PCEvent, "Acting on PC Event to PC: %s\n", + thread->pcState()); + } + + return num_pc_event_checks > 1; +} + +void +Execute::doInstCommitAccounting(MinorDynInstPtr inst) +{ + assert(!inst->isFault()); + + MinorThread *thread = cpu.threads[inst->id.threadId]; + + /* Increment the many and various inst and op counts in the + * thread and system */ + if (!inst->staticInst->isMicroop() || inst->staticInst->isLastMicroop()) + { + thread->numInst++; + thread->numInsts++; + cpu.stats.numInsts++; + } + thread->numOp++; + thread->numOps++; + cpu.stats.numOps++; + cpu.system->totalNumInsts++; + + /* Act on events related to instruction counts */ + cpu.comInstEventQueue[inst->id.threadId]->serviceEvents(thread->numInst); + cpu.system->instEventQueue.serviceEvents(cpu.system->totalNumInsts); + + /* Set the CP SeqNum to the numOps commit number */ + if (inst->traceData) + inst->traceData->setCPSeq(thread->numOp); +} + +bool +Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue, + BranchData &branch, Fault &fault, bool &committed, + bool &completed_mem_issue) +{ + ThreadID thread_id = inst->id.threadId; + ThreadContext *thread = cpu.getContext(thread_id); + + bool completed_inst = true; + fault = NoFault; + + /* Is the thread for this instruction suspended? In that case, just + * stall as long as there are no pending interrupts */ + if (thread->status() == ThreadContext::Suspended && + !isInterrupted(thread_id)) + { + DPRINTF(MinorExecute, "Not committing inst from suspended thread" + " inst: %s\n", *inst); + completed_inst = false; + } else if (inst->isFault()) { + ExecContext context(cpu, *cpu.threads[thread_id], *this, inst); + + DPRINTF(MinorExecute, "Fault inst reached Execute: %s\n", + inst->fault->name()); + + fault = inst->fault; + inst->fault->invoke(thread, NULL); + + tryToBranch(inst, fault, branch); + } else if (inst->staticInst->isMemRef()) { + /* Memory accesses are executed in two parts: + * executeMemRefInst -- calculates the EA and issues the access + * to memory. This is done here. + * handleMemResponse -- handles the response packet, done by + * Execute::commit + * + * While the memory access is in its FU, the EA is being + * calculated. At the end of the FU, when it is ready to + * 'commit' (in this function), the access is presented to the + * memory queues. When a response comes back from memory, + * Execute::commit will commit it. + */ + bool predicate_passed = false; + bool completed_mem_inst = executeMemRefInst(inst, branch, + predicate_passed, fault); + + if (completed_mem_inst && fault != NoFault) { + if (early_memory_issue) { + DPRINTF(MinorExecute, "Fault in early executing inst: %s\n", + fault->name()); + /* Don't execute the fault, just stall the instruction + * until it gets to the head of inFlightInsts */ + inst->canEarlyIssue = false; + /* Not completed as we'll come here again to pick up + * the fault when we get to the end of the FU */ + completed_inst = false; + } else { + DPRINTF(MinorExecute, "Fault in execute: %s\n", + fault->name()); + fault->invoke(thread, NULL); + + tryToBranch(inst, fault, branch); + completed_inst = true; + } + } else { + completed_inst = completed_mem_inst; + } + completed_mem_issue = completed_inst; + } else if (inst->isInst() && inst->staticInst->isMemBarrier() && + !lsq.canPushIntoStoreBuffer()) + { + DPRINTF(MinorExecute, "Can't commit data barrier inst: %s yet as" + " there isn't space in the store buffer\n", *inst); + + completed_inst = false; + } else { + ExecContext context(cpu, *cpu.threads[thread_id], *this, inst); + + DPRINTF(MinorExecute, "Committing inst: %s\n", *inst); + + fault = inst->staticInst->execute(&context, + inst->traceData); + + /* Set the predicate for tracing and dump */ + if (inst->traceData) + inst->traceData->setPredicate(context.readPredicate()); + + committed = true; + + if (fault != NoFault) { + DPRINTF(MinorExecute, "Fault in execute of inst: %s fault: %s\n", + *inst, fault->name()); + fault->invoke(thread, inst->staticInst); + } + + doInstCommitAccounting(inst); + tryToBranch(inst, fault, branch); + } + + if (completed_inst) { + /* Keep a copy of this instruction's predictionSeqNum just in case + * we need to issue a branch without an instruction (such as an + * interrupt) */ + lastPredictionSeqNum = inst->id.predictionSeqNum; + + /* Check to see if this instruction suspended the current thread. */ + if (!inst->isFault() && + thread->status() == ThreadContext::Suspended && + branch.isBubble() && /* It didn't branch too */ + !isInterrupted(thread_id)) /* Don't suspend if we have + interrupts */ + { + TheISA::PCState resume_pc = cpu.getContext(0)->pcState(); + + assert(resume_pc.microPC() == 0); + + DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute" + " inst: %s\n", inst->id.threadId, *inst); + + cpu.stats.numFetchSuspends++; + + updateBranchData(BranchData::SuspendThread, inst, resume_pc, + branch); + } + } + + return completed_inst; +} + +void +Execute::commit(bool only_commit_microops, bool discard, BranchData &branch) +{ + Fault fault = NoFault; + Cycles now = cpu.curCycle(); + + /** + * Try and execute as many instructions from the end of FU pipelines as + * possible. This *doesn't* include actually advancing the pipelines. + * + * We do this by looping on the front of the inFlightInsts queue for as + * long as we can find the desired instruction at the end of the + * functional unit it was issued to without seeing a branch or a fault. + * In this function, these terms are used: + * complete -- The instruction has finished its passage through + * its functional unit and its fate has been decided + * (committed, discarded, issued to the memory system) + * commit -- The instruction is complete(d), not discarded and has + * its effects applied to the CPU state + * discard(ed) -- The instruction is complete but not committed + * as its streamSeqNum disagrees with the current + * Execute::streamSeqNum + * + * Commits are also possible from two other places: + * + * 1) Responses returning from the LSQ + * 2) Mem ops issued to the LSQ ('committed' from the FUs) earlier + * than their position in the inFlightInsts queue, but after all + * their dependencies are resolved. + */ + + /* Has an instruction been completed? Once this becomes false, we stop + * trying to complete instructions. */ + bool completed_inst = true; + + /* Number of insts committed this cycle to check against commitLimit */ + unsigned int num_insts_committed = 0; + + /* Number of memory access instructions committed to check against + * memCommitLimit */ + unsigned int num_mem_refs_committed = 0; + + if (only_commit_microops && !inFlightInsts->empty()) { + DPRINTF(MinorInterrupt, "Only commit microops %s %d\n", + *(inFlightInsts->front().inst), + lastCommitWasEndOfMacroop); + } + + while (!inFlightInsts->empty() && /* Some more instructions to process */ + !branch.isStreamChange() && /* No real branch */ + fault == NoFault && /* No faults */ + completed_inst && /* Still finding instructions to execute */ + num_insts_committed != commitLimit /* Not reached commit limit */ + ) + { + if (only_commit_microops) { + DPRINTF(MinorInterrupt, "Committing tail of insts before" + " interrupt: %s\n", + *(inFlightInsts->front().inst)); + } + + QueuedInst *head_inflight_inst = &(inFlightInsts->front()); + + InstSeqNum head_exec_seq_num = + head_inflight_inst->inst->id.execSeqNum; + + /* The instruction we actually process if completed_inst + * remains true to the end of the loop body. + * Start by considering the the head of the in flight insts queue */ + MinorDynInstPtr inst = head_inflight_inst->inst; + + bool committed_inst = false; + bool discard_inst = false; + bool completed_mem_ref = false; + bool issued_mem_ref = false; + bool early_memory_issue = false; + + /* Must set this again to go around the loop */ + completed_inst = false; + + /* If we're just completing a macroop before an interrupt or drain, + * can we stil commit another microop (rather than a memory response) + * without crosing into the next full instruction? */ + bool can_commit_insts = !inFlightInsts->empty() && + !(only_commit_microops && lastCommitWasEndOfMacroop); + + /* Can we find a mem response for this inst */ + LSQ::LSQRequestPtr mem_response = + (inst->inLSQ ? lsq.findResponse(inst) : NULL); + + DPRINTF(MinorExecute, "Trying to commit canCommitInsts: %d\n", + can_commit_insts); + + /* Test for PC events after every instruction */ + if (isInbetweenInsts() && tryPCEvents()) { + ThreadContext *thread = cpu.getContext(0); + + /* Branch as there was a change in PC */ + updateBranchData(BranchData::UnpredictedBranch, + MinorDynInst::bubble(), thread->pcState(), branch); + } else if (mem_response && + num_mem_refs_committed < memoryCommitLimit) + { + /* Try to commit from the memory responses next */ + discard_inst = inst->id.streamSeqNum != streamSeqNum || + discard; + + DPRINTF(MinorExecute, "Trying to commit mem response: %s\n", + *inst); + + /* Complete or discard the response */ + if (discard_inst) { + DPRINTF(MinorExecute, "Discarding mem inst: %s as its" + " stream state was unexpected, expected: %d\n", + *inst, streamSeqNum); + + lsq.popResponse(mem_response); + } else { + handleMemResponse(inst, mem_response, branch, fault); + committed_inst = true; + } + + completed_mem_ref = true; + completed_inst = true; + } else if (can_commit_insts) { + /* If true, this instruction will, subject to timing tweaks, + * be considered for completion. try_to_commit flattens + * the `if' tree a bit and allows other tests for inst + * commit to be inserted here. */ + bool try_to_commit = false; + + /* Try and issue memory ops early if they: + * - Can push a request into the LSQ + * - Have reached the end of their FUs + * - Have had all their dependencies satisfied + * - Are from the right stream + * + * For any other case, leave it to the normal instruction + * issue below to handle them. + */ + if (!inFUMemInsts->empty() && lsq.canRequest()) { + DPRINTF(MinorExecute, "Trying to commit from mem FUs\n"); + + const MinorDynInstPtr head_mem_ref_inst = + inFUMemInsts->front().inst; + FUPipeline *fu = funcUnits[head_mem_ref_inst->fuIndex]; + const MinorDynInstPtr &fu_inst = fu->front().inst; + + /* Use this, possibly out of order, inst as the one + * to 'commit'/send to the LSQ */ + if (!fu_inst->isBubble() && + !fu_inst->inLSQ && + fu_inst->canEarlyIssue && + streamSeqNum == fu_inst->id.streamSeqNum && + head_exec_seq_num > fu_inst->instToWaitFor) + { + DPRINTF(MinorExecute, "Issuing mem ref early" + " inst: %s instToWaitFor: %d\n", + *(fu_inst), fu_inst->instToWaitFor); + + inst = fu_inst; + try_to_commit = true; + early_memory_issue = true; + completed_inst = true; + } + } + + /* Try and commit FU-less insts */ + if (!completed_inst && inst->isNoCostInst()) { + DPRINTF(MinorExecute, "Committing no cost inst: %s", *inst); + + try_to_commit = true; + completed_inst = true; + } + + /* Try to issue from the ends of FUs and the inFlightInsts + * queue */ + if (!completed_inst && !inst->inLSQ) { + DPRINTF(MinorExecute, "Trying to commit from FUs\n"); + + /* Try to commit from a functional unit */ + /* Is the head inst of the expected inst's FU actually the + * expected inst? */ + QueuedInst &fu_inst = + funcUnits[inst->fuIndex]->front(); + InstSeqNum fu_inst_seq_num = fu_inst.inst->id.execSeqNum; + + if (fu_inst.inst->isBubble()) { + /* No instruction ready */ + completed_inst = false; + } else if (fu_inst_seq_num != head_exec_seq_num) { + /* Past instruction: we must have already executed it + * in the same cycle and so the head inst isn't + * actually at the end of its pipeline + * Future instruction: handled above and only for + * mem refs on their way to the LSQ */ + } else /* if (fu_inst_seq_num == head_exec_seq_num) */ { + /* All instructions can be committed if they have the + * right execSeqNum and there are no in-flight + * mem insts before us */ + try_to_commit = true; + completed_inst = true; + } + } + + if (try_to_commit) { + discard_inst = inst->id.streamSeqNum != streamSeqNum || + discard; + + /* Is this instruction discardable as its streamSeqNum + * doesn't match? */ + if (!discard_inst) { + /* Try to commit or discard a non-memory instruction. + * Memory ops are actually 'committed' from this FUs + * and 'issued' into the memory system so we need to + * account for them later (commit_was_mem_issue gets + * set) */ + if (inst->extraCommitDelayExpr) { + DPRINTF(MinorExecute, "Evaluating expression for" + " extra commit delay inst: %s\n", *inst); + + ThreadContext *thread = + cpu.getContext(inst->id.threadId); + + TimingExprEvalContext context(inst->staticInst, + thread, NULL); + + uint64_t extra_delay = inst->extraCommitDelayExpr-> + eval(context); + + DPRINTF(MinorExecute, "Extra commit delay expr" + " result: %d\n", extra_delay); + + if (extra_delay < 128) { + inst->extraCommitDelay += Cycles(extra_delay); + } else { + DPRINTF(MinorExecute, "Extra commit delay was" + " very long: %d\n", extra_delay); + } + inst->extraCommitDelayExpr = NULL; + } + + /* Move the extraCommitDelay from the instruction + * into the minimumCommitCycle */ + if (inst->extraCommitDelay != Cycles(0)) { + inst->minimumCommitCycle = cpu.curCycle() + + inst->extraCommitDelay; + inst->extraCommitDelay = Cycles(0); + } + + /* @todo Think about making lastMemBarrier be + * MAX_UINT_64 to avoid using 0 as a marker value */ + if (!inst->isFault() && inst->isMemRef() && + lsq.getLastMemBarrier() < + inst->id.execSeqNum && + lsq.getLastMemBarrier() != 0) + { + DPRINTF(MinorExecute, "Not committing inst: %s yet" + " as there are incomplete barriers in flight\n", + *inst); + completed_inst = false; + } else if (inst->minimumCommitCycle > now) { + DPRINTF(MinorExecute, "Not committing inst: %s yet" + " as it wants to be stalled for %d more cycles\n", + *inst, inst->minimumCommitCycle - now); + completed_inst = false; + } else { + completed_inst = commitInst(inst, + early_memory_issue, branch, fault, + committed_inst, issued_mem_ref); + } + } else { + /* Discard instruction */ + completed_inst = true; + } + + if (completed_inst) { + /* Allow the pipeline to advance. If the FU head + * instruction wasn't the inFlightInsts head + * but had already been committed, it would have + * unstalled the pipeline before here */ + if (inst->fuIndex != noCostFUIndex) + funcUnits[inst->fuIndex]->stalled = false; + } + } + } else { + DPRINTF(MinorExecute, "No instructions to commit\n"); + completed_inst = false; + } + + /* All discardable instructions must also be 'completed' by now */ + assert(!(discard_inst && !completed_inst)); + + /* Instruction committed but was discarded due to streamSeqNum + * mismatch */ + if (discard_inst) { + DPRINTF(MinorExecute, "Discarding inst: %s as its stream" + " state was unexpected, expected: %d\n", + *inst, streamSeqNum); + + if (fault == NoFault) + cpu.stats.numDiscardedOps++; + } + + /* Mark the mem inst as being in the LSQ */ + if (issued_mem_ref) { + inst->fuIndex = 0; + inst->inLSQ = true; + } + + /* Pop issued (to LSQ) and discarded mem refs from the inFUMemInsts + * as they've *definitely* exited the FUs */ + if (completed_inst && inst->isMemRef()) { + /* The MemRef could have been discarded from the FU or the memory + * queue, so just check an FU instruction */ + if (!inFUMemInsts->empty() && + inFUMemInsts->front().inst == inst) + { + inFUMemInsts->pop(); + } + } + + if (completed_inst && !(issued_mem_ref && fault == NoFault)) { + /* Note that this includes discarded insts */ + DPRINTF(MinorExecute, "Completed inst: %s\n", *inst); + + /* Got to the end of a full instruction? */ + lastCommitWasEndOfMacroop = inst->isFault() || + inst->isLastOpInInst(); + + /* lastPredictionSeqNum is kept as a convenience to prevent its + * value from changing too much on the minorview display */ + lastPredictionSeqNum = inst->id.predictionSeqNum; + + /* Finished with the inst, remove it from the inst queue and + * clear its dependencies */ + inFlightInsts->pop(); + + /* Complete barriers in the LSQ/move to store buffer */ + if (inst->isInst() && inst->staticInst->isMemBarrier()) { + DPRINTF(MinorMem, "Completing memory barrier" + " inst: %s committed: %d\n", *inst, committed_inst); + lsq.completeMemBarrierInst(inst, committed_inst); + } + + scoreboard.clearInstDests(inst, inst->isMemRef()); + } + + /* Handle per-cycle instruction counting */ + if (committed_inst) { + bool is_no_cost_inst = inst->isNoCostInst(); + + /* Don't show no cost instructions as having taken a commit + * slot */ + if (DTRACE(MinorTrace) && !is_no_cost_inst) + instsBeingCommitted.insts[num_insts_committed] = inst; + + if (!is_no_cost_inst) + num_insts_committed++; + + if (num_insts_committed == commitLimit) + DPRINTF(MinorExecute, "Reached inst commit limit\n"); + + /* Re-set the time of the instruction if that's required for + * tracing */ + if (inst->traceData) { + if (setTraceTimeOnCommit) + inst->traceData->setWhen(curTick()); + inst->traceData->dump(); + } + + if (completed_mem_ref) + num_mem_refs_committed++; + + if (num_mem_refs_committed == memoryCommitLimit) + DPRINTF(MinorExecute, "Reached mem ref commit limit\n"); + } + } +} + +bool +Execute::isInbetweenInsts() const +{ + return lastCommitWasEndOfMacroop && + !lsq.accessesInFlight(); +} + +void +Execute::evaluate() +{ + inputBuffer.setTail(*inp.outputWire); + BranchData &branch = *out.inputWire; + + const ForwardInstData *insts_in = getInput(); + + /* Do all the cycle-wise activities for dcachePort here to potentially + * free up input spaces in the LSQ's requests queue */ + lsq.step(); + + /* Has an interrupt been signalled? This may not be acted on + * straighaway so this is different from took_interrupt below */ + bool interrupted = false; + /* If there was an interrupt signalled, was it acted on now? */ + bool took_interrupt = false; + + if (cpu.getInterruptController()) { + /* This is here because it seems that after drainResume the + * interrupt controller isn't always set */ + interrupted = drainState == NotDraining && isInterrupted(0); + } else { + DPRINTF(MinorInterrupt, "No interrupt controller\n"); + } + + unsigned int num_issued = 0; + + if (DTRACE(MinorTrace)) { + /* Empty the instsBeingCommitted for MinorTrace */ + instsBeingCommitted.bubbleFill(); + } + + /* THREAD threadId on isInterrupted */ + /* Act on interrupts */ + if (interrupted && isInbetweenInsts()) { + took_interrupt = takeInterrupt(0, branch); + /* Clear interrupted if no interrupt was actually waiting */ + interrupted = took_interrupt; + } + + if (took_interrupt) { + /* Do no commit/issue this cycle */ + } else if (!branch.isBubble()) { + /* It's important that this is here to carry Fetch1 wakeups to Fetch1 + * without overwriting them */ + DPRINTF(MinorInterrupt, "Execute skipping a cycle to allow old" + " branch to complete\n"); + } else { + if (interrupted) { + if (inFlightInsts->empty()) { + DPRINTF(MinorInterrupt, "Waiting but no insts\n"); + } else { + DPRINTF(MinorInterrupt, "Waiting for end of inst before" + " signalling interrupt\n"); + } + } + + /* commit can set stalled flags observable to issue and so *must* be + * called first */ + if (drainState != NotDraining) { + if (drainState == DrainCurrentInst) { + /* Commit only micro-ops, don't kill anything else */ + commit(true, false, branch); + + if (isInbetweenInsts()) + setDrainState(DrainHaltFetch); + + /* Discard any generated branch */ + branch = BranchData::bubble(); + } else if (drainState == DrainAllInsts) { + /* Kill all instructions */ + while (getInput()) + popInput(); + commit(false, true, branch); + } + } else { + /* Commit micro-ops only if interrupted. Otherwise, commit + * anything you like */ + commit(interrupted, false, branch); + } + + /* This will issue merrily even when interrupted in the sure and + * certain knowledge that the interrupt with change the stream */ + if (insts_in) + num_issued = issue(false); + } + + /* Halt fetch, but don't do it until we have the current instruction in + * the bag */ + if (drainState == DrainHaltFetch) { + updateBranchData(BranchData::HaltFetch, MinorDynInst::bubble(), + TheISA::PCState(0), branch); + + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + setDrainState(DrainAllInsts); + } + + MinorDynInstPtr next_issuable_inst = NULL; + bool can_issue_next = false; + + /* Find the next issuable instruction and see if it can be issued */ + if (getInput()) { + MinorDynInstPtr inst = getInput()->insts[inputIndex]; + + if (inst->isFault()) { + can_issue_next = true; + } else if (!inst->isBubble()) { + if (cpu.getContext(inst->id.threadId)->status() != + ThreadContext::Suspended) + { + next_issuable_inst = inst; + } + } + } + + bool becoming_stalled = true; + + /* Advance the pipelines and note whether they still need to be + * advanced */ + for (unsigned int i = 0; i < numFuncUnits; i++) { + FUPipeline *fu = funcUnits[i]; + + fu->advance(); + + /* If we need to go again, the pipeline will have been left or set + * to be unstalled */ + if (fu->occupancy != 0 && !fu->stalled) + becoming_stalled = false; + + /* Could we possibly issue the next instruction? This is quite + * an expensive test */ + if (next_issuable_inst && !fu->stalled && + scoreboard.canInstIssue(next_issuable_inst, + NULL, NULL, cpu.curCycle() + Cycles(1), + cpu.getContext(next_issuable_inst->id.threadId)) && + fu->provides(next_issuable_inst->staticInst->opClass())) + { + can_issue_next = true; + } + } + + bool head_inst_might_commit = false; + + /* Could the head in flight insts be committed */ + if (!inFlightInsts->empty()) { + const QueuedInst &head_inst = inFlightInsts->front(); + + if (head_inst.inst->isNoCostInst()) { + head_inst_might_commit = true; + } else { + FUPipeline *fu = funcUnits[head_inst.inst->fuIndex]; + + /* Head inst is commitable */ + if ((fu->stalled && + fu->front().inst->id == head_inst.inst->id) || + lsq.findResponse(head_inst.inst)) + { + head_inst_might_commit = true; + } + } + } + + DPRINTF(Activity, "Need to tick num issued insts: %s%s%s%s%s%s\n", + (num_issued != 0 ? " (issued some insts)" : ""), + (becoming_stalled ? " (becoming stalled)" : "(not becoming stalled)"), + (can_issue_next ? " (can issued next inst)" : ""), + (head_inst_might_commit ? "(head inst might commit)" : ""), + (lsq.needsToTick() ? " (LSQ needs to tick)" : ""), + (interrupted ? " (interrupted)" : "")); + + bool need_to_tick = + num_issued != 0 || /* Issued some insts this cycle */ + !becoming_stalled || /* Some FU pipelines can still move */ + can_issue_next || /* Can still issue a new inst */ + head_inst_might_commit || /* Could possible commit the next inst */ + lsq.needsToTick() || /* Must step the dcache port */ + interrupted; /* There are pending interrupts */ + + if (!need_to_tick) { + DPRINTF(Activity, "The next cycle might be skippable as there are no" + " advanceable FUs\n"); + } + + /* Wake up if we need to tick again */ + if (need_to_tick) + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + + /* Note activity of following buffer */ + if (!branch.isBubble()) + cpu.activityRecorder->activity(); + + /* Make sure the input (if any left) is pushed */ + inputBuffer.pushTail(); +} + +void +Execute::wakeupFetch(BranchData::Reason reason) +{ + BranchData branch; + assert(branch.isBubble()); + + /* THREAD thread id */ + ThreadContext *thread = cpu.getContext(0); + + /* Force a branch to the current PC (which should be the next inst.) to + * wake up Fetch1 */ + if (!branch.isStreamChange() /* No real branch already happened */) { + DPRINTF(MinorInterrupt, "Waking up Fetch (via Execute) by issuing" + " a branch: %s\n", thread->pcState()); + + assert(thread->pcState().microPC() == 0); + + updateBranchData(reason, + MinorDynInst::bubble(), thread->pcState(), branch); + } else { + DPRINTF(MinorInterrupt, "Already branching, no need for wakeup\n"); + } + + *out.inputWire = branch; + + /* Make sure we get ticked */ + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); +} + +void +Execute::minorTrace() const +{ + std::ostringstream insts; + std::ostringstream stalled; + + instsBeingCommitted.reportData(insts); + lsq.minorTrace(); + inputBuffer.minorTrace(); + scoreboard.minorTrace(); + + /* Report functional unit stalling in one string */ + unsigned int i = 0; + while (i < numFuncUnits) + { + stalled << (funcUnits[i]->stalled ? '1' : 'E'); + i++; + if (i != numFuncUnits) + stalled << ','; + } + + MINORTRACE("insts=%s inputIndex=%d streamSeqNum=%d" + " stalled=%s drainState=%d isInbetweenInsts=%d\n", + insts.str(), inputIndex, streamSeqNum, stalled.str(), drainState, + isInbetweenInsts()); + + std::for_each(funcUnits.begin(), funcUnits.end(), + std::mem_fun(&FUPipeline::minorTrace)); + + inFlightInsts->minorTrace(); + inFUMemInsts->minorTrace(); +} + +void +Execute::drainResume() +{ + DPRINTF(Drain, "MinorExecute drainResume\n"); + + setDrainState(NotDraining); + + /* Wakeup fetch and keep the pipeline running until that branch takes + * effect */ + wakeupFetch(BranchData::WakeupFetch); + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); +} + +std::ostream &operator <<(std::ostream &os, Execute::DrainState state) +{ + switch (state) + { + case Execute::NotDraining: + os << "NotDraining"; + break; + case Execute::DrainCurrentInst: + os << "DrainCurrentInst"; + break; + case Execute::DrainHaltFetch: + os << "DrainHaltFetch"; + break; + case Execute::DrainAllInsts: + os << "DrainAllInsts"; + break; + default: + os << "Drain-" << static_cast<int>(state); + break; + } + + return os; +} + +void +Execute::setDrainState(DrainState state) +{ + DPRINTF(Drain, "setDrainState: %s\n", state); + drainState = state; +} + +unsigned int +Execute::drain() +{ + DPRINTF(Drain, "MinorExecute drain\n"); + + if (drainState == NotDraining) { + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + + /* Go to DrainCurrentInst if we're not between operations + * this should probably test the LSQ as well. Or maybe + * just always go to DrainCurrentInst anyway */ + if (lastCommitWasEndOfMacroop) + setDrainState(DrainHaltFetch); + else + setDrainState(DrainCurrentInst); + } + + return (isDrained() ? 0 : 1); +} + +bool +Execute::isDrained() +{ + return drainState == DrainAllInsts && + inputBuffer.empty() && + inFlightInsts->empty() && + lsq.isDrained(); +} + +Execute::~Execute() +{ + for (unsigned int i = 0; i < numFuncUnits; i++) + delete funcUnits[i]; + + delete inFlightInsts; +} + +bool +Execute::instIsRightStream(MinorDynInstPtr inst) +{ + return inst->id.streamSeqNum == streamSeqNum; +} + +bool +Execute::instIsHeadInst(MinorDynInstPtr inst) +{ + bool ret = false; + + if (!inFlightInsts->empty()) + ret = inFlightInsts->front().inst->id == inst->id; + + return ret; +} + +MinorCPU::MinorCPUPort & +Execute::getDcachePort() +{ + return lsq.getDcachePort(); +} + +} diff --git a/src/cpu/minor/execute.hh b/src/cpu/minor/execute.hh new file mode 100644 index 000000000..8cd026534 --- /dev/null +++ b/src/cpu/minor/execute.hh @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * All the fun of executing instructions from Decode and sending branch/new + * instruction stream info. to Fetch1. + */ + +#ifndef __CPU_MINOR_EXECUTE_HH__ +#define __CPU_MINOR_EXECUTE_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/func_unit.hh" +#include "cpu/minor/lsq.hh" +#include "cpu/minor/pipe_data.hh" +#include "cpu/minor/scoreboard.hh" + +namespace Minor +{ + +/** Execute stage. Everything apart from fetching and decoding instructions. + * The LSQ lives here too. */ +class Execute : public Named +{ + protected: + /** Input port carrying instructions from Decode */ + Latch<ForwardInstData>::Output inp; + + /** Input port carrying stream changes to Fetch1 */ + Latch<BranchData>::Input out; + + /** Pointer back to the containing CPU */ + MinorCPU &cpu; + + /** Number of instructions that can be issued per cycle */ + unsigned int issueLimit; + + /** Number of memory ops that can be issued per cycle */ + unsigned int memoryIssueLimit; + + /** Number of instructions that can be committed per cycle */ + unsigned int commitLimit; + + /** Number of memory instructions that can be committed per cycle */ + unsigned int memoryCommitLimit; + + /** If true, more than one input line can be processed each cycle if + * there is room to execute more instructions than taken from the first + * line */ + bool processMoreThanOneInput; + + /** Descriptions of the functional units we want to generate */ + MinorFUPool &fuDescriptions; + + /** Number of functional units to produce */ + unsigned int numFuncUnits; + + /** Longest latency of any FU, useful for setting up the activity + * recoder */ + Cycles longestFuLatency; + + /** Modify instruction trace times on commit */ + bool setTraceTimeOnCommit; + + /** Modify instruction trace times on issue */ + bool setTraceTimeOnIssue; + + /** Allow mem refs to leave their FUs before reaching the head + * of the in flight insts queue if their dependencies are met */ + bool allowEarlyMemIssue; + + /** The FU index of the non-existent costless FU for instructions + * which pass the MinorDynInst::isNoCostInst test */ + unsigned int noCostFUIndex; + + /** Dcache port to pass on to the CPU. Execute owns this */ + LSQ lsq; + + /** Scoreboard of instruction dependencies */ + Scoreboard scoreboard; + + /** The execution functional units */ + std::vector<FUPipeline *> funcUnits; + + public: /* Public for Pipeline to be able to pass it to Decode */ + InputBuffer<ForwardInstData> inputBuffer; + + protected: + /** Stage cycle-by-cycle state */ + + /** State that drain passes through (in order). On a drain request, + * Execute transitions into either DrainCurrentInst (if between + * microops) or DrainHaltFetch. + * + * Note that Execute doesn't actually have * a 'Drained' state, only + * an indication that it's currently draining and isDrained that can't + * tell if there are insts still in the pipeline leading up to + * Execute */ + enum DrainState + { + NotDraining, /* Not draining, possibly running */ + DrainCurrentInst, /* Draining to end of inst/macroop */ + DrainHaltFetch, /* Halting Fetch after completing current inst */ + DrainAllInsts /* Discarding all remaining insts */ + }; + + /** In-order instructions either in FUs or the LSQ */ + Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFlightInsts; + + /** Memory ref instructions still in the FUs */ + Queue<QueuedInst, ReportTraitsAdaptor<QueuedInst> > *inFUMemInsts; + + /** Index that we've completed upto in getInput data. We can say we're + * popInput when this equals getInput()->width() */ + unsigned int inputIndex; + + /** The last commit was the end of a full instruction so an interrupt + * can safely happen */ + bool lastCommitWasEndOfMacroop; + + /** Structure for reporting insts currently being processed/retired + * for MinorTrace */ + ForwardInstData instsBeingCommitted; + + /** Source of sequence number for instuction streams. Increment this and + * pass to fetch whenever an instruction stream needs to be changed. + * For any more complicated behaviour (e.g. speculation) there'll need + * to be another plan. THREAD, need one for each thread */ + InstSeqNum streamSeqNum; + + /** A prediction number for use where one isn't available from an + * instruction. This is harvested from committed instructions. + * This isn't really needed as the streamSeqNum will change on + * a branch, but it minimises disruption in stream identification */ + InstSeqNum lastPredictionSeqNum; + + /** State progression for draining NotDraining -> ... -> DrainAllInsts */ + DrainState drainState; + + protected: + friend std::ostream &operator <<(std::ostream &os, DrainState state); + + /** Get a piece of data to work on from the inputBuffer, or 0 if there + * is no data. */ + const ForwardInstData *getInput(); + + /** Pop an element off the input buffer, if there are any */ + void popInput(); + + /** Generate Branch data based (into branch) on an observed (or not) + * change in PC while executing an instruction. + * Also handles branch prediction information within the inst. */ + void tryToBranch(MinorDynInstPtr inst, Fault fault, BranchData &branch); + + /** Actually create a branch to communicate to Fetch1/Fetch2 and, + * if that is a stream-changing branch update the streamSeqNum */ + void updateBranchData(BranchData::Reason reason, + MinorDynInstPtr inst, const TheISA::PCState &target, + BranchData &branch); + + /** Handle extracting mem ref responses from the memory queues and + * completing the associated instructions. + * Fault is an output and will contain any fault caused (and already + * invoked by the function) + * Sets branch to any branch generated by the instruction. */ + void handleMemResponse(MinorDynInstPtr inst, + LSQ::LSQRequestPtr response, BranchData &branch, + Fault &fault); + + /** Execute a memory reference instruction. This calls initiateAcc on + * the instruction which will then call writeMem or readMem to issue a + * memory access to the LSQ. + * Returns true if the instruction was executed rather than stalled + * because of a lack of LSQ resources and false otherwise. + * branch is set to any branch raised by the instruction. + * failed_predicate is set to false if the instruction passed its + * predicate and so will access memory or true if the instruction + * *failed* its predicate and is now complete. + * fault is set if any non-NoFault fault is raised. + * Any faults raised are actually invoke-d by this function. */ + bool executeMemRefInst(MinorDynInstPtr inst, BranchData &branch, + bool &failed_predicate, Fault &fault); + + /** Has an interrupt been raised */ + bool isInterrupted(ThreadID thread_id) const; + + /** Are we between instructions? Can we be interrupted? */ + bool isInbetweenInsts() const; + + /** Act on an interrupt. Returns true if an interrupt was actually + * signalled and invoked */ + bool takeInterrupt(ThreadID thread_id, BranchData &branch); + + /** Try and issue instructions from the inputBuffer */ + unsigned int issue(bool only_issue_microops); + + /** Try to act on PC-related events. Returns true if any were + * executed */ + bool tryPCEvents(); + + /** Do the stats handling and instruction count and PC event events + * related to the new instruction/op counts */ + void doInstCommitAccounting(MinorDynInstPtr inst); + + /** Commit a single instruction. Returns true if the instruction being + * examined was completed (fully executed, discarded, or initiated a + * memory access), false if there is still some processing to do. + * fu_index is the index of the functional unit this instruction is + * being executed in into for funcUnits + * If early_memory_issue is true then this is an early execution + * of a mem ref and so faults will not be processed. + * If the return value is true: + * fault is set if a fault happened, + * branch is set to indicate any branch that occurs + * committed is set to true if this instruction is committed + * (and so needs to be traced and accounted for) + * completed_mem_issue is set if the instruction was a + * memory access that was issued */ + bool commitInst(MinorDynInstPtr inst, bool early_memory_issue, + BranchData &branch, Fault &fault, bool &committed, + bool &completed_mem_issue); + + /** Try and commit instructions from the ends of the functional unit + * pipelines. + * If only_commit_microops is true then only commit upto the + * end of the currect full instruction. + * If discard is true then discard all instructions rather than + * committing. + * branch is set to any branch raised during commit. */ + void commit(bool only_commit_microops, bool discard, BranchData &branch); + + /** Set the drain state (with useful debugging messages) */ + void setDrainState(DrainState state); + + public: + Execute(const std::string &name_, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<ForwardInstData>::Output inp_, + Latch<BranchData>::Input out_); + + ~Execute(); + + public: + + /** Cause Execute to issue an UnpredictedBranch (or WakeupFetch if + * that was passed as the reason) to Fetch1 to wake the + * system up (using the PC from the thread context). */ + void wakeupFetch(BranchData::Reason reason = + BranchData::UnpredictedBranch); + + /** Returns the DcachePort owned by this Execute to pass upwards */ + MinorCPU::MinorCPUPort &getDcachePort(); + + /** To allow ExecContext to find the LSQ */ + LSQ &getLSQ() { return lsq; } + + /** Does the given instruction have the right stream sequence number + * to be committed? */ + bool instIsRightStream(MinorDynInstPtr inst); + + /** Returns true if the given instruction is at the head of the + * inFlightInsts instruction queue */ + bool instIsHeadInst(MinorDynInstPtr inst); + + /** Pass on input/buffer data to the output if you can */ + void evaluate(); + + void minorTrace() const; + + /** After thread suspension, has Execute been drained of in-flight + * instructions and memory accesses. */ + bool isDrained(); + + /** Like the drain interface on SimObject */ + unsigned int drain(); + void drainResume(); +}; + +} + +#endif /* __CPU_MINOR_EXECUTE_HH__ */ diff --git a/src/cpu/minor/fetch1.cc b/src/cpu/minor/fetch1.cc new file mode 100644 index 000000000..45dc5eddc --- /dev/null +++ b/src/cpu/minor/fetch1.cc @@ -0,0 +1,676 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <cstring> +#include <iomanip> +#include <sstream> + +#include "base/cast.hh" +#include "cpu/minor/fetch1.hh" +#include "cpu/minor/pipeline.hh" +#include "debug/Drain.hh" +#include "debug/Fetch.hh" +#include "debug/MinorTrace.hh" + +namespace Minor +{ + +Fetch1::Fetch1(const std::string &name_, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<BranchData>::Output inp_, + Latch<ForwardLineData>::Input out_, + Latch<BranchData>::Output prediction_, + Reservable &next_stage_input_buffer) : + Named(name_), + cpu(cpu_), + inp(inp_), + out(out_), + prediction(prediction_), + nextStageReserve(next_stage_input_buffer), + icachePort(name_ + ".icache_port", *this, cpu_), + lineSnap(params.fetch1LineSnapWidth), + maxLineWidth(params.fetch1LineWidth), + fetchLimit(params.fetch1FetchLimit), + state(FetchWaitingForPC), + pc(0), + streamSeqNum(InstId::firstStreamSeqNum), + predictionSeqNum(InstId::firstPredictionSeqNum), + blocked(false), + requests(name_ + ".requests", "lines", params.fetch1FetchLimit), + transfers(name_ + ".transfers", "lines", params.fetch1FetchLimit), + icacheState(IcacheRunning), + lineSeqNum(InstId::firstLineSeqNum), + numFetchesInMemorySystem(0), + numFetchesInITLB(0) +{ + if (lineSnap == 0) { + lineSnap = cpu.cacheLineSize(); + DPRINTF(Fetch, "lineSnap set to cache line size of: %d\n", + lineSnap); + } + + if (maxLineWidth == 0) { + maxLineWidth = cpu.cacheLineSize(); + DPRINTF(Fetch, "maxLineWidth set to cache line size of: %d\n", + maxLineWidth); + } + + /* These assertions should be copied to the Python config. as well */ + if ((lineSnap % sizeof(TheISA::MachInst)) != 0) { + fatal("%s: fetch1LineSnapWidth must be a multiple " + "of sizeof(TheISA::MachInst) (%d)\n", name_, + sizeof(TheISA::MachInst)); + } + + if (!(maxLineWidth >= lineSnap && + (maxLineWidth % sizeof(TheISA::MachInst)) == 0)) + { + fatal("%s: fetch1LineWidth must be a multiple of" + " sizeof(TheISA::MachInst)" + " (%d), and >= fetch1LineSnapWidth (%d)\n", + name_, sizeof(TheISA::MachInst), lineSnap); + } + + if (fetchLimit < 1) { + fatal("%s: fetch1FetchLimit must be >= 1 (%d)\n", name_, + fetchLimit); + } +} + +void +Fetch1::fetchLine() +{ + /* If line_offset != 0, a request is pushed for the remainder of the + * line. */ + /* Use a lower, sizeof(MachInst) aligned address for the fetch */ + Addr aligned_pc = pc.instAddr() & ~((Addr) lineSnap - 1); + unsigned int line_offset = aligned_pc % lineSnap; + unsigned int request_size = maxLineWidth - line_offset; + + /* Fill in the line's id */ + InstId request_id(0 /* thread */, + streamSeqNum, predictionSeqNum, + lineSeqNum); + + FetchRequestPtr request = new FetchRequest(*this, request_id, pc); + + DPRINTF(Fetch, "Inserting fetch into the fetch queue " + "%s addr: 0x%x pc: %s line_offset: %d request_size: %d\n", + request_id, aligned_pc, pc, line_offset, request_size); + + request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0); + request->request.setVirt(0 /* asid */, + aligned_pc, request_size, Request::INST_FETCH, cpu.instMasterId(), + /* I've no idea why we need the PC, but give it */ + pc.instAddr()); + + DPRINTF(Fetch, "Submitting ITLB request\n"); + numFetchesInITLB++; + + request->state = FetchRequest::InTranslation; + + /* Reserve space in the queues upstream of requests for results */ + transfers.reserve(); + requests.push(request); + + /* Submit the translation request. The response will come + * through finish/markDelayed on this request as it bears + * the Translation interface */ + cpu.threads[request->id.threadId]->itb->translateTiming( + &request->request, + cpu.getContext(request->id.threadId), + request, BaseTLB::Execute); + + lineSeqNum++; + + /* Step the PC for the next line onto the line aligned next address. + * Note that as instructions can span lines, this PC is only a + * reliable 'new' PC if the next line has a new stream sequence number. */ +#if THE_ISA == ALPHA_ISA + /* Restore the low bits of the PC used as address space flags */ + Addr pc_low_bits = pc.instAddr() & + ((Addr) (1 << sizeof(TheISA::MachInst)) - 1); + + pc.set(aligned_pc + request_size + pc_low_bits); +#else + pc.set(aligned_pc + request_size); +#endif +} + +std::ostream & +operator <<(std::ostream &os, Fetch1::IcacheState state) +{ + switch (state) { + case Fetch1::IcacheRunning: + os << "IcacheRunning"; + break; + case Fetch1::IcacheNeedsRetry: + os << "IcacheNeedsRetry"; + break; + default: + os << "IcacheState-" << static_cast<int>(state); + break; + } + return os; +} + +void +Fetch1::FetchRequest::makePacket() +{ + /* Make the necessary packet for a memory transaction */ + packet = new Packet(&request, MemCmd::ReadReq); + packet->allocate(); + + /* This FetchRequest becomes SenderState to allow the response to be + * identified */ + packet->pushSenderState(this); +} + +void +Fetch1::FetchRequest::finish( + Fault fault_, RequestPtr request_, ThreadContext *tc, BaseTLB::Mode mode) +{ + fault = fault_; + + state = Translated; + fetch.handleTLBResponse(this); + + /* Let's try and wake up the processor for the next cycle */ + fetch.cpu.wakeupOnEvent(Pipeline::Fetch1StageId); +} + +void +Fetch1::handleTLBResponse(FetchRequestPtr response) +{ + numFetchesInITLB--; + + if (response->fault != NoFault) { + DPRINTF(Fetch, "Fault in address ITLB translation: %s, " + "paddr: 0x%x, vaddr: 0x%x\n", + response->fault->name(), + (response->request.hasPaddr() ? response->request.getPaddr() : 0), + response->request.getVaddr()); + + if (DTRACE(MinorTrace)) + minorTraceResponseLine(name(), response); + } else { + DPRINTF(Fetch, "Got ITLB response\n"); + } + + response->state = FetchRequest::Translated; + + tryToSendToTransfers(response); +} + +Fetch1::FetchRequest::~FetchRequest() +{ + if (packet) + delete packet; +} + +void +Fetch1::tryToSendToTransfers(FetchRequestPtr request) +{ + if (!requests.empty() && requests.front() != request) { + DPRINTF(Fetch, "Fetch not at front of requests queue, can't" + " issue to memory\n"); + return; + } + + if (request->state == FetchRequest::InTranslation) { + DPRINTF(Fetch, "Fetch still in translation, not issuing to" + " memory\n"); + return; + } + + if (request->isDiscardable() || request->fault != NoFault) { + /* Discarded and faulting requests carry on through transfers + * as Complete/packet == NULL */ + + request->state = FetchRequest::Complete; + moveFromRequestsToTransfers(request); + + /* Wake up the pipeline next cycle as there will be no event + * for this queue->queue transfer */ + cpu.wakeupOnEvent(Pipeline::Fetch1StageId); + } else if (request->state == FetchRequest::Translated) { + if (!request->packet) + request->makePacket(); + + /* Ensure that the packet won't delete the request */ + assert(request->packet->needsResponse()); + + if (tryToSend(request)) + moveFromRequestsToTransfers(request); + } else { + DPRINTF(Fetch, "Not advancing line fetch\n"); + } +} + +void +Fetch1::moveFromRequestsToTransfers(FetchRequestPtr request) +{ + assert(!requests.empty() && requests.front() == request); + + requests.pop(); + transfers.push(request); +} + +bool +Fetch1::tryToSend(FetchRequestPtr request) +{ + bool ret = false; + + if (icachePort.sendTimingReq(request->packet)) { + /* Invalidate the fetch_requests packet so we don't + * accidentally fail to deallocate it (or use it!) + * later by overwriting it */ + request->packet = NULL; + request->state = FetchRequest::RequestIssuing; + numFetchesInMemorySystem++; + + ret = true; + + DPRINTF(Fetch, "Issued fetch request to memory: %s\n", + request->id); + } else { + /* Needs to be resent, wait for that */ + icacheState = IcacheNeedsRetry; + + DPRINTF(Fetch, "Line fetch needs to retry: %s\n", + request->id); + } + + return ret; +} + +void +Fetch1::stepQueues() +{ + IcacheState old_icache_state = icacheState; + + switch (icacheState) { + case IcacheRunning: + /* Move ITLB results on to the memory system */ + if (!requests.empty()) { + tryToSendToTransfers(requests.front()); + } + break; + case IcacheNeedsRetry: + break; + } + + if (icacheState != old_icache_state) { + DPRINTF(Fetch, "Step in state %s moving to state %s\n", + old_icache_state, icacheState); + } +} + +void +Fetch1::popAndDiscard(FetchQueue &queue) +{ + if (!queue.empty()) { + delete queue.front(); + queue.pop(); + } +} + +unsigned int +Fetch1::numInFlightFetches() +{ + return requests.occupiedSpace() + + transfers.occupiedSpace(); +} + +/** Print the appropriate MinorLine line for a fetch response */ +void +Fetch1::minorTraceResponseLine(const std::string &name, + Fetch1::FetchRequestPtr response) const +{ + Request &request M5_VAR_USED = response->request; + + if (response->packet && response->packet->isError()) { + MINORLINE(this, "id=F;%s vaddr=0x%x fault=\"error packet\"\n", + response->id, request.getVaddr()); + } else if (response->fault != NoFault) { + MINORLINE(this, "id=F;%s vaddr=0x%x fault=\"%s\"\n", + response->id, request.getVaddr(), response->fault->name()); + } else { + MINORLINE(this, "id=%s size=%d vaddr=0x%x paddr=0x%x\n", + response->id, request.getSize(), + request.getVaddr(), request.getPaddr()); + } +} + +bool +Fetch1::recvTimingResp(PacketPtr response) +{ + DPRINTF(Fetch, "recvTimingResp %d\n", numFetchesInMemorySystem); + + /* Only push the response if we didn't change stream? No, all responses + * should hit the responses queue. It's the job of 'step' to throw them + * away. */ + FetchRequestPtr fetch_request = safe_cast<FetchRequestPtr> + (response->popSenderState()); + + /* Fixup packet in fetch_request as this may have changed */ + assert(!fetch_request->packet); + fetch_request->packet = response; + + numFetchesInMemorySystem--; + fetch_request->state = FetchRequest::Complete; + + if (DTRACE(MinorTrace)) + minorTraceResponseLine(name(), fetch_request); + + if (response->isError()) { + DPRINTF(Fetch, "Received error response packet: %s\n", + fetch_request->id); + } + + /* We go to idle even if there are more things to do on the queues as + * it's the job of step to actually step us on to the next transaction */ + + /* Let's try and wake up the processor for the next cycle to move on + * queues */ + cpu.wakeupOnEvent(Pipeline::Fetch1StageId); + + /* Never busy */ + return true; +} + +void +Fetch1::recvRetry() +{ + DPRINTF(Fetch, "recvRetry\n"); + assert(icacheState == IcacheNeedsRetry); + assert(!requests.empty()); + + FetchRequestPtr retryRequest = requests.front(); + + icacheState = IcacheRunning; + + if (tryToSend(retryRequest)) + moveFromRequestsToTransfers(retryRequest); +} + +std::ostream & +operator <<(std::ostream &os, Fetch1::FetchState state) +{ + switch (state) { + case Fetch1::FetchHalted: + os << "FetchHalted"; + break; + case Fetch1::FetchWaitingForPC: + os << "FetchWaitingForPC"; + break; + case Fetch1::FetchRunning: + os << "FetchRunning"; + break; + default: + os << "FetchState-" << static_cast<int>(state); + break; + } + return os; +} + +void +Fetch1::changeStream(const BranchData &branch) +{ + updateExpectedSeqNums(branch); + + /* Start fetching again if we were stopped */ + switch (branch.reason) { + case BranchData::SuspendThread: + DPRINTF(Fetch, "Suspending fetch: %s\n", branch); + state = FetchWaitingForPC; + break; + case BranchData::HaltFetch: + DPRINTF(Fetch, "Halting fetch\n"); + state = FetchHalted; + break; + default: + DPRINTF(Fetch, "Changing stream on branch: %s\n", branch); + state = FetchRunning; + break; + } + pc = branch.target; +} + +void +Fetch1::updateExpectedSeqNums(const BranchData &branch) +{ + DPRINTF(Fetch, "Updating streamSeqNum from: %d to %d," + " predictionSeqNum from: %d to %d\n", + streamSeqNum, branch.newStreamSeqNum, + predictionSeqNum, branch.newPredictionSeqNum); + + /* Change the stream */ + streamSeqNum = branch.newStreamSeqNum; + /* Update the prediction. Note that it's possible for this to + * actually set the prediction to an *older* value if new + * predictions have been discarded by execute */ + predictionSeqNum = branch.newPredictionSeqNum; +} + +void +Fetch1::processResponse(Fetch1::FetchRequestPtr response, + ForwardLineData &line) +{ + PacketPtr packet = response->packet; + + /* Pass the prefetch abort (if any) on to Fetch2 in a ForwardLineData + * structure */ + line.setFault(response->fault); + /* Make sequence numbers valid in return */ + line.id = response->id; + /* Set PC to virtual address */ + line.pc = response->pc; + /* Set the lineBase, which is a sizeof(MachInst) aligned address <= + * pc.instAddr() */ + line.lineBaseAddr = response->request.getVaddr(); + + if (response->fault != NoFault) { + /* Stop fetching if there was a fault */ + /* Should probably try to flush the queues as well, but we + * can't be sure that this fault will actually reach Execute, and we + * can't (currently) selectively remove this stream from the queues */ + DPRINTF(Fetch, "Stopping line fetch because of fault: %s\n", + response->fault->name()); + state = Fetch1::FetchWaitingForPC; + } else { + line.adoptPacketData(packet); + /* Null the response's packet to prevent the response from trying to + * deallocate the packet */ + response->packet = NULL; + } +} + +void +Fetch1::evaluate() +{ + const BranchData &execute_branch = *inp.outputWire; + const BranchData &fetch2_branch = *prediction.outputWire; + ForwardLineData &line_out = *out.inputWire; + + assert(line_out.isBubble()); + + blocked = !nextStageReserve.canReserve(); + + /* Are we changing stream? Look to the Execute branches first, then + * to predicted changes of stream from Fetch2 */ + /* @todo, find better way to express ignoring branch predictions */ + if (execute_branch.isStreamChange() && + execute_branch.reason != BranchData::BranchPrediction) + { + if (state == FetchHalted) { + if (execute_branch.reason == BranchData::WakeupFetch) { + DPRINTF(Fetch, "Waking up fetch: %s\n", execute_branch); + changeStream(execute_branch); + } else { + DPRINTF(Fetch, "Halted, ignoring branch: %s\n", + execute_branch); + } + } else { + changeStream(execute_branch); + } + + if (!fetch2_branch.isBubble()) { + DPRINTF(Fetch, "Ignoring simultaneous prediction: %s\n", + fetch2_branch); + } + + /* The streamSeqNum tagging in request/response ->req should handle + * discarding those requests when we get to them. */ + } else if (state != FetchHalted && fetch2_branch.isStreamChange()) { + /* Handle branch predictions by changing the instruction source + * if we're still processing the same stream (as set by streamSeqNum) + * as the one of the prediction. + */ + if (fetch2_branch.newStreamSeqNum != streamSeqNum) { + DPRINTF(Fetch, "Not changing stream on prediction: %s," + " streamSeqNum mismatch\n", + fetch2_branch); + } else { + changeStream(fetch2_branch); + } + } + + /* Can we fetch? */ + /* The bare minimum requirements for initiating a fetch */ + /* THREAD need to handle multiple threads */ + if (state == FetchRunning && /* We are actually fetching */ + !blocked && /* Space in the Fetch2 inputBuffer */ + /* The thread we're going to fetch for (thread 0), is active */ + cpu.getContext(0)->status() == ThreadContext::Active && + numInFlightFetches() < fetchLimit) + { + fetchLine(); + /* Take up a slot in the fetch queue */ + nextStageReserve.reserve(); + } + + /* Halting shouldn't prevent fetches in flight from being processed */ + /* Step fetches through the icachePort queues and memory system */ + stepQueues(); + + /* As we've thrown away early lines, if there is a line, it must + * be from the right stream */ + if (!transfers.empty() && + transfers.front()->isComplete()) + { + Fetch1::FetchRequestPtr response = transfers.front(); + + if (response->isDiscardable()) { + nextStageReserve.freeReservation(); + + DPRINTF(Fetch, "Discarding translated fetch at it's for" + " an old stream\n"); + + /* Wake up next cycle just in case there was some other + * action to do */ + cpu.wakeupOnEvent(Pipeline::Fetch1StageId); + } else { + DPRINTF(Fetch, "Processing fetched line: %s\n", + response->id); + + processResponse(response, line_out); + } + + popAndDiscard(transfers); + } + + /* If we generated output, and mark the stage as being active + * to encourage that output on to the next stage */ + if (!line_out.isBubble()) + cpu.activityRecorder->activity(); + + /* Fetch1 has no inputBuffer so the only activity we can have is to + * generate a line output (tested just above) or to initiate a memory + * fetch which will signal activity when it returns/needs stepping + * between queues */ +} + +bool +Fetch1::isDrained() +{ + DPRINTF(Drain, "isDrained %s %s%s%s\n", + state == FetchHalted, + (numInFlightFetches() == 0 ? "" : "inFlightFetches "), + ((*out.inputWire).isBubble() ? "" : "outputtingLine")); + + return state == FetchHalted && + numInFlightFetches() == 0 && + (*out.inputWire).isBubble(); +} + +void +Fetch1::FetchRequest::reportData(std::ostream &os) const +{ + os << id; +} + +bool Fetch1::FetchRequest::isDiscardable() const +{ + /* Can't discard lines in TLB/memory */ + return state != InTranslation && state != RequestIssuing && + (id.streamSeqNum != fetch.streamSeqNum || + id.predictionSeqNum != fetch.predictionSeqNum); +} + +void +Fetch1::minorTrace() const +{ + std::ostringstream data; + + if (blocked) + data << 'B'; + else + (*out.inputWire).reportData(data); + + MINORTRACE("state=%s icacheState=%s in_tlb_mem=%s/%s" + " streamSeqNum=%d lines=%s\n", state, icacheState, + numFetchesInITLB, numFetchesInMemorySystem, + streamSeqNum, data.str()); + requests.minorTrace(); + transfers.minorTrace(); +} + +} diff --git a/src/cpu/minor/fetch1.hh b/src/cpu/minor/fetch1.hh new file mode 100644 index 000000000..29a63d1f1 --- /dev/null +++ b/src/cpu/minor/fetch1.hh @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Fetch1 is responsible for fetching "lines" from memory and passing + * them to Fetch2 + */ + +#ifndef __CPU_MINOR_FETCH1_HH__ +#define __CPU_MINOR_FETCH1_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/pipe_data.hh" +#include "cpu/base.hh" +#include "mem/packet.hh" + +namespace Minor +{ + +/** A stage responsible for fetching "lines" from memory and passing + * them to Fetch2 */ +class Fetch1 : public Named +{ + protected: + /** Exposable fetch port */ + class IcachePort : public MinorCPU::MinorCPUPort + { + protected: + /** My owner */ + Fetch1 &fetch; + + public: + IcachePort(std::string name, Fetch1 &fetch_, MinorCPU &cpu) : + MinorCPU::MinorCPUPort(name, cpu), fetch(fetch_) + { } + + protected: + bool recvTimingResp(PacketPtr pkt) + { return fetch.recvTimingResp(pkt); } + + void recvRetry() { fetch.recvRetry(); } + }; + + /** Memory access queuing. + * + * A request can be submitted by pushing it onto the requests queue after + * issuing an ITLB lookup (state becomes InTranslation) with a + * FetchSenderState senderState containing the current lineSeqNum and + * stream/predictionSeqNum. + * + * Translated packets (state becomes Translation) are then passed to the + * memory system and the transfers queue (state becomes RequestIssuing). + * Retries are handled by leaving the packet on the requests queue and + * changing the state to IcacheNeedsRetry). + * + * Responses from the memory system alter the request object (state + * become Complete). Responses can be picked up from the head of the + * transfers queue to pass on to Fetch2. */ + + /** Structure to hold SenderState info through + * translation and memory accesses. */ + class FetchRequest : + public BaseTLB::Translation, /* For TLB lookups */ + public Packet::SenderState /* For packing into a Packet */ + { + protected: + /** Owning fetch unit */ + Fetch1 &fetch; + + public: + /** Progress of this request through address translation and + * memory */ + enum FetchRequestState + { + NotIssued, /* Just been made */ + InTranslation, /* Issued to ITLB, must wait for reqply */ + Translated, /* Translation complete */ + RequestIssuing, /* Issued to memory, must wait for response */ + Complete /* Complete. Either a fault, or a fetched line */ + }; + + FetchRequestState state; + + /** Identity of the line that this request will generate */ + InstId id; + + /** FetchRequests carry packets while they're in the requests and + * transfers responses queues. When a Packet returns from the memory + * system, its request needs to have its packet updated as this may + * have changed in flight */ + PacketPtr packet; + + /** The underlying request that this fetch represents */ + Request request; + + /** PC to fixup with line address */ + TheISA::PCState pc; + + /** Fill in a fault if one happens during fetch, check this by + * picking apart the response packet */ + Fault fault; + + /** Make a packet to use with the memory transaction */ + void makePacket(); + + /** Report interface */ + void reportData(std::ostream &os) const; + + /** Is this line out of date with the current stream/prediction + * sequence and can it be discarded without orphaning in flight + * TLB lookups/memory accesses? */ + bool isDiscardable() const; + + /** Is this a complete read line or fault */ + bool isComplete() const { return state == Complete; } + + protected: + /** BaseTLB::Translation interface */ + + /** Interface for ITLB responses. We can handle delay, so don't + * do anything */ + void markDelayed() { } + + /** Interface for ITLB responses. Populates self and then passes + * the request on to the ports' handleTLBResponse member + * function */ + void finish(Fault fault_, RequestPtr request_, ThreadContext *tc, + BaseTLB::Mode mode); + + public: + FetchRequest(Fetch1 &fetch_, InstId id_, TheISA::PCState pc_) : + SenderState(), + fetch(fetch_), + state(NotIssued), + id(id_), + packet(NULL), + request(), + pc(pc_), + fault(NoFault) + { } + + ~FetchRequest(); + }; + + typedef FetchRequest *FetchRequestPtr; + + protected: + /** Construction-assigned data members */ + + /** Pointer back to the containing CPU */ + MinorCPU &cpu; + + /** Input port carrying branch requests from Execute */ + Latch<BranchData>::Output inp; + /** Output port carrying read lines to Fetch2 */ + Latch<ForwardLineData>::Input out; + /** Input port carrying branch predictions from Fetch2 */ + Latch<BranchData>::Output prediction; + + /** Interface to reserve space in the next stage */ + Reservable &nextStageReserve; + + /** IcachePort to pass to the CPU. Fetch1 is the only module that uses + * it. */ + IcachePort icachePort; + + /** Line snap size in bytes. All fetches clip to make their ends not + * extend beyond this limit. Setting this to the machine L1 cache line + * length will result in fetches never crossing line boundaries. */ + unsigned int lineSnap; + + /** Maximum fetch width in bytes. Setting this (and lineSnap) to the + * machine L1 cache line length will result in fetches of whole cache + * lines. Setting this to sizeof(MachInst) will result it fetches of + * single instructions (except near the end of lineSnap lines) */ + unsigned int maxLineWidth; + + /** Maximum number of fetches allowed in flight (in queues or memory) */ + unsigned int fetchLimit; + + protected: + /** Cycle-by-cycle state */ + + /** State of memory access for head instruction fetch */ + enum FetchState + { + FetchHalted, /* Not fetching, waiting to be woken by transition + to FetchWaitingForPC. The PC is not valid in this state */ + FetchWaitingForPC, /* Not fetching, waiting for stream change. + This doesn't stop issued fetches from being returned and + processed or for branches to change the state to Running. */ + FetchRunning /* Try to fetch, when possible */ + }; + + /** Stage cycle-by-cycle state */ + + FetchState state; + + /** Fetch PC value. This is updated by branches from Execute, branch + * prediction targets from Fetch2 and by incrementing it as we fetch + * lines subsequent to those two sources. */ + TheISA::PCState pc; + + /** Stream sequence number. This changes on request from Execute and is + * used to tag instructions by the fetch stream to which they belong. + * Execute originates new prediction sequence numbers. */ + InstSeqNum streamSeqNum; + + /** Prediction sequence number. This changes when requests from Execute + * or Fetch2 ask for a change of fetch address and is used to tag lines + * by the prediction to which they belong. Fetch2 originates + * prediction sequence numbers. */ + InstSeqNum predictionSeqNum; + + /** The sequence number expected for the next returned cache line. The + * responses queue should be ordered and so, if the front of that queue + * has a lower lineSeqNum than this, lines need to be discarded. If it + * has a higher lineSeqNum, our line hasn't appeared yet */ + InstSeqNum expectedLineSeqNum; + + /** Blocked indication for report */ + bool blocked; + + /** State of memory access for head instruction fetch */ + enum IcacheState + { + IcacheRunning, /* Default. Step icache queues when possible */ + IcacheNeedsRetry /* Request rejected, will be asked to retry */ + }; + + typedef Queue<FetchRequestPtr, + ReportTraitsPtrAdaptor<FetchRequestPtr>, + NoBubbleTraits<FetchRequestPtr> > + FetchQueue; + + /** Queue of address translated requests from Fetch1 */ + FetchQueue requests; + + /** Queue of in-memory system requests and responses */ + FetchQueue transfers; + + /** Retry state of icache_port */ + IcacheState icacheState; + + /** Sequence number for line fetch used for ordering lines to flush */ + InstSeqNum lineSeqNum; + + /** Count of the number fetches which have left the transfers queue + * and are in the 'wild' in the memory system. Try not to rely on + * this value, it's better to code without knowledge of the number + * of outstanding accesses */ + unsigned int numFetchesInMemorySystem; + /** Number of requests inside the ITLB rather than in the queues. + * All requests so located *must* have reserved space in the + * transfers queue */ + unsigned int numFetchesInITLB; + + protected: + friend std::ostream &operator <<(std::ostream &os, + Fetch1::FetchState state); + + /** Start fetching from a new address. */ + void changeStream(const BranchData &branch); + + /** Update streamSeqNum and predictionSeqNum from the given branch (and + * assume these have changed and discard (on delivery) all lines in + * flight) */ + void updateExpectedSeqNums(const BranchData &branch); + + /** Convert a response to a ForwardLineData */ + void processResponse(FetchRequestPtr response, + ForwardLineData &line); + + friend std::ostream &operator <<(std::ostream &os, + IcacheState state); + + /** Insert a line fetch into the requests. This can be a partial + * line request where the given address has a non-0 offset into a + * line. */ + void fetchLine(); + + /** Try and issue a fetch for a translated request at the + * head of the requests queue. Also tries to move the request + * between queues */ + void tryToSendToTransfers(FetchRequestPtr request); + + /** Try to send (or resend) a memory request's next/only packet to + * the memory system. Returns true if the fetch was successfully + * sent to memory */ + bool tryToSend(FetchRequestPtr request); + + /** Move a request between queues */ + void moveFromRequestsToTransfers(FetchRequestPtr request); + + /** Step requests along between requests and transfers queues */ + void stepQueues(); + + /** Pop a request from the given queue and correctly deallocate and + * discard it. */ + void popAndDiscard(FetchQueue &queue); + + /** Handle pushing a TLB response onto the right queue */ + void handleTLBResponse(FetchRequestPtr response); + + /** Returns the total number of queue occupancy, in-ITLB and + * in-memory system fetches */ + unsigned int numInFlightFetches(); + + /** Print the appropriate MinorLine line for a fetch response */ + void minorTraceResponseLine(const std::string &name, + FetchRequestPtr response) const; + + /** Memory interface */ + virtual bool recvTimingResp(PacketPtr pkt); + virtual void recvRetry(); + + public: + Fetch1(const std::string &name_, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<BranchData>::Output inp_, + Latch<ForwardLineData>::Input out_, + Latch<BranchData>::Output prediction_, + Reservable &next_stage_input_buffer); + + public: + /** Returns the IcachePort owned by this Fetch1 */ + MinorCPU::MinorCPUPort &getIcachePort() { return icachePort; } + + /** Pass on input/buffer data to the output if you can */ + void evaluate(); + + void minorTrace() const; + + /** Is this stage drained? For Fetch1, draining is initiated by + * Execute signalling a branch with the reason HaltFetch */ + bool isDrained(); +}; + +} + +#endif /* __CPU_MINOR_FETCH1_HH__ */ diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc new file mode 100644 index 000000000..4827b75fc --- /dev/null +++ b/src/cpu/minor/fetch2.cc @@ -0,0 +1,543 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <string> + +#include "arch/decoder.hh" +#include "arch/utility.hh" +#include "cpu/minor/fetch2.hh" +#include "cpu/minor/pipeline.hh" +#include "cpu/pred/bpred_unit.hh" +#include "debug/Branch.hh" +#include "debug/Fetch.hh" +#include "debug/MinorTrace.hh" + +namespace Minor +{ + +Fetch2::Fetch2(const std::string &name, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<ForwardLineData>::Output inp_, + Latch<BranchData>::Output branchInp_, + Latch<BranchData>::Input predictionOut_, + Latch<ForwardInstData>::Input out_, + Reservable &next_stage_input_buffer) : + Named(name), + cpu(cpu_), + inp(inp_), + branchInp(branchInp_), + predictionOut(predictionOut_), + out(out_), + nextStageReserve(next_stage_input_buffer), + outputWidth(params.decodeInputWidth), + processMoreThanOneInput(params.fetch2CycleInput), + branchPredictor(*params.branchPred), + inputBuffer(name + ".inputBuffer", "lines", params.fetch2InputBufferSize), + inputIndex(0), + pc(TheISA::PCState(0)), + havePC(false), + lastStreamSeqNum(InstId::firstStreamSeqNum), + fetchSeqNum(InstId::firstFetchSeqNum), + expectedStreamSeqNum(InstId::firstStreamSeqNum), + predictionSeqNum(InstId::firstPredictionSeqNum) +{ + if (outputWidth < 1) + fatal("%s: decodeInputWidth must be >= 1 (%d)\n", name, outputWidth); + + if (params.fetch2InputBufferSize < 1) { + fatal("%s: fetch2InputBufferSize must be >= 1 (%d)\n", name, + params.fetch2InputBufferSize); + } +} + +const ForwardLineData * +Fetch2::getInput() +{ + /* Get a line from the inputBuffer to work with */ + if (!inputBuffer.empty()) { + return &(inputBuffer.front()); + } else { + return NULL; + } +} + +void +Fetch2::popInput() +{ + if (!inputBuffer.empty()) { + inputBuffer.front().freeLine(); + inputBuffer.pop(); + } + + inputIndex = 0; +} + +void +Fetch2::dumpAllInput() +{ + DPRINTF(Fetch, "Dumping whole input buffer\n"); + while (!inputBuffer.empty()) + popInput(); + + inputIndex = 0; +} + +void +Fetch2::updateBranchPrediction(const BranchData &branch) +{ + MinorDynInstPtr inst = branch.inst; + + /* Don't even consider instructions we didn't try to predict or faults */ + if (inst->isFault() || !inst->triedToPredict) + return; + + switch (branch.reason) { + case BranchData::NoBranch: + /* No data to update */ + break; + case BranchData::Interrupt: + /* Never try to predict interrupts */ + break; + case BranchData::SuspendThread: + /* Don't need to act on suspends */ + break; + case BranchData::WakeupFetch: + /* Don't need to act on wakeups, no instruction tied to action. */ + break; + case BranchData::HaltFetch: + /* Don't need to act on fetch wakeup */ + break; + case BranchData::BranchPrediction: + /* Shouldn't happen. Fetch2 is the only source of + * BranchPredictions */ + break; + case BranchData::UnpredictedBranch: + /* Unpredicted branch or barrier */ + DPRINTF(Branch, "Unpredicted branch seen inst: %s\n", *inst); + branchPredictor.squash(inst->id.fetchSeqNum, + branch.target, true, inst->id.threadId); + break; + case BranchData::CorrectlyPredictedBranch: + /* Predicted taken, was taken */ + DPRINTF(Branch, "Branch predicted correctly inst: %s\n", *inst); + branchPredictor.update(inst->id.fetchSeqNum, + inst->id.threadId); + break; + case BranchData::BadlyPredictedBranch: + /* Predicted taken, not taken */ + DPRINTF(Branch, "Branch mis-predicted inst: %s\n", *inst); + branchPredictor.squash(inst->id.fetchSeqNum, + branch.target /* Not used */, false, inst->id.threadId); + break; + case BranchData::BadlyPredictedBranchTarget: + /* Predicted taken, was taken but to a different target */ + DPRINTF(Branch, "Branch mis-predicted target inst: %s target: %s\n", + *inst, branch.target); + branchPredictor.squash(inst->id.fetchSeqNum, + branch.target, true, inst->id.threadId); + break; + } +} + +void +Fetch2::predictBranch(MinorDynInstPtr inst, BranchData &branch) +{ + TheISA::PCState inst_pc = inst->pc; + + assert(!inst->predictedTaken); + + /* Skip non-control/sys call instructions */ + if (inst->staticInst->isControl() || + inst->staticInst->isSyscall()) + { + /* Tried to predict */ + inst->triedToPredict = true; + + DPRINTF(Branch, "Trying to predict for inst: %s\n", *inst); + + if (branchPredictor.predict(inst->staticInst, + inst->id.fetchSeqNum, inst_pc, + inst->id.threadId)) + { + inst->predictedTaken = true; + inst->predictedTarget = inst_pc; + branch.target = inst_pc; + } + } else { + DPRINTF(Branch, "Not attempting prediction for inst: %s\n", *inst); + } + + /* If we predict taken, set branch and update sequence numbers */ + if (inst->predictedTaken) { + /* Update the predictionSeqNum and remember the streamSeqNum that it + * was associated with */ + expectedStreamSeqNum = inst->id.streamSeqNum; + + BranchData new_branch = BranchData(BranchData::BranchPrediction, + inst->id.streamSeqNum, predictionSeqNum + 1, + inst->predictedTarget, inst); + + /* Mark with a new prediction number by the stream number of the + * instruction causing the prediction */ + predictionSeqNum++; + branch = new_branch; + + DPRINTF(Branch, "Branch predicted taken inst: %s target: %s" + " new predictionSeqNum: %d\n", + *inst, inst->predictedTarget, predictionSeqNum); + } +} + +void +Fetch2::evaluate() +{ + inputBuffer.setTail(*inp.outputWire); + ForwardInstData &insts_out = *out.inputWire; + BranchData prediction; + BranchData &branch_inp = *branchInp.outputWire; + + assert(insts_out.isBubble()); + + blocked = false; + + /* React to branches from Execute to update local branch prediction + * structures */ + updateBranchPrediction(branch_inp); + + /* If a branch arrives, don't try and do anything about it. Only + * react to your own predictions */ + if (branch_inp.isStreamChange()) { + DPRINTF(Fetch, "Dumping all input as a stream changing branch" + " has arrived\n"); + dumpAllInput(); + havePC = false; + } + + /* Even when blocked, clear out input lines with the wrong + * prediction sequence number */ + { + const ForwardLineData *line_in = getInput(); + + while (line_in && + expectedStreamSeqNum == line_in->id.streamSeqNum && + predictionSeqNum != line_in->id.predictionSeqNum) + { + DPRINTF(Fetch, "Discarding line %s" + " due to predictionSeqNum mismatch (expected: %d)\n", + line_in->id, predictionSeqNum); + + popInput(); + havePC = false; + + if (processMoreThanOneInput) { + DPRINTF(Fetch, "Wrapping\n"); + line_in = getInput(); + } else { + line_in = NULL; + } + } + } + + if (!nextStageReserve.canReserve()) { + blocked = true; + } else { + const ForwardLineData *line_in = getInput(); + + unsigned int output_index = 0; + + /* Pack instructions into the output while we can. This may involve + * using more than one input line. Note that lineWidth will be 0 + * for faulting lines */ + while (line_in && + (line_in->isFault() || + inputIndex < line_in->lineWidth) && /* More input */ + output_index < outputWidth && /* More output to fill */ + prediction.isBubble() /* No predicted branch */) + { + ThreadContext *thread = cpu.getContext(line_in->id.threadId); + TheISA::Decoder *decoder = thread->getDecoderPtr(); + + /* Discard line due to prediction sequence number being wrong but + * without the streamSeqNum number having changed */ + bool discard_line = + expectedStreamSeqNum == line_in->id.streamSeqNum && + predictionSeqNum != line_in->id.predictionSeqNum; + + /* Set the PC if the stream changes. Setting havePC to false in + * a previous cycle handles all other change of flow of control + * issues */ + bool set_pc = lastStreamSeqNum != line_in->id.streamSeqNum; + + if (!discard_line && (!havePC || set_pc)) { + /* Set the inputIndex to be the MachInst-aligned offset + * from lineBaseAddr of the new PC value */ + inputIndex = + (line_in->pc.instAddr() & BaseCPU::PCMask) - + line_in->lineBaseAddr; + DPRINTF(Fetch, "Setting new PC value: %s inputIndex: 0x%x" + " lineBaseAddr: 0x%x lineWidth: 0x%x\n", + line_in->pc, inputIndex, line_in->lineBaseAddr, + line_in->lineWidth); + pc = line_in->pc; + havePC = true; + decoder->reset(); + } + + /* The generated instruction. Leave as NULL if no instruction + * is to be packed into the output */ + MinorDynInstPtr dyn_inst = NULL; + + if (discard_line) { + /* Rest of line was from an older prediction in the same + * stream */ + DPRINTF(Fetch, "Discarding line %s (from inputIndex: %d)" + " due to predictionSeqNum mismatch (expected: %d)\n", + line_in->id, inputIndex, predictionSeqNum); + } else if (line_in->isFault()) { + /* Pack a fault as a MinorDynInst with ->fault set */ + + /* Make a new instruction and pick up the line, stream, + * prediction, thread ids from the incoming line */ + dyn_inst = new MinorDynInst(line_in->id); + + /* Fetch and prediction sequence numbers originate here */ + dyn_inst->id.fetchSeqNum = fetchSeqNum; + dyn_inst->id.predictionSeqNum = predictionSeqNum; + /* To complete the set, test that exec sequence number has + * not been set */ + assert(dyn_inst->id.execSeqNum == 0); + + dyn_inst->pc = pc; + + /* Pack a faulting instruction but allow other + * instructions to be generated. (Fetch2 makes no + * immediate judgement about streamSeqNum) */ + dyn_inst->fault = line_in->fault; + DPRINTF(Fetch, "Fault being passed output_index: " + "%d: %s\n", output_index, dyn_inst->fault->name()); + } else { + uint8_t *line = line_in->line; + + TheISA::MachInst inst_word; + /* The instruction is wholly in the line, can just + * assign */ + inst_word = TheISA::gtoh( + *(reinterpret_cast<TheISA::MachInst *> + (line + inputIndex))); + + if (!decoder->instReady()) { + decoder->moreBytes(pc, + line_in->lineBaseAddr + inputIndex, inst_word); + DPRINTF(Fetch, "Offering MachInst to decoder" + " addr: 0x%x\n", line_in->lineBaseAddr + inputIndex); + } + + /* Maybe make the above a loop to accomodate ISAs with + * instructions longer than sizeof(MachInst) */ + + if (decoder->instReady()) { + /* Make a new instruction and pick up the line, stream, + * prediction, thread ids from the incoming line */ + dyn_inst = new MinorDynInst(line_in->id); + + /* Fetch and prediction sequence numbers originate here */ + dyn_inst->id.fetchSeqNum = fetchSeqNum; + dyn_inst->id.predictionSeqNum = predictionSeqNum; + /* To complete the set, test that exec sequence number + * has not been set */ + assert(dyn_inst->id.execSeqNum == 0); + + /* Note that the decoder can update the given PC. + * Remember not to assign it until *after* calling + * decode */ + StaticInstPtr decoded_inst = decoder->decode(pc); + dyn_inst->staticInst = decoded_inst; + + dyn_inst->pc = pc; + + DPRINTF(Fetch, "Instruction extracted from line %s" + " lineWidth: %d output_index: %d inputIndex: %d" + " pc: %s inst: %s\n", + line_in->id, + line_in->lineWidth, output_index, inputIndex, + pc, *dyn_inst); + +#if THE_ISA == X86_ISA || THE_ISA == ARM_ISA + /* In SE mode, it's possible to branch to a microop when + * replaying faults such as page faults (or simply + * intra-microcode branches in X86). Unfortunately, + * as Minor has micro-op decomposition in a separate + * pipeline stage from instruction decomposition, the + * following advancePC (which may follow a branch with + * microPC() != 0) *must* see a fresh macroop. This + * kludge should be improved with an addition to PCState + * but I offer it in this form for the moment + * + * X86 can branch within microops so we need to deal with + * the case that, after a branch, the first un-advanced PC + * may be pointing to a microop other than 0. Once + * advanced, however, the microop number *must* be 0 */ + pc.upc(0); + pc.nupc(1); +#endif + + /* Advance PC for the next instruction */ + TheISA::advancePC(pc, decoded_inst); + + /* Predict any branches and issue a branch if + * necessary */ + predictBranch(dyn_inst, prediction); + } else { + DPRINTF(Fetch, "Inst not ready yet\n"); + } + + /* Step on the pointer into the line if there's no + * complete instruction waiting */ + if (decoder->needMoreBytes()) { + inputIndex += sizeof(TheISA::MachInst); + + DPRINTF(Fetch, "Updated inputIndex value PC: %s" + " inputIndex: 0x%x lineBaseAddr: 0x%x lineWidth: 0x%x\n", + line_in->pc, inputIndex, line_in->lineBaseAddr, + line_in->lineWidth); + } + } + + if (dyn_inst) { + /* Step to next sequence number */ + fetchSeqNum++; + + /* Correctly size the output before writing */ + if (output_index == 0) + insts_out.resize(outputWidth); + /* Pack the generated dynamic instruction into the output */ + insts_out.insts[output_index] = dyn_inst; + output_index++; + + /* Output MinorTrace instruction info for + * pre-microop decomposition macroops */ + if (DTRACE(MinorTrace) && !dyn_inst->isFault() && + dyn_inst->staticInst->isMacroop()) + { + dyn_inst->minorTraceInst(*this); + } + } + + /* Remember the streamSeqNum of this line so we can tell when + * we change stream */ + lastStreamSeqNum = line_in->id.streamSeqNum; + + /* Asked to discard line or there was a branch or fault */ + if (!prediction.isBubble() || /* The remains of a + line with a prediction in it */ + line_in->isFault() /* A line which is just a fault */) + { + DPRINTF(Fetch, "Discarding all input on branch/fault\n"); + dumpAllInput(); + havePC = false; + line_in = NULL; + } else if (discard_line) { + /* Just discard one line, one's behind it may have new + * stream sequence numbers. There's a DPRINTF above + * for this event */ + popInput(); + havePC = false; + line_in = NULL; + } else if (inputIndex == line_in->lineWidth) { + /* Got to end of a line, pop the line but keep PC + * in case this is a line-wrapping inst. */ + popInput(); + line_in = NULL; + } + + if (!line_in && processMoreThanOneInput) { + DPRINTF(Fetch, "Wrapping\n"); + line_in = getInput(); + } + } + + /* The rest of the output (if any) should already have been packed + * with bubble instructions by insts_out's initialisation */ + } + + /** Reserve a slot in the next stage and output data */ + *predictionOut.inputWire = prediction; + + /* If we generated output, reserve space for the result in the next stage + * and mark the stage as being active this cycle */ + if (!insts_out.isBubble()) { + /* Note activity of following buffer */ + cpu.activityRecorder->activity(); + nextStageReserve.reserve(); + } + + /* If we still have input to process and somewhere to put it, + * mark stage as active */ + if (getInput() && nextStageReserve.canReserve()) + cpu.activityRecorder->activateStage(Pipeline::Fetch2StageId); + + /* Make sure the input (if any left) is pushed */ + inputBuffer.pushTail(); +} + +bool +Fetch2::isDrained() +{ + return inputBuffer.empty() && + (*inp.outputWire).isBubble() && + (*predictionOut.inputWire).isBubble(); +} + +void +Fetch2::minorTrace() const +{ + std::ostringstream data; + + if (blocked) + data << 'B'; + else + (*out.inputWire).reportData(data); + + MINORTRACE("inputIndex=%d havePC=%d predictionSeqNum=%d insts=%s\n", + inputIndex, havePC, predictionSeqNum, data.str()); + inputBuffer.minorTrace(); +} + +} diff --git a/src/cpu/minor/fetch2.hh b/src/cpu/minor/fetch2.hh new file mode 100644 index 000000000..2fc38b377 --- /dev/null +++ b/src/cpu/minor/fetch2.hh @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Fetch2 receives lines of data from Fetch1, separates them into + * instructions and passes them to Decode + */ + +#ifndef __CPU_MINOR_FETCH2_HH__ +#define __CPU_MINOR_FETCH2_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/pipe_data.hh" +#include "cpu/pred/bpred_unit.hh" +#include "params/MinorCPU.hh" + +namespace Minor +{ + +/** This stage receives lines of data from Fetch1, separates them into + * instructions and passes them to Decode */ +class Fetch2 : public Named +{ + protected: + /** Pointer back to the containing CPU */ + MinorCPU &cpu; + + /** Input port carrying lines from Fetch1 */ + Latch<ForwardLineData>::Output inp; + + /** Input port carrying branches from Execute. This is a snoop of the + * data provided to F1. */ + Latch<BranchData>::Output branchInp; + + /** Output port carrying predictions back to Fetch1 */ + Latch<BranchData>::Input predictionOut; + + /** Output port carrying instructions into Decode */ + Latch<ForwardInstData>::Input out; + + /** Interface to reserve space in the next stage */ + Reservable &nextStageReserve; + + /** Width of output of this stage/input of next in instructions */ + unsigned int outputWidth; + + /** If true, more than one input word can be processed each cycle if + * there is room in the output to contain its processed data */ + bool processMoreThanOneInput; + + /** Branch predictor passed from Python configuration */ + BPredUnit &branchPredictor; + + public: + /* Public so that Pipeline can pass it to Fetch1 */ + InputBuffer<ForwardLineData> inputBuffer; + + protected: + /** Data members after this line are cycle-to-cycle state */ + + /** Index into an incompletely processed input line that instructions + * are to be extracted from */ + unsigned int inputIndex; + + /** Remembered program counter value. Between contiguous lines, this + * is just updated with advancePC. For lines following changes of + * stream, a new PC must be loaded and havePC be set. + * havePC is needed to accomodate instructions which span across + * lines meaning that Fetch2 and the decoder need to remember a PC + * value and a partially-offered instruction from the previous line */ + TheISA::PCState pc; + + /** PC is currently valid. Initially false, gets set to true when a + * change-of-stream line is received and false again when lines are + * discarded for any reason */ + bool havePC; + + /** Stream sequence number of the last seen line used to identify changes + * of instruction stream */ + InstSeqNum lastStreamSeqNum; + + /** Fetch2 is the source of fetch sequence numbers. These represent the + * sequence that instructions were extracted from fetched lines. */ + InstSeqNum fetchSeqNum; + + /** Stream sequence number remembered from last time the predictionSeqNum + * changed. Lines should only be discarded when their predictionSeqNums + * disagree with Fetch2::predictionSeqNum *and* they are from the same + * stream that bore that prediction number */ + InstSeqNum expectedStreamSeqNum; + + /** Fetch2 is the source of prediction sequence numbers. These represent + * predicted changes of control flow sources from branch prediction in + * Fetch2. */ + InstSeqNum predictionSeqNum; + + /** Blocked indication for report */ + bool blocked; + + protected: + /** Get a piece of data to work on from the inputBuffer, or 0 if there + * is no data. */ + const ForwardLineData *getInput(); + + /** Pop an element off the input buffer, if there are any */ + void popInput(); + + /** Dump the whole contents of the input buffer. Useful after a + * prediction changes control flow */ + void dumpAllInput(); + + /** Update local branch prediction structures from feedback from + * Execute. */ + void updateBranchPrediction(const BranchData &branch); + + /** Predicts branches for the given instruction. Updates the + * instruction's predicted... fields and also the branch which + * carries the prediction to Fetch1 */ + void predictBranch(MinorDynInstPtr inst, BranchData &branch); + + public: + Fetch2(const std::string &name, + MinorCPU &cpu_, + MinorCPUParams ¶ms, + Latch<ForwardLineData>::Output inp_, + Latch<BranchData>::Output branchInp_, + Latch<BranchData>::Input predictionOut_, + Latch<ForwardInstData>::Input out_, + Reservable &next_stage_input_buffer); + + public: + /** Pass on input/buffer data to the output if you can */ + void evaluate(); + + void minorTrace() const; + + /** Is this stage drained? For Fetch2, draining is initiated by + * Execute halting Fetch1 causing Fetch2 to naturally drain. + * Branch predictions are ignored by Fetch1 during halt */ + bool isDrained(); +}; + +} + +#endif /* __CPU_MINOR_FETCH2_HH__ */ diff --git a/src/cpu/minor/func_unit.cc b/src/cpu/minor/func_unit.cc new file mode 100644 index 000000000..1a75c4aa8 --- /dev/null +++ b/src/cpu/minor/func_unit.cc @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <iomanip> +#include <sstream> +#include <typeinfo> + +#include "cpu/minor/func_unit.hh" +#include "debug/MinorTiming.hh" +#include "enums/OpClass.hh" + +MinorOpClass * +MinorOpClassParams::create() +{ + return new MinorOpClass(this); +} + +MinorOpClassSet * +MinorOpClassSetParams::create() +{ + return new MinorOpClassSet(this); +} + +MinorFUTiming * +MinorFUTimingParams::create() +{ + return new MinorFUTiming(this); +} + +MinorFU * +MinorFUParams::create() +{ + return new MinorFU(this); +} + +MinorFUPool * +MinorFUPoolParams::create() +{ + return new MinorFUPool(this); +} + +MinorOpClassSet::MinorOpClassSet(const MinorOpClassSetParams *params) : + SimObject(params), + opClasses(params->opClasses), + /* Initialise to true for an empty list so that 'fully capable' is + * the default */ + capabilityList(Num_OpClasses, (opClasses.empty() ? true : false)) +{ + for (unsigned int i = 0; i < opClasses.size(); i++) + capabilityList[opClasses[i]->opClass] = true; +} + +MinorFUTiming::MinorFUTiming( + const MinorFUTimingParams *params) : + SimObject(params), + mask(params->mask), + match(params->match), + description(params->description), + suppress(params->suppress), + extraCommitLat(params->extraCommitLat), + extraCommitLatExpr(params->extraCommitLatExpr), + extraAssumedLat(params->extraAssumedLat), + srcRegsRelativeLats(params->srcRegsRelativeLats), + opClasses(params->opClasses) +{ } + +namespace Minor +{ + +void +QueuedInst::reportData(std::ostream &os) const +{ + inst->reportData(os); +} + +FUPipeline::FUPipeline(const std::string &name, const MinorFU &description_, + ClockedObject &timeSource_) : + FUPipelineBase(name, "insts", description_.opLat), + description(description_), + timeSource(timeSource_), + nextInsertCycle(Cycles(0)) +{ + /* Issue latencies are set to 1 in calls to addCapability here. + * Issue latencies are associated with the pipeline as a whole, + * rather than instruction classes in Minor */ + + /* All pipelines should be able to execute No_OpClass instructions */ + addCapability(No_OpClass, description.opLat, 1); + + /* Add the capabilities listed in the MinorFU for this functional unit */ + for (unsigned int i = 0; i < description.opClasses->opClasses.size(); + i++) + { + addCapability(description.opClasses->opClasses[i]->opClass, + description.opLat, 1); + } + + for (unsigned int i = 0; i < description.timings.size(); i++) { + MinorFUTiming &timing = *(description.timings[i]); + + if (DTRACE(MinorTiming)) { + std::ostringstream lats; + + unsigned int num_lats = timing.srcRegsRelativeLats.size(); + unsigned int j = 0; + while (j < num_lats) { + lats << timing.srcRegsRelativeLats[j]; + + j++; + if (j != num_lats) + lats << ','; + } + + DPRINTFS(MinorTiming, static_cast<Named *>(this), + "Adding extra timing decode pattern %d to FU" + " mask: %016x match: %016x srcRegLatencies: %s\n", + i, timing.mask, timing.match, lats.str()); + } + } + + const std::vector<unsigned> &cant_forward = + description.cantForwardFromFUIndices; + + /* Setup the bit vector cantForward... with the set indices + * specified in the parameters */ + for (auto i = cant_forward.begin(); i != cant_forward.end(); ++i) { + cantForwardFromFUIndices.resize((*i) + 1, false); + cantForwardFromFUIndices[*i] = true; + } +} + +Cycles +FUPipeline::cyclesBeforeInsert() +{ + if (nextInsertCycle == 0 || timeSource.curCycle() > nextInsertCycle) + return Cycles(0); + else + return nextInsertCycle - timeSource.curCycle(); +} + +bool +FUPipeline::canInsert() const +{ + return nextInsertCycle == 0 || timeSource.curCycle() >= nextInsertCycle; +} + +void +FUPipeline::advance() +{ + bool was_stalled = stalled; + + /* If an instruction was pushed into the pipeline, set the delay before + * the next instruction can follow */ + if (alreadyPushed()) { + if (nextInsertCycle <= timeSource.curCycle()) { + nextInsertCycle = timeSource.curCycle() + description.issueLat; + } + } else if (was_stalled && nextInsertCycle != 0) { + /* Don't count stalled cycles as part of the issue latency */ + ++nextInsertCycle; + } + FUPipelineBase::advance(); +} + +MinorFUTiming * +FUPipeline::findTiming(StaticInstPtr inst) +{ +#if THE_ISA == ARM_ISA + /* This should work for any ISA with a POD mach_inst */ + TheISA::ExtMachInst mach_inst = inst->machInst; +#else + /* Just allow extra decode based on op classes */ + uint64_t mach_inst = 0; +#endif + + const std::vector<MinorFUTiming *> &timings = + description.timings; + unsigned int num_timings = timings.size(); + + for (unsigned int i = 0; i < num_timings; i++) { + MinorFUTiming &timing = *timings[i]; + + if (timing.provides(inst->opClass()) && + (mach_inst & timing.mask) == timing.match) + { + DPRINTFS(MinorTiming, static_cast<Named *>(this), + "Found extra timing match (pattern %d '%s')" + " %s %16x (type %s)\n", + i, timing.description, inst->disassemble(0), mach_inst, + typeid(*inst).name()); + + return &timing; + } + } + + if (num_timings != 0) { + DPRINTFS(MinorTiming, static_cast<Named *>(this), + "No extra timing info. found for inst: %s" + " mach_inst: %16x\n", + inst->disassemble(0), mach_inst); + } + + return NULL; +} + +} diff --git a/src/cpu/minor/func_unit.hh b/src/cpu/minor/func_unit.hh new file mode 100644 index 000000000..34da579b6 --- /dev/null +++ b/src/cpu/minor/func_unit.hh @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Execute function unit descriptions and pipeline implementations. + */ + +#ifndef __CPU_MINOR_FUNC_UNIT_HH__ +#define __CPU_MINOR_FUNC_UNIT_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/dyn_inst.hh" +#include "cpu/func_unit.hh" +#include "cpu/timing_expr.hh" +#include "params/MinorFU.hh" +#include "params/MinorFUPool.hh" +#include "params/MinorOpClass.hh" +#include "params/MinorOpClassSet.hh" +#include "sim/clocked_object.hh" + +/** Boxing for MinorOpClass to get around a build problem with C++11 but + * also allow for future additions to op class checking */ +class MinorOpClass : public SimObject +{ + public: + OpClass opClass; + + public: + MinorOpClass(const MinorOpClassParams *params) : + SimObject(params), + opClass(params->opClass) + { } +}; + +/** Wrapper for a matchable set of op classes */ +class MinorOpClassSet : public SimObject +{ + public: + std::vector<MinorOpClass *> opClasses; + + /** Convenience packing of opClasses into a bit vector for easier + * testing */ + std::vector<bool> capabilityList; + + public: + MinorOpClassSet(const MinorOpClassSetParams *params); + + public: + /** Does this set support the given op class */ + bool provides(OpClass op_class) { return capabilityList[op_class]; } +}; + +/** Extra timing capability to allow individual ops to have their source + * register dependency latencies tweaked based on the ExtMachInst of the + * source instruction. + */ +class MinorFUTiming: public SimObject +{ + public: + /** Mask off the ExtMachInst of an instruction before comparing with + * match */ + uint64_t mask; + uint64_t match; + + /** Textual description of the decode's purpose */ + std::string description; + + /** If true, instructions matching this mask/match should *not* be + * issued in this FU */ + bool suppress; + + /** Extra latency that the instruction should spend at the end of + * the pipeline */ + Cycles extraCommitLat; + TimingExpr *extraCommitLatExpr; + + /** Extra delay that results should show in the scoreboard after + * leaving the pipeline. If set to Cycles(0) for memory references, + * an 'unpredictable' return time will be set in the scoreboard + * blocking following dependent instructions from issuing */ + Cycles extraAssumedLat; + + /** Cycle offsets from the scoreboard delivery times of register values + * for each of this instruction's source registers (in srcRegs order). + * The offsets are subtracted from the scoreboard returnCycle times. + * For example, for an instruction type with 3 source registers, + * [2, 1, 2] will allow the instruction to issue upto 2 cycles early + * for dependencies on the 1st and 3rd register and upto 1 cycle early + * on the 2nd. */ + std::vector<Cycles> srcRegsRelativeLats; + + /** Extra opClasses check (after the FU one) */ + MinorOpClassSet *opClasses; + + public: + MinorFUTiming(const MinorFUTimingParams *params); + + public: + /** Does the extra decode in this object support the given op class */ + bool provides(OpClass op_class) { return opClasses->provides(op_class); } +}; + +/** A functional unit that can execute any of opClasses operations with a + * single op(eration)Lat(ency) and issueLat(ency) associated with the unit + * rather than each operation (as in src/FuncUnit). + * + * This is very similar to cpu/func_unit but replicated here to allow + * the Minor functional units to change without having to disturb the common + * definition. + */ +class MinorFU : public SimObject +{ + public: + MinorOpClassSet *opClasses; + + /** Delay from issuing the operation, to it reaching the + * end of the associated pipeline */ + Cycles opLat; + + /** Delay after issuing an operation before the next + * operation can be issued */ + Cycles issueLat; + + /** FUs which this pipeline can't receive a forwarded (i.e. relative + * latency != 0) result from */ + std::vector<unsigned int> cantForwardFromFUIndices; + + /** Extra timing info to give timings to individual ops */ + std::vector<MinorFUTiming *> timings; + + public: + MinorFU(const MinorFUParams *params) : + SimObject(params), + opClasses(params->opClasses), + opLat(params->opLat), + issueLat(params->issueLat), + cantForwardFromFUIndices(params->cantForwardFromFUIndices), + timings(params->timings) + { } +}; + +/** A collection of MinorFUs */ +class MinorFUPool : public SimObject +{ + public: + std::vector<MinorFU *> funcUnits; + + public: + MinorFUPool(const MinorFUPoolParams *params) : + SimObject(params), + funcUnits(params->funcUnits) + { } +}; + +namespace Minor +{ + +/** Container class to box instructions in the FUs to make those + * queues have correct bubble behaviour when stepped */ +class QueuedInst +{ + public: + MinorDynInstPtr inst; + + public: + QueuedInst(MinorDynInstPtr inst_ = MinorDynInst::bubble()) : + inst(inst_) + { } + + public: + /** Report and bubble interfaces */ + void reportData(std::ostream &os) const; + bool isBubble() const { return inst->isBubble(); } + + static QueuedInst bubble() + { return QueuedInst(MinorDynInst::bubble()); } +}; + +/** Functional units have pipelines which stall when an inst gets to + * their ends allowing Execute::commit to pick up timing-completed insts + * when it feels like it */ +typedef SelfStallingPipeline<QueuedInst, + ReportTraitsAdaptor<QueuedInst> > FUPipelineBase; + +/** A functional unit configured from a MinorFU object */ +class FUPipeline : public FUPipelineBase, public FuncUnit +{ + public: + /** Functional unit description that this pipeline implements */ + const MinorFU &description; + + /** An FUPipeline needs access to curCycle, use this timing source */ + ClockedObject &timeSource; + + /** Set of operation classes supported by this FU */ + std::bitset<Num_OpClasses> capabilityList; + + /** FUs which this pipeline can't receive a forwarded (i.e. relative + * latency != 0) result from */ + std::vector<bool> cantForwardFromFUIndices; + + public: + /** When can a new instruction be inserted into the pipeline? This is + * an absolute cycle time unless it is 0 in which case the an + * instruction can be pushed straightaway */ + Cycles nextInsertCycle; + + public: + FUPipeline(const std::string &name, const MinorFU &description_, + ClockedObject &timeSource_); + + public: + /** How many cycles must from curCycle before insertion into the + * pipeline is allowed */ + Cycles cyclesBeforeInsert(); + + /** Can an instruction be inserted now? */ + bool canInsert() const; + + /** Find the extra timing information for this instruction. Returns + * NULL if no decode info. is found */ + MinorFUTiming *findTiming(StaticInstPtr inst); + + /** Step the pipeline. Allow multiple steps? */ + void advance(); +}; + +} + +#endif /* __CPU_MINOR_FUNC_UNIT_HH__ */ diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc new file mode 100644 index 000000000..c5e38c78d --- /dev/null +++ b/src/cpu/minor/lsq.cc @@ -0,0 +1,1614 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <iomanip> +#include <sstream> + +#include "arch/locked_mem.hh" +#include "arch/mmapped_ipr.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/exec_context.hh" +#include "cpu/minor/execute.hh" +#include "cpu/minor/lsq.hh" +#include "cpu/minor/pipeline.hh" +#include "debug/Activity.hh" +#include "debug/MinorMem.hh" + +namespace Minor +{ + +/** Returns the offset of addr into an aligned a block of size block_size */ +static Addr +addrBlockOffset(Addr addr, unsigned int block_size) +{ + return addr & (block_size - 1); +} + +/** Returns true if the given [addr .. addr+size-1] transfer needs to be + * fragmented across a block size of block_size */ +static bool +transferNeedsBurst(Addr addr, unsigned int size, unsigned int block_size) +{ + return (addrBlockOffset(addr, block_size) + size) > block_size; +} + +LSQ::LSQRequest::LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, + PacketDataPtr data_, uint64_t *res_) : + SenderState(), + port(port_), + inst(inst_), + isLoad(isLoad_), + data(data_), + packet(NULL), + request(), + fault(NoFault), + res(res_), + skipped(false), + issuedToMemory(false), + state(NotIssued) +{ } + +LSQ::AddrRangeCoverage +LSQ::LSQRequest::containsAddrRangeOf( + Addr req1_addr, unsigned int req1_size, + Addr req2_addr, unsigned int req2_size) +{ + /* 'end' here means the address of the byte just past the request + * blocks */ + Addr req2_end_addr = req2_addr + req2_size; + Addr req1_end_addr = req1_addr + req1_size; + + AddrRangeCoverage ret; + + if (req1_addr > req2_end_addr || req1_end_addr < req2_addr) + ret = NoAddrRangeCoverage; + else if (req1_addr <= req2_addr && req1_end_addr >= req2_end_addr) + ret = FullAddrRangeCoverage; + else + ret = PartialAddrRangeCoverage; + + return ret; +} + +LSQ::AddrRangeCoverage +LSQ::LSQRequest::containsAddrRangeOf(LSQRequestPtr other_request) +{ + return containsAddrRangeOf(request.getPaddr(), request.getSize(), + other_request->request.getPaddr(), other_request->request.getSize()); +} + +bool +LSQ::LSQRequest::isBarrier() +{ + return inst->isInst() && inst->staticInst->isMemBarrier(); +} + +bool +LSQ::LSQRequest::needsToBeSentToStoreBuffer() +{ + return state == StoreToStoreBuffer; +} + +void +LSQ::LSQRequest::setState(LSQRequestState new_state) +{ + DPRINTFS(MinorMem, (&port), "Setting state from %d to %d for request:" + " %s\n", state, new_state, *inst); + state = new_state; +} + +bool +LSQ::LSQRequest::isComplete() const +{ + /* @todo, There is currently only one 'completed' state. This + * may not be a good choice */ + return state == Complete; +} + +void +LSQ::LSQRequest::reportData(std::ostream &os) const +{ + os << (isLoad ? 'R' : 'W') << ';'; + inst->reportData(os); + os << ';' << state; +} + +std::ostream & +operator <<(std::ostream &os, LSQ::AddrRangeCoverage coverage) +{ + switch (coverage) { + case LSQ::PartialAddrRangeCoverage: + os << "PartialAddrRangeCoverage"; + break; + case LSQ::FullAddrRangeCoverage: + os << "FullAddrRangeCoverage"; + break; + case LSQ::NoAddrRangeCoverage: + os << "NoAddrRangeCoverage"; + break; + default: + os << "AddrRangeCoverage-" << static_cast<int>(coverage); + break; + } + return os; +} + +std::ostream & +operator <<(std::ostream &os, LSQ::LSQRequest::LSQRequestState state) +{ + switch (state) { + case LSQ::LSQRequest::NotIssued: + os << "NotIssued"; + break; + case LSQ::LSQRequest::InTranslation: + os << "InTranslation"; + break; + case LSQ::LSQRequest::Translated: + os << "Translated"; + break; + case LSQ::LSQRequest::Failed: + os << "Failed"; + break; + case LSQ::LSQRequest::RequestIssuing: + os << "RequestIssuing"; + break; + case LSQ::LSQRequest::StoreToStoreBuffer: + os << "StoreToStoreBuffer"; + break; + case LSQ::LSQRequest::StoreInStoreBuffer: + os << "StoreInStoreBuffer"; + break; + case LSQ::LSQRequest::StoreBufferIssuing: + os << "StoreBufferIssuing"; + break; + case LSQ::LSQRequest::RequestNeedsRetry: + os << "RequestNeedsRetry"; + break; + case LSQ::LSQRequest::StoreBufferNeedsRetry: + os << "StoreBufferNeedsRetry"; + break; + case LSQ::LSQRequest::Complete: + os << "Complete"; + break; + default: + os << "LSQRequestState-" << static_cast<int>(state); + break; + } + return os; +} + +void +LSQ::clearMemBarrier(MinorDynInstPtr inst) +{ + bool is_last_barrier = inst->id.execSeqNum >= lastMemBarrier; + + DPRINTF(MinorMem, "Moving %s barrier out of store buffer inst: %s\n", + (is_last_barrier ? "last" : "a"), *inst); + + if (is_last_barrier) + lastMemBarrier = 0; +} + +void +LSQ::SingleDataRequest::finish(Fault fault_, RequestPtr request_, + ThreadContext *tc, BaseTLB::Mode mode) +{ + fault = fault_; + + port.numAccessesInDTLB--; + + DPRINTFS(MinorMem, (&port), "Received translation response for" + " request: %s\n", *inst); + + makePacket(); + + setState(Translated); + port.tryToSendToTransfers(this); + + /* Let's try and wake up the processor for the next cycle */ + port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); +} + +void +LSQ::SingleDataRequest::startAddrTranslation() +{ + ThreadContext *thread = port.cpu.getContext( + inst->id.threadId); + + port.numAccessesInDTLB++; + + setState(LSQ::LSQRequest::InTranslation); + + DPRINTFS(MinorMem, (&port), "Submitting DTLB request\n"); + /* Submit the translation request. The response will come through + * finish/markDelayed on the LSQRequest as it bears the Translation + * interface */ + thread->getDTBPtr()->translateTiming( + &request, thread, this, (isLoad ? BaseTLB::Read : BaseTLB::Write)); +} + +void +LSQ::SingleDataRequest::retireResponse(PacketPtr packet_) +{ + DPRINTFS(MinorMem, (&port), "Retiring packet\n"); + packet = packet_; + packetInFlight = false; + setState(Complete); +} + +void +LSQ::SplitDataRequest::finish(Fault fault_, RequestPtr request_, + ThreadContext *tc, BaseTLB::Mode mode) +{ + fault = fault_; + + port.numAccessesInDTLB--; + + unsigned int M5_VAR_USED expected_fragment_index = + numTranslatedFragments; + + numInTranslationFragments--; + numTranslatedFragments++; + + DPRINTFS(MinorMem, (&port), "Received translation response for fragment" + " %d of request: %s\n", expected_fragment_index, *inst); + + assert(request_ == fragmentRequests[expected_fragment_index]); + + /* Wake up next cycle to get things going again in case the + * tryToSendToTransfers does take */ + port.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + + if (fault != NoFault) { + /* tryToSendToTransfers will handle the fault */ + + DPRINTFS(MinorMem, (&port), "Faulting translation for fragment:" + " %d of request: %s\n", + expected_fragment_index, *inst); + + setState(Translated); + port.tryToSendToTransfers(this); + } else if (numTranslatedFragments == numFragments) { + makeFragmentPackets(); + + setState(Translated); + port.tryToSendToTransfers(this); + } else { + /* Avoid calling translateTiming from within ::finish */ + assert(!translationEvent.scheduled()); + port.cpu.schedule(translationEvent, curTick()); + } +} + +LSQ::SplitDataRequest::SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_, + bool isLoad_, PacketDataPtr data_, uint64_t *res_) : + LSQRequest(port_, inst_, isLoad_, data_, res_), + translationEvent(*this), + numFragments(0), + numInTranslationFragments(0), + numTranslatedFragments(0), + numIssuedFragments(0), + numRetiredFragments(0), + fragmentRequests(), + fragmentPackets() +{ + /* Don't know how many elements are needed until the request is + * populated by the caller. */ +} + +LSQ::SplitDataRequest::~SplitDataRequest() +{ + for (auto i = fragmentRequests.begin(); + i != fragmentRequests.end(); i++) + { + delete *i; + } + + for (auto i = fragmentPackets.begin(); + i != fragmentPackets.end(); i++) + { + delete *i; + } +} + +void +LSQ::SplitDataRequest::makeFragmentRequests() +{ + Addr base_addr = request.getVaddr(); + unsigned int whole_size = request.getSize(); + unsigned int line_width = port.lineWidth; + + unsigned int fragment_size; + Addr fragment_addr; + + /* Assume that this transfer is across potentially many block snap + * boundaries: + * + * | _|________|________|________|___ | + * | |0| 1 | 2 | 3 | 4 | | + * | |_|________|________|________|___| | + * | | | | | | + * + * The first transfer (0) can be up to lineWidth in size. + * All the middle transfers (1-3) are lineWidth in size + * The last transfer (4) can be from zero to lineWidth - 1 in size + */ + unsigned int first_fragment_offset = + addrBlockOffset(base_addr, line_width); + unsigned int last_fragment_size = + addrBlockOffset(base_addr + whole_size, line_width); + unsigned int first_fragment_size = + line_width - first_fragment_offset; + + unsigned int middle_fragments_total_size = + whole_size - (first_fragment_size + last_fragment_size); + + assert(addrBlockOffset(middle_fragments_total_size, line_width) == 0); + + unsigned int middle_fragment_count = + middle_fragments_total_size / line_width; + + numFragments = 1 /* first */ + middle_fragment_count + + (last_fragment_size == 0 ? 0 : 1); + + DPRINTFS(MinorMem, (&port), "Dividing transfer into %d fragmentRequests." + " First fragment size: %d Last fragment size: %d\n", + numFragments, first_fragment_size, + (last_fragment_size == 0 ? line_width : last_fragment_size)); + + assert(((middle_fragment_count * line_width) + + first_fragment_size + last_fragment_size) == whole_size); + + fragment_addr = base_addr; + fragment_size = first_fragment_size; + + /* Just past the last address in the request */ + Addr end_addr = base_addr + whole_size; + + for (unsigned int fragment_index = 0; fragment_index < numFragments; + fragment_index++) + { + bool M5_VAR_USED is_last_fragment = false; + + if (fragment_addr == base_addr) { + /* First fragment */ + fragment_size = first_fragment_size; + } else { + if ((fragment_addr + line_width) > end_addr) { + /* Adjust size of last fragment */ + fragment_size = end_addr - fragment_addr; + is_last_fragment = true; + } else { + /* Middle fragments */ + fragment_size = line_width; + } + } + + Request *fragment = new Request(); + + fragment->setThreadContext(request.contextId(), /* thread id */ 0); + fragment->setVirt(0 /* asid */, + fragment_addr, fragment_size, request.getFlags(), + request.masterId(), + request.getPC()); + + DPRINTFS(MinorMem, (&port), "Generating fragment addr: 0x%x size: %d" + " (whole request addr: 0x%x size: %d) %s\n", + fragment_addr, fragment_size, base_addr, whole_size, + (is_last_fragment ? "last fragment" : "")); + + fragment_addr += fragment_size; + + fragmentRequests.push_back(fragment); + } +} + +void +LSQ::SplitDataRequest::makeFragmentPackets() +{ + Addr base_addr = request.getVaddr(); + + DPRINTFS(MinorMem, (&port), "Making packets for request: %s\n", *inst); + + for (unsigned int fragment_index = 0; fragment_index < numFragments; + fragment_index++) + { + Request *fragment = fragmentRequests[fragment_index]; + + DPRINTFS(MinorMem, (&port), "Making packet %d for request: %s" + " (%d, 0x%x)\n", + fragment_index, *inst, + (fragment->hasPaddr() ? "has paddr" : "no paddr"), + (fragment->hasPaddr() ? fragment->getPaddr() : 0)); + + Addr fragment_addr = fragment->getVaddr(); + unsigned int fragment_size = fragment->getSize(); + + uint8_t *request_data = NULL; + + if (!isLoad) { + /* Split data for Packets. Will become the property of the + * outgoing Packets */ + request_data = new uint8_t[fragment_size]; + std::memcpy(request_data, data + (fragment_addr - base_addr), + fragment_size); + } + + assert(fragment->hasPaddr()); + + PacketPtr fragment_packet = + makePacketForRequest(*fragment, isLoad, this, request_data); + + fragmentPackets.push_back(fragment_packet); + } + + /* Might as well make the overall/response packet here */ + /* Get the physical address for the whole request/packet from the first + * fragment */ + request.setPaddr(fragmentRequests[0]->getPaddr()); + makePacket(); +} + +void +LSQ::SplitDataRequest::startAddrTranslation() +{ + setState(LSQ::LSQRequest::InTranslation); + + makeFragmentRequests(); + + numInTranslationFragments = 0; + numTranslatedFragments = 0; + + /* @todo, just do these in sequence for now with + * a loop of: + * do { + * sendNextFragmentToTranslation ; translateTiming ; finish + * } while (numTranslatedFragments != numFragments); + */ + + /* Do first translation */ + sendNextFragmentToTranslation(); +} + +PacketPtr +LSQ::SplitDataRequest::getHeadPacket() +{ + assert(numIssuedFragments < numFragments); + + return fragmentPackets[numIssuedFragments]; +} + +void +LSQ::SplitDataRequest::stepToNextPacket() +{ + assert(numIssuedFragments < numFragments); + + numIssuedFragments++; +} + +void +LSQ::SplitDataRequest::retireResponse(PacketPtr response) +{ + assert(numRetiredFragments < numFragments); + + DPRINTFS(MinorMem, (&port), "Retiring fragment addr: 0x%x size: %d" + " offset: 0x%x (retired fragment num: %d) %s\n", + response->req->getVaddr(), response->req->getSize(), + request.getVaddr() - response->req->getVaddr(), + numRetiredFragments, + (fault == NoFault ? "" : fault->name())); + + numRetiredFragments++; + + if (skipped) { + /* Skip because we already knew the request had faulted or been + * skipped */ + DPRINTFS(MinorMem, (&port), "Skipping this fragment\n"); + } else if (response->isError()) { + /* Mark up the error and leave to execute to handle it */ + DPRINTFS(MinorMem, (&port), "Fragment has an error, skipping\n"); + setSkipped(); + packet->copyError(response); + } else { + if (isLoad) { + if (!data) { + /* For a split transfer, a Packet must be constructed + * to contain all returning data. This is that packet's + * data */ + data = new uint8_t[request.getSize()]; + } + + /* Populate the portion of the overall response data represented + * by the response fragment */ + std::memcpy( + data + (response->req->getVaddr() - request.getVaddr()), + response->getPtr<uint8_t>(), + response->req->getSize()); + } + } + + /* Complete early if we're skipping are no more in-flight accesses */ + if (skipped && !hasPacketsInMemSystem()) { + DPRINTFS(MinorMem, (&port), "Completed skipped burst\n"); + setState(Complete); + if (packet->needsResponse()) + packet->makeResponse(); + } + + if (numRetiredFragments == numFragments) + setState(Complete); + + if (!skipped && isComplete()) { + DPRINTFS(MinorMem, (&port), "Completed burst %d\n", packet != NULL); + + DPRINTFS(MinorMem, (&port), "Retired packet isRead: %d isWrite: %d" + " needsResponse: %d packetSize: %s requestSize: %s responseSize:" + " %s\n", packet->isRead(), packet->isWrite(), + packet->needsResponse(), packet->getSize(), request.getSize(), + response->getSize()); + + /* A request can become complete by several paths, this is a sanity + * check to make sure the packet's data is created */ + if (!data) { + data = new uint8_t[request.getSize()]; + } + + if (isLoad) { + DPRINTFS(MinorMem, (&port), "Copying read data\n"); + std::memcpy(packet->getPtr<uint8_t>(), data, request.getSize()); + } + packet->makeResponse(); + } + + /* Packets are all deallocated together in ~SplitLSQRequest */ +} + +void +LSQ::SplitDataRequest::sendNextFragmentToTranslation() +{ + unsigned int fragment_index = numTranslatedFragments; + + ThreadContext *thread = port.cpu.getContext( + inst->id.threadId); + + DPRINTFS(MinorMem, (&port), "Submitting DTLB request for fragment: %d\n", + fragment_index); + + port.numAccessesInDTLB++; + numInTranslationFragments++; + + thread->getDTBPtr()->translateTiming( + fragmentRequests[fragment_index], thread, this, (isLoad ? + BaseTLB::Read : BaseTLB::Write)); +} + +bool +LSQ::StoreBuffer::canInsert() const +{ + /* @todo, support store amalgamation */ + return slots.size() < numSlots; +} + +void +LSQ::StoreBuffer::deleteRequest(LSQRequestPtr request) +{ + auto found = std::find(slots.begin(), slots.end(), request); + + if (found != slots.end()) { + DPRINTF(MinorMem, "Deleting request: %s %s %s from StoreBuffer\n", + request, *found, *(request->inst)); + slots.erase(found); + + delete request; + } +} + +void +LSQ::StoreBuffer::insert(LSQRequestPtr request) +{ + if (!canInsert()) { + warn("%s: store buffer insertion without space to insert from" + " inst: %s\n", name(), *(request->inst)); + } + + DPRINTF(MinorMem, "Pushing store: %s into store buffer\n", request); + + numUnissuedAccesses++; + + if (request->state != LSQRequest::Complete) + request->setState(LSQRequest::StoreInStoreBuffer); + + slots.push_back(request); + + /* Let's try and wake up the processor for the next cycle to step + * the store buffer */ + lsq.cpu.wakeupOnEvent(Pipeline::ExecuteStageId); +} + +LSQ::AddrRangeCoverage +LSQ::StoreBuffer::canForwardDataToLoad(LSQRequestPtr request, + unsigned int &found_slot) +{ + unsigned int slot_index = slots.size() - 1; + auto i = slots.rbegin(); + AddrRangeCoverage ret = NoAddrRangeCoverage; + + /* Traverse the store buffer in reverse order (most to least recent) + * and try to find a slot whose address range overlaps this request */ + while (ret == NoAddrRangeCoverage && i != slots.rend()) { + LSQRequestPtr slot = *i; + + if (slot->packet) { + AddrRangeCoverage coverage = slot->containsAddrRangeOf(request); + + if (coverage != NoAddrRangeCoverage) { + DPRINTF(MinorMem, "Forwarding: slot: %d result: %s thisAddr:" + " 0x%x thisSize: %d slotAddr: 0x%x slotSize: %d\n", + slot_index, coverage, + request->request.getPaddr(), request->request.getSize(), + slot->request.getPaddr(), slot->request.getSize()); + + found_slot = slot_index; + ret = coverage; + } + } + + i++; + slot_index--; + } + + return ret; +} + +/** Fill the given packet with appropriate date from slot slot_number */ +void +LSQ::StoreBuffer::forwardStoreData(LSQRequestPtr load, + unsigned int slot_number) +{ + assert(slot_number < slots.size()); + assert(load->packet); + assert(load->isLoad); + + LSQRequestPtr store = slots[slot_number]; + + assert(store->packet); + assert(store->containsAddrRangeOf(load) == FullAddrRangeCoverage); + + Addr load_addr = load->request.getPaddr(); + Addr store_addr = store->request.getPaddr(); + Addr addr_offset = load_addr - store_addr; + + unsigned int load_size = load->request.getSize(); + + DPRINTF(MinorMem, "Forwarding %d bytes for addr: 0x%x from store buffer" + " slot: %d addr: 0x%x addressOffset: 0x%x\n", + load_size, load_addr, slot_number, + store_addr, addr_offset); + + void *load_packet_data = load->packet->getPtr<void>(); + void *store_packet_data = store->packet->getPtr<uint8_t>() + addr_offset; + + std::memcpy(load_packet_data, store_packet_data, load_size); +} + +void +LSQ::StoreBuffer::step() +{ + DPRINTF(MinorMem, "StoreBuffer step numUnissuedAccesses: %d\n", + numUnissuedAccesses); + + if (numUnissuedAccesses != 0 && lsq.state == LSQ::MemoryRunning) { + /* Clear all the leading barriers */ + while (!slots.empty() && + slots.front()->isComplete() && slots.front()->isBarrier()) + { + LSQRequestPtr barrier = slots.front(); + + DPRINTF(MinorMem, "Clearing barrier for inst: %s\n", + *(barrier->inst)); + + numUnissuedAccesses--; + lsq.clearMemBarrier(barrier->inst); + slots.pop_front(); + + delete barrier; + } + + auto i = slots.begin(); + bool issued = true; + unsigned int issue_count = 0; + + /* Skip trying if the memory system is busy */ + if (lsq.state == LSQ::MemoryNeedsRetry) + issued = false; + + /* Try to issue all stores in order starting from the head + * of the queue. Responses are allowed to be retired + * out of order */ + while (issued && + issue_count < storeLimitPerCycle && + lsq.canSendToMemorySystem() && + i != slots.end()) + { + LSQRequestPtr request = *i; + + DPRINTF(MinorMem, "Considering request: %s, sentAllPackets: %d" + " state: %s\n", + *(request->inst), request->sentAllPackets(), + request->state); + + if (request->isBarrier() && request->isComplete()) { + /* Give up at barriers */ + issued = false; + } else if (!(request->state == LSQRequest::StoreBufferIssuing && + request->sentAllPackets())) + { + DPRINTF(MinorMem, "Trying to send request: %s to memory" + " system\n", *(request->inst)); + + if (lsq.tryToSend(request)) { + /* Barrier are accounted for as they are cleared from + * the queue, not after their transfers are complete */ + if (!request->isBarrier()) + numUnissuedAccesses--; + issue_count++; + } else { + /* Don't step on to the next store buffer entry if this + * one hasn't issued all its packets as the store + * buffer must still enforce ordering */ + issued = false; + } + } + i++; + } + } +} + +void +LSQ::completeMemBarrierInst(MinorDynInstPtr inst, + bool committed) +{ + if (committed) { + /* Not already sent to the store buffer as a store request? */ + if (!inst->inStoreBuffer) { + /* Insert an entry into the store buffer to tick off barriers + * until there are none in flight */ + storeBuffer.insert(new BarrierDataRequest(*this, inst)); + } + } else { + /* Clear the barrier anyway if it wasn't actually committed */ + clearMemBarrier(inst); + } +} + +void +LSQ::StoreBuffer::minorTrace() const +{ + unsigned int size = slots.size(); + unsigned int i = 0; + std::ostringstream os; + + while (i < size) { + LSQRequestPtr request = slots[i]; + + request->reportData(os); + + i++; + if (i < numSlots) + os << ','; + } + + while (i < numSlots) { + os << '-'; + + i++; + if (i < numSlots) + os << ','; + } + + MINORTRACE("addr=%s num_unissued_stores=%d\n", os.str(), + numUnissuedAccesses); +} + +void +LSQ::tryToSendToTransfers(LSQRequestPtr request) +{ + if (state == MemoryNeedsRetry) { + DPRINTF(MinorMem, "Request needs retry, not issuing to" + " memory until retry arrives\n"); + return; + } + + if (request->state == LSQRequest::InTranslation) { + DPRINTF(MinorMem, "Request still in translation, not issuing to" + " memory\n"); + return; + } + + assert(request->state == LSQRequest::Translated || + request->state == LSQRequest::RequestIssuing || + request->state == LSQRequest::Failed || + request->state == LSQRequest::Complete); + + if (requests.empty() || requests.front() != request) { + DPRINTF(MinorMem, "Request not at front of requests queue, can't" + " issue to memory\n"); + return; + } + + if (transfers.unreservedRemainingSpace() == 0) { + DPRINTF(MinorMem, "No space to insert request into transfers" + " queue\n"); + return; + } + + if (request->isComplete() || request->state == LSQRequest::Failed) { + DPRINTF(MinorMem, "Passing a %s transfer on to transfers" + " queue\n", (request->isComplete() ? "completed" : "failed")); + request->setState(LSQRequest::Complete); + request->setSkipped(); + moveFromRequestsToTransfers(request); + return; + } + + if (!execute.instIsRightStream(request->inst)) { + /* Wrong stream, try to abort the transfer but only do so if + * there are no packets in flight */ + if (request->hasPacketsInMemSystem()) { + DPRINTF(MinorMem, "Request's inst. is from the wrong stream," + " waiting for responses before aborting request\n"); + } else { + DPRINTF(MinorMem, "Request's inst. is from the wrong stream," + " aborting request\n"); + request->setState(LSQRequest::Complete); + request->setSkipped(); + moveFromRequestsToTransfers(request); + } + return; + } + + if (request->fault != NoFault) { + if (request->inst->staticInst->isPrefetch()) { + DPRINTF(MinorMem, "Not signalling fault for faulting prefetch\n"); + } + DPRINTF(MinorMem, "Moving faulting request into the transfers" + " queue\n"); + request->setState(LSQRequest::Complete); + request->setSkipped(); + moveFromRequestsToTransfers(request); + return; + } + + bool is_load = request->isLoad; + bool is_llsc = request->request.isLLSC(); + bool is_swap = request->request.isSwap(); + bool bufferable = !(request->request.isUncacheable() || + is_llsc || is_swap); + + if (is_load) { + if (numStoresInTransfers != 0) { + DPRINTF(MinorMem, "Load request with stores still in transfers" + " queue, stalling\n"); + return; + } + } else { + /* Store. Can it be sent to the store buffer? */ + if (bufferable && !request->request.isMmappedIpr()) { + request->setState(LSQRequest::StoreToStoreBuffer); + moveFromRequestsToTransfers(request); + DPRINTF(MinorMem, "Moving store into transfers queue\n"); + return; + } + } + + /* Check if this is the head instruction (and so must be executable as + * its stream sequence number was checked above) for loads which must + * not be speculatively issued and stores which must be issued here */ + if (!bufferable) { + if (!execute.instIsHeadInst(request->inst)) { + DPRINTF(MinorMem, "Memory access not the head inst., can't be" + " sure it can be performed, not issuing\n"); + return; + } + + unsigned int forwarding_slot = 0; + + if (storeBuffer.canForwardDataToLoad(request, forwarding_slot) != + NoAddrRangeCoverage) + { + DPRINTF(MinorMem, "Memory access can receive forwarded data" + " from the store buffer, need to wait for store buffer to" + " drain\n"); + return; + } + } + + /* True: submit this packet to the transfers queue to be sent to the + * memory system. + * False: skip the memory and push a packet for this request onto + * requests */ + bool do_access = true; + + if (!is_llsc) { + /* Check for match in the store buffer */ + if (is_load) { + unsigned int forwarding_slot = 0; + AddrRangeCoverage forwarding_result = + storeBuffer.canForwardDataToLoad(request, + forwarding_slot); + + switch (forwarding_result) { + case FullAddrRangeCoverage: + /* Forward data from the store buffer into this request and + * repurpose this request's packet into a response packet */ + storeBuffer.forwardStoreData(request, forwarding_slot); + request->packet->makeResponse(); + + /* Just move between queues, no access */ + do_access = false; + break; + case PartialAddrRangeCoverage: + DPRINTF(MinorMem, "Load partly satisfied by store buffer" + " data. Must wait for the store to complete\n"); + return; + break; + case NoAddrRangeCoverage: + DPRINTF(MinorMem, "No forwardable data from store buffer\n"); + /* Fall through to try access */ + break; + } + } + } else { + if (!canSendToMemorySystem()) { + DPRINTF(MinorMem, "Can't send request to memory system yet\n"); + return; + } + + SimpleThread &thread = *cpu.threads[request->inst->id.threadId]; + + TheISA::PCState old_pc = thread.pcState(); + ExecContext context(cpu, thread, execute, request->inst); + + /* Handle LLSC requests and tests */ + if (is_load) { + TheISA::handleLockedRead(&context, &request->request); + } else { + do_access = TheISA::handleLockedWrite(&context, + &request->request, cacheBlockMask); + + if (!do_access) { + DPRINTF(MinorMem, "Not perfoming a memory " + "access for store conditional\n"); + } + } + thread.pcState(old_pc); + } + + /* See the do_access comment above */ + if (do_access) { + if (!canSendToMemorySystem()) { + DPRINTF(MinorMem, "Can't send request to memory system yet\n"); + return; + } + + /* Remember if this is an access which can't be idly + * discarded by an interrupt */ + if (!bufferable) { + numAccessesIssuedToMemory++; + request->issuedToMemory = true; + } + + if (tryToSend(request)) + moveFromRequestsToTransfers(request); + } else { + request->setState(LSQRequest::Complete); + moveFromRequestsToTransfers(request); + } +} + +bool +LSQ::tryToSend(LSQRequestPtr request) +{ + bool ret = false; + + if (!canSendToMemorySystem()) { + DPRINTF(MinorMem, "Can't send request: %s yet, no space in memory\n", + *(request->inst)); + } else { + PacketPtr packet = request->getHeadPacket(); + + DPRINTF(MinorMem, "Trying to send request: %s addr: 0x%x\n", + *(request->inst), packet->req->getVaddr()); + + /* The sender state of the packet *must* be an LSQRequest + * so the response can be correctly handled */ + assert(packet->findNextSenderState<LSQRequest>()); + + if (request->request.isMmappedIpr()) { + ThreadContext *thread = + cpu.getContext(request->request.threadId()); + + if (request->isLoad) { + DPRINTF(MinorMem, "IPR read inst: %s\n", *(request->inst)); + TheISA::handleIprRead(thread, packet); + } else { + DPRINTF(MinorMem, "IPR write inst: %s\n", *(request->inst)); + TheISA::handleIprWrite(thread, packet); + } + + request->stepToNextPacket(); + ret = request->sentAllPackets(); + + if (!ret) { + DPRINTF(MinorMem, "IPR access has another packet: %s\n", + *(request->inst)); + } + + if (ret) + request->setState(LSQRequest::Complete); + else + request->setState(LSQRequest::RequestIssuing); + } else if (dcachePort.sendTimingReq(packet)) { + DPRINTF(MinorMem, "Sent data memory request\n"); + + numAccessesInMemorySystem++; + + request->stepToNextPacket(); + + ret = request->sentAllPackets(); + + switch (request->state) { + case LSQRequest::Translated: + case LSQRequest::RequestIssuing: + /* Fully or partially issued a request in the transfers + * queue */ + request->setState(LSQRequest::RequestIssuing); + break; + case LSQRequest::StoreInStoreBuffer: + case LSQRequest::StoreBufferIssuing: + /* Fully or partially issued a request in the store + * buffer */ + request->setState(LSQRequest::StoreBufferIssuing); + break; + default: + assert(false); + break; + } + + state = MemoryRunning; + } else { + DPRINTF(MinorMem, + "Sending data memory request - needs retry\n"); + + /* Needs to be resent, wait for that */ + state = MemoryNeedsRetry; + retryRequest = request; + + switch (request->state) { + case LSQRequest::Translated: + case LSQRequest::RequestIssuing: + request->setState(LSQRequest::RequestNeedsRetry); + break; + case LSQRequest::StoreInStoreBuffer: + case LSQRequest::StoreBufferIssuing: + request->setState(LSQRequest::StoreBufferNeedsRetry); + break; + default: + assert(false); + break; + } + } + } + + return ret; +} + +void +LSQ::moveFromRequestsToTransfers(LSQRequestPtr request) +{ + assert(!requests.empty() && requests.front() == request); + assert(transfers.unreservedRemainingSpace() != 0); + + /* Need to count the number of stores in the transfers + * queue so that loads know when their store buffer forwarding + * results will be correct (only when all those stores + * have reached the store buffer) */ + if (!request->isLoad) + numStoresInTransfers++; + + requests.pop(); + transfers.push(request); +} + +bool +LSQ::canSendToMemorySystem() +{ + return state == MemoryRunning && + numAccessesInMemorySystem < inMemorySystemLimit; +} + +bool +LSQ::recvTimingResp(PacketPtr response) +{ + LSQRequestPtr request = + safe_cast<LSQRequestPtr>(response->popSenderState()); + + DPRINTF(MinorMem, "Received response packet inst: %s" + " addr: 0x%x cmd: %s\n", + *(request->inst), response->getAddr(), + response->cmd.toString()); + + numAccessesInMemorySystem--; + + if (response->isError()) { + DPRINTF(MinorMem, "Received error response packet: %s\n", + *request->inst); + } + + switch (request->state) { + case LSQRequest::RequestIssuing: + case LSQRequest::RequestNeedsRetry: + /* Response to a request from the transfers queue */ + request->retireResponse(response); + + DPRINTF(MinorMem, "Has outstanding packets?: %d %d\n", + request->hasPacketsInMemSystem(), request->isComplete()); + + break; + case LSQRequest::StoreBufferIssuing: + case LSQRequest::StoreBufferNeedsRetry: + /* Response to a request from the store buffer */ + request->retireResponse(response); + + /* Remove completed requests unless they are barrier (which will + * need to be removed in order */ + if (request->isComplete()) { + if (!request->isBarrier()) { + storeBuffer.deleteRequest(request); + } else { + DPRINTF(MinorMem, "Completed transfer for barrier: %s" + " leaving the request as it is also a barrier\n", + *(request->inst)); + } + } + break; + default: + /* Shouldn't be allowed to receive a response from another + * state */ + assert(false); + break; + } + + /* We go to idle even if there are more things in the requests queue + * as it's the job of step to actually step us on to the next + * transaction */ + + /* Let's try and wake up the processor for the next cycle */ + cpu.wakeupOnEvent(Pipeline::ExecuteStageId); + + /* Never busy */ + return true; +} + +void +LSQ::recvRetry() +{ + DPRINTF(MinorMem, "Received retry request\n"); + + assert(state == MemoryNeedsRetry); + + switch (retryRequest->state) { + case LSQRequest::RequestNeedsRetry: + /* Retry in the requests queue */ + retryRequest->setState(LSQRequest::Translated); + break; + case LSQRequest::StoreBufferNeedsRetry: + /* Retry in the store buffer */ + retryRequest->setState(LSQRequest::StoreInStoreBuffer); + break; + default: + assert(false); + } + + /* Set state back to MemoryRunning so that the following + * tryToSend can actually send. Note that this won't + * allow another transfer in as tryToSend should + * issue a memory request and either succeed for this + * request or return the LSQ back to MemoryNeedsRetry */ + state = MemoryRunning; + + /* Try to resend the request */ + if (tryToSend(retryRequest)) { + /* Successfully sent, need to move the request */ + switch (retryRequest->state) { + case LSQRequest::RequestIssuing: + /* In the requests queue */ + moveFromRequestsToTransfers(retryRequest); + break; + case LSQRequest::StoreBufferIssuing: + /* In the store buffer */ + storeBuffer.numUnissuedAccesses--; + break; + default: + assert(false); + break; + } + } + + retryRequest = NULL; +} + +LSQ::LSQ(std::string name_, std::string dcache_port_name_, + MinorCPU &cpu_, Execute &execute_, + unsigned int in_memory_system_limit, unsigned int line_width, + unsigned int requests_queue_size, unsigned int transfers_queue_size, + unsigned int store_buffer_size, + unsigned int store_buffer_cycle_store_limit) : + Named(name_), + cpu(cpu_), + execute(execute_), + dcachePort(dcache_port_name_, *this, cpu_), + lastMemBarrier(0), + state(MemoryRunning), + inMemorySystemLimit(in_memory_system_limit), + lineWidth((line_width == 0 ? cpu.cacheLineSize() : line_width)), + requests(name_ + ".requests", "addr", requests_queue_size), + transfers(name_ + ".transfers", "addr", transfers_queue_size), + storeBuffer(name_ + ".storeBuffer", + *this, store_buffer_size, store_buffer_cycle_store_limit), + numAccessesInMemorySystem(0), + numAccessesInDTLB(0), + numStoresInTransfers(0), + numAccessesIssuedToMemory(0), + retryRequest(NULL), + cacheBlockMask(~(cpu_.cacheLineSize() - 1)) +{ + if (in_memory_system_limit < 1) { + fatal("%s: executeMaxAccessesInMemory must be >= 1 (%d)\n", name_, + in_memory_system_limit); + } + + if (store_buffer_cycle_store_limit < 1) { + fatal("%s: executeLSQMaxStoreBufferStoresPerCycle must be" + " >= 1 (%d)\n", name_, store_buffer_cycle_store_limit); + } + + if (requests_queue_size < 1) { + fatal("%s: executeLSQRequestsQueueSize must be" + " >= 1 (%d)\n", name_, requests_queue_size); + } + + if (transfers_queue_size < 1) { + fatal("%s: executeLSQTransfersQueueSize must be" + " >= 1 (%d)\n", name_, transfers_queue_size); + } + + if (store_buffer_size < 1) { + fatal("%s: executeLSQStoreBufferSize must be" + " >= 1 (%d)\n", name_, store_buffer_size); + } + + if ((lineWidth & (lineWidth - 1)) != 0) { + fatal("%s: lineWidth: %d must be a power of 2\n", name(), lineWidth); + } +} + +LSQ::~LSQ() +{ } + +LSQ::LSQRequest::~LSQRequest() +{ + if (packet) + delete packet; + if (data) + delete [] data; +} + +/** + * Step the memory access mechanism on to its next state. In reality, most + * of the stepping is done by the callbacks on the LSQ but this + * function is responsible for issuing memory requests lodged in the + * requests queue. + */ +void +LSQ::step() +{ + /* Try to move address-translated requests between queues and issue + * them */ + if (!requests.empty()) + tryToSendToTransfers(requests.front()); + + storeBuffer.step(); +} + +LSQ::LSQRequestPtr +LSQ::findResponse(MinorDynInstPtr inst) +{ + LSQ::LSQRequestPtr ret = NULL; + + if (!transfers.empty()) { + LSQRequestPtr request = transfers.front(); + + /* Same instruction and complete access or a store that's + * capable of being moved to the store buffer */ + if (request->inst->id == inst->id) { + if (request->isComplete() || + (request->state == LSQRequest::StoreToStoreBuffer && + storeBuffer.canInsert())) + { + ret = request; + } + } + } + + if (ret) { + DPRINTF(MinorMem, "Found matching memory response for inst: %s\n", + *inst); + } else { + DPRINTF(MinorMem, "No matching memory response for inst: %s\n", + *inst); + } + + return ret; +} + +void +LSQ::popResponse(LSQ::LSQRequestPtr response) +{ + assert(!transfers.empty() && transfers.front() == response); + + transfers.pop(); + + if (!response->isLoad) + numStoresInTransfers--; + + if (response->issuedToMemory) + numAccessesIssuedToMemory--; + + if (response->state != LSQRequest::StoreInStoreBuffer) { + DPRINTF(MinorMem, "Deleting %s request: %s\n", + (response->isLoad ? "load" : "store"), + *(response->inst)); + + delete response; + } +} + +void +LSQ::sendStoreToStoreBuffer(LSQRequestPtr request) +{ + assert(request->state == LSQRequest::StoreToStoreBuffer); + + DPRINTF(MinorMem, "Sending store: %s to store buffer\n", + *(request->inst)); + + request->inst->inStoreBuffer = true; + + storeBuffer.insert(request); +} + +bool +LSQ::isDrained() +{ + return requests.empty() && transfers.empty() && + storeBuffer.isDrained(); +} + +bool +LSQ::needsToTick() +{ + bool ret = false; + + if (canSendToMemorySystem()) { + bool have_translated_requests = !requests.empty() && + requests.front()->state != LSQRequest::InTranslation && + transfers.unreservedRemainingSpace() != 0; + + ret = have_translated_requests || + storeBuffer.numUnissuedStores() != 0; + } + + if (ret) + DPRINTF(Activity, "Need to tick\n"); + + return ret; +} + +void +LSQ::pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, + unsigned int size, Addr addr, unsigned int flags, uint64_t *res) +{ + bool needs_burst = transferNeedsBurst(addr, size, lineWidth); + LSQRequestPtr request; + + /* Copy given data into the request. The request will pass this to the + * packet and then it will own the data */ + uint8_t *request_data = NULL; + + DPRINTF(MinorMem, "Pushing request (%s) addr: 0x%x size: %d flags:" + " 0x%x%s lineWidth : 0x%x\n", + (isLoad ? "load" : "store"), addr, size, flags, + (needs_burst ? " (needs burst)" : ""), lineWidth); + + if (!isLoad) { + /* request_data becomes the property of a ...DataRequest (see below) + * and destroyed by its destructor */ + request_data = new uint8_t[size]; + if (flags & Request::CACHE_BLOCK_ZERO) { + /* For cache zeroing, just use zeroed data */ + std::memset(request_data, 0, size); + } else { + std::memcpy(request_data, data, size); + } + } + + if (needs_burst) { + request = new SplitDataRequest( + *this, inst, isLoad, request_data, res); + } else { + request = new SingleDataRequest( + *this, inst, isLoad, request_data, res); + } + + if (inst->traceData) + inst->traceData->setAddr(addr); + + request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0); + request->request.setVirt(0 /* asid */, + addr, size, flags, cpu.instMasterId(), + /* I've no idea why we need the PC, but give it */ + inst->pc.instAddr()); + + requests.push(request); + request->startAddrTranslation(); +} + +void +LSQ::pushFailedRequest(MinorDynInstPtr inst) +{ + LSQRequestPtr request = new FailedDataRequest(*this, inst); + requests.push(request); +} + +void +LSQ::minorTrace() const +{ + MINORTRACE("state=%s in_tlb_mem=%d/%d stores_in_transfers=%d" + " lastMemBarrier=%d\n", + state, numAccessesInDTLB, numAccessesInMemorySystem, + numStoresInTransfers, lastMemBarrier); + requests.minorTrace(); + transfers.minorTrace(); + storeBuffer.minorTrace(); +} + +LSQ::StoreBuffer::StoreBuffer(std::string name_, LSQ &lsq_, + unsigned int store_buffer_size, + unsigned int store_limit_per_cycle) : + Named(name_), lsq(lsq_), + numSlots(store_buffer_size), + storeLimitPerCycle(store_limit_per_cycle), + slots(), + numUnissuedAccesses(0) +{ +} + +PacketPtr +makePacketForRequest(Request &request, bool isLoad, + Packet::SenderState *sender_state, PacketDataPtr data) +{ + MemCmd command; + + /* Make a ret with the right command type to match the request */ + if (request.isLLSC()) { + command = (isLoad ? MemCmd::LoadLockedReq : MemCmd::StoreCondReq); + } else if (request.isSwap()) { + command = MemCmd::SwapReq; + } else { + command = (isLoad ? MemCmd::ReadReq : MemCmd::WriteReq); + } + + PacketPtr ret = new Packet(&request, command); + + if (sender_state) + ret->pushSenderState(sender_state); + + if (isLoad) + ret->allocate(); + else + ret->dataDynamicArray(data); + + return ret; +} + +void +LSQ::issuedMemBarrierInst(MinorDynInstPtr inst) +{ + assert(inst->isInst() && inst->staticInst->isMemBarrier()); + assert(inst->id.execSeqNum > lastMemBarrier); + + /* Remember the barrier. We only have a notion of one + * barrier so this may result in some mem refs being + * delayed if they are between barriers */ + lastMemBarrier = inst->id.execSeqNum; +} + +void +LSQ::LSQRequest::makePacket() +{ + /* Make the function idempotent */ + if (packet) + return; + + packet = makePacketForRequest(request, isLoad, this, data); + /* Null the ret data so we know not to deallocate it when the + * ret is destroyed. The data now belongs to the ret and + * the ret is responsible for its destruction */ + data = NULL; +} + +std::ostream & +operator <<(std::ostream &os, LSQ::MemoryState state) +{ + switch (state) { + case LSQ::MemoryRunning: + os << "MemoryRunning"; + break; + case LSQ::MemoryNeedsRetry: + os << "MemoryNeedsRetry"; + break; + default: + os << "MemoryState-" << static_cast<int>(state); + break; + } + return os; +} + +void +LSQ::recvTimingSnoopReq(PacketPtr pkt) +{ + /* LLSC operations in Minor can't be speculative and are executed from + * the head of the requests queue. We shouldn't need to do more than + * this action on snoops. */ + + /* THREAD */ + TheISA::handleLockedSnoop(cpu.getContext(0), pkt, cacheBlockMask); +} + +} diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh new file mode 100644 index 000000000..0998395e0 --- /dev/null +++ b/src/cpu/minor/lsq.hh @@ -0,0 +1,722 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * A load/store queue that allows outstanding reads and writes. + * + */ + +#ifndef __CPU_MINOR_NEW_LSQ_HH__ +#define __CPU_MINOR_NEW_LSQ_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/pipe_data.hh" +#include "cpu/minor/trace.hh" + +namespace Minor +{ + +/* Forward declaration */ +class Execute; + +class LSQ : public Named +{ + protected: + /** My owner(s) */ + MinorCPU &cpu; + Execute &execute; + + protected: + /** State of memory access for head access. */ + enum MemoryState + { + MemoryRunning, /* Default. Step dcache queues when possible. */ + MemoryNeedsRetry /* Request rejected, will be asked to retry */ + }; + + /** Print MemoryState values as shown in the enum definition */ + friend std::ostream &operator <<(std::ostream &os, + MemoryState state); + + /** Coverage of one address range with another */ + enum AddrRangeCoverage + { + PartialAddrRangeCoverage, /* Two ranges partly overlap */ + FullAddrRangeCoverage, /* One range fully covers another */ + NoAddrRangeCoverage /* Two ranges are disjoint */ + }; + + /** Exposable data port */ + class DcachePort : public MinorCPU::MinorCPUPort + { + protected: + /** My owner */ + LSQ &lsq; + + public: + DcachePort(std::string name, LSQ &lsq_, MinorCPU &cpu) : + MinorCPU::MinorCPUPort(name, cpu), lsq(lsq_) + { } + + protected: + bool recvTimingResp(PacketPtr pkt) + { return lsq.recvTimingResp(pkt); } + + void recvRetry() { lsq.recvRetry(); } + + void recvTimingSnoopReq(PacketPtr pkt) + { return lsq.recvTimingSnoopReq(pkt); } + }; + + DcachePort dcachePort; + + public: + /** Derived SenderState to carry data access info. through address + * translation, the queues in this port and back from the memory + * system. */ + class LSQRequest : + public BaseTLB::Translation, /* For TLB lookups */ + public Packet::SenderState /* For packing into a Packet */ + { + public: + /** Owning port */ + LSQ &port; + + /** Instruction which made this request */ + MinorDynInstPtr inst; + + /** Load/store indication used for building packet. This isn't + * carried by Request so we need to keep it here */ + bool isLoad; + + /** Dynamically allocated and populated data carried for + * building write packets */ + PacketDataPtr data; + + /* Requests carry packets on their way to the memory system. + * When a Packet returns from the memory system, its + * request needs to have its packet updated as this + * may have changed in flight */ + PacketPtr packet; + + /** The underlying request of this LSQRequest */ + Request request; + + /** Fault generated performing this request */ + Fault fault; + + /** Res from pushRequest */ + uint64_t *res; + + /** Was skipped. Set to indicate any reason (faulted, bad + * stream sequence number, in a fault shadow) that this + * request did not perform a memory transfer */ + bool skipped; + + /** This in an access other than a normal cacheable load + * that's visited the memory system */ + bool issuedToMemory; + + enum LSQRequestState + { + NotIssued, /* Newly created */ + InTranslation, /* TLB accessed, no reply yet */ + Translated, /* Finished address translation */ + Failed, /* The starting start of FailedDataRequests */ + RequestIssuing, /* Load/store issued to memory in the requests + queue */ + StoreToStoreBuffer, /* Store in transfers on its way to the + store buffer */ + RequestNeedsRetry, /* Retry needed for load */ + StoreInStoreBuffer, /* Store in the store buffer, before issuing + a memory transfer */ + StoreBufferIssuing, /* Store in store buffer and has been + issued */ + StoreBufferNeedsRetry, /* Retry needed for store */ + /* All completed states. Includes + completed loads, TLB faults and skipped requests whose + seqNum's no longer match */ + Complete + }; + + LSQRequestState state; + + protected: + /** BaseTLB::Translation interface */ + void markDelayed() { } + + public: + LSQRequest(LSQ &port_, MinorDynInstPtr inst_, bool isLoad_, + PacketDataPtr data_ = NULL, uint64_t *res_ = NULL); + + virtual ~LSQRequest(); + + public: + /** Make a packet to use with the memory transaction */ + void makePacket(); + + /** Was no memory access attempted for this request? */ + bool skippedMemAccess() { return skipped; } + + /** Set this request as having been skipped before a memory + * transfer was attempt */ + void setSkipped() { skipped = true; } + + /** Does address range req1 (req1_addr to req1_addr + req1_size - 1) + * fully cover, partially cover or not cover at all the range req2 */ + static AddrRangeCoverage containsAddrRangeOf( + Addr req1_addr, unsigned int req1_size, + Addr req2_addr, unsigned int req2_size); + + /** Does this request's address range fully cover the range + * of other_request? */ + AddrRangeCoverage containsAddrRangeOf(LSQRequest *other_request); + + /** Start the address translation process for this request. This + * will issue a translation request to the TLB. */ + virtual void startAddrTranslation() = 0; + + /** Get the next packet to issue for this request. For split + * transfers, it will be necessary to step through the available + * packets by calling do { getHeadPacket ; stepToNextPacket } while + * (!sentAllPackets) and by retiring response using retireResponse */ + virtual PacketPtr getHeadPacket() = 0; + + /** Step to the next packet for the next call to getHeadPacket */ + virtual void stepToNextPacket() = 0; + + /** Have all packets been sent? */ + virtual bool sentAllPackets() = 0; + + /** True if this request has any issued packets in the memory + * system and so can't be interrupted until it gets responses */ + virtual bool hasPacketsInMemSystem() = 0; + + /** Retire a response packet into the LSQRequest packet possibly + * completing this transfer */ + virtual void retireResponse(PacketPtr packet_) = 0; + + /** Is this a request a barrier? */ + virtual bool isBarrier(); + + /** This request, once processed by the requests/transfers + * queues, will need to go to the store buffer */ + bool needsToBeSentToStoreBuffer(); + + /** Set state and output trace output */ + void setState(LSQRequestState new_state); + + /** Has this request been completed. This includes *all* reasons + * for completion: successful transfers, faults, skipped because + * of preceding faults */ + bool isComplete() const; + + /** MinorTrace report interface */ + void reportData(std::ostream &os) const; + }; + + typedef LSQRequest *LSQRequestPtr; + + friend std::ostream & operator <<(std::ostream &os, + AddrRangeCoverage state); + + friend std::ostream & operator <<(std::ostream &os, + LSQRequest::LSQRequestState state); + + protected: + /** Special request types that don't actually issue memory requests */ + class SpecialDataRequest : public LSQRequest + { + protected: + /** TLB interace */ + void finish(Fault fault_, RequestPtr request_, ThreadContext *tc, + BaseTLB::Mode mode) + { } + + public: + /** Send single translation request */ + void startAddrTranslation() { } + + /** Get the head packet as counted by numIssuedFragments */ + PacketPtr getHeadPacket() + { fatal("No packets in a SpecialDataRequest"); } + + /** Step on numIssuedFragments */ + void stepToNextPacket() { } + + /** Has no packets to send */ + bool sentAllPackets() { return true; } + + /** Never sends any requests */ + bool hasPacketsInMemSystem() { return false; } + + /** Keep the given packet as the response packet + * LSQRequest::packet */ + void retireResponse(PacketPtr packet_) { } + + public: + SpecialDataRequest(LSQ &port_, MinorDynInstPtr inst_) : + /* Say this is a load, not actually relevant */ + LSQRequest(port_, inst_, true, NULL, 0) + { } + }; + + /** FailedDataRequest represents requests from instructions that + * failed their predicates but need to ride the requests/transfers + * queues to maintain trace ordering */ + class FailedDataRequest : public SpecialDataRequest + { + public: + FailedDataRequest(LSQ &port_, MinorDynInstPtr inst_) : + SpecialDataRequest(port_, inst_) + { state = Failed; } + }; + + /** Request for doing barrier accounting in the store buffer. Not + * for use outside that unit */ + class BarrierDataRequest : public SpecialDataRequest + { + public: + bool isBarrier() { return true; } + + public: + BarrierDataRequest(LSQ &port_, MinorDynInstPtr inst_) : + SpecialDataRequest(port_, inst_) + { state = Complete; } + }; + + /** SingleDataRequest is used for requests that don't fragment */ + class SingleDataRequest : public LSQRequest + { + protected: + /** TLB interace */ + void finish(Fault fault_, RequestPtr request_, ThreadContext *tc, + BaseTLB::Mode mode); + + /** Has my only packet been sent to the memory system but has not + * yet been responded to */ + bool packetInFlight; + + /** Has the packet been at least sent to the memory system? */ + bool packetSent; + + public: + /** Send single translation request */ + void startAddrTranslation(); + + /** Get the head packet as counted by numIssuedFragments */ + PacketPtr getHeadPacket() { return packet; } + + /** Remember that the packet has been sent */ + void stepToNextPacket() { packetInFlight = true; packetSent = true; } + + /** Has packet been sent */ + bool hasPacketsInMemSystem() { return packetInFlight; } + + /** packetInFlight can become false again, so need to check + * packetSent */ + bool sentAllPackets() { return packetSent; } + + /** Keep the given packet as the response packet + * LSQRequest::packet */ + void retireResponse(PacketPtr packet_); + + public: + SingleDataRequest(LSQ &port_, MinorDynInstPtr inst_, + bool isLoad_, PacketDataPtr data_ = NULL, uint64_t *res_ = NULL) : + LSQRequest(port_, inst_, isLoad_, data_, res_), + packetInFlight(false), + packetSent(false) + { } + }; + + class SplitDataRequest : public LSQRequest + { + protected: + /** Event to step between translations */ + class TranslationEvent : public Event + { + protected: + SplitDataRequest &owner; + + public: + TranslationEvent(SplitDataRequest &owner_) + : owner(owner_) { } + + void process() + { owner.sendNextFragmentToTranslation(); } + }; + + TranslationEvent translationEvent; + protected: + /** Number of fragments this request is split into */ + unsigned int numFragments; + + /** Number of fragments in the address translation mechanism */ + unsigned int numInTranslationFragments; + + /** Number of fragments that have completed address translation, + * (numTranslatedFragments + numInTranslationFragments) <= + * numFragments. When numTranslatedFramgents == numFragments, + * translation is complete */ + unsigned int numTranslatedFragments; + + /** Number of fragments already issued (<= numFragments) */ + unsigned int numIssuedFragments; + + /** Number of fragments retired back to this request */ + unsigned int numRetiredFragments; + + /** Fragment Requests corresponding to the address ranges of + * each fragment */ + std::vector<Request *> fragmentRequests; + + /** Packets matching fragmentRequests to issue fragments to memory */ + std::vector<Packet *> fragmentPackets; + + protected: + /** TLB response interface */ + void finish(Fault fault_, RequestPtr request_, ThreadContext *tc, + BaseTLB::Mode mode); + + public: + SplitDataRequest(LSQ &port_, MinorDynInstPtr inst_, + bool isLoad_, PacketDataPtr data_ = NULL, + uint64_t *res_ = NULL); + + ~SplitDataRequest(); + + public: + /** Make all the Requests for this transfer's fragments so that those + * requests can be sent for address translation */ + void makeFragmentRequests(); + + /** Make the packets to go with the requests so they can be sent to + * the memory system */ + void makeFragmentPackets(); + + /** Start a loop of do { sendNextFragmentToTranslation ; + * translateTiming ; finish } while (numTranslatedFragments != + * numFragments) to complete all this requests' fragments' address + * translations */ + void startAddrTranslation(); + + /** Get the head packet as counted by numIssuedFragments */ + PacketPtr getHeadPacket(); + + /** Step on numIssuedFragments */ + void stepToNextPacket(); + + bool hasPacketsInMemSystem() + { return numIssuedFragments != numRetiredFragments; } + + /** Have we stepped past the end of fragmentPackets? */ + bool sentAllPackets() { return numIssuedFragments == numFragments; } + + /** For loads, paste the response data into the main + * response packet */ + void retireResponse(PacketPtr packet_); + + /** Part of the address translation loop, see startAddTranslation */ + void sendNextFragmentToTranslation(); + }; + + /** Store buffer. This contains stores which have been committed + * but whose memory transfers have not yet been issued. Load data + * can be forwarded out of the store buffer */ + class StoreBuffer : public Named + { + public: + /** My owner */ + LSQ &lsq; + + /** Number of slots, this is a bound on the size of slots */ + const unsigned int numSlots; + + /** Maximum number of stores that can be issued per cycle */ + const unsigned int storeLimitPerCycle; + + public: + /** Queue of store requests on their way to memory */ + std::deque<LSQRequestPtr> slots; + + /** Number of occupied slots which have not yet issued a + * memory access */ + unsigned int numUnissuedAccesses; + + public: + StoreBuffer(std::string name_, LSQ &lsq_, + unsigned int store_buffer_size, + unsigned int store_limit_per_cycle); + + public: + /** Can a new request be inserted into the queue? */ + bool canInsert() const; + + /** Delete the given request and free the slot it occupied */ + void deleteRequest(LSQRequestPtr request); + + /** Insert a request at the back of the queue */ + void insert(LSQRequestPtr request); + + /** Look for a store which satisfies the given load. Returns an + * indication whether the forwarding request can be wholly, + * partly or not all all satisfied. If the request can be + * wholly satisfied, the store buffer slot number which can be used + * is returned in found_slot */ + AddrRangeCoverage canForwardDataToLoad(LSQRequestPtr request, + unsigned int &found_slot); + + /** Fill the given packet with appropriate date from slot + * slot_number */ + void forwardStoreData(LSQRequestPtr load, unsigned int slot_number); + + /** Number of stores in the store buffer which have not been + * completely issued to the memory system */ + unsigned int numUnissuedStores() { return numUnissuedAccesses; } + + /** Drained if there is absolutely nothing left in the buffer */ + bool isDrained() const { return slots.empty(); } + + /** Try to issue more stores to memory */ + void step(); + + /** Report queue contents for MinorTrace */ + void minorTrace() const; + }; + + protected: + /** Most recent execSeqNum of a memory barrier instruction or + * 0 if there are no in-flight barriers. Useful as a + * dependency for early-issued memory operations */ + InstSeqNum lastMemBarrier; + + public: + /** Retry state of last issued memory transfer */ + MemoryState state; + + /** Maximum number of in-flight accesses issued to the memory system */ + const unsigned int inMemorySystemLimit; + + /** Memory system access width (and snap) in bytes */ + const unsigned int lineWidth; + + public: + /** The LSQ consists of three queues: requests, transfers and the + * store buffer storeBuffer. */ + + typedef Queue<LSQRequestPtr, + ReportTraitsPtrAdaptor<LSQRequestPtr>, + NoBubbleTraits<LSQRequestPtr> > + LSQQueue; + + /** requests contains LSQRequests which have been issued to the TLB by + * calling ExecContext::readMem/writeMem (which in turn calls + * LSQ::pushRequest and LSQRequest::startAddrTranslation). Once they + * have a physical address, requests at the head of requests can be + * issued to the memory system. At this stage, it cannot be clear that + * memory accesses *must* happen (that there are no preceding faults or + * changes of flow of control) and so only cacheable reads are issued + * to memory. + * Cacheable stores are not issued at all (and just pass through + * 'transfers' in order) and all other transfers are stalled in requests + * until their corresponding instructions are at the head of the + * inMemInsts instruction queue and have the right streamSeqNum. */ + LSQQueue requests; + + /** Once issued to memory (or, for stores, just had their + * state changed to StoreToStoreBuffer) LSQRequests pass through + * transfers waiting for memory responses. At the head of transfers, + * Execute::commitInst can pick up the memory response for a request + * using LSQ::findResponse. Responses to be committed can then + * have ExecContext::completeAcc on them. Stores can then be pushed + * into the store buffer. All other transfers will then be complete. */ + LSQQueue transfers; + + /* The store buffer contains committed cacheable stores on + * their way to memory decoupled from subsequence instruction execution. + * Before trying to issue a cacheable read from 'requests' to memory, + * the store buffer is checked to see if a previous store contains the + * needed data (StoreBuffer::canForwardDataToLoad) which can be + * forwarded in lieu of a memory access. If there are outstanding + * stores in the transfers queue, they must be promoted to the store + * buffer (and so be commited) before they can be correctly checked + * for forwarding. */ + StoreBuffer storeBuffer; + + protected: + /** Count of the number of mem. accesses which have left the + * requests queue and are in the 'wild' in the memory system. */ + unsigned int numAccessesInMemorySystem; + + /** Number of requests in the DTLB in the requests queue */ + unsigned int numAccessesInDTLB; + + /** The number of stores in the transfers queue. Useful when + * testing if the store buffer contains all the forwardable stores */ + unsigned int numStoresInTransfers; + + /** The number of accesses which have been issued to the memory + * system but have not been committed/discarded *excluding* + * cacheable normal loads which don't need to be tracked */ + unsigned int numAccessesIssuedToMemory; + + /** The request (from either requests or the store buffer) which is + * currently waiting have its memory access retried */ + LSQRequestPtr retryRequest; + + /** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */ + Addr cacheBlockMask; + + protected: + /** Try and issue a memory access for a translated request at the + * head of the requests queue. Also tries to move the request + * between queues */ + void tryToSendToTransfers(LSQRequestPtr request); + + /** Try to send (or resend) a memory request's next/only packet to + * the memory system. Returns true if the request was successfully + * sent to memory (and was also the last packet in a transfer) */ + bool tryToSend(LSQRequestPtr request); + + /** Clear a barrier (if it's the last one marked up in lastMemBarrier) */ + void clearMemBarrier(MinorDynInstPtr inst); + + /** Move a request between queues */ + void moveFromRequestsToTransfers(LSQRequestPtr request); + + /** Can a request be sent to the memory system */ + bool canSendToMemorySystem(); + + public: + LSQ(std::string name_, std::string dcache_port_name_, + MinorCPU &cpu_, Execute &execute_, + unsigned int max_accesses_in_memory_system, unsigned int line_width, + unsigned int requests_queue_size, unsigned int transfers_queue_size, + unsigned int store_buffer_size, + unsigned int store_buffer_cycle_store_limit); + + virtual ~LSQ(); + + public: + /** Step checks the queues to see if their are issuable transfers + * which were not otherwise picked up by tests at the end of other + * events. + * + * Steppable actions include deferred actions which couldn't be + * cascaded on the end of a memory response/TLB response event + * because of resource congestion. */ + void step(); + + /** Is their space in the request queue to be able to push a request by + * issuing an isMemRef instruction */ + bool canRequest() { return requests.unreservedRemainingSpace() != 0; } + + /** Returns a response if it's at the head of the transfers queue and + * it's either complete or can be sent on to the store buffer. After + * calling, the request still remains on the transfer queue until + * popResponse is called */ + LSQRequestPtr findResponse(MinorDynInstPtr inst); + + /** Sanity check and pop the head response */ + void popResponse(LSQRequestPtr response); + + /** Must check this before trying to insert into the store buffer */ + bool canPushIntoStoreBuffer() const { return storeBuffer.canInsert(); } + + /** A store has been committed, please move it to the store buffer */ + void sendStoreToStoreBuffer(LSQRequestPtr request); + + /** Are there any accesses other than normal cached loads in the + * memory system or having received responses which need to be + * handled for their instruction's to be completed */ + bool accessesInFlight() const + { return numAccessesIssuedToMemory != 0; } + + /** A memory barrier instruction has been issued, remember its + * execSeqNum that we can avoid issuing memory ops until it is + * committed */ + void issuedMemBarrierInst(MinorDynInstPtr inst); + + /** Get the execSeqNum of the last issued memory barrier */ + InstSeqNum getLastMemBarrier() const { return lastMemBarrier; } + + /** Is there nothing left in the LSQ */ + bool isDrained(); + + /** May need to be ticked next cycle as one of the queues contains + * an actionable transfers or address translation */ + bool needsToTick(); + + /** Complete a barrier instruction. Where committed, makes a + * BarrierDataRequest and pushed it into the store buffer */ + void completeMemBarrierInst(MinorDynInstPtr inst, + bool committed); + + /** Single interface for readMem/writeMem to issue requests into + * the LSQ */ + void pushRequest(MinorDynInstPtr inst, bool isLoad, uint8_t *data, + unsigned int size, Addr addr, unsigned int flags, uint64_t *res); + + /** Push a predicate failed-representing request into the queues just + * to maintain commit order */ + void pushFailedRequest(MinorDynInstPtr inst); + + /** Memory interface */ + bool recvTimingResp(PacketPtr pkt); + void recvRetry(); + void recvTimingSnoopReq(PacketPtr pkt); + + /** Return the raw-bindable port */ + MinorCPU::MinorCPUPort &getDcachePort() { return dcachePort; } + + void minorTrace() const; +}; + +/** Make a suitable packet for the given request. If the request is a store, + * data will be the payload data. If sender_state is NULL, it won't be + * pushed into the packet as senderState */ +PacketPtr makePacketForRequest(Request &request, bool isLoad, + Packet::SenderState *sender_state = NULL, PacketDataPtr data = NULL); +} + +#endif /* __CPU_MINOR_NEW_LSQ_HH__ */ diff --git a/src/cpu/minor/pipe_data.cc b/src/cpu/minor/pipe_data.cc new file mode 100644 index 000000000..447f9c0e7 --- /dev/null +++ b/src/cpu/minor/pipe_data.cc @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "cpu/minor/pipe_data.hh" + +namespace Minor +{ + +std::ostream & +operator <<(std::ostream &os, BranchData::Reason reason) +{ + switch (reason) + { + case BranchData::NoBranch: + os << "NoBranch"; + break; + case BranchData::UnpredictedBranch: + os << "UnpredictedBranch"; + break; + case BranchData::BranchPrediction: + os << "BranchPrediction"; + break; + case BranchData::CorrectlyPredictedBranch: + os << "CorrectlyPredictedBranch"; + break; + case BranchData::BadlyPredictedBranch: + os << "BadlyPredictedBranch"; + break; + case BranchData::BadlyPredictedBranchTarget: + os << "BadlyPredictedBranchTarget"; + break; + case BranchData::Interrupt: + os << "Interrupt"; + break; + case BranchData::SuspendThread: + os << "SuspendThread"; + break; + case BranchData::WakeupFetch: + os << "WakeupFetch"; + break; + case BranchData::HaltFetch: + os << "HaltFetch"; + break; + } + + return os; +} + +bool +BranchData::isStreamChange(const BranchData::Reason reason) +{ + bool ret = false; + + switch (reason) + { + /* No change of stream (see the enum comment in pipe_data.hh) */ + case NoBranch: + case CorrectlyPredictedBranch: + ret = false; + break; + + /* Change of stream (Fetch1 should act on) */ + case UnpredictedBranch: + case BranchPrediction: + case BadlyPredictedBranchTarget: + case BadlyPredictedBranch: + case SuspendThread: + case Interrupt: + case WakeupFetch: + case HaltFetch: + ret = true; + break; + } + + return ret; +} + +bool +BranchData::isBranch(const BranchData::Reason reason) +{ + bool ret = false; + + switch (reason) + { + /* No change of stream (see the enum comment in pipe_data.hh) */ + case NoBranch: + case CorrectlyPredictedBranch: + case SuspendThread: + case Interrupt: + case WakeupFetch: + case HaltFetch: + ret = false; + break; + + /* Change of stream (Fetch1 should act on) */ + case UnpredictedBranch: + case BranchPrediction: + case BadlyPredictedBranchTarget: + case BadlyPredictedBranch: + ret = true; + break; + } + + return ret; +} + +void +BranchData::reportData(std::ostream &os) const +{ + if (isBubble()) { + os << '-'; + } else { + os << reason + << ';' << newStreamSeqNum << '.' << newPredictionSeqNum + << ";0x" << std::hex << target.instAddr() << std::dec + << ';'; + inst->reportData(os); + } +} + +std::ostream & +operator <<(std::ostream &os, const BranchData &branch) +{ + os << branch.reason << " target: 0x" + << std::hex << branch.target.instAddr() << std::dec + << ' ' << *branch.inst + << ' ' << branch.newStreamSeqNum << "(stream)." + << branch.newPredictionSeqNum << "(pred)"; + + return os; +} + +void +ForwardLineData::setFault(Fault fault_) +{ + fault = fault_; + if (isFault()) + bubbleFlag = false; +} + +void +ForwardLineData::allocateLine(unsigned int width_) +{ + lineWidth = width_; + bubbleFlag = false; + + assert(!isFault()); + assert(!line); + + line = new uint8_t[width_]; +} + +void +ForwardLineData::adoptPacketData(Packet *packet) +{ + this->packet = packet; + lineWidth = packet->req->getSize(); + bubbleFlag = false; + + assert(!isFault()); + assert(!line); + + line = packet->getPtr<uint8_t>(); +} + +void +ForwardLineData::freeLine() +{ + /* Only free lines in non-faulting, non-bubble lines */ + if (!isFault() && !isBubble()) { + assert(line); + /* If packet is not NULL then the line must belong to the packet so + * we don't need to separately deallocate the line */ + if (packet) { + delete packet; + } else { + delete [] line; + } + line = NULL; + bubbleFlag = true; + } +} + +void +ForwardLineData::reportData(std::ostream &os) const +{ + if (isBubble()) + os << '-'; + else if (fault != NoFault) + os << "F;" << id; + else + os << id; +} + +ForwardInstData::ForwardInstData(unsigned int width) : + numInsts(width) +{ + bubbleFill(); +} + +ForwardInstData::ForwardInstData(const ForwardInstData &src) +{ + *this = src; +} + +ForwardInstData & +ForwardInstData::operator =(const ForwardInstData &src) +{ + numInsts = src.numInsts; + + for (unsigned int i = 0; i < src.numInsts; i++) + insts[i] = src.insts[i]; + + return *this; +} + +bool +ForwardInstData::isBubble() const +{ + return numInsts == 0 || insts[0]->isBubble(); +} + +void +ForwardInstData::bubbleFill() +{ + for (unsigned int i = 0; i < numInsts; i++) + insts[i] = MinorDynInst::bubble(); +} + +void +ForwardInstData::resize(unsigned int width) +{ + assert(width < MAX_FORWARD_INSTS); + numInsts = width; + + bubbleFill(); +} + +void +ForwardInstData::reportData(std::ostream &os) const +{ + if (isBubble()) { + os << '-'; + } else { + unsigned int i = 0; + + os << '('; + while (i != numInsts) { + insts[i]->reportData(os); + i++; + if (i != numInsts) + os << ','; + } + os << ')'; + } +} + +} diff --git a/src/cpu/minor/pipe_data.hh b/src/cpu/minor/pipe_data.hh new file mode 100644 index 000000000..4468cb89e --- /dev/null +++ b/src/cpu/minor/pipe_data.hh @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Contains class definitions for data flowing between pipeline stages in + * the top-level structure portion of this model. Latch types are also + * defined which pair forward/backward flowing data specific to each stage + * pair. + * + * No post-configuration inter-stage communication should *ever* take place + * outside these classes (except for reservation!) + */ + +#ifndef __CPU_MINOR_PIPE_DATA_HH__ +#define __CPU_MINOR_PIPE_DATA_HH__ + +#include "cpu/minor/buffers.hh" +#include "cpu/minor/dyn_inst.hh" +#include "cpu/base.hh" + +namespace Minor +{ + +/** Forward data betwen Execute and Fetch1 carrying change-of-address/stream + * information. */ +class BranchData /* : public ReportIF, public BubbleIF */ +{ + public: + enum Reason + { + /* *** No change of stream (information to branch prediction) */ + + /* Don't branch at all (bubble) */ + NoBranch, + /* Don't branch, but here's the details of a correct prediction + * that was executed */ + CorrectlyPredictedBranch, + + /* *** Change of stream */ + + /* Take an unpredicted branch */ + UnpredictedBranch, + /* Take a branch on branch prediction data (from Fetch2) */ + BranchPrediction, + /* Prediction of wrong target PC */ + BadlyPredictedBranchTarget, + /* Bad branch prediction (didn't actually branch). Need to branch + * back to correct stream. If the target is wrong, use + * BadlyPredictedBranchTarget */ + BadlyPredictedBranch, + /* Suspend fetching for this thread (inst->id.threadId). + * This will be woken up by another stream changing branch so + * count it as stream changing itself and expect pc to be the PC + * of the next instruction */ + SuspendThread, + /* Wakeup fetching from Halted */ + WakeupFetch, + /* Branch from an interrupt (no instruction) */ + Interrupt, + /* Stop fetching in anticipation of of draining */ + HaltFetch + }; + + /** Is a request with this reason actually a request to change the + * PC rather than a bubble or branch prediction information */ + static bool isStreamChange(const BranchData::Reason reason); + + /** Is a request with this reason actually a 'real' branch, that is, + * a stream change that's not just an instruction to Fetch1 to halt + * or wake up */ + static bool isBranch(const BranchData::Reason reason); + + public: + /** Explanation for this branch */ + Reason reason; + + /** Sequence number of new stream/prediction to be adopted */ + InstSeqNum newStreamSeqNum; + InstSeqNum newPredictionSeqNum; + + /** Starting PC of that stream */ + TheISA::PCState target; + + /** Instruction which caused this branch */ + MinorDynInstPtr inst; + + public: + BranchData() : + reason(NoBranch), newStreamSeqNum(0), + newPredictionSeqNum(0), target(TheISA::PCState(0)), + inst(MinorDynInst::bubble()) + { } + + BranchData( + Reason reason_, + InstSeqNum new_stream_seq_num, + InstSeqNum new_prediction_seq_num, + TheISA::PCState target, + MinorDynInstPtr inst_) : + reason(reason_), + newStreamSeqNum(new_stream_seq_num), + newPredictionSeqNum(new_prediction_seq_num), + target(target), + inst(inst_) + { } + + /** BubbleIF interface */ + static BranchData bubble() { return BranchData(); } + bool isBubble() const { return reason == NoBranch; } + + /** As static isStreamChange but on this branch data */ + bool isStreamChange() const { return isStreamChange(reason); } + + /** As static isBranch but on this branch data */ + bool isBranch() const { return isBranch(reason); } + + /** ReportIF interface */ + void reportData(std::ostream &os) const; +}; + +/** Print a branch reason enum */ +std::ostream &operator <<(std::ostream &os, BranchData::Reason reason); + +/** Print BranchData contents in a format suitable for DPRINTF comments, not + * for MinorTrace */ +std::ostream &operator <<(std::ostream &os, const BranchData &branch); + +/** Line fetch data in the forward direction. Contains a single cache line + * (or fragment of a line), its address, a sequence number assigned when + * that line was fetched and a bubbleFlag that can allow ForwardLineData to + * be used to represent the absence of line data in a pipeline. */ +class ForwardLineData /* : public ReportIF, public BubbleIF */ +{ + private: + /** This line is a bubble. No other data member is required to be valid + * if this is true */ + bool bubbleFlag; + + public: + /** First byte address in the line. This is allowed to be + * <= pc.instAddr() */ + Addr lineBaseAddr; + + /** PC of the first requested inst within this line */ + TheISA::PCState pc; + + /** Explicit line width, don't rely on data.size */ + unsigned int lineWidth; + + public: + /** This line has a fault. The bubble flag will be false and seqNums + * will be valid but no data will */ + Fault fault; + + /** Thread, stream, prediction ... id of this line */ + InstId id; + + /** Line data. line[0] is the byte at address pc.instAddr(). Data is + * only valid upto lineWidth - 1. */ + uint8_t *line; + + /** Packet from which the line is taken */ + Packet *packet; + + public: + ForwardLineData() : + bubbleFlag(true), + lineBaseAddr(0), + lineWidth(0), + fault(NoFault), + line(NULL), + packet(NULL) + { + /* Make lines bubbles by default */ + } + + ~ForwardLineData() { line = NULL; } + + public: + /** This is a fault, not a line */ + bool isFault() const { return fault != NoFault; } + + /** Set fault and possible clear the bubble flag */ + void setFault(Fault fault_); + + /** In-place initialise a ForwardLineData, freeing and overridding the + * line */ + void allocateLine(unsigned int width_); + + /** Use the data from a packet as line instead of allocating new + * space. On destruction of this object, the packet will be destroyed */ + void adoptPacketData(Packet *packet); + + /** Free this ForwardLineData line. Note that these are shared between + * line objects and so you must be careful when deallocating them. + * Copying of ForwardLineData can, therefore, be done by default copy + * constructors/assignment */ + void freeLine(); + + /** BubbleIF interface */ + static ForwardLineData bubble() { return ForwardLineData(); } + bool isBubble() const { return bubbleFlag; } + + /** ReportIF interface */ + void reportData(std::ostream &os) const; +}; + +/** Maximum number of instructions that can be carried by the pipeline. */ +const unsigned int MAX_FORWARD_INSTS = 16; + +/** Forward flowing data between Fetch2,Decode,Execute carrying a packet of + * instructions of a width appropriate to the configured stage widths. + * Also carries exception information where instructions are not valid */ +class ForwardInstData /* : public ReportIF, public BubbleIF */ +{ + public: + /** Array of carried insts, ref counted */ + MinorDynInstPtr insts[MAX_FORWARD_INSTS]; + + /** The number of insts slots that can be expected to be valid insts */ + unsigned int numInsts; + + public: + explicit ForwardInstData(unsigned int width = 0); + + ForwardInstData(const ForwardInstData &src); + + public: + /** Number of instructions carried by this object */ + unsigned int width() const { return numInsts; } + + /** Copy the inst array only as far as numInsts */ + ForwardInstData &operator =(const ForwardInstData &src); + + /** Resize a bubble/empty ForwardInstData and fill with bubbles */ + void resize(unsigned int width); + + /** Fill with bubbles from 0 to width() - 1 */ + void bubbleFill(); + + /** BubbleIF interface */ + bool isBubble() const; + + /** ReportIF interface */ + void reportData(std::ostream &os) const; +}; + +} + +#endif /* __CPU_MINOR_PIPE_DATA_HH__ */ diff --git a/src/cpu/minor/pipeline.cc b/src/cpu/minor/pipeline.cc new file mode 100644 index 000000000..9d802234b --- /dev/null +++ b/src/cpu/minor/pipeline.cc @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include <algorithm> + +#include "cpu/minor/decode.hh" +#include "cpu/minor/execute.hh" +#include "cpu/minor/fetch1.hh" +#include "cpu/minor/fetch2.hh" +#include "cpu/minor/pipeline.hh" +#include "debug/Drain.hh" +#include "debug/MinorCPU.hh" +#include "debug/MinorTrace.hh" +#include "debug/Quiesce.hh" + +namespace Minor +{ + +Pipeline::Pipeline(MinorCPU &cpu_, MinorCPUParams ¶ms) : + Ticked(cpu_, &(cpu_.BaseCPU::numCycles)), + cpu(cpu_), + allow_idling(params.enableIdling), + f1ToF2(cpu.name() + ".f1ToF2", "lines", + params.fetch1ToFetch2ForwardDelay), + f2ToF1(cpu.name() + ".f2ToF1", "prediction", + params.fetch1ToFetch2BackwardDelay, true), + f2ToD(cpu.name() + ".f2ToD", "insts", + params.fetch2ToDecodeForwardDelay), + dToE(cpu.name() + ".dToE", "insts", + params.decodeToExecuteForwardDelay), + eToF1(cpu.name() + ".eToF1", "branch", + params.executeBranchDelay), + execute(cpu.name() + ".execute", cpu, params, + dToE.output(), eToF1.input()), + decode(cpu.name() + ".decode", cpu, params, + f2ToD.output(), dToE.input(), execute.inputBuffer), + fetch2(cpu.name() + ".fetch2", cpu, params, + f1ToF2.output(), eToF1.output(), f2ToF1.input(), f2ToD.input(), + decode.inputBuffer), + fetch1(cpu.name() + ".fetch1", cpu, params, + eToF1.output(), f1ToF2.input(), f2ToF1.output(), fetch2.inputBuffer), + activityRecorder(cpu.name() + ".activity", Num_StageId, + /* The max depth of inter-stage FIFOs */ + std::max(params.fetch1ToFetch2ForwardDelay, + std::max(params.fetch2ToDecodeForwardDelay, + std::max(params.decodeToExecuteForwardDelay, + params.executeBranchDelay)))), + needToSignalDrained(false) +{ + if (params.fetch1ToFetch2ForwardDelay < 1) { + fatal("%s: fetch1ToFetch2ForwardDelay must be >= 1 (%d)\n", + cpu.name(), params.fetch1ToFetch2ForwardDelay); + } + + if (params.fetch2ToDecodeForwardDelay < 1) { + fatal("%s: fetch2ToDecodeForwardDelay must be >= 1 (%d)\n", + cpu.name(), params.fetch2ToDecodeForwardDelay); + } + + if (params.decodeToExecuteForwardDelay < 1) { + fatal("%s: decodeToExecuteForwardDelay must be >= 1 (%d)\n", + cpu.name(), params.decodeToExecuteForwardDelay); + } + + if (params.executeBranchDelay < 1) { + fatal("%s: executeBranchDelay must be >= 1\n", + cpu.name(), params.executeBranchDelay); + } +} + +void +Pipeline::minorTrace() const +{ + fetch1.minorTrace(); + f1ToF2.minorTrace(); + f2ToF1.minorTrace(); + fetch2.minorTrace(); + f2ToD.minorTrace(); + decode.minorTrace(); + dToE.minorTrace(); + execute.minorTrace(); + eToF1.minorTrace(); + activityRecorder.minorTrace(); +} + +void +Pipeline::evaluate() +{ + /* Note that it's important to evaluate the stages in order to allow + * 'immediate', 0-time-offset TimeBuffer activity to be visible from + * later stages to earlier ones in the same cycle */ + execute.evaluate(); + decode.evaluate(); + fetch2.evaluate(); + fetch1.evaluate(); + + if (DTRACE(MinorTrace)) + minorTrace(); + + /* Update the time buffers after the stages */ + f1ToF2.evaluate(); + f2ToF1.evaluate(); + f2ToD.evaluate(); + dToE.evaluate(); + eToF1.evaluate(); + + /* The activity recorder must be be called after all the stages and + * before the idler (which acts on the advice of the activity recorder */ + activityRecorder.evaluate(); + + if (allow_idling) { + /* Become idle if we can but are not draining */ + if (!activityRecorder.active() && !needToSignalDrained) { + DPRINTF(Quiesce, "Suspending as the processor is idle\n"); + stop(); + } + + /* Deactivate all stages. Note that the stages *could* + * activate and deactivate themselves but that's fraught + * with additional difficulty. + * As organised herre */ + activityRecorder.deactivateStage(Pipeline::CPUStageId); + activityRecorder.deactivateStage(Pipeline::Fetch1StageId); + activityRecorder.deactivateStage(Pipeline::Fetch2StageId); + activityRecorder.deactivateStage(Pipeline::DecodeStageId); + activityRecorder.deactivateStage(Pipeline::ExecuteStageId); + } + + if (needToSignalDrained) /* Must be draining */ + { + DPRINTF(Drain, "Still draining\n"); + if (isDrained()) { + DPRINTF(Drain, "Signalling end of draining\n"); + cpu.signalDrainDone(); + needToSignalDrained = false; + stop(); + } + } +} + +MinorCPU::MinorCPUPort & +Pipeline::getInstPort() +{ + return fetch1.getIcachePort(); +} + +MinorCPU::MinorCPUPort & +Pipeline::getDataPort() +{ + return execute.getDcachePort(); +} + +void +Pipeline::wakeupFetch() +{ + execute.wakeupFetch(); +} + +unsigned int +Pipeline::drain(DrainManager *manager) +{ + DPRINTF(MinorCPU, "Draining pipeline by halting inst fetches. " + " Execution should drain naturally\n"); + + execute.drain(); + + /* Make sure that needToSignalDrained isn't accidentally set if we + * are 'pre-drained' */ + bool drained = isDrained(); + needToSignalDrained = !drained; + + return (drained ? 0 : 1); +} + +void +Pipeline::drainResume() +{ + DPRINTF(Drain, "Drain resume\n"); + execute.drainResume(); +} + +bool +Pipeline::isDrained() +{ + bool fetch1_drained = fetch1.isDrained(); + bool fetch2_drained = fetch2.isDrained(); + bool decode_drained = decode.isDrained(); + bool execute_drained = execute.isDrained(); + + bool f1_to_f2_drained = f1ToF2.empty(); + bool f2_to_f1_drained = f2ToF1.empty(); + bool f2_to_d_drained = f2ToD.empty(); + bool d_to_e_drained = dToE.empty(); + + bool ret = fetch1_drained && fetch2_drained && + decode_drained && execute_drained && + f1_to_f2_drained && f2_to_f1_drained && + f2_to_d_drained && d_to_e_drained; + + DPRINTF(MinorCPU, "Pipeline undrained stages state:%s%s%s%s%s%s%s%s\n", + (fetch1_drained ? "" : " Fetch1"), + (fetch2_drained ? "" : " Fetch2"), + (decode_drained ? "" : " Decode"), + (execute_drained ? "" : " Execute"), + (f1_to_f2_drained ? "" : " F1->F2"), + (f2_to_f1_drained ? "" : " F2->F1"), + (f2_to_d_drained ? "" : " F2->D"), + (d_to_e_drained ? "" : " D->E") + ); + + return ret; +} + +} diff --git a/src/cpu/minor/pipeline.hh b/src/cpu/minor/pipeline.hh new file mode 100644 index 000000000..893efbf50 --- /dev/null +++ b/src/cpu/minor/pipeline.hh @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * The constructed pipeline. Kept out of MinorCPU to keep the interface + * between the CPU and its grubby implementation details clean. + */ + +#ifndef __CPU_MINOR_PIPELINE_HH__ +#define __CPU_MINOR_PIPELINE_HH__ + +#include "cpu/minor/activity.hh" +#include "cpu/minor/cpu.hh" +#include "cpu/minor/decode.hh" +#include "cpu/minor/execute.hh" +#include "cpu/minor/fetch1.hh" +#include "cpu/minor/fetch2.hh" +#include "params/MinorCPU.hh" +#include "sim/ticked_object.hh" + +namespace Minor +{ + +/** + * @namespace Minor + * + * Minor contains all the definitions within the MinorCPU apart from the CPU + * class itself + */ + +/** The constructed pipeline. Kept out of MinorCPU to keep the interface + * between the CPU and its grubby implementation details clean. */ +class Pipeline : public Ticked +{ + protected: + MinorCPU &cpu; + + /** Allow cycles to be skipped when the pipeline is idle */ + bool allow_idling; + + Latch<ForwardLineData> f1ToF2; + Latch<BranchData> f2ToF1; + Latch<ForwardInstData> f2ToD; + Latch<ForwardInstData> dToE; + Latch<BranchData> eToF1; + + Execute execute; + Decode decode; + Fetch2 fetch2; + Fetch1 fetch1; + + /** Activity recording for the pipeline. This is access through the CPU + * by the pipeline stages but belongs to the Pipeline as it is the + * cleanest place to initialise it */ + MinorActivityRecorder activityRecorder; + + public: + /** Enumerated ids of the 'stages' for the activity recorder */ + enum StageId + { + /* A stage representing wakeup of the whole processor */ + CPUStageId = 0, + /* Real pipeline stages */ + Fetch1StageId, Fetch2StageId, DecodeStageId, ExecuteStageId, + Num_StageId /* Stage count */ + }; + + /** True after drain is called but draining isn't complete */ + bool needToSignalDrained; + + public: + Pipeline(MinorCPU &cpu_, MinorCPUParams ¶ms); + + public: + /** Wake up the Fetch unit. This is needed on thread activation esp. + * after quiesce wakeup */ + void wakeupFetch(); + + /** Try to drain the CPU */ + unsigned int drain(DrainManager *manager); + + void drainResume(); + + /** Test to see if the CPU is drained */ + bool isDrained(); + + /** A custom evaluate allows report in the right place (between + * stages and pipeline advance) */ + void evaluate(); + + void minorTrace() const; + + /** Functions below here are BaseCPU operations passed on to pipeline + * stages */ + + /** Return the IcachePort belonging to Fetch1 for the CPU */ + MinorCPU::MinorCPUPort &getInstPort(); + /** Return the DcachePort belonging to Execute for the CPU */ + MinorCPU::MinorCPUPort &getDataPort(); + + /** To give the activity recorder to the CPU */ + MinorActivityRecorder *getActivityRecorder() { return &activityRecorder; } +}; + +} + +#endif /* __CPU_MINOR_PIPELINE_HH__ */ diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc new file mode 100644 index 000000000..f6b1f7944 --- /dev/null +++ b/src/cpu/minor/scoreboard.cc @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "arch/registers.hh" +#include "cpu/minor/scoreboard.hh" +#include "cpu/reg_class.hh" +#include "debug/MinorScoreboard.hh" +#include "debug/MinorTiming.hh" + +namespace Minor +{ + +bool +Scoreboard::findIndex(RegIndex reg, Index &scoreboard_index) +{ + RegClass reg_class = regIdxToClass(reg); + bool ret = false; + + if (reg == TheISA::ZeroReg) { + /* Don't bother with the zero register */ + ret = false; + } else { + switch (reg_class) + { + case IntRegClass: + scoreboard_index = reg; + ret = true; + break; + case FloatRegClass: + scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs + + reg - TheISA::FP_Reg_Base; + ret = true; + break; + case CCRegClass: + scoreboard_index = TheISA::NumIntRegs + reg - TheISA::FP_Reg_Base; + ret = true; + break; + case MiscRegClass: + /* Don't bother with Misc registers */ + ret = false; + break; + } + } + + return ret; +} + +/** Flatten a RegIndex, irrespective of what reg type it's pointing to */ +static TheISA::RegIndex +flattenRegIndex(TheISA::RegIndex reg, ThreadContext *thread_context) +{ + RegClass reg_class = regIdxToClass(reg); + TheISA::RegIndex ret = reg; + + switch (reg_class) + { + case IntRegClass: + ret = thread_context->flattenIntIndex(reg); + break; + case FloatRegClass: + ret = thread_context->flattenFloatIndex(reg); + break; + case CCRegClass: + ret = thread_context->flattenCCIndex(reg); + break; + case MiscRegClass: + /* Don't bother to flatten misc regs as we don't need them here */ + /* return thread_context->flattenMiscIndex(reg); */ + ret = reg; + break; + } + + return ret; +} + +void +Scoreboard::markupInstDests(MinorDynInstPtr inst, Cycles retire_time, + ThreadContext *thread_context, bool mark_unpredictable) +{ + if (inst->isFault()) + return; + + StaticInstPtr staticInst = inst->staticInst; + unsigned int num_dests = staticInst->numDestRegs(); + + /** Mark each destination register */ + for (unsigned int dest_index = 0; dest_index < num_dests; + dest_index++) + { + RegIndex reg = flattenRegIndex( + staticInst->destRegIdx(dest_index), thread_context); + Index index; + + if (findIndex(reg, index)) { + if (mark_unpredictable) + numUnpredictableResults[index]++; + + inst->flatDestRegIdx[dest_index] = reg; + + numResults[index]++; + returnCycle[index] = retire_time; + /* We should be able to rely on only being given accending + * execSeqNums, but sanity check */ + if (inst->id.execSeqNum > writingInst[index]) { + writingInst[index] = inst->id.execSeqNum; + fuIndices[index] = inst->fuIndex; + } + + DPRINTF(MinorScoreboard, "Marking up inst: %s" + " regIndex: %d final numResults: %d returnCycle: %d\n", + *inst, index, numResults[index], returnCycle[index]); + } else { + /* Use ZeroReg to mark invalid/untracked dests */ + inst->flatDestRegIdx[dest_index] = TheISA::ZeroReg; + } + } +} + +InstSeqNum +Scoreboard::execSeqNumToWaitFor(MinorDynInstPtr inst, + ThreadContext *thread_context) +{ + InstSeqNum ret = 0; + + if (inst->isFault()) + return ret; + + StaticInstPtr staticInst = inst->staticInst; + unsigned int num_srcs = staticInst->numSrcRegs(); + + for (unsigned int src_index = 0; src_index < num_srcs; src_index++) { + RegIndex reg = flattenRegIndex(staticInst->srcRegIdx(src_index), + thread_context); + unsigned short int index; + + if (findIndex(reg, index)) { + if (writingInst[index] > ret) + ret = writingInst[index]; + } + } + + DPRINTF(MinorScoreboard, "Inst: %s depends on execSeqNum: %d\n", + *inst, ret); + + return ret; +} + +void +Scoreboard::clearInstDests(MinorDynInstPtr inst, bool clear_unpredictable) +{ + if (inst->isFault()) + return; + + StaticInstPtr staticInst = inst->staticInst; + unsigned int num_dests = staticInst->numDestRegs(); + + /** Mark each destination register */ + for (unsigned int dest_index = 0; dest_index < num_dests; + dest_index++) + { + RegIndex reg = inst->flatDestRegIdx[dest_index]; + Index index; + + if (findIndex(reg, index)) { + if (clear_unpredictable && numUnpredictableResults[index] != 0) + numUnpredictableResults[index] --; + + numResults[index] --; + + if (numResults[index] == 0) { + returnCycle[index] = Cycles(0); + writingInst[index] = 0; + fuIndices[index] = -1; + } + + DPRINTF(MinorScoreboard, "Clearing inst: %s" + " regIndex: %d final numResults: %d\n", + *inst, index, numResults[index]); + } + } +} + +bool +Scoreboard::canInstIssue(MinorDynInstPtr inst, + const std::vector<Cycles> *src_reg_relative_latencies, + const std::vector<bool> *cant_forward_from_fu_indices, + Cycles now, ThreadContext *thread_context) +{ + /* Always allow fault to be issued */ + if (inst->isFault()) + return true; + + StaticInstPtr staticInst = inst->staticInst; + unsigned int num_srcs = staticInst->numSrcRegs(); + + /* Default to saying you can issue */ + bool ret = true; + + unsigned int num_relative_latencies = 0; + Cycles default_relative_latency = Cycles(0); + + /* Where relative latencies are given, the default is the last + * one as that allows the rel. lat. list to be shorted than the + * number of src. regs */ + if (src_reg_relative_latencies && + src_reg_relative_latencies->size() != 0) + { + num_relative_latencies = src_reg_relative_latencies->size(); + default_relative_latency = (*src_reg_relative_latencies) + [num_relative_latencies-1]; + } + + /* For each source register, find the latest result */ + unsigned int src_index = 0; + while (src_index < num_srcs && /* More registers */ + ret /* Still possible */) + { + RegIndex reg = flattenRegIndex(staticInst->srcRegIdx(src_index), + thread_context); + unsigned short int index; + + if (findIndex(reg, index)) { + bool cant_forward = fuIndices[index] != 1 && + cant_forward_from_fu_indices && + index < cant_forward_from_fu_indices->size() && + (*cant_forward_from_fu_indices)[index]; + + Cycles relative_latency = (cant_forward ? Cycles(0) : + (src_index >= num_relative_latencies ? + default_relative_latency : + (*src_reg_relative_latencies)[src_index])); + + if (returnCycle[index] > (now + relative_latency) || + numUnpredictableResults[index] != 0) + { + ret = false; + } + } + src_index++; + } + + if (DTRACE(MinorTiming)) { + if (ret && num_srcs > num_relative_latencies && + num_relative_latencies != 0) + { + DPRINTF(MinorTiming, "Warning, inst: %s timing extra decode has" + " more src. regs: %d than relative latencies: %d\n", + staticInst->disassemble(0), num_srcs, num_relative_latencies); + } + } + + return ret; +} + +void +Scoreboard::minorTrace() const +{ + std::ostringstream result_stream; + + bool printed_element = false; + + unsigned int i = 0; + while (i < numRegs) { + unsigned short int num_results = numResults[i]; + unsigned short int num_unpredictable_results = + numUnpredictableResults[i]; + + if (!(num_results == 0 && num_unpredictable_results == Cycles(0))) { + if (printed_element) + result_stream << ','; + + result_stream << '(' << i << ',' + << num_results << '/' + << num_unpredictable_results << '/' + << returnCycle[i] << '/' + << writingInst[i] << ')'; + + printed_element = true; + } + + i++; + } + + MINORTRACE("busy=%s\n", result_stream.str()); +} + +} diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh new file mode 100644 index 000000000..711bcafb2 --- /dev/null +++ b/src/cpu/minor/scoreboard.hh @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * A simple instruction scoreboard for tracking dependencies in Execute. + */ + +#ifndef __CPU_MINOR_SCOREBOARD_HH__ +#define __CPU_MINOR_SCOREBOARD_HH__ + +#include "cpu/minor/cpu.hh" +#include "cpu/minor/dyn_inst.hh" +#include "cpu/minor/trace.hh" + +namespace Minor +{ + +/** A scoreboard of register dependencies including, for each register: + * The number of in-flight instructions which will generate a result for + * this register */ +class Scoreboard : public Named +{ + public: + /** The number of registers in the Scoreboard. These + * are just the integer, CC and float registers packed + * together with integer regs in the range [0,NumIntRegs-1], + * CC regs in the range [NumIntRegs, NumIntRegs+NumCCRegs-1] + * and float regs in the range + * [NumIntRegs+NumCCRegs, NumFloatRegs+NumIntRegs+NumCCRegs-1] */ + const unsigned numRegs; + + /** Type to use for thread context registers */ + typedef TheISA::RegIndex RegIndex; + + /** Type to use when indexing numResults */ + typedef unsigned short int Index; + + /** Count of the number of in-flight instructions that + * have results for each register */ + std::vector<Index> numResults; + + /** Count of the number of results which can't be predicted */ + std::vector<Index> numUnpredictableResults; + + /** Index of the FU generating this result */ + std::vector<int> fuIndices; + + /** The estimated cycle number that the result will be presented. + * This can be offset from to allow forwarding to be simulated as + * long as instruction completion is *strictly* in order with + * respect to instructions with unpredictable result timing */ + std::vector<Cycles> returnCycle; + + /** The execute sequence number of the most recent inst to generate this + * register value */ + std::vector<InstSeqNum> writingInst; + + public: + Scoreboard(const std::string &name) : + Named(name), + numRegs(TheISA::NumIntRegs + TheISA::NumCCRegs + + TheISA::NumFloatRegs), + numResults(numRegs, 0), + numUnpredictableResults(numRegs, 0), + fuIndices(numRegs, 0), + returnCycle(numRegs, Cycles(0)), + writingInst(numRegs, 0) + { } + + public: + /** Sets scoreboard_index to the index into numResults of the + * given register index. Returns true if the given register + * is in the scoreboard and false if it isn't */ + bool findIndex(RegIndex reg, Index &scoreboard_index); + + /** Mark up an instruction's effects by incrementing + * numResults counts. If mark_unpredictable is true, the inst's + * destination registers are marked as being unpredictable without + * an estimated retire time */ + void markupInstDests(MinorDynInstPtr inst, Cycles retire_time, + ThreadContext *thread_context, bool mark_unpredictable); + + /** Clear down the dependencies for this instruction. clear_unpredictable + * must match mark_unpredictable for the same inst. */ + void clearInstDests(MinorDynInstPtr inst, bool clear_unpredictable); + + /** Returns the exec sequence number of the most recent inst on + * which the given inst depends. Useful for determining which + * inst must actually be committed before a dependent inst + * can call initiateAcc */ + InstSeqNum execSeqNumToWaitFor(MinorDynInstPtr inst, + ThreadContext *thread_context); + + /** Can this instruction be issued. Are any of its source registers + * due to be written by other marked-up instructions in flight */ + bool canInstIssue(MinorDynInstPtr inst, + const std::vector<Cycles> *src_reg_relative_latencies, + const std::vector<bool> *cant_forward_from_fu_indices, + Cycles now, ThreadContext *thread_context); + + /** MinorTraceIF interface */ + void minorTrace() const; +}; + +} + +#endif /* __CPU_MINOR_SCOREBOARD_HH__ */ diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc new file mode 100644 index 000000000..baa0aa7f3 --- /dev/null +++ b/src/cpu/minor/stats.cc @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "cpu/minor/stats.hh" + +namespace Minor +{ + +MinorStats::MinorStats() +{ } + +void +MinorStats::regStats(const std::string &name, BaseCPU &baseCpu) +{ + numInsts + .name(name + ".committedInsts") + .desc("Number of instructions committed"); + + numOps + .name(name + ".committedOps") + .desc("Number of ops (including micro ops) committed"); + + numDiscardedOps + .name(name + ".discardedOps") + .desc("Number of ops (including micro ops) which were discarded " + "before commit"); + + numFetchSuspends + .name(name + ".numFetchSuspends") + .desc("Number of times Execute suspended instruction fetching"); + + quiesceCycles + .name(name + ".quiesceCycles") + .desc("Total number of cycles that CPU has spent quiesced or waiting " + "for an interrupt") + .prereq(quiesceCycles); + + cpi + .name(name + ".cpi") + .desc("CPI: cycles per instruction") + .precision(6); + cpi = baseCpu.numCycles / numInsts; + + ipc + .name(name + ".ipc") + .desc("IPC: instructions per cycle") + .precision(6); + ipc = numInsts / baseCpu.numCycles; +} + +}; diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh new file mode 100644 index 000000000..dc246304d --- /dev/null +++ b/src/cpu/minor/stats.hh @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2011-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * The stats for MinorCPU separated from the CPU definition. + */ + +#ifndef __CPU_MINOR_STATS_HH__ +#define __CPU_MINOR_STATS_HH__ + +#include "base/statistics.hh" +#include "cpu/base.hh" +#include "sim/ticked_object.hh" + +namespace Minor +{ + +/** Currently unused stats class. */ +class MinorStats +{ + public: + /** Number of simulated instructions */ + Stats::Scalar numInsts; + + /** Number of simulated insts and microops */ + Stats::Scalar numOps; + + /** Number of ops discarded before committing */ + Stats::Scalar numDiscardedOps; + + /** Number of times fetch was asked to suspend by Execute */ + Stats::Scalar numFetchSuspends; + + /** Number of cycles in quiescent state */ + Stats::Scalar quiesceCycles; + + /** CPI/IPC for total cycle counts and macro insts */ + Stats::Formula cpi; + Stats::Formula ipc; + + public: + MinorStats(); + + public: + void regStats(const std::string &name, BaseCPU &baseCpu); +}; + +} + +#endif /* __CPU_MINOR_STATS_HH__ */ diff --git a/src/cpu/minor/trace.hh b/src/cpu/minor/trace.hh new file mode 100644 index 000000000..9bbe09750 --- /dev/null +++ b/src/cpu/minor/trace.hh @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * This file contains miscellaneous classes and functions for formatting + * general trace information and also MinorTrace information. + * + * MinorTrace is this model's cycle-by-cycle trace information for use by + * minorview. + */ + +#ifndef __CPU_MINOR_TRACE_HH__ +#define __CPU_MINOR_TRACE_HH__ + +#include <string> + +#include "base/trace.hh" +#include "debug/MinorTrace.hh" + +namespace Minor +{ + +/** DPRINTFN for MinorTrace reporting */ +#define MINORTRACE(...) \ + DPRINTF(MinorTrace, "MinorTrace: " __VA_ARGS__) + +/** DPRINTFN for MinorTrace MinorInst line reporting */ +#define MINORINST(sim_object, ...) \ + DPRINTFS(MinorTrace, (sim_object), "MinorInst: " __VA_ARGS__) + +/** DPRINTFN for MinorTrace MinorLine line reporting */ +#define MINORLINE(sim_object, ...) \ + DPRINTFS(MinorTrace, (sim_object), "MinorLine: " __VA_ARGS__) + +} + +#endif /* __CPU_MINOR_TRACE_HH__ */ diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript index 5b2ecceef..bb9342f06 100644 --- a/src/cpu/pred/SConscript +++ b/src/cpu/pred/SConscript @@ -30,7 +30,8 @@ Import('*') -if 'InOrderCPU' in env['CPU_MODELS'] or 'O3CPU' in env['CPU_MODELS']: +if 'InOrderCPU' in env['CPU_MODELS'] or 'O3CPU' in env['CPU_MODELS'] \ + or 'Minor' in env['CPU_MODELS']: SimObject('BranchPredictor.py') Source('bpred_unit.cc') diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index f598c920d..375b7d0ba 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -59,6 +59,11 @@ class CheckerCPU; class AtomicSimpleCPU; class TimingSimpleCPU; class InorderCPU; +namespace Minor +{ + class ExecContext; +}; + class SymbolTable; namespace Trace { diff --git a/src/cpu/timing_expr.cc b/src/cpu/timing_expr.cc new file mode 100644 index 000000000..d6d904956 --- /dev/null +++ b/src/cpu/timing_expr.cc @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "base/intmath.hh" +#include "cpu/timing_expr.hh" + +TimingExprEvalContext::TimingExprEvalContext (StaticInstPtr inst_, + ThreadContext *thread_, + TimingExprLet *let_) : + inst(inst_), thread(thread_), let(let_) +{ + /* Reserve space to hold the results of evaluating the + * let expressions */ + if (let) { + unsigned int num_defns = let->defns.size(); + + results.resize(num_defns, 0); + resultAvailable.resize(num_defns, false); + } +} + +uint64_t TimingExprSrcReg::eval(TimingExprEvalContext &context) +{ + return context.inst->srcRegIdx(index); +} + +uint64_t TimingExprReadIntReg::eval(TimingExprEvalContext &context) +{ + return context.thread->readIntReg(reg->eval(context)); +} + +uint64_t TimingExprLet::eval(TimingExprEvalContext &context) +{ + TimingExprEvalContext new_context(context.inst, + context.thread, this); + + return expr->eval(new_context); +} + +uint64_t TimingExprRef::eval(TimingExprEvalContext &context) +{ + /* Lookup the result, evaluating if necessary. @todo, this + * should have more error checking */ + if (!context.resultAvailable[index]) { + context.results[index] = context.let->defns[index]->eval(context); + context.resultAvailable[index] = true; + } + + return context.results[index]; +} + +uint64_t TimingExprUn::eval(TimingExprEvalContext &context) +{ + uint64_t arg_value = arg->eval(context); + uint64_t ret = 0; + + switch (op) { + case Enums::timingExprSizeInBits: + if (arg_value == 0) + ret = 0; + else + ret = ceilLog2(arg_value); + break; + case Enums::timingExprNot: + ret = arg_value != 0; + break; + case Enums::timingExprInvert: + ret = ~arg_value; + break; + case Enums::timingExprSignExtend32To64: + ret = static_cast<int64_t>( + static_cast<int32_t>(arg_value)); + break; + case Enums::timingExprAbs: + if (static_cast<int64_t>(arg_value) < 0) + ret = -arg_value; + else + ret = arg_value; + break; + default: + break; + } + + return ret; +} + +uint64_t TimingExprBin::eval(TimingExprEvalContext &context) +{ + uint64_t left_value = left->eval(context); + uint64_t right_value = right->eval(context); + uint64_t ret = 0; + + switch (op) { + case Enums::timingExprAdd: + ret = left_value + right_value; + break; + case Enums::timingExprSub: + ret = left_value - right_value; + break; + case Enums::timingExprUMul: + ret = left_value * right_value; + break; + case Enums::timingExprUDiv: + if (right_value != 0) { + ret = left_value / right_value; + } + break; + case Enums::timingExprUCeilDiv: + if (right_value != 0) { + ret = (left_value + (right_value - 1)) / right_value; + } + break; + case Enums::timingExprSMul: + ret = static_cast<int64_t>(left_value) * + static_cast<int64_t>(right_value); + break; + case Enums::timingExprSDiv: + if (right_value != 0) { + ret = static_cast<int64_t>(left_value) / + static_cast<int64_t>(right_value); + } + break; + case Enums::timingExprEqual: + ret = left_value == right_value; + break; + case Enums::timingExprNotEqual: + ret = left_value != right_value; + break; + case Enums::timingExprULessThan: + ret = left_value < right_value; + break; + case Enums::timingExprUGreaterThan: + ret = left_value > right_value; + break; + case Enums::timingExprSLessThan: + ret = static_cast<int64_t>(left_value) < + static_cast<int64_t>(right_value); + break; + case Enums::timingExprSGreaterThan: + ret = static_cast<int64_t>(left_value) > + static_cast<int64_t>(right_value); + break; + case Enums::timingExprAnd: + ret = (left_value != 0) && (right_value != 0); + break; + case Enums::timingExprOr: + ret = (left_value != 0) || (right_value != 0); + break; + default: + break; + } + + return ret; +} + +uint64_t TimingExprIf::eval(TimingExprEvalContext &context) +{ + uint64_t cond_value = cond->eval(context); + + if (cond_value != 0) + return trueExpr->eval(context); + else + return falseExpr->eval(context); +} + +TimingExprLiteral * +TimingExprLiteralParams::create() +{ + return new TimingExprLiteral(this); +} + +TimingExprSrcReg * +TimingExprSrcRegParams::create() +{ + return new TimingExprSrcReg(this); +} + +TimingExprReadIntReg * +TimingExprReadIntRegParams::create() +{ + return new TimingExprReadIntReg(this); +} + +TimingExprLet * +TimingExprLetParams::create() +{ + return new TimingExprLet(this); +} + +TimingExprRef * +TimingExprRefParams::create() +{ + return new TimingExprRef(this); +} + +TimingExprUn * +TimingExprUnParams::create() +{ + return new TimingExprUn(this); +} + +TimingExprBin * +TimingExprBinParams::create() +{ + return new TimingExprBin(this); +} + +TimingExprIf * +TimingExprIfParams::create() +{ + return new TimingExprIf(this); +} diff --git a/src/cpu/timing_expr.hh b/src/cpu/timing_expr.hh new file mode 100644 index 000000000..d2c38ea90 --- /dev/null +++ b/src/cpu/timing_expr.hh @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/* + * These classes define an expression language over uint64_t with only + * a few operators. This can be used to form expressions for the extra + * delay required in variable execution time instructions. + * + * Expressions, in evaluation, will have access to the ThreadContext and + * a StaticInst. + */ + +#ifndef __CPU_TIMING_EXPR_HH__ +#define __CPU_TIMING_EXPR_HH__ + +#include "cpu/static_inst.hh" +#include "cpu/thread_context.hh" +#include "enums/TimingExprOp.hh" +#include "params/TimingExpr.hh" +#include "params/TimingExprBin.hh" +#include "params/TimingExprIf.hh" +#include "params/TimingExprLet.hh" +#include "params/TimingExprLiteral.hh" +#include "params/TimingExprReadIntReg.hh" +#include "params/TimingExprRef.hh" +#include "params/TimingExprSrcReg.hh" +#include "params/TimingExprUn.hh" +#include "sim/sim_object.hh" + +/** These classes are just the C++ counterparts for those in Expr.py and + * are, therefore, documented there */ + +class TimingExprLet; + +/** Object to gather the visible context for evaluation */ +class TimingExprEvalContext +{ + public: + /** Special visible context */ + StaticInstPtr inst; + ThreadContext *thread; + + /** Context visible as sub expressions. results will hold the results + * of (lazily) evaluating let's expressions. resultAvailable elements + * are true when a result has actually been evaluated */ + TimingExprLet *let; + std::vector<uint64_t> results; + std::vector<bool > resultAvailable; + + TimingExprEvalContext(StaticInstPtr inst_, + ThreadContext *thread_, TimingExprLet *let_); +}; + +class TimingExpr : public SimObject +{ + public: + TimingExpr(const TimingExprParams *params) : + SimObject(params) + { } + + virtual uint64_t eval(TimingExprEvalContext &context) = 0; +}; + +class TimingExprLiteral : public TimingExpr +{ + public: + uint64_t value; + + TimingExprLiteral(const TimingExprLiteralParams *params) : + TimingExpr(params), + value(params->value) + { } + + uint64_t eval(TimingExprEvalContext &context) { return value; } +}; + +class TimingExprSrcReg : public TimingExpr +{ + public: + unsigned int index; + + TimingExprSrcReg(const TimingExprSrcRegParams *params) : + TimingExpr(params), + index(params->index) + { } + + uint64_t eval(TimingExprEvalContext &context); +}; + +class TimingExprReadIntReg : public TimingExpr +{ + public: + TimingExpr *reg; + + TimingExprReadIntReg(const TimingExprReadIntRegParams *params) : + TimingExpr(params), + reg(params->reg) + { } + + uint64_t eval(TimingExprEvalContext &context); +}; + +class TimingExprLet : public TimingExpr +{ + public: + std::vector<TimingExpr *> defns; + TimingExpr *expr; + + TimingExprLet(const TimingExprLetParams *params) : + TimingExpr(params), + defns(params->defns), + expr(params->expr) + { } + + uint64_t eval(TimingExprEvalContext &context); +}; + +class TimingExprRef : public TimingExpr +{ + public: + unsigned int index; + + TimingExprRef(const TimingExprRefParams *params) : + TimingExpr(params), + index(params->index) + { } + + uint64_t eval(TimingExprEvalContext &context); +}; + +class TimingExprUn : public TimingExpr +{ + public: + Enums::TimingExprOp op; + TimingExpr *arg; + + TimingExprUn(const TimingExprUnParams *params) : + TimingExpr(params), + op(params->op), + arg(params->arg) + { } + + uint64_t eval(TimingExprEvalContext &context); +}; + +class TimingExprBin : public TimingExpr +{ + public: + Enums::TimingExprOp op; + TimingExpr *left; + TimingExpr *right; + + TimingExprBin(const TimingExprBinParams *params) : + TimingExpr(params), + op(params->op), + left(params->left), + right(params->right) + { } + + uint64_t eval(TimingExprEvalContext &context); +}; + +class TimingExprIf : public TimingExpr +{ + public: + TimingExpr *cond; + TimingExpr *trueExpr; + TimingExpr *falseExpr; + + TimingExprIf(const TimingExprIfParams *params) : + TimingExpr(params), + cond(params->cond), + trueExpr(params->trueExpr), + falseExpr(params->falseExpr) + { } + + uint64_t eval(TimingExprEvalContext &context); +}; + +#endif diff --git a/src/doc/inside-minor.doxygen b/src/doc/inside-minor.doxygen new file mode 100644 index 000000000..e55f61c01 --- /dev/null +++ b/src/doc/inside-minor.doxygen @@ -0,0 +1,1091 @@ +# Copyright (c) 2014 ARM Limited +# All rights reserved +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Andrew Bardsley + +namespace Minor +{ + +/*! + +\page minor Inside the Minor CPU model + +\tableofcontents + +This document contains a description of the structure and function of the +Minor gem5 in-order processor model. It is recommended reading for anyone who +wants to understand Minor's internal organisation, design decisions, C++ +implementation and Python configuration. A familiarity with gem5 and some of +its internal structures is assumed. This document is meant to be read +alongside the Minor source code and to explain its general structure without +being too slavish about naming every function and data type. + +\section whatis What is Minor? + +Minor is an in-order processor model with a fixed pipeline but configurable +data structures and execute behaviour. It is intended to be used to model +processors with strict in-order execution behaviour and allows visualisation +of an instruction's position in the pipeline through the +MinorTrace/minorview.py format/tool. The intention is to provide a framework +for micro-architecturally correlating the model with a particular, chosen +processor with similar capabilities. + +\section philo Design philosophy + +\subsection mt Multithreading + +The model isn't currently capable of multithreading but there are THREAD +comments in key places where stage data needs to be arrayed to support +multithreading. + +\subsection structs Data structures + +Decorating data structures with large amounts of life-cycle information is +avoided. Only instructions (MinorDynInst) contain a significant proportion of +their data content whose values are not set at construction. + +All internal structures have fixed sizes on construction. Data held in queues +and FIFOs (MinorBuffer, FUPipeline) should have a BubbleIF interface to +allow a distinct 'bubble'/no data value option for each type. + +Inter-stage 'struct' data is packaged in structures which are passed by value. +Only MinorDynInst, the line data in ForwardLineData and the memory-interfacing +objects Fetch1::FetchRequest and LSQ::LSQRequest are '::new' allocated while +running the model. + +\section model Model structure + +Objects of class MinorCPU are provided by the model to gem5. MinorCPU +implements the interfaces of (cpu.hh) and can provide data and +instruction interfaces for connection to a cache system. The model is +configured in a similar way to other gem5 models through Python. That +configuration is passed on to MinorCPU::pipeline (of class Pipeline) which +actually implements the processor pipeline. + +The hierarchy of major unit ownership from MinorCPU down looks like this: + +<ul> +<li>MinorCPU</li> +<ul> + <li>Pipeline - container for the pipeline, owns the cyclic 'tick' + event mechanism and the idling (cycle skipping) mechanism.</li> + <ul> + <li>Fetch1 - instruction fetch unit responsible for fetching cache + lines (or parts of lines from the I-cache interface)</li> + <ul> + <li>Fetch1::IcachePort - interface to the I-cache from + Fetch1</li> + </ul> + <li>Fetch2 - line to instruction decomposition</li> + <li>Decode - instruction to micro-op decomposition</li> + <li>Execute - instruction execution and data memory + interface</li> + <ul> + <li>LSQ - load store queue for memory ref. instructions</li> + <li>LSQ::DcachePort - interface to the D-cache from + Execute</li> + </ul> + </ul> + </ul> +</ul> + +\section keystruct Key data structures + +\subsection ids Instruction and line identity: InstId (dyn_inst.hh) + +An InstId contains the sequence numbers and thread numbers that describe the +life cycle and instruction stream affiliations of individual fetched cache +lines and instructions. + +An InstId is printed in one of the following forms: + + - T/S.P/L - for fetched cache lines + - T/S.P/L/F - for instructions before Decode + - T/S.P/L/F.E - for instructions from Decode onwards + +for example: + + - 0/10.12/5/6.7 + +InstId's fields are: + +<table> +<tr> + <td><b>Field</b></td> + <td><b>Symbol</b></td> + <td><b>Generated by</b></td> + <td><b>Checked by</b></td> + <td><b>Function</b></td> +</tr> + +<tr> + <td>InstId::threadId</td> + <td>T</td> + <td>Fetch1</td> + <td>Everywhere the thread number is needed</td> + <td>Thread number (currently always 0).</td> +</tr> + +<tr> + <td>InstId::streamSeqNum</td> + <td>S</td> + <td>Execute</td> + <td>Fetch1, Fetch2, Execute (to discard lines/insts)</td> + <td>Stream sequence number as chosen by Execute. Stream + sequence numbers change after changes of PC (branches, exceptions) in + Execute and are used to separate pre and post branch instruction + streams.</td> +</tr> + +<tr> + <td>InstId::predictionSeqNum</td> + <td>P</td> + <td>Fetch2</td> + <td>Fetch2 (while discarding lines after prediction)</td> + <td>Prediction sequence numbers represent branch prediction decisions. + This is used by Fetch2 to mark lines/instructions according to the last + followed branch prediction made by Fetch2. Fetch2 can signal to Fetch1 + that it should change its fetch address and mark lines with a new + prediction sequence number (which it will only do if the stream sequence + number Fetch1 expects matches that of the request). </td> </tr> + +<tr> +<td>InstId::lineSeqNum</td> +<td>L</td> +<td>Fetch1</td> +<td>(Just for debugging)</td> +<td>Line fetch sequence number of this cache line or the line + this instruction was extracted from. + </td> +</tr> + +<tr> +<td>InstId::fetchSeqNum</td> +<td>F</td> +<td>Fetch2</td> +<td>Fetch2 (as the inst. sequence number for branches)</td> +<td>Instruction fetch order assigned by Fetch2 when lines + are decomposed into instructions. + </td> +</tr> + +<tr> +<td>InstId::execSeqNum</td> +<td>E</td> +<td>Decode</td> +<td>Execute (to check instruction identity in queues/FUs/LSQ)</td> +<td>Instruction order after micro-op decomposition.</td> +</tr> + +</table> + +The sequence number fields are all independent of each other and although, for +instance, InstId::execSeqNum for an instruction will always be >= +InstId::fetchSeqNum, the comparison is not useful. + +The originating stage of each sequence number field keeps a counter for that +field which can be incremented in order to generate new, unique numbers. + +\subsection insts Instructions: MinorDynInst (dyn_inst.hh) + +MinorDynInst represents an instruction's progression through the pipeline. An +instruction can be three things: + +<table> +<tr> + <td><b>Thing</b></td> + <td><b>Predicate</b></td> + <td><b>Explanation</b></td> +</tr> +<tr> + <td>A bubble</td> + <td>MinorDynInst::isBubble()</td> + <td>no instruction at all, just a space-filler</td> +</tr> +<tr> + <td>A fault</td> + <td>MinorDynInst::isFault()</td> + <td>a fault to pass down the pipeline in an instruction's clothing</td> +</tr> +<tr> + <td>A decoded instruction</td> + <td>MinorDynInst::isInst()</td> + <td>instructions are actually passed to the gem5 decoder in Fetch2 and so + are created fully decoded. MinorDynInst::staticInst is the decoded + instruction form.</td> +</tr> +</table> + +Instructions are reference counted using the gem5 RefCountingPtr +(base/refcnt.hh) wrapper. They therefore usually appear as MinorDynInstPtr in +code. Note that as RefCountingPtr initialises as nullptr rather than an +object that supports BubbleIF::isBubble, passing raw MinorDynInstPtrs to +Queue%s and other similar structures from stage.hh without boxing is +dangerous. + +\subsection fld ForwardLineData (pipe_data.hh) + +ForwardLineData is used to pass cache lines from Fetch1 to Fetch2. Like +MinorDynInst%s, they can be bubbles (ForwardLineData::isBubble()), +fault-carrying or can contain a line (partial line) fetched by Fetch1. The +data carried by ForwardLineData is owned by a Packet object returned from +memory and is explicitly memory managed and do must be deleted once processed +(by Fetch2 deleting the Packet). + +\subsection fid ForwardInstData (pipe_data.hh) + +ForwardInstData can contain up to ForwardInstData::width() instructions in its +ForwardInstData::insts vector. This structure is used to carry instructions +between Fetch2, Decode and Execute and to store input buffer vectors in Decode +and Execute. + +\subsection fr Fetch1::FetchRequest (fetch1.hh) + +FetchRequests represent I-cache line fetch requests. The are used in the +memory queues of Fetch1 and are pushed into/popped from Packet::senderState +while traversing the memory system. + +FetchRequests contain a memory system Request (mem/request.hh) for that fetch +access, a packet (Packet, mem/packet.hh), if the request gets to memory, and a +fault field that can be populated with a TLB-sourced prefetch fault (if any). + +\subsection lsqr LSQ::LSQRequest (execute.hh) + +LSQRequests are similar to FetchRequests but for D-cache accesses. They carry +the instruction associated with a memory access. + +\section pipeline The pipeline + +\verbatim +------------------------------------------------------------------------------ + Key: + + [] : inter-stage BufferBuffer + ,--. + | | : pipeline stage + `--' + ---> : forward communication + <--- : backward communication + + rv : reservation information for input buffers + + ,------. ,------. ,------. ,-------. + (from --[]-v->|Fetch1|-[]->|Fetch2|-[]->|Decode|-[]->|Execute|--> (to Fetch1 + Execute) | | |<-[]-| |<-rv-| |<-rv-| | & Fetch2) + | `------'<-rv-| | | | | | + `-------------->| | | | | | + `------' `------' `-------' +------------------------------------------------------------------------------ +\endverbatim + +The four pipeline stages are connected together by MinorBuffer FIFO +(stage.hh, derived ultimately from TimeBuffer) structures which allow +inter-stage delays to be modelled. There is a MinorBuffer%s between adjacent +stages in the forward direction (for example: passing lines from Fetch1 to +Fetch2) and, between Fetch2 and Fetch1, a buffer in the backwards direction +carrying branch predictions. + +Stages Fetch2, Decode and Execute have input buffers which, each cycle, can +accept input data from the previous stage and can hold that data if the stage +is not ready to process it. Input buffers store data in the same form as it +is received and so Decode and Execute's input buffers contain the output +instruction vector (ForwardInstData (pipe_data.hh)) from their previous stages +with the instructions and bubbles in the same positions as a single buffer +entry. + +Stage input buffers provide a Reservable (stage.hh) interface to their +previous stages, to allow slots to be reserved in their input buffers, and +communicate their input buffer occupancy backwards to allow the previous stage +to plan whether it should make an output in a given cycle. + +\subsection events Event handling: MinorActivityRecorder (activity.hh, +pipeline.hh) + +Minor is essentially a cycle-callable model with some ability to skip cycles +based on pipeline activity. External events are mostly received by callbacks +(e.g. Fetch1::IcachePort::recvTimingResp) and cause the pipeline to be woken +up to service advancing request queues. + +Ticked (sim/ticked.hh) is a base class bringing together an evaluate +member function and a provided SimObject. It provides a Ticked::start/stop +interface to start and pause clock events from being periodically issued. +Pipeline is a derived class of Ticked. + +During evaluate calls, stages can signal that they still have work to do in +the next cycle by calling either MinorCPU::activityRecorder->activity() (for +non-callable related activity) or MinorCPU::wakeupOnEvent(<stageId>) (for +stage callback-related 'wakeup' activity). + +Pipeline::evaluate contains calls to evaluate for each unit and a test for +pipeline idling which can turns off the clock tick if no unit has signalled +that it may become active next cycle. + +Within Pipeline (pipeline.hh), the stages are evaluated in reverse order (and +so will ::evaluate in reverse order) and their backwards data can be +read immediately after being written in each cycle allowing output decisions +to be 'perfect' (allowing synchronous stalling of the whole pipeline). Branch +predictions from Fetch2 to Fetch1 can also be transported in 0 cycles making +fetch1ToFetch2BackwardDelay the only configurable delay which can be set as +low as 0 cycles. + +The MinorCPU::activateContext and MinorCPU::suspendContext interface can be +called to start and pause threads (threads in the MT sense) and to start and +pause the pipeline. Executing instructions can call this interface +(indirectly through the ThreadContext) to idle the CPU/their threads. + +\subsection stages Each pipeline stage + +In general, the behaviour of a stage (each cycle) is: + +\verbatim + evaluate: + push input to inputBuffer + setup references to input/output data slots + + do 'every cycle' 'step' tasks + + if there is input and there is space in the next stage: + process and generate a new output + maybe re-activate the stage + + send backwards data + + if the stage generated output to the following FIFO: + signal pipe activity + + if the stage has more processable input and space in the next stage: + re-activate the stage for the next cycle + + commit the push to the inputBuffer if that data hasn't all been used +\endverbatim + +The Execute stage differs from this model as its forward output (branch) data +is unconditionally sent to Fetch1 and Fetch2. To allow this behaviour, Fetch1 +and Fetch2 must be unconditionally receptive to that data. + +\subsection fetch1 Fetch1 stage + +Fetch1 is responsible for fetching cache lines or partial cache lines from the +I-cache and passing them on to Fetch2 to be decomposed into instructions. It +can receive 'change of stream' indications from both Execute and Fetch2 to +signal that it should change its internal fetch address and tag newly fetched +lines with new stream or prediction sequence numbers. When both Execute and +Fetch2 signal changes of stream at the same time, Fetch1 takes Execute's +change. + +Every line issued by Fetch1 will bear a unique line sequence number which can +be used for debugging stream changes. + +When fetching from the I-cache, Fetch1 will ask for data from the current +fetch address (Fetch1::pc) up to the end of the 'data snap' size set in the +parameter fetch1LineSnapWidth. Subsequent autonomous line fetches will fetch +whole lines at a snap boundary and of size fetch1LineWidth. + +Fetch1 will only initiate a memory fetch if it can reserve space in Fetch2 +input buffer. That input buffer serves an the fetch queue/LFL for the system. + +Fetch1 contains two queues: requests and transfers to handle the stages of +translating the address of a line fetch (via the TLB) and accommodating the +request/response of fetches to/from memory. + +Fetch requests from Fetch1 are pushed into the requests queue as newly +allocated FetchRequest objects once they have been sent to the ITLB with a +call to itb->translateTiming. + +A response from the TLB moves the request from the requests queue to the +transfers queue. If there is more than one entry in each queue, it is +possible to get a TLB response for request which is not at the head of the +requests queue. In that case, the TLB response is marked up as a state change +to Translated in the request object, and advancing the request to transfers +(and the memory system) is left to calls to Fetch1::stepQueues which is called +in the cycle following any event is received. + +Fetch1::tryToSendToTransfers is responsible for moving requests between the +two queues and issuing requests to memory. Failed TLB lookups (prefetch +aborts) continue to occupy space in the queues until they are recovered at the +head of transfers. + +Responses from memory change the request object state to Complete and +Fetch1::evaluate can pick up response data, package it in the ForwardLineData +object, and forward it to Fetch2%'s input buffer. + +As space is always reserved in Fetch2::inputBuffer, setting the input buffer's +size to 1 results in non-prefetching behaviour. + +When a change of stream occurs, translated requests queue members and +completed transfers queue members can be unconditionally discarded to make way +for new transfers. + +\subsection fetch2 Fetch2 stage + +Fetch2 receives a line from Fetch1 into its input buffer. The data in the +head line in that buffer is iterated over and separated into individual +instructions which are packed into a vector of instructions which can be +passed to Decode. Packing instructions can be aborted early if a fault is +found in either the input line as a whole or a decomposed instruction. + +\subsubsection bp Branch prediction + +Fetch2 contains the branch prediction mechanism. This is a wrapper around the +branch predictor interface provided by gem5 (cpu/pred/...). + +Branches are predicted for any control instructions found. If prediction is +attempted for an instruction, the MinorDynInst::triedToPredict flag is set on +that instruction. + +When a branch is predicted to take, the MinorDynInst::predictedTaken flag is +set and MinorDynInst::predictedTarget is set to the predicted target PC value. +The predicted branch instruction is then packed into Fetch2%'s output vector, +the prediction sequence number is incremented, and the branch is communicated +to Fetch1. + +After signalling a prediction, Fetch2 will discard its input buffer contents +and will reject any new lines which have the same stream sequence number as +that branch but have a different prediction sequence number. This allows +following sequentially fetched lines to be rejected without ignoring new lines +generated by a change of stream indicated from a 'real' branch from Execute +(which will have a new stream sequence number). + +The program counter value provided to Fetch2 by Fetch1 packets is only updated +when there is a change of stream. Fetch2::havePC indicates whether the PC +will be picked up from the next processed input line. Fetch2::havePC is +necessary to allow line-wrapping instructions to be tracked through decode. + +Branches (and instructions predicted to branch) which are processed by Execute +will generate BranchData (pipe_data.hh) data explaining the outcome of the +branch which is sent forwards to Fetch1 and Fetch2. Fetch1 uses this data to +change stream (and update its stream sequence number and address for new +lines). Fetch2 uses it to update the branch predictor. Minor does not +communicate branch data to the branch predictor for instructions which are +discarded on the way to commit. + +BranchData::BranchReason (pipe_data.hh) encodes the possible branch scenarios: + +<table> +<tr> + <td>Branch enum val.</td> + <td>In Execute</td> + <td>Fetch1 reaction</td> + <td>Fetch2 reaction</td> +</tr> +<tr> + <td>NoBranch</td> + <td>(output bubble data)</td> + <td>-</td> + <td>-</td> +</tr> +<tr> + <td>CorrectlyPredictedBranch</td> + <td>Predicted, taken</td> + <td>-</td> + <td>Update BP as taken branch</td> +</tr> +<tr> + <td>UnpredictedBranch</td> + <td>Not predicted, taken and was taken</td> + <td>New stream</td> + <td>Update BP as taken branch</td> +</tr> +<tr> + <td>BadlyPredictedBranch</td> + <td>Predicted, not taken</td> + <td>New stream to restore to old inst. source</td> + <td>Update BP as not taken branch</td> +</tr> +<tr> + <td>BadlyPredictedBranchTarget</td> + <td>Predicted, taken, but to a different target than predicted one</td> + <td>New stream</td> + <td>Update BTB to new target</td> +</tr> +<tr> + <td>SuspendThread</td> + <td>Hint to suspend fetching</td> + <td>Suspend fetch for this thread (branch to next inst. as wakeup + fetch addr)</td> + <td>-</td> +</tr> +<tr> + <td>Interrupt</td> + <td>Interrupt detected</td> + <td>New stream</td> + <td>-</td> +</tr> +</table> + +The parameter decodeInputWidth sets the number of instructions which can be +packed into the output per cycle. If the parameter fetch2CycleInput is true, +Decode can try to take instructions from more than one entry in its input +buffer per cycle. + +\subsection decode Decode stage + +Decode takes a vector of instructions from Fetch2 (via its input buffer) and +decomposes those instructions into micro-ops (if necessary) and packs them +into its output instruction vector. + +The parameter executeInputWidth sets the number of instructions which can be +packed into the output per cycle. If the parameter decodeCycleInput is true, +Decode can try to take instructions from more than one entry in its input +buffer per cycle. + +\subsection execute Execute stage + +Execute provides all the instruction execution and memory access mechanisms. +An instructions passage through Execute can take multiple cycles with its +precise timing modelled by a functional unit pipeline FIFO. + +A vector of instructions (possibly including fault 'instructions') is provided +to Execute by Decode and can be queued in the Execute input buffer before +being issued. Setting the parameter executeCycleInput allows execute to +examine more than one input buffer entry (more than one instruction vector). +The number of instructions in the input vector can be set with +executeInputWidth and the depth of the input buffer can be set with parameter +executeInputBufferSize. + +\subsubsection fus Functional units + +The Execute stage contains pipelines for each functional unit comprising the +computational core of the CPU. Functional units are configured via the +executeFuncUnits parameter. Each functional unit has a number of instruction +classes it supports, a stated delay between instruction issues, and a delay +from instruction issue to (possible) commit and an optional timing annotation +capable of more complicated timing. + +Each active cycle, Execute::evaluate performs this action: + +\verbatim + Execute::evaluate: + push input to inputBuffer + setup references to input/output data slots and branch output slot + + step D-cache interface queues (similar to Fetch1) + + if interrupt posted: + take interrupt (signalling branch to Fetch1/Fetch2) + else + commit instructions + issue new instructions + + advance functional unit pipelines + + reactivate Execute if the unit is still active + + commit the push to the inputBuffer if that data hasn't all been used +\endverbatim + +\subsubsection fifos Functional unit FIFOs + +Functional units are implemented as SelfStallingPipelines (stage.hh). These +are TimeBuffer FIFOs with two distinct 'push' and 'pop' wires. They respond +to SelfStallingPipeline::advance in the same way as TimeBuffers <b>unless</b> +there is data at the far, 'pop', end of the FIFO. A 'stalled' flag is +provided for signalling stalling and to allow a stall to be cleared. The +intention is to provide a pipeline for each functional unit which will never +advance an instruction out of that pipeline until it has been processed and +the pipeline is explicitly unstalled. + +The actions 'issue', 'commit', and 'advance' act on the functional units. + +\subsubsection issue Issue + +Issuing instructions involves iterating over both the input buffer +instructions and the heads of the functional units to try and issue +instructions in order. The number of instructions which can be issued each +cycle is limited by the parameter executeIssueLimit, how executeCycleInput is +set, the availability of pipeline space and the policy used to choose a +pipeline in which the instruction can be issued. + +At present, the only issue policy is strict round-robin visiting of each +pipeline with the given instructions in sequence. For greater flexibility, +better (and more specific policies) will need to be possible. + +Memory operation instructions traverse their functional units to perform their +EA calculations. On 'commit', the ExecContext::initiateAcc execution phase is +performed and any memory access is issued (via. ExecContext::{read,write}Mem +calling LSQ::pushRequest) to the LSQ. + +Note that faults are issued as if they are instructions and can (currently) be +issued to *any* functional unit. + +Every issued instruction is also pushed into the Execute::inFlightInsts queue. +Memory ref. instructions are pushing into Execute::inFUMemInsts queue. + +\subsubsection commit Commit + +Instructions are committed by examining the head of the Execute::inFlightInsts +queue (which is decorated with the functional unit number to which the +instruction was issued). Instructions which can then be found in their +functional units are executed and popped from Execute::inFlightInsts. + +Memory operation instructions are committed into the memory queues (as +described above) and exit their functional unit pipeline but are not popped +from the Execute::inFlightInsts queue. The Execute::inFUMemInsts queue +provides ordering to memory operations as they pass through the functional +units (maintaining issue order). On entering the LSQ, instructions are popped +from Execute::inFUMemInsts. + +If the parameter executeAllowEarlyMemoryIssue is set, memory operations can be +sent from their FU to the LSQ before reaching the head of +Execute::inFlightInsts but after their dependencies are met. +MinorDynInst::instToWaitFor is marked up with the latest dependent instruction +execSeqNum required to be committed for a memory operation to progress to the +LSQ. + +Once a memory response is available (by testing the head of +Execute::inFlightInsts against LSQ::findResponse), commit will process that +response (ExecContext::completeAcc) and pop the instruction from +Execute::inFlightInsts. + +Any branch, fault or interrupt will cause a stream sequence number change and +signal a branch to Fetch1/Fetch2. Only instructions with the current stream +sequence number will be issued and/or committed. + +\subsubsection advance Advance + +All non-stalled pipeline are advanced and may, thereafter, become stalled. +Potential activity in the next cycle is signalled if there are any +instructions remaining in any pipeline. + +\subsubsection sb Scoreboard + +The scoreboard (Scoreboard) is used to control instruction issue. It contains +a count of the number of in flight instructions which will write each general +purpose CPU integer or float register. Instructions will only be issued where +the scoreboard contains a count of 0 instructions which will write to one of +the instructions source registers. + +Once an instruction is issued, the scoreboard counts for each destination +register for an instruction will be incremented. + +The estimated delivery time of the instruction's result is marked up in the +scoreboard by adding the length of the issued-to FU to the current time. The +timings parameter on each FU provides a list of additional rules for +calculating the delivery time. These are documented in the parameter comments +in MinorCPU.py. + +On commit, (for memory operations, memory response commit) the scoreboard +counters for an instruction's source registers are decremented. will be +decremented. + +\subsubsection ifi Execute::inFlightInsts + +The Execute::inFlightInsts queue will always contain all instructions in +flight in Execute in the correct issue order. Execute::issue is the only +process which will push an instruction into the queue. Execute::commit is the +only process that can pop an instruction. + +\subsubsection lsq LSQ + +The LSQ can support multiple outstanding transactions to memory in a number of +conservative cases. + +There are three queues to contain requests: requests, transfers and the store +buffer. The requests and transfers queue operate in a similar manner to the +queues in Fetch1. The store buffer is used to decouple the delay of +completing store operations from following loads. + +Requests are issued to the DTLB as their instructions leave their functional +unit. At the head of requests, cacheable load requests can be sent to memory +and on to the transfers queue. Cacheable stores will be passed to transfers +unprocessed and progress that queue maintaining order with other transactions. + +The conditions in LSQ::tryToSendToTransfers dictate when requests can +be sent to memory. + +All uncacheable transactions, split transactions and locked transactions are +processed in order at the head of requests. Additionally, store results +residing in the store buffer can have their data forwarded to cacheable loads +(removing the need to perform a read from memory) but no cacheable load can be +issue to the transfers queue until that queue's stores have drained into the +store buffer. + +At the end of transfers, requests which are LSQ::LSQRequest::Complete (are +faulting, are cacheable stores, or have been sent to memory and received a +response) can be picked off by Execute and either committed +(ExecContext::completeAcc) and, for stores, be sent to the store buffer. + +Barrier instructions do not prevent cacheable loads from progressing to memory +but do cause a stream change which will discard that load. Stores will not be +committed to the store buffer if they are in the shadow of the barrier but +before the new instruction stream has arrived at Execute. As all other memory +transactions are delayed at the end of the requests queue until they are at +the head of Execute::inFlightInsts, they will be discarded by any barrier +stream change. + +After commit, LSQ::BarrierDataRequest requests are inserted into the +store buffer to track each barrier until all preceding memory transactions +have drained from the store buffer. No further memory transactions will be +issued from the ends of FUs until after the barrier has drained. + +\subsubsection drain Draining + +Draining is mostly handled by the Execute stage. When initiated by calling +MinorCPU::drain, Pipeline::evaluate checks the draining status of each unit +each cycle and keeps the pipeline active until draining is complete. It is +Pipeline that signals the completion of draining. Execute is triggered by +MinorCPU::drain and starts stepping through its Execute::DrainState state +machine, starting from state Execute::NotDraining, in this order: + +<table> +<tr> + <td><b>State</b></td> + <td><b>Meaning</b></td> +</tr> +<tr> + <td>Execute::NotDraining</td> + <td>Not trying to drain, normal execution</td> +</tr> +<tr> + <td>Execute::DrainCurrentInst</td> + <td>Draining micro-ops to complete inst.</td> +</tr> +<tr> + <td>Execute::DrainHaltFetch</td> + <td>Halt fetching instructions</td> +</tr> +<tr> + <td>Execute::DrainAllInsts</td> + <td>Discarding all instructions presented</td> +</tr> +</table> + +When complete, a drained Execute unit will be in the Execute::DrainAllInsts +state where it will continue to discard instructions but has no knowledge of +the drained state of the rest of the model. + +\section debug Debug options + +The model provides a number of debug flags which can be passed to gem5 with +the --debug-flags option. + +The available flags are: + +<table> +<tr> + <td><b>Debug flag</b></td> + <td><b>Unit which will generate debugging output</b></td> +</tr> +<tr> + <td>Activity</td> + <td>Debug ActivityMonitor actions</td> +</tr> +<tr> + <td>Branch</td> + <td>Fetch2 and Execute branch prediction decisions</td> +</tr> +<tr> + <td>MinorCPU</td> + <td>CPU global actions such as wakeup/thread suspension</td> +</tr> +<tr> + <td>Decode</td> + <td>Decode</td> +</tr> +<tr> + <td>MinorExec</td> + <td>Execute behaviour</td> +</tr> +<tr> + <td>Fetch</td> + <td>Fetch1 and Fetch2</td> +</tr> +<tr> + <td>MinorInterrupt</td> + <td>Execute interrupt handling</td> +</tr> +<tr> + <td>MinorMem</td> + <td>Execute memory interactions</td> +</tr> +<tr> + <td>MinorScoreboard</td> + <td>Execute scoreboard activity</td> +</tr> +<tr> + <td>MinorTrace</td> + <td>Generate MinorTrace cyclic state trace output (see below)</td> +</tr> +<tr> + <td>MinorTiming</td> + <td>MinorTiming instruction timing modification operations</td> +</tr> +</table> + +The group flag Minor enables all of the flags beginning with Minor. + +\section trace MinorTrace and minorview.py + +The debug flag MinorTrace causes cycle-by-cycle state data to be printed which +can then be processed and viewed by the minorview.py tool. This output is +very verbose and so it is recommended it only be used for small examples. + +\subsection traceformat MinorTrace format + +There are three types of line outputted by MinorTrace: + +\subsubsection state MinorTrace - Ticked unit cycle state + +For example: + +\verbatim + 110000: system.cpu.dcachePort: MinorTrace: state=MemoryRunning in_tlb_mem=0/0 +\endverbatim + +For each time step, the MinorTrace flag will cause one MinorTrace line to be +printed for every named element in the model. + +\subsubsection traceunit MinorInst - summaries of instructions issued by \ + Decode + +For example: + +\verbatim + 140000: system.cpu.execute: MinorInst: id=0/1.1/1/1.1 addr=0x5c \ + inst=" mov r0, #0" class=IntAlu +\endverbatim + +MinorInst lines are currently only generated for instructions which are +committed. + +\subsubsection tracefetch1 MinorLine - summaries of line fetches issued by \ + Fetch1 + +For example: + +\verbatim + 92000: system.cpu.icachePort: MinorLine: id=0/1.1/1 size=36 \ + vaddr=0x5c paddr=0x5c +\endverbatim + +\subsection minorview minorview.py + +Minorview (util/minorview.py) can be used to visualise the data created by +MinorTrace. + +\verbatim +usage: minorview.py [-h] [--picture picture-file] [--prefix name] + [--start-time time] [--end-time time] [--mini-views] + event-file + +Minor visualiser + +positional arguments: + event-file + +optional arguments: + -h, --help show this help message and exit + --picture picture-file + markup file containing blob information (default: + <minorview-path>/minor.pic) + --prefix name name prefix in trace for CPU to be visualised + (default: system.cpu) + --start-time time time of first event to load from file + --end-time time time of last event to load from file + --mini-views show tiny views of the next 10 time steps +\endverbatim + +Raw debugging output can be passed to minorview.py as the event-file. It will +pick out the MinorTrace lines and use other lines where units in the +simulation are named (such as system.cpu.dcachePort in the above example) will +appear as 'comments' when units are clicked on the visualiser. + +Clicking on a unit which contains instructions or lines will bring up a speech +bubble giving extra information derived from the MinorInst/MinorLine lines. + +--start-time and --end-time allow only sections of debug files to be loaded. + +--prefix allows the name prefix of the CPU to be inspected to be supplied. +This defaults to 'system.cpu'. + +In the visualiser, The buttons Start, End, Back, Forward, Play and Stop can be +used to control the displayed simulation time. + +The diagonally striped coloured blocks are showing the InstId of the +instruction or line they represent. Note that lines in Fetch1 and f1ToF2.F +only show the id fields of a line and that instructions in Fetch2, f2ToD, and +decode.inputBuffer do not yet have execute sequence numbers. The T/S.P/L/F.E +buttons can be used to toggle parts of InstId on and off to make it easier to +understand the display. Useful combinations are: + +<table> +<tr> + <td><b>Combination</b></td> + <td><b>Reason</b></td> +</tr> +<tr> + <td>E</td> + <td>just show the final execute sequence number</td> +</tr> +<tr> + <td>F/E</td> + <td>show the instruction-related numbers</td> +</tr> +<tr> + <td>S/P</td> + <td>show just the stream-related numbers (watch the stream sequence + change with branches and not change with predicted branches)</td> +</tr> +<tr> + <td>S/E</td> + <td>show instructions and their stream</td> +</tr> +</table> + +The key to the right shows all the displayable colours (some of the colour +choices are quite bad!): + +<table> +<tr> + <td><b>Symbol</b></td> + <td><b>Meaning</b></td> +</tr> +<tr> + <td>U</td> + <td>Unknown data</td> +</tr> +<tr> + <td>B</td> + <td>Blocked stage</td> +</tr> +<tr> + <td>-</td> + <td>Bubble</td> +</tr> +<tr> + <td>E</td> + <td>Empty queue slot</td> +</tr> +<tr> + <td>R</td> + <td>Reserved queue slot</td> +</tr> +<tr> + <td>F</td> + <td>Fault</td> +</tr> +<tr> + <td>r</td> + <td>Read (used as the leftmost stripe on data in the dcachePort)</td> +</tr> +<tr> + <td>w</td> + <td>Write " "</td> +</tr> +<tr> + <td>0 to 9</td> + <td>last decimal digit of the corresponding data</td> +</tr> +</table> + +\verbatim + + ,---------------. .--------------. *U + | |=|->|=|->|=| | ||=|||->||->|| | *- <- Fetch queues/LSQ + `---------------' `--------------' *R + === ====== *w <- Activity/Stage activity + ,--------------. *1 + ,--. ,. ,. | ============ | *3 <- Scoreboard + | |-\[]-\||-\[]-\||-\[]-\| ============ | *5 <- Execute::inFlightInsts + | | :[] :||-/[]-/||-/[]-/| -. -------- | *7 + | |-/[]-/|| ^ || | | --------- | *9 + | | || | || | | ------ | +[]->| | ->|| | || | | ---- | + | |<-[]<-||<-+-<-||<-[]<-| | ------ |->[] <- Execute to Fetch1, + '--` `' ^ `' | -' ------ | Fetch2 branch data + ---. | ---. `--------------' + ---' | ---' ^ ^ + | ^ | `------------ Execute + MinorBuffer ----' input `-------------------- Execute input buffer + buffer +\endverbatim + +Stages show the colours of the instructions currently being +generated/processed. + +Forward FIFOs between stages show the data being pushed into them at the +current tick (to the left), the data in transit, and the data available at +their outputs (to the right). + +The backwards FIFO between Fetch2 and Fetch1 shows branch prediction data. + +In general, all displayed data is correct at the end of a cycle's activity at +the time indicated but before the inter-stage FIFOs are ticked. Each FIFO +has, therefore an extra slot to show the asserted new input data, and all the +data currently within the FIFO. + +Input buffers for each stage are shown below the corresponding stage and show +the contents of those buffers as horizontal strips. Strips marked as reserved +(cyan by default) are reserved to be filled by the previous stage. An input +buffer with all reserved or occupied slots will, therefore, block the previous +stage from generating output. + +Fetch queues and LSQ show the lines/instructions in the queues of each +interface and show the number of lines/instructions in TLB and memory in the +two striped colours of the top of their frames. + +Inside Execute, the horizontal bars represent the individual FU pipelines. +The vertical bar to the left is the input buffer and the bar to the right, the +instructions committed this cycle. The background of Execute shows +instructions which are being committed this cycle in their original FU +pipeline positions. + +The strip at the top of the Execute block shows the current streamSeqNum that +Execute is committing. A similar stripe at the top of Fetch1 shows that +stage's expected streamSeqNum and the stripe at the top of Fetch2 shows its +issuing predictionSeqNum. + +The scoreboard shows the number of instructions in flight which will commit a +result to the register in the position shown. The scoreboard contains slots +for each integer and floating point register. + +The Execute::inFlightInsts queue shows all the instructions in flight in +Execute with the oldest instruction (the next instruction to be committed) to +the right. + +'Stage activity' shows the signalled activity (as E/1) for each stage (with +CPU miscellaneous activity to the left) + +'Activity' show a count of stage and pipe activity. + +\subsection picformat minor.pic format + +The minor.pic file (src/minor/minor.pic) describes the layout of the +models blocks on the visualiser. Its format is described in the supplied +minor.pic file. + +*/ + +} diff --git a/src/sim/SConscript b/src/sim/SConscript index 5a5c1ab8a..9f9022f30 100644 --- a/src/sim/SConscript +++ b/src/sim/SConscript @@ -32,6 +32,7 @@ Import('*') SimObject('BaseTLB.py') SimObject('ClockedObject.py') +SimObject('TickedObject.py') SimObject('Root.py') SimObject('ClockDomain.py') SimObject('VoltageDomain.py') @@ -51,6 +52,7 @@ Source('serialize.cc') Source('drain.cc') Source('sim_events.cc') Source('sim_object.cc') +Source('ticked_object.cc') Source('simulate.cc') Source('stat_control.cc') Source('clock_domain.cc') diff --git a/src/sim/TickedObject.py b/src/sim/TickedObject.py new file mode 100644 index 000000000..a566aac92 --- /dev/null +++ b/src/sim/TickedObject.py @@ -0,0 +1,43 @@ +# Copyright (c) 2014 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Andrew Bardsley + +from ClockedObject import ClockedObject + +class TickedObject(ClockedObject): + type = 'TickedObject' + abstract = True + cxx_header = "sim/ticked_object.hh" diff --git a/src/sim/ticked_object.cc b/src/sim/ticked_object.cc new file mode 100644 index 000000000..22a149388 --- /dev/null +++ b/src/sim/ticked_object.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +#include "sim/ticked_object.hh" + +Ticked::Ticked(ClockedObject &object_, + Stats::Scalar *imported_num_cycles, + Event::Priority priority) : + object(object_), + event(*this, priority), + running(false), + lastStopped(0), + /* Allocate numCycles if an external stat wasn't passed in */ + numCyclesLocal((imported_num_cycles ? NULL : new Stats::Scalar)), + numCycles((imported_num_cycles ? *imported_num_cycles : + *numCyclesLocal)) +{ } + +void +Ticked::regStats() +{ + if (numCyclesLocal) { + numCycles + .name(object.name() + ".tickCycles") + .desc("Number of cycles that the object ticked or was stopped"); + } + + tickCycles + .name(object.name() + ".tickCycles") + .desc("Number of cycles that the object actually ticked"); + + idleCycles + .name(object.name() + ".idleCycles") + .desc("Total number of cycles that the object has spent stopped"); + idleCycles = numCycles - tickCycles; +} + +void +Ticked::serialize(std::ostream &os) +{ + uint64_t lastStoppedUint = lastStopped; + + paramOut(os, "lastStopped", lastStoppedUint); +} + +void +Ticked::unserialize(Checkpoint *cp, const std::string §ion) +{ + uint64_t lastStoppedUint; + + paramIn(cp, section, "lastStopped", lastStoppedUint); + + lastStopped = Cycles(lastStoppedUint); +} + +TickedObject::TickedObject(TickedObjectParams *params, + Event::Priority priority) : + ClockedObject(params), + /* Make numCycles in Ticked */ + Ticked(*this, NULL, priority) +{ } + +void +TickedObject::regStats() +{ + Ticked::regStats(); +} + +void +TickedObject::serialize(std::ostream &os) +{ + Ticked::serialize(os); + ClockedObject::serialize(os); +} +void +TickedObject::unserialize(Checkpoint *cp, const std::string §ion) +{ + Ticked::unserialize(cp, section); + ClockedObject::unserialize(cp, section); +} diff --git a/src/sim/ticked_object.hh b/src/sim/ticked_object.hh new file mode 100644 index 000000000..5bca92443 --- /dev/null +++ b/src/sim/ticked_object.hh @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2013-2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Andrew Bardsley + */ + +/** + * @file + * + * Base classes for ClockedObjects which have evaluate functions to + * look like clock ticking operations. TickedObject attaches gem5's event + * queue to Ticked to apply actual scheduling. + */ + +#ifndef __SIM_TICKED_OBJECT_HH__ +#define __SIM_TICKED_OBJECT_HH__ + +#include "params/TickedObject.hh" +#include "sim/clocked_object.hh" + +/** Ticked attaches gem5's event queue/scheduler to evaluate + * calls and provides a start/stop interface to ticking. + * + * Ticked is not a ClockedObject but can be attached to one by + * inheritance and by calling regStats, serialize/unserialize */ +class Ticked +{ + protected: + /** An event to call process periodically */ + class ClockEvent : public Event + { + public: + Ticked &owner; + + ClockEvent(Ticked &owner_, Priority priority) : + Event(priority), + owner(owner_) + { } + + /** Evaluate and reschedule */ + void + process() + { + ++owner.tickCycles; + ++owner.numCycles; + owner.evaluate(); + if (owner.running) { + owner.object.schedule(this, + owner.object.clockEdge(Cycles(1))); + } + } + }; + + friend class ClockEvent; + + /** ClockedObject who is responsible for this Ticked's actions/stats */ + ClockedObject &object; + + /** The single instance of ClockEvent used */ + ClockEvent event; + + /** Have I been started? and am not stopped */ + bool running; + + /** Time of last stop event to calculate run time */ + Cycles lastStopped; + + private: + /** Locally allocated stats */ + Stats::Scalar *numCyclesLocal; + + protected: + /** Total number of cycles either ticked or spend stopped */ + Stats::Scalar &numCycles; + + /** Number of cycles ticked */ + Stats::Scalar tickCycles; + + /** Number of cycles stopped */ + Stats::Formula idleCycles; + + public: + Ticked(ClockedObject &object_, + Stats::Scalar *imported_num_cycles = NULL, + Event::Priority priority = Event::CPU_Tick_Pri); + + virtual ~Ticked() { } + + /** Register {num,ticks}Cycles if necessary. If numCycles is + * imported, be sure to register it *before* calling this regStats */ + void regStats(); + + /** Start ticking */ + void + start() + { + if (!running) { + if (!event.scheduled()) + object.schedule(event, object.clockEdge(Cycles(1))); + running = true; + numCycles += cyclesSinceLastStopped(); + } + } + + /** How long have we been stopped for? */ + Cycles + cyclesSinceLastStopped() const + { + return object.curCycle() - lastStopped; + } + + /** Reset stopped time to current time */ + void + resetLastStopped() + { + lastStopped = object.curCycle(); + } + + /** Cancel the next tick event and issue no more */ + void + stop() + { + if (running) { + if (event.scheduled()) + object.deschedule(event); + running = false; + resetLastStopped(); + } + } + + /** Checkpoint lastStopped */ + void serialize(std::ostream &os); + void unserialize(Checkpoint *cp, const std::string §ion); + + /** Action to call on the clock tick */ + virtual void evaluate() = 0; +}; + +/** TickedObject attaches Ticked to ClockedObject and can be used as + * a base class where ticked operation */ +class TickedObject : public ClockedObject, public Ticked +{ + public: + TickedObject(TickedObjectParams *params, + Event::Priority priority = Event::CPU_Tick_Pri); + + /** Disambiguate to make these functions overload correctly */ + using ClockedObject::regStats; + using ClockedObject::serialize; + using ClockedObject::unserialize; + + /** Pass on regStats, serialize etc. onto Ticked */ + void regStats(); + void serialize(std::ostream &os); + void unserialize(Checkpoint *cp, const std::string §ion); +}; + +#endif /* __SIM_TICKED_OBJECT_HH__ */ |