From 166da650a3c864b31193ade893ed99e547c67644 Mon Sep 17 00:00:00 2001 From: Rekai Gonzalez-Alberquilla Date: Wed, 5 Apr 2017 13:24:23 -0500 Subject: arch: ISA parser additions of vector registers Reiley's update :) of the isa parser definitions. My addition of the vector element operand concept for the ISA parser. Nathanael's modification creating a hierarchy between vector registers and its constituencies to the isa parser. Some fixes/updates on top to consider instructions as vectors instead of floating when they use the VectorRF. Some counters added to all the models to keep faithful counts. Change-Id: Id8f162a525240dfd7ba884c5a4d9fa69f4050101 Reviewed-by: Andreas Sandberg Reviewed-on: https://gem5-review.googlesource.com/2706 Reviewed-by: Anthony Gutierrez Maintainer: Andreas Sandberg --- src/arch/alpha/faults.cc | 7 + src/arch/alpha/faults.hh | 13 ++ src/arch/alpha/isa/fp.isa | 3 + src/arch/arm/isa/insts/fp64.isa | 8 +- src/arch/arm/isa/insts/neon64.isa | 24 ++- src/arch/arm/isa/operands.isa | 293 ++++++++++++++++++++++++++---------- src/arch/arm/isa/templates/mem.isa | 10 +- src/arch/arm/isa/templates/pred.isa | 6 +- src/arch/isa_parser.py | 268 ++++++++++++++++++++++++++++++++- src/arch/sparc/faults.cc | 6 + src/arch/sparc/faults.hh | 1 + src/arch/sparc/isa/base.isa | 5 + src/cpu/StaticInstFlags.py | 2 + src/cpu/base_dyn_inst.hh | 1 + src/cpu/o3/commit.hh | 2 + src/cpu/o3/commit_impl.hh | 10 ++ src/cpu/o3/inst_queue.hh | 6 +- src/cpu/o3/inst_queue_impl.hh | 50 +++++- src/cpu/simple/base.cc | 16 ++ src/cpu/simple/exec_context.hh | 6 + src/cpu/static_inst.hh | 1 + 21 files changed, 626 insertions(+), 112 deletions(-) diff --git a/src/arch/alpha/faults.cc b/src/arch/alpha/faults.cc index 8a6e469f2..59d95000b 100644 --- a/src/arch/alpha/faults.cc +++ b/src/arch/alpha/faults.cc @@ -98,6 +98,13 @@ FaultName FloatEnableFault::_name = "fen"; FaultVect FloatEnableFault::_vect = 0x0581; FaultStat FloatEnableFault::_count; +/* We use the same fault vector, as for the guest system these should be the + * same, but for host purposes, having differentiation is helpful for + * debug/monitorization purposes. */ +FaultName VectorEnableFault::_name = "ven"; +FaultVect VectorEnableFault::_vect = 0x0581; +FaultStat VectorEnableFault::_count; + FaultName PalFault::_name = "pal"; FaultVect PalFault::_vect = 0x2001; FaultStat PalFault::_count; diff --git a/src/arch/alpha/faults.hh b/src/arch/alpha/faults.hh index 80e3ae5e1..07789a22e 100644 --- a/src/arch/alpha/faults.hh +++ b/src/arch/alpha/faults.hh @@ -299,6 +299,19 @@ class FloatEnableFault : public AlphaFault FaultStat & countStat() {return _count;} }; +class VectorEnableFault : public AlphaFault +{ + private: + static FaultName _name; + static FaultVect _vect; + static FaultStat _count; + + public: + FaultName name() const {return _name;} + FaultVect vect() {return _vect;} + FaultStat & countStat() {return _count;} +}; + class PalFault : public AlphaFault { private: diff --git a/src/arch/alpha/isa/fp.isa b/src/arch/alpha/isa/fp.isa index 6213c8e08..ea692aeef 100644 --- a/src/arch/alpha/isa/fp.isa +++ b/src/arch/alpha/isa/fp.isa @@ -50,6 +50,9 @@ output exec {{ } return fault; } + inline Fault checkVectorEnableFault(CPU_EXEC_CONTEXT *xc) { + return std::make_shared(); + } }}; output header {{ diff --git a/src/arch/arm/isa/insts/fp64.isa b/src/arch/arm/isa/insts/fp64.isa index 706f77fb0..a5e1085de 100644 --- a/src/arch/arm/isa/insts/fp64.isa +++ b/src/arch/arm/isa/insts/fp64.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2012-2013 ARM Limited +// Copyright (c) 2012-2013, 2016 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -123,9 +123,11 @@ let {{ exec_output += BasicExecute.subst(fmovCoreRegXIop); fmovUCoreRegXCode = vfp64EnabledCheckCode + ''' + /* Explicitly merge with previous value */ + AA64FpDestP0_uw = AA64FpDestP0_uw; + AA64FpDestP1_uw = AA64FpDestP1_uw; AA64FpDestP2_uw = XOp1_ud; - AA64FpDestP3_uw = XOp1_ud >> 32; - ''' + AA64FpDestP3_uw = XOp1_ud >> 32;''' fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp", { "code": fmovUCoreRegXCode, "op_class": "FloatMiscOp" }, []) diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa index 7c9040be3..4897e7c91 100644 --- a/src/arch/arm/isa/insts/neon64.isa +++ b/src/arch/arm/isa/insts/neon64.isa @@ -1,6 +1,6 @@ // -*- mode: c++ -*- -// Copyright (c) 2012-2013, 2015 ARM Limited +// Copyright (c) 2012-2013, 2015-2016 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -225,11 +225,16 @@ let {{ AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); ''' % { "reg" : reg, "destReg": destReg } destReg += 1 - if destCnt < 4 and not hi: # zero upper half - for reg in range(destCnt, 4): - eWalkCode += ''' - AA64FpDestP%(reg)d_uw = 0; - ''' % { "reg" : reg } + if destCnt < 4: + if hi: # Explicitly merge with lower half + for reg in range(0, destCnt): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = AA64FpDestP%(reg)d_uw;''' % { "reg" : reg } + else: # zero upper half + for reg in range(destCnt, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0;''' % { "reg" : reg } + iop = InstObjParams(name, Name, "DataX2RegImmOp" if byElem else "DataX2RegOp", { "code": eWalkCode, @@ -429,11 +434,16 @@ let {{ AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); ''' % { "reg" : reg, "destReg": destReg } destReg += 1 - if not hi: + if hi: + for reg in range(0, 2): # Explicitly merge with the lower half + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = AA64FpDestP%(reg)d_uw;''' % { "reg" : reg } + else: for reg in range(2, 4): # zero upper half eWalkCode += ''' AA64FpDestP%(reg)d_uw = 0; ''' % { "reg" : reg } + iop = InstObjParams(name, Name, "DataX1RegImmOp" if hasImm else "DataX1RegOp", { "code": eWalkCode, diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index e48c154d4..5898075ab 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -1,5 +1,5 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2014 ARM Limited +// Copyright (c) 2010-2014, 2016 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -49,7 +49,10 @@ def operand_types {{ 'ud' : 'uint64_t', 'tud' : 'Twin64_t', 'sf' : 'float', - 'df' : 'double' + 'df' : 'double', + 'vc' : 'TheISA::VecRegContainer', + # For operations that are implemented as a template + 'x' : 'TPElem', }}; let {{ @@ -117,6 +120,15 @@ let {{ srtMode = 1 srtEPC = 0 + def vectorElem(idx, elem): + return ('VecElem', 'sf', (idx, elem), 'IsVectorElem', srtNormal) + + def vectorReg(idx, elems = None): + return ('VecReg', 'vc', (idx, elems) , 'IsVector', srtNormal) + + def vectorRegElem(elem, ext = 'sf', zeroing = False): + return (elem, ext, zeroing) + def floatReg(idx): return ('FloatReg', 'sf', idx, 'IsFloating', srtNormal) @@ -297,86 +309,203 @@ def operands {{ 'FpOp2P3': floatReg('(op2 + 3)'), # Create AArch64 unpacked view of the FP registers - 'AA64FpOp1P0': floatReg('((op1 * 4) + 0)'), - 'AA64FpOp1P1': floatReg('((op1 * 4) + 1)'), - 'AA64FpOp1P2': floatReg('((op1 * 4) + 2)'), - 'AA64FpOp1P3': floatReg('((op1 * 4) + 3)'), - 'AA64FpOp2P0': floatReg('((op2 * 4) + 0)'), - 'AA64FpOp2P1': floatReg('((op2 * 4) + 1)'), - 'AA64FpOp2P2': floatReg('((op2 * 4) + 2)'), - 'AA64FpOp2P3': floatReg('((op2 * 4) + 3)'), - 'AA64FpOp3P0': floatReg('((op3 * 4) + 0)'), - 'AA64FpOp3P1': floatReg('((op3 * 4) + 1)'), - 'AA64FpOp3P2': floatReg('((op3 * 4) + 2)'), - 'AA64FpOp3P3': floatReg('((op3 * 4) + 3)'), - 'AA64FpDestP0': floatReg('((dest * 4) + 0)'), - 'AA64FpDestP1': floatReg('((dest * 4) + 1)'), - 'AA64FpDestP2': floatReg('((dest * 4) + 2)'), - 'AA64FpDestP3': floatReg('((dest * 4) + 3)'), - 'AA64FpDest2P0': floatReg('((dest2 * 4) + 0)'), - 'AA64FpDest2P1': floatReg('((dest2 * 4) + 1)'), - 'AA64FpDest2P2': floatReg('((dest2 * 4) + 2)'), - 'AA64FpDest2P3': floatReg('((dest2 * 4) + 3)'), - - 'AA64FpOp1P0V0': floatReg('((((op1+0)) * 4) + 0)'), - 'AA64FpOp1P1V0': floatReg('((((op1+0)) * 4) + 1)'), - 'AA64FpOp1P2V0': floatReg('((((op1+0)) * 4) + 2)'), - 'AA64FpOp1P3V0': floatReg('((((op1+0)) * 4) + 3)'), - - 'AA64FpOp1P0V1': floatReg('((((op1+1)) * 4) + 0)'), - 'AA64FpOp1P1V1': floatReg('((((op1+1)) * 4) + 1)'), - 'AA64FpOp1P2V1': floatReg('((((op1+1)) * 4) + 2)'), - 'AA64FpOp1P3V1': floatReg('((((op1+1)) * 4) + 3)'), - - 'AA64FpOp1P0V2': floatReg('((((op1+2)) * 4) + 0)'), - 'AA64FpOp1P1V2': floatReg('((((op1+2)) * 4) + 1)'), - 'AA64FpOp1P2V2': floatReg('((((op1+2)) * 4) + 2)'), - 'AA64FpOp1P3V2': floatReg('((((op1+2)) * 4) + 3)'), - - 'AA64FpOp1P0V3': floatReg('((((op1+3)) * 4) + 0)'), - 'AA64FpOp1P1V3': floatReg('((((op1+3)) * 4) + 1)'), - 'AA64FpOp1P2V3': floatReg('((((op1+3)) * 4) + 2)'), - 'AA64FpOp1P3V3': floatReg('((((op1+3)) * 4) + 3)'), - - 'AA64FpOp1P0V0S': floatReg('((((op1+0)%32) * 4) + 0)'), - 'AA64FpOp1P1V0S': floatReg('((((op1+0)%32) * 4) + 1)'), - 'AA64FpOp1P2V0S': floatReg('((((op1+0)%32) * 4) + 2)'), - 'AA64FpOp1P3V0S': floatReg('((((op1+0)%32) * 4) + 3)'), - - 'AA64FpOp1P0V1S': floatReg('((((op1+1)%32) * 4) + 0)'), - 'AA64FpOp1P1V1S': floatReg('((((op1+1)%32) * 4) + 1)'), - 'AA64FpOp1P2V1S': floatReg('((((op1+1)%32) * 4) + 2)'), - 'AA64FpOp1P3V1S': floatReg('((((op1+1)%32) * 4) + 3)'), - - 'AA64FpOp1P0V2S': floatReg('((((op1+2)%32) * 4) + 0)'), - 'AA64FpOp1P1V2S': floatReg('((((op1+2)%32) * 4) + 1)'), - 'AA64FpOp1P2V2S': floatReg('((((op1+2)%32) * 4) + 2)'), - 'AA64FpOp1P3V2S': floatReg('((((op1+2)%32) * 4) + 3)'), - - 'AA64FpOp1P0V3S': floatReg('((((op1+3)%32) * 4) + 0)'), - 'AA64FpOp1P1V3S': floatReg('((((op1+3)%32) * 4) + 1)'), - 'AA64FpOp1P2V3S': floatReg('((((op1+3)%32) * 4) + 2)'), - 'AA64FpOp1P3V3S': floatReg('((((op1+3)%32) * 4) + 3)'), - - 'AA64FpDestP0V0': floatReg('((((dest+0)) * 4) + 0)'), - 'AA64FpDestP1V0': floatReg('((((dest+0)) * 4) + 1)'), - 'AA64FpDestP2V0': floatReg('((((dest+0)) * 4) + 2)'), - 'AA64FpDestP3V0': floatReg('((((dest+0)) * 4) + 3)'), - - 'AA64FpDestP0V1': floatReg('((((dest+1)) * 4) + 0)'), - 'AA64FpDestP1V1': floatReg('((((dest+1)) * 4) + 1)'), - 'AA64FpDestP2V1': floatReg('((((dest+1)) * 4) + 2)'), - 'AA64FpDestP3V1': floatReg('((((dest+1)) * 4) + 3)'), - - 'AA64FpDestP0V0L': floatReg('((((dest+0)%32) * 4) + 0)'), - 'AA64FpDestP1V0L': floatReg('((((dest+0)%32) * 4) + 1)'), - 'AA64FpDestP2V0L': floatReg('((((dest+0)%32) * 4) + 2)'), - 'AA64FpDestP3V0L': floatReg('((((dest+0)%32) * 4) + 3)'), - - 'AA64FpDestP0V1L': floatReg('((((dest+1)%32) * 4) + 0)'), - 'AA64FpDestP1V1L': floatReg('((((dest+1)%32) * 4) + 1)'), - 'AA64FpDestP2V1L': floatReg('((((dest+1)%32) * 4) + 2)'), - 'AA64FpDestP3V1L': floatReg('((((dest+1)%32) * 4) + 3)'), + # Name ::= 'AA64Vec' OpSpec [LaneSpec] + # OpSpec ::= IOSpec [Index] [Plus] + # IOSpec ::= 'S' | 'D' + # Index ::= '0' | ... | '9' + # Plus ::= [PlusAmount] ['l'] + # PlusAmount ::= 'p' [PlusAmount] + # LaneSpec ::= 'L' Index + # + # All the constituents are hierarchically defined as part of the Vector + # Register they belong to + + 'AA64FpOp1': vectorReg('op1', + { + 'AA64FpOp1P0': vectorRegElem('0'), + 'AA64FpOp1P1': vectorRegElem('1'), + 'AA64FpOp1P2': vectorRegElem('2'), + 'AA64FpOp1P3': vectorRegElem('3'), + 'AA64FpOp1S': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpOp1D': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpOp1Q': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpOp2': vectorReg('op2', + { + 'AA64FpOp2P0': vectorRegElem('0'), + 'AA64FpOp2P1': vectorRegElem('1'), + 'AA64FpOp2P2': vectorRegElem('2'), + 'AA64FpOp2P3': vectorRegElem('3'), + 'AA64FpOp2S': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpOp2D': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpOp2Q': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpOp3': vectorReg('op3', + { + 'AA64FpOp3P0': vectorRegElem('0'), + 'AA64FpOp3P1': vectorRegElem('1'), + 'AA64FpOp3P2': vectorRegElem('2'), + 'AA64FpOp3P3': vectorRegElem('3'), + 'AA64FpOp3S': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpOp3D': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpOp3Q': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpDest': vectorReg('dest', + { + 'AA64FpDestP0': vectorRegElem('0'), + 'AA64FpDestP1': vectorRegElem('1'), + 'AA64FpDestP2': vectorRegElem('2'), + 'AA64FpDestP3': vectorRegElem('3'), + 'AA64FpDestS': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpDestD': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpDestQ': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpDest2': vectorReg('dest2', + { + 'AA64FpDest2P0': vectorRegElem('0'), + 'AA64FpDest2P1': vectorRegElem('1'), + 'AA64FpDest2P2': vectorRegElem('2'), + 'AA64FpDest2P3': vectorRegElem('3'), + 'AA64FpDest2S': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpDest2D': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpDest2Q': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpOp1V0': vectorReg('op1', + { + 'AA64FpOp1P0V0': vectorRegElem('0'), + 'AA64FpOp1P1V0': vectorRegElem('1'), + 'AA64FpOp1P2V0': vectorRegElem('2'), + 'AA64FpOp1P3V0': vectorRegElem('3'), + 'AA64FpOp1SV0': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpOp1DV0': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpOp1QV0': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpOp1V1': vectorReg('op1+1', + { + 'AA64FpOp1P0V1': vectorRegElem('0'), + 'AA64FpOp1P1V1': vectorRegElem('1'), + 'AA64FpOp1P2V1': vectorRegElem('2'), + 'AA64FpOp1P3V1': vectorRegElem('3'), + 'AA64FpOp1SV1': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpOp1DV1': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpOp1QV1': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpOp1V2': vectorReg('op1+2', + { + 'AA64FpOp1P0V2': vectorRegElem('0'), + 'AA64FpOp1P1V2': vectorRegElem('1'), + 'AA64FpOp1P2V2': vectorRegElem('2'), + 'AA64FpOp1P3V2': vectorRegElem('3'), + 'AA64FpOp1SV2': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpOp1DV2': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpOp1QV2': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpOp1V3': vectorReg('op1+3', + { + 'AA64FpOp1P0V3': vectorRegElem('0'), + 'AA64FpOp1P1V3': vectorRegElem('1'), + 'AA64FpOp1P2V3': vectorRegElem('2'), + 'AA64FpOp1P3V3': vectorRegElem('3'), + 'AA64FpOp1SV3': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpOp1DV3': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpOp1QV3': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpOp1V0S': vectorReg('(op1+0)%32', + { + 'AA64FpOp1P0V0S': vectorRegElem('0'), + 'AA64FpOp1P1V0S': vectorRegElem('1'), + 'AA64FpOp1P2V0S': vectorRegElem('2'), + 'AA64FpOp1P3V0S': vectorRegElem('3'), + 'AA64FpOp1SV0S': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpOp1DV0S': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpOp1QV0S': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpOp1V1S': vectorReg('(op1+1)%32', + { + 'AA64FpOp1P0V1S': vectorRegElem('0'), + 'AA64FpOp1P1V1S': vectorRegElem('1'), + 'AA64FpOp1P2V1S': vectorRegElem('2'), + 'AA64FpOp1P3V1S': vectorRegElem('3'), + 'AA64FpOp1SV1S': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpOp1DV1S': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpOp1QV1S': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpOp1V2S': vectorReg('(op1+2)%32', + { + 'AA64FpOp1P0V2S': vectorRegElem('0'), + 'AA64FpOp1P1V2S': vectorRegElem('1'), + 'AA64FpOp1P2V2S': vectorRegElem('2'), + 'AA64FpOp1P3V2S': vectorRegElem('3'), + 'AA64FpOp1SV2S': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpOp1DV2S': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpOp1QV2S': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpOp1V3S': vectorReg('(op1+3)%32', + { + 'AA64FpOp1P0V3S': vectorRegElem('0'), + 'AA64FpOp1P1V3S': vectorRegElem('1'), + 'AA64FpOp1P2V3S': vectorRegElem('2'), + 'AA64FpOp1P3V3S': vectorRegElem('3'), + 'AA64FpOp1SV3S': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpOp1DV3S': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpOp1QV3S': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpDestV0': vectorReg('(dest+0)', + { + 'AA64FpDestP0V0': vectorRegElem('0'), + 'AA64FpDestP1V0': vectorRegElem('1'), + 'AA64FpDestP2V0': vectorRegElem('2'), + 'AA64FpDestP3V0': vectorRegElem('3'), + 'AA64FpDestSV0': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpDestDV0': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpDestQV0': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpDestV1': vectorReg('(dest+1)', + { + 'AA64FpDestP0V1': vectorRegElem('0'), + 'AA64FpDestP1V1': vectorRegElem('1'), + 'AA64FpDestP2V1': vectorRegElem('2'), + 'AA64FpDestP3V1': vectorRegElem('3'), + 'AA64FpDestSV1': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpDestDV1': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpDestQV1': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpDestV0L': vectorReg('(dest+0)%32', + { + 'AA64FpDestP0V0L': vectorRegElem('0'), + 'AA64FpDestP1V0L': vectorRegElem('1'), + 'AA64FpDestP2V0L': vectorRegElem('2'), + 'AA64FpDestP3V0L': vectorRegElem('3'), + 'AA64FpDestSV0L': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpDestDV0L': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpDestQV0L': vectorRegElem('0', 'tud', zeroing = True) + }), + + 'AA64FpDestV1L': vectorReg('(dest+1)%32', + { + 'AA64FpDestP0V1L': vectorRegElem('0'), + 'AA64FpDestP1V1L': vectorRegElem('1'), + 'AA64FpDestP2V1L': vectorRegElem('2'), + 'AA64FpDestP3V1L': vectorRegElem('3'), + 'AA64FpDestSV1L': vectorRegElem('0', 'sf', zeroing = True), + 'AA64FpDestDV1L': vectorRegElem('0', 'df', zeroing = True), + 'AA64FpDestQV1L': vectorRegElem('0', 'tud', zeroing = True) + }), #Abstracted control reg operands 'MiscDest': cntrlReg('dest'), diff --git a/src/arch/arm/isa/templates/mem.isa b/src/arch/arm/isa/templates/mem.isa index 51f598f50..a0942d151 100644 --- a/src/arch/arm/isa/templates/mem.isa +++ b/src/arch/arm/isa/templates/mem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010, 2012, 2014 ARM Limited +// Copyright (c) 2010, 2012, 2014, 2016 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -1150,7 +1150,7 @@ def template LoadRegConstructor {{ #if %(use_uops)d assert(numMicroops >= 2); uops = new StaticInstPtr[numMicroops]; - if (_dest == INTREG_PC && !isFloating()) { + if (_dest == INTREG_PC && !isFloating() && !isVector()) { IntRegIndex wbIndexReg = index; uops[0] = new %(acc_name)s(machInst, INTREG_UREG0, _base, _add, _shiftAmt, _shiftType, _index); @@ -1187,7 +1187,7 @@ def template LoadRegConstructor {{ } #else - if (_dest == INTREG_PC && !isFloating()) { + if (_dest == INTREG_PC && !isFloating() && !isVector()) { flags[IsControl] = true; flags[IsIndirectControl] = true; if (conditional) @@ -1216,7 +1216,7 @@ def template LoadImmConstructor {{ #if %(use_uops)d assert(numMicroops >= 2); uops = new StaticInstPtr[numMicroops]; - if (_dest == INTREG_PC && !isFloating()) { + if (_dest == INTREG_PC && !isFloating() && !isVector()) { uops[0] = new %(acc_name)s(machInst, INTREG_UREG0, _base, _add, _imm); uops[0]->setDelayedCommit(); @@ -1250,7 +1250,7 @@ def template LoadImmConstructor {{ uops[1]->setLastMicroop(); } #else - if (_dest == INTREG_PC && !isFloating()) { + if (_dest == INTREG_PC && !isFloating() && !isVector()) { flags[IsControl] = true; flags[IsIndirectControl] = true; if (conditional) diff --git a/src/arch/arm/isa/templates/pred.isa b/src/arch/arm/isa/templates/pred.isa index 752ab8d1e..7b372bdee 100644 --- a/src/arch/arm/isa/templates/pred.isa +++ b/src/arch/arm/isa/templates/pred.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2016 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -77,7 +77,7 @@ def template DataImmConstructor {{ } } - if (%(is_branch)s && !isFloating()){ + if (%(is_branch)s && !isFloating() && !isVector()){ flags[IsControl] = true; flags[IsIndirectControl] = true; if (condCode == COND_AL || condCode == COND_UC) @@ -117,7 +117,7 @@ def template DataRegConstructor {{ } } - if (%(is_branch)s && !isFloating()){ + if (%(is_branch)s && !isFloating() && !isVector()){ flags[IsControl] = true; flags[IsIndirectControl] = true; if (condCode == COND_AL || condCode == COND_UC) diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py index 759b50c0d..ac639b413 100755 --- a/src/arch/isa_parser.py +++ b/src/arch/isa_parser.py @@ -493,6 +493,12 @@ class Operand(object): def isControlReg(self): return 0 + def isVecReg(self): + return 0 + + def isVecElem(self): + return 0 + def isPCState(self): return 0 @@ -658,6 +664,200 @@ class FloatRegOperand(Operand): }''' % (self.ctype, self.base_name, wp) return wb +class VecRegOperand(Operand): + reg_class = 'VecRegClass' + + def __init__(self, parser, full_name, ext, is_src, is_dest): + Operand.__init__(self, parser, full_name, ext, is_src, is_dest) + self.elemExt = None + self.parser = parser + + def isReg(self): + return 1 + + def isVecReg(self): + return 1 + + def makeDeclElem(self, elem_op): + (elem_name, elem_ext) = elem_op + (elem_spec, dflt_elem_ext, zeroing) = self.elems[elem_name] + if elem_ext: + ext = elem_ext + else: + ext = dflt_elem_ext + ctype = self.parser.operandTypeMap[ext] + return '\n\t%s %s = 0;' % (ctype, elem_name) + + def makeDecl(self): + if not self.is_dest and self.is_src: + c_decl = '\t/* Vars for %s*/' % (self.base_name) + if hasattr(self, 'active_elems'): + if self.active_elems: + for elem in self.active_elems: + c_decl += self.makeDeclElem(elem) + return c_decl + '\t/* End vars for %s */\n' % (self.base_name) + else: + return '' + + def makeConstructor(self, predRead, predWrite): + c_src = '' + c_dest = '' + + numAccessNeeded = 1 + + if self.is_src: + c_src = src_reg_constructor % (self.reg_class, self.reg_spec) + + if self.is_dest: + c_dest = dst_reg_constructor % (self.reg_class, self.reg_spec) + c_dest += '\n\t_numVecDestRegs++;' + + return c_src + c_dest + + # Read destination register to write + def makeReadWElem(self, elem_op): + (elem_name, elem_ext) = elem_op + (elem_spec, dflt_elem_ext, zeroing) = self.elems[elem_name] + if elem_ext: + ext = elem_ext + else: + ext = dflt_elem_ext + ctype = self.parser.operandTypeMap[ext] + c_read = '\t\t%s& %s = %s[%s];\n' % \ + (ctype, elem_name, self.base_name, elem_spec) + return c_read + + def makeReadW(self, predWrite): + func = 'getWritableVecRegOperand' + if self.read_code != None: + return self.buildReadCode(func) + + if predWrite: + rindex = '_destIndex++' + else: + rindex = '%d' % self.dest_reg_idx + + c_readw = '\t\t%s& tmp_d%s = xc->%s(this, %s);\n'\ + % ('TheISA::VecRegContainer', rindex, func, rindex) + if self.elemExt: + c_readw += '\t\tauto %s = tmp_d%s.as<%s>();\n' % (self.base_name, + rindex, self.parser.operandTypeMap[self.elemExt]) + if self.ext: + c_readw += '\t\tauto %s = tmp_d%s.as<%s>();\n' % (self.base_name, + rindex, self.parser.operandTypeMap[self.ext]) + if hasattr(self, 'active_elems'): + if self.active_elems: + for elem in self.active_elems: + c_readw += self.makeReadWElem(elem) + return c_readw + + # Normal source operand read + def makeReadElem(self, elem_op, name): + (elem_name, elem_ext) = elem_op + (elem_spec, dflt_elem_ext, zeroing) = self.elems[elem_name] + + if elem_ext: + ext = elem_ext + else: + ext = dflt_elem_ext + ctype = self.parser.operandTypeMap[ext] + c_read = '\t\t%s = %s[%s];\n' % \ + (elem_name, name, elem_spec) + return c_read + + def makeRead(self, predRead): + func = 'readVecRegOperand' + if self.read_code != None: + return self.buildReadCode(func) + + if predRead: + rindex = '_sourceIndex++' + else: + rindex = '%d' % self.src_reg_idx + + name = self.base_name + if self.is_dest and self.is_src: + name += '_merger' + + c_read = '\t\t%s& tmp_s%s = xc->%s(this, %s);\n' \ + % ('const TheISA::VecRegContainer', rindex, func, rindex) + # If the parser has detected that elements are being access, create + # the appropriate view + if self.elemExt: + c_read += '\t\tauto %s = tmp_s%s.as<%s>();\n' % \ + (name, rindex, self.parser.operandTypeMap[self.elemExt]) + if self.ext: + c_read += '\t\tauto %s = tmp_s%s.as<%s>();\n' % \ + (name, rindex, self.parser.operandTypeMap[self.ext]) + if hasattr(self, 'active_elems'): + if self.active_elems: + for elem in self.active_elems: + c_read += self.makeReadElem(elem, name) + return c_read + + def makeWrite(self, predWrite): + func = 'setVecRegOperand' + if self.write_code != None: + return self.buildWriteCode(func) + + wb = ''' + if (traceData) { + panic("Vectors not supported yet in tracedata"); + /*traceData->setData(final_val);*/ + } + ''' + return wb + + def finalize(self, predRead, predWrite): + super(VecRegOperand, self).finalize(predRead, predWrite) + if self.is_dest: + self.op_rd = self.makeReadW(predWrite) + self.op_rd + +class VecElemOperand(Operand): + reg_class = 'VectorElemClass' + + def isReg(self): + return 1 + + def isVecElem(self): + return 1 + + def makeDecl(self): + if self.is_dest and not self.is_src: + return '\n\t%s %s;' % (self.ctype, self.base_name) + else: + return '' + + def makeConstructor(self, predRead, predWrite): + c_src = '' + c_dest = '' + + numAccessNeeded = 1 + regId = 'RegId(%s, %s * numVecElemPerVecReg + elemIdx, %s)' % \ + (self.reg_class, self.reg_spec) + + if self.is_src: + c_src = ('\n\t_srcRegIdx[_numSrcRegs++] = RegId(%s, %s, %s);' % + (self.reg_class, self.reg_spec, self.elem_spec)) + + if self.is_dest: + c_dest = ('\n\t_destRegIdx[_numDestRegs++] = RegId(%s, %s, %s);' % + (self.reg_class, self.reg_spec, self.elem_spec)) + c_dest += '\n\t_numVecElemDestRegs++;' + return c_src + c_dest + + def makeRead(self, predRead): + c_read = ('\n/* Elem is kept inside the operand description */' + + '\n\tVecElem %s = xc->readVecElemOperand(this, %d);' % + (self.base_name, self.src_reg_idx)) + return c_read + + def makeWrite(self, predWrite): + c_write = ('\n/* Elem is kept inside the operand description */' + + '\n\txc->setVecElemOperand(this, %d, %s);' % + (self.dest_reg_idx, self.base_name)) + return c_write + class CCRegOperand(Operand): reg_class = 'CCRegClass' @@ -857,22 +1057,49 @@ class OperandList(object): op = match.groups() # regexp groups are operand full name, base, and extension (op_full, op_base, op_ext) = op + # If is a elem operand, define or update the corresponding + # vector operand + isElem = False + if op_base in parser.elemToVector: + isElem = True + elem_op = (op_base, op_ext) + op_base = parser.elemToVector[op_base] + op_ext = '' # use the default one # if the token following the operand is an assignment, this is # a destination (LHS), else it's a source (RHS) is_dest = (assignRE.match(code, match.end()) != None) is_src = not is_dest + # see if we've already seen this one op_desc = self.find_base(op_base) if op_desc: - if op_desc.ext != op_ext: - error ('Inconsistent extensions for operand %s' % \ - op_base) + if op_ext and op_ext != '' and op_desc.ext != op_ext: + error ('Inconsistent extensions for operand %s: %s - %s' \ + % (op_base, op_desc.ext, op_ext)) op_desc.is_src = op_desc.is_src or is_src op_desc.is_dest = op_desc.is_dest or is_dest + if isElem: + (elem_base, elem_ext) = elem_op + found = False + for ae in op_desc.active_elems: + (ae_base, ae_ext) = ae + if ae_base == elem_base: + if ae_ext != elem_ext: + error('Inconsistent extensions for elem' + ' operand %s' % elem_base) + else: + found = True + if not found: + op_desc.active_elems.append(elem_op) else: # new operand: create new descriptor op_desc = parser.operandNameMap[op_base](parser, op_full, op_ext, is_src, is_dest) + # if operand is a vector elem, add the corresponding vector + # operand if not already done + if isElem: + op_desc.elemExt = elem_op[1] + op_desc.active_elems = [elem_op] self.append(op_desc) # start next search after end of current match next_pos = match.end() @@ -883,6 +1110,7 @@ class OperandList(object): self.numDestRegs = 0 self.numFPDestRegs = 0 self.numIntDestRegs = 0 + self.numVecDestRegs = 0 self.numCCDestRegs = 0 self.numMiscDestRegs = 0 self.memOperand = None @@ -904,6 +1132,8 @@ class OperandList(object): self.numFPDestRegs += 1 elif op_desc.isIntReg(): self.numIntDestRegs += 1 + elif op_desc.isVecReg(): + self.numVecDestRegs += 1 elif op_desc.isCCReg(): self.numCCDestRegs += 1 elif op_desc.isControlReg(): @@ -994,6 +1224,11 @@ class SubOperandList(OperandList): op = match.groups() # regexp groups are operand full name, base, and extension (op_full, op_base, op_ext) = op + # If is a elem operand, define or update the corresponding + # vector operand + if op_base in parser.elemToVector: + elem_op = op_base + op_base = parser.elemToVector[elem_op] # find this op in the master list op_desc = master_list.find_base(op_base) if not op_desc: @@ -1105,6 +1340,8 @@ class InstObjParams(object): header += '\n\t_numSrcRegs = 0;' header += '\n\t_numDestRegs = 0;' header += '\n\t_numFPDestRegs = 0;' + header += '\n\t_numVecDestRegs = 0;' + header += '\n\t_numVecElemDestRegs = 0;' header += '\n\t_numIntDestRegs = 0;' header += '\n\t_numCCDestRegs = 0;' @@ -1149,6 +1386,8 @@ class InstObjParams(object): self.op_class = 'MemReadOp' elif 'IsFloating' in self.flags: self.op_class = 'FloatAddOp' + elif 'IsVector' in self.flags: + self.op_class = 'SimdAddOp' else: self.op_class = 'IntAluOp' @@ -1158,8 +1397,12 @@ class InstObjParams(object): # if 'IsFloating' is set, add call to the FP enable check # function (which should be provided by isa_desc via a declare) + # if 'IsVector' is set, add call to the Vector enable check + # function (which should be provided by isa_desc via a declare) if 'IsFloating' in self.flags: self.fp_enable_check = 'fault = checkFpEnableFault(xc);' + elif 'IsVector' in self.flags: + self.fp_enable_check = 'fault = checkVecEnableFault(xc);' else: self.fp_enable_check = '' @@ -2300,6 +2543,16 @@ StaticInstPtr if dflt_ext: dflt_ctype = self.operandTypeMap[dflt_ext] attrList.extend(['dflt_ctype', 'dflt_ext']) + # reg_spec is either just a string or a dictionary + # (for elems of vector) + if isinstance(reg_spec, tuple): + (reg_spec, elem_spec) = reg_spec + if isinstance(elem_spec, str): + attrList.append('elem_spec') + else: + assert(isinstance(elem_spec, dict)) + elems = elem_spec + attrList.append('elems') for attr in attrList: tmp_dict[attr] = eval(attr) tmp_dict['base_name'] = op_name @@ -2323,6 +2576,15 @@ StaticInstPtr # Define operand variables. operands = user_dict.keys() + # Add the elems defined in the vector operands and + # build a map elem -> vector (used in OperandList) + elem_to_vec = {} + for op in user_dict.keys(): + if hasattr(self.operandNameMap[op], 'elems'): + for elem in self.operandNameMap[op].elems.keys(): + operands.append(elem) + elem_to_vec[elem] = op + self.elemToVector = elem_to_vec extensions = self.operandTypeMap.keys() operandsREString = r''' diff --git a/src/arch/sparc/faults.cc b/src/arch/sparc/faults.cc index c09bd0da2..13e9c19f6 100644 --- a/src/arch/sparc/faults.cc +++ b/src/arch/sparc/faults.cc @@ -108,6 +108,12 @@ template<> SparcFaultBase::FaultVals SparcFault::vals = {"fp_disabled", 0x020, 800, {P, P, H}, FaultStat()}; +/* SPARCv8 and SPARCv9 define just fp_disabled trap. SIMD is not contemplated + * as a separate part. Therefore, we use the same code and TT */ +template<> SparcFaultBase::FaultVals + SparcFault::vals = +{"fp_disabled", 0x020, 800, {P, P, H}, FaultStat()}; + template<> SparcFaultBase::FaultVals SparcFault::vals = {"fp_exception_ieee_754", 0x021, 1110, {P, P, H}, FaultStat()}; diff --git a/src/arch/sparc/faults.hh b/src/arch/sparc/faults.hh index 42c8b7149..aa270fa31 100644 --- a/src/arch/sparc/faults.hh +++ b/src/arch/sparc/faults.hh @@ -122,6 +122,7 @@ class PrivilegedOpcode : public SparcFault {}; // class UnimplementedSTD : public SparcFault {}; class FpDisabled : public SparcFault {}; +class VecDisabled : public SparcFault {}; class FpExceptionIEEE754 : public SparcFault {}; diff --git a/src/arch/sparc/isa/base.isa b/src/arch/sparc/isa/base.isa index b517d462c..4b61c940c 100644 --- a/src/arch/sparc/isa/base.isa +++ b/src/arch/sparc/isa/base.isa @@ -578,6 +578,11 @@ output exec {{ return NoFault; } } + static inline Fault + checkVecEnableFault(CPU_EXEC_CONTEXT *xc) + { + return std::make_shared(); + } }}; diff --git a/src/cpu/StaticInstFlags.py b/src/cpu/StaticInstFlags.py index ef29726fc..55ef456ce 100644 --- a/src/cpu/StaticInstFlags.py +++ b/src/cpu/StaticInstFlags.py @@ -58,6 +58,8 @@ class StaticInstFlags(Enum): 'IsInteger', # References integer regs. 'IsFloating', # References FP regs. 'IsCC', # References CC regs. + 'IsVector', # References Vector regs. + 'IsVectorElem', # References Vector reg elems. 'IsMemRef', # References memory (load, store, or prefetch) 'IsLoad', # Reads from memory (load or prefetch). diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 132c390b3..d7d32e629 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -517,6 +517,7 @@ class BaseDynInst : public ExecContext, public RefCounted bool isDataPrefetch() const { return staticInst->isDataPrefetch(); } bool isInteger() const { return staticInst->isInteger(); } bool isFloating() const { return staticInst->isFloating(); } + bool isVector() const { return staticInst->isVector(); } bool isControl() const { return staticInst->isControl(); } bool isCall() const { return staticInst->isCall(); } bool isReturn() const { return staticInst->isReturn(); } diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 3cce7f69c..5977f94f3 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -517,6 +517,8 @@ class DefaultCommit Stats::Vector statComMembars; /** Total number of committed branches. */ Stats::Vector statComBranches; + /** Total number of vector instructions */ + Stats::Vector statComVector; /** Total number of floating point instructions */ Stats::Vector statComFloating; /** Total number of integer instructions */ diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index ea77f18fb..aba2696c2 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -260,6 +260,13 @@ DefaultCommit::regStats() .flags(total) ; + statComVector + .init(cpu->numThreads) + .name(name() + ".vec_insts") + .desc("Number of committed Vector instructions.") + .flags(total) + ; + statComInteger .init(cpu->numThreads) .name(name()+".int_insts") @@ -1404,6 +1411,9 @@ DefaultCommit::updateComInstStats(DynInstPtr &inst) // Floating Point Instruction if (inst->isFloating()) statComFloating[tid]++; + // Vector Instruction + if (inst->isVector()) + statComVector[tid]++; // Function Calls if (inst->isCall()) diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 23d8d416c..64f8aa1be 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -543,10 +543,14 @@ class InstructionQueue Stats::Scalar intInstQueueWakeupAccesses; Stats::Scalar fpInstQueueReads; Stats::Scalar fpInstQueueWrites; - Stats::Scalar fpInstQueueWakeupQccesses; + Stats::Scalar fpInstQueueWakeupAccesses; + Stats::Scalar vecInstQueueReads; + Stats::Scalar vecInstQueueWrites; + Stats::Scalar vecInstQueueWakeupAccesses; Stats::Scalar intAluAccesses; Stats::Scalar fpAluAccesses; + Stats::Scalar vecAluAccesses; }; #endif //__CPU_O3_INST_QUEUE_HH__ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 2b113ae04..3da72fd86 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -364,7 +364,7 @@ InstructionQueue::regStats() .desc("Number of floating instruction queue writes") .flags(total); - fpInstQueueWakeupQccesses + fpInstQueueWakeupAccesses .name(name() + ".fp_inst_queue_wakeup_accesses") .desc("Number of floating instruction queue wakeup accesses") .flags(total); @@ -567,7 +567,13 @@ template void InstructionQueue::insert(DynInstPtr &new_inst) { - new_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++; + if (new_inst->isFloating()) { + fpInstQueueWrites++; + } else if (new_inst->isVector()) { + vecInstQueueWrites++; + } else { + intInstQueueWrites++; + } // Make sure the instruction is valid assert(new_inst); @@ -609,7 +615,13 @@ InstructionQueue::insertNonSpec(DynInstPtr &new_inst) { // @todo: Clean up this code; can do it by setting inst as unable // to issue, then calling normal insert on the inst. - new_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++; + if (new_inst->isFloating()) { + fpInstQueueWrites++; + } else if (new_inst->isVector()) { + vecInstQueueWrites++; + } else { + intInstQueueWrites++; + } assert(new_inst); @@ -660,8 +672,10 @@ InstructionQueue::getInstToExecute() assert(!instsToExecute.empty()); DynInstPtr inst = instsToExecute.front(); instsToExecute.pop_front(); - if (inst->isFloating()){ + if (inst->isFloating()) { fpInstQueueReads++; + } else if (inst->isVector()) { + vecInstQueueReads++; } else { intInstQueueReads++; } @@ -783,7 +797,13 @@ InstructionQueue::scheduleReadyInsts() DynInstPtr issuing_inst = readyInsts[op_class].top(); - issuing_inst->isFloating() ? fpInstQueueReads++ : intInstQueueReads++; + if (issuing_inst->isFloating()) { + fpInstQueueReads++; + } else if (issuing_inst->isVector()) { + vecInstQueueReads++; + } else { + intInstQueueReads++; + } assert(issuing_inst->seqNum == (*order_it).oldestInst); @@ -810,7 +830,13 @@ InstructionQueue::scheduleReadyInsts() if (op_class != No_OpClass) { idx = fuPool->getUnit(op_class); - issuing_inst->isFloating() ? fpAluAccesses++ : intAluAccesses++; + if (issuing_inst->isFloating()) { + fpAluAccesses++; + } else if (issuing_inst->isVector()) { + vecAluAccesses++; + } else { + intAluAccesses++; + } if (idx > FUPool::NoFreeFU) { op_latency = fuPool->getOpLatency(op_class); } @@ -955,7 +981,9 @@ InstructionQueue::wakeDependents(DynInstPtr &completed_inst) // The instruction queue here takes care of both floating and int ops if (completed_inst->isFloating()) { - fpInstQueueWakeupQccesses++; + fpInstQueueWakeupAccesses++; + } else if (completed_inst->isVector()) { + vecInstQueueWakeupAccesses++; } else { intInstQueueWakeupAccesses++; } @@ -1189,7 +1217,13 @@ InstructionQueue::doSquash(ThreadID tid) (*squash_it)->seqNum > squashedSeqNum[tid]) { DynInstPtr squashed_inst = (*squash_it); - squashed_inst->isFloating() ? fpInstQueueWrites++ : intInstQueueWrites++; + if (squashed_inst->isFloating()) { + fpInstQueueWrites++; + } else if (squashed_inst->isVector()) { + vecInstQueueWrites++; + } else { + intInstQueueWrites++; + } // Only handle the instruction if it actually is in the IQ and // hasn't already been squashed in the IQ. diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 57cea4ba7..783967602 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -252,6 +252,11 @@ BaseSimpleCPU::regStats() .desc("Number of float alu accesses") ; + t_info.numVecAluAccesses + .name(thread_str + ".num_vec_alu_accesses") + .desc("Number of vector alu accesses") + ; + t_info.numCallsReturns .name(thread_str + ".num_func_calls") .desc("number of times a function call or return occured") @@ -272,6 +277,11 @@ BaseSimpleCPU::regStats() .desc("number of float instructions") ; + t_info.numVecInsts + .name(thread_str + ".num_vec_insts") + .desc("number of vector instructions") + ; + t_info.numIntRegReads .name(thread_str + ".num_int_register_reads") .desc("number of times the integer registers were read") @@ -613,6 +623,12 @@ BaseSimpleCPU::postExecute() t_info.numFpInsts++; } + //vector alu accesses + if (curStaticInst->isVector()){ + t_info.numVecAluAccesses++; + t_info.numVecInsts++; + } + //number of function calls/returns to get window accesses if (curStaticInst->isCall() || curStaticInst->isReturn()){ t_info.numCallsReturns++; diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index 0f546407d..6d51e5ed9 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -94,6 +94,9 @@ class SimpleExecContext : public ExecContext { // Number of float alu accesses Stats::Scalar numFpAluAccesses; + // Number of vector alu accesses + Stats::Scalar numVecAluAccesses; + // Number of function calls/returns Stats::Scalar numCallsReturns; @@ -106,6 +109,9 @@ class SimpleExecContext : public ExecContext { // Number of float instructions Stats::Scalar numFpInsts; + // Number of vector instructions + Stats::Scalar numVecInsts; + // Number of integer register file accesses Stats::Scalar numIntRegReads; Stats::Scalar numIntRegWrites; diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index e7507c6a6..883c532ac 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -150,6 +150,7 @@ class StaticInst : public RefCounted, public StaticInstFlags bool isInteger() const { return flags[IsInteger]; } bool isFloating() const { return flags[IsFloating]; } + bool isVector() const { return flags[IsVector]; } bool isCC() const { return flags[IsCC]; } bool isControl() const { return flags[IsControl]; } -- cgit v1.2.3