summaryrefslogtreecommitdiff
path: root/src/cpu/o3/cpu.cc
diff options
context:
space:
mode:
authorRekai Gonzalez-Alberquilla <Rekai.GonzalezAlberquilla@arm.com>2017-04-05 13:24:00 -0500
committerAndreas Sandberg <andreas.sandberg@arm.com>2017-07-05 14:43:49 +0000
commit00da08902918da13fccc3f2266b7b2f5d0080708 (patch)
treeb495a0ceba7e073adca005cf84a7575d0aad5f27 /src/cpu/o3/cpu.cc
parent0747a432d25ade2c197ca6393270e12606419872 (diff)
downloadgem5-00da08902918da13fccc3f2266b7b2f5d0080708.tar.xz
cpu: Added interface for vector reg file
This patch adds some more functionality to the cpu model and the arch to interface with the vector register file. This change consists mainly of augmenting ThreadContexts and ExecContexts with calls to get/set full vectors, underlying microarchitectural elements or lanes. Those are meant to interface with the vector register file. All classes that implement this interface also get an appropriate implementation. This requires implementing the vector register file for the different models using the VecRegContainer class. This change set also updates the Result abstraction to contemplate the possibility of having a vector as result. The changes also affect how the remote_gdb connection works. There are some (nasty) side effects, such as the need to define dummy numPhysVecRegs parameter values for architectures that do not implement vector extensions. Nathanael Premillieu's work with an increasing number of fixes and improvements of mine. Change-Id: Iee65f4e8b03abfe1e94e6940a51b68d0977fd5bb Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com> [ Fix RISCV build issues and CC reg free list initialisation ] Signed-off-by: Andreas Sandberg <andreas.sandberg@arm.com> Reviewed-on: https://gem5-review.googlesource.com/2705
Diffstat (limited to 'src/cpu/o3/cpu.cc')
-rw-r--r--src/cpu/o3/cpu.cc143
1 files changed, 139 insertions, 4 deletions
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index a7a39b72a..c249d90ba 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012, 2014 ARM Limited
+ * Copyright (c) 2011-2012, 2014, 2016 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -46,6 +46,7 @@
#include "cpu/o3/cpu.hh"
+#include "arch/generic/traits.hh"
#include "arch/kernel_stats.hh"
#include "config/the_isa.hh"
#include "cpu/activity.hh"
@@ -171,9 +172,14 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
iew(this, params),
commit(this, params),
+ /* It is mandatory that all SMT threads use the same renaming mode as
+ * they are sharing registers and rename */
+ vecMode(initRenameMode<TheISA::ISA>::mode(params->isa[0])),
regFile(params->numPhysIntRegs,
params->numPhysFloatRegs,
- params->numPhysCCRegs),
+ params->numPhysVecRegs,
+ params->numPhysCCRegs,
+ vecMode),
freeList(name() + ".freelist", &regFile),
@@ -270,6 +276,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
//Make Sure That this a Valid Architeture
assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs);
assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs);
+ assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs);
assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs);
rename.setScoreboard(&scoreboard);
@@ -278,6 +285,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
// Setup the rename map for whichever stages need it.
for (ThreadID tid = 0; tid < numThreads; tid++) {
isa[tid] = params->isa[tid];
+ assert(initRenameMode<TheISA::ISA>::equals(isa[tid], isa[0]));
// Only Alpha has an FP zero register, so for other ISAs we
// use an invalid FP register index to avoid special treatment
@@ -287,10 +295,11 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
(THE_ISA == ALPHA_ISA) ? TheISA::ZeroReg : invalidFPReg;
commitRenameMap[tid].init(&regFile, TheISA::ZeroReg, fpZeroReg,
- &freeList);
+ &freeList,
+ vecMode);
renameMap[tid].init(&regFile, TheISA::ZeroReg, fpZeroReg,
- &freeList);
+ &freeList, vecMode);
}
// Initialize rename map to assign physical registers to the
@@ -311,6 +320,30 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
RegId(FloatRegClass, ridx), phys_reg);
}
+ /* Here we need two 'interfaces' the 'whole register' and the
+ * 'register element'. At any point only one of them will be
+ * active. */
+ if (vecMode == Enums::Full) {
+ /* Initialize the full-vector interface */
+ for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) {
+ RegId rid = RegId(VecRegClass, ridx);
+ PhysRegIdPtr phys_reg = freeList.getVecReg();
+ renameMap[tid].setEntry(rid, phys_reg);
+ commitRenameMap[tid].setEntry(rid, phys_reg);
+ }
+ } else {
+ /* Initialize the vector-element interface */
+ for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) {
+ for (ElemIndex ldx = 0; ldx < TheISA::NumVecElemPerVecReg;
+ ++ldx) {
+ RegId lrid = RegId(VecElemClass, ridx, ldx);
+ PhysRegIdPtr phys_elem = freeList.getVecElem();
+ renameMap[tid].setEntry(lrid, phys_elem);
+ commitRenameMap[tid].setEntry(lrid, phys_elem);
+ }
+ }
+ }
+
for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) {
PhysRegIdPtr phys_reg = freeList.getCCReg();
renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg);
@@ -514,6 +547,16 @@ FullO3CPU<Impl>::regStats()
.desc("number of floating regfile writes")
.prereq(fpRegfileWrites);
+ vecRegfileReads
+ .name(name() + ".vec_regfile_reads")
+ .desc("number of vector regfile reads")
+ .prereq(vecRegfileReads);
+
+ vecRegfileWrites
+ .name(name() + ".vec_regfile_writes")
+ .desc("number of vector regfile writes")
+ .prereq(vecRegfileWrites);
+
ccRegfileReads
.name(name() + ".cc_regfile_reads")
.desc("number of cc regfile reads")
@@ -1257,6 +1300,32 @@ FullO3CPU<Impl>::readFloatRegBits(PhysRegIdPtr phys_reg)
}
template <class Impl>
+auto
+FullO3CPU<Impl>::readVecReg(PhysRegIdPtr phys_reg) const
+ -> const VecRegContainer&
+{
+ vecRegfileReads++;
+ return regFile.readVecReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::getWritableVecReg(PhysRegIdPtr phys_reg)
+ -> VecRegContainer&
+{
+ vecRegfileWrites++;
+ return regFile.getWritableVecReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem&
+{
+ vecRegfileReads++;
+ return regFile.readVecElem(phys_reg);
+}
+
+template <class Impl>
CCReg
FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg)
{
@@ -1290,6 +1359,22 @@ FullO3CPU<Impl>::setFloatRegBits(PhysRegIdPtr phys_reg, FloatRegBits val)
template <class Impl>
void
+FullO3CPU<Impl>::setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val)
+{
+ vecRegfileWrites++;
+ regFile.setVecReg(phys_reg, val);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val)
+{
+ vecRegfileWrites++;
+ regFile.setVecElem(phys_reg, val);
+}
+
+template <class Impl>
+void
FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, CCReg val)
{
ccRegfileWrites++;
@@ -1330,6 +1415,36 @@ FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, ThreadID tid)
}
template <class Impl>
+auto
+FullO3CPU<Impl>::readArchVecReg(int reg_idx, ThreadID tid) const
+ -> const VecRegContainer&
+{
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecRegClass, reg_idx));
+ return readVecReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::getWritableArchVecReg(int reg_idx, ThreadID tid)
+ -> VecRegContainer&
+{
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecRegClass, reg_idx));
+ return getWritableVecReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
+ ThreadID tid) const -> const VecElem&
+{
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecRegClass, reg_idx, ldx));
+ return readVecElem(phys_reg);
+}
+
+template <class Impl>
CCReg
FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid)
{
@@ -1375,6 +1490,26 @@ FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, ThreadID tid)
template <class Impl>
void
+FullO3CPU<Impl>::setArchVecReg(int reg_idx, const VecRegContainer& val,
+ ThreadID tid)
+{
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecRegClass, reg_idx));
+ setVecReg(phys_reg, val);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
+ const VecElem& val, ThreadID tid)
+{
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecRegClass, reg_idx, ldx));
+ setVecElem(phys_reg, val);
+}
+
+template <class Impl>
+void
FullO3CPU<Impl>::setArchCCReg(int reg_idx, CCReg val, ThreadID tid)
{
ccRegfileWrites++;