summaryrefslogtreecommitdiff
path: root/src/cpu
diff options
context:
space:
mode:
authorGiacomo Gabrielli <giacomo.gabrielli@arm.com>2018-10-16 16:04:08 +0100
committerGiacomo Gabrielli <giacomo.gabrielli@arm.com>2019-01-30 16:57:54 +0000
commit25474167e5b247d1b91fbf802c5b396a63ae705e (patch)
treeb509597b23d792734f55c33b8125eebfbd9cd3a5 /src/cpu
parentc6f5db8743f19b02a38146d9cf2a829883387008 (diff)
downloadgem5-25474167e5b247d1b91fbf802c5b396a63ae705e.tar.xz
arch,cpu: Add vector predicate registers
Latest-gen. vector/SIMD extensions, including the Arm Scalable Vector Extension (SVE), introduce the notion of a predicate register file. This changeset adds this feature across architectures and CPU models. Change-Id: Iebcadbad89c0a582ff8b1b70de353305db603946 Signed-off-by: Giacomo Gabrielli <giacomo.gabrielli@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/13715 Maintainer: Andreas Sandberg <andreas.sandberg@arm.com> Reviewed-by: Jason Lowe-Power <jason@lowepower.com>
Diffstat (limited to 'src/cpu')
-rw-r--r--src/cpu/base_dyn_inst.hh22
-rw-r--r--src/cpu/checker/cpu.hh35
-rw-r--r--src/cpu/checker/thread_context.hh24
-rw-r--r--src/cpu/exec_context.hh19
-rw-r--r--src/cpu/inst_res.hh24
-rw-r--r--src/cpu/minor/exec_context.hh27
-rw-r--r--src/cpu/minor/scoreboard.cc7
-rw-r--r--src/cpu/minor/scoreboard.hh5
-rw-r--r--src/cpu/o3/O3CPU.py2
-rw-r--r--src/cpu/o3/comm.hh5
-rw-r--r--src/cpu/o3/cpu.cc84
-rw-r--r--src/cpu/o3/cpu.hh19
-rw-r--r--src/cpu/o3/dyn_inst.hh25
-rw-r--r--src/cpu/o3/free_list.hh23
-rw-r--r--src/cpu/o3/inst_queue_impl.hh1
-rw-r--r--src/cpu/o3/regfile.cc21
-rw-r--r--src/cpu/o3/regfile.hh46
-rw-r--r--src/cpu/o3/rename.hh3
-rw-r--r--src/cpu/o3/rename_impl.hh11
-rw-r--r--src/cpu/o3/rename_map.cc4
-rw-r--r--src/cpu/o3/rename_map.hh25
-rw-r--r--src/cpu/o3/thread_context.hh23
-rw-r--r--src/cpu/o3/thread_context_impl.hh24
-rw-r--r--src/cpu/reg_class.cc3
-rw-r--r--src/cpu/reg_class.hh7
-rw-r--r--src/cpu/simple/exec_context.hh34
-rw-r--r--src/cpu/simple_thread.hh59
-rw-r--r--src/cpu/static_inst.hh29
-rw-r--r--src/cpu/thread_context.cc26
-rw-r--r--src/cpu/thread_context.hh34
30 files changed, 635 insertions, 36 deletions
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index d81b58bdf..b87fd8b4e 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -584,6 +584,11 @@ class BaseDynInst : public ExecContext, public RefCounted
{
return staticInst->numVecElemDestRegs();
}
+ int8_t
+ numVecPredDestRegs() const
+ {
+ return staticInst->numVecPredDestRegs();
+ }
/** Returns the logical register index of the i'th destination register. */
const RegId& destRegIdx(int i) const { return staticInst->destRegIdx(i); }
@@ -638,6 +643,16 @@ class BaseDynInst : public ExecContext, public RefCounted
InstResult::ResultType::VecElem));
}
}
+
+ /** Predicate result. */
+ template<typename T>
+ void setVecPredResult(T&& t)
+ {
+ if (instFlags[RecordResult]) {
+ instResult.push(InstResult(std::forward<T>(t),
+ InstResult::ResultType::VecPredReg));
+ }
+ }
/** @} */
/** Records an integer register being set to a value. */
@@ -672,6 +687,13 @@ class BaseDynInst : public ExecContext, public RefCounted
setVecElemResult(val);
}
+ /** Record a vector register being set to a value */
+ void setVecPredRegOperand(const StaticInst *si, int idx,
+ const VecPredRegContainer& val)
+ {
+ setVecPredResult(val);
+ }
+
/** Records that one of the source registers is ready. */
void markSrcRegReady();
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index 4468689bd..9d6061ad8 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011, 2016 ARM Limited
+ * Copyright (c) 2011, 2016-2017 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -304,6 +304,22 @@ class CheckerCPU : public BaseCPU, public ExecContext
return thread->readVecElem(reg);
}
+ const VecPredRegContainer&
+ readVecPredRegOperand(const StaticInst *si, int idx) const override
+ {
+ const RegId& reg = si->srcRegIdx(idx);
+ assert(reg.isVecPredReg());
+ return thread->readVecPredReg(reg);
+ }
+
+ VecPredRegContainer&
+ getWritableVecPredRegOperand(const StaticInst *si, int idx) override
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecPredReg());
+ return thread->getWritableVecPredReg(reg);
+ }
+
CCReg
readCCRegOperand(const StaticInst *si, int idx) override
{
@@ -336,6 +352,14 @@ class CheckerCPU : public BaseCPU, public ExecContext
InstResult::ResultType::VecElem));
}
+ template<typename T>
+ void
+ setVecPredResult(T&& t)
+ {
+ result.push(InstResult(std::forward<T>(t),
+ InstResult::ResultType::VecPredReg));
+ }
+
void
setIntRegOperand(const StaticInst *si, int idx, RegVal val) override
{
@@ -383,6 +407,15 @@ class CheckerCPU : public BaseCPU, public ExecContext
setVecElemResult(val);
}
+ void setVecPredRegOperand(const StaticInst *si, int idx,
+ const VecPredRegContainer& val) override
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecPredReg());
+ thread->setVecPredReg(reg, val);
+ setVecPredResult(val);
+ }
+
bool readPredicate() const override { return thread->readPredicate(); }
void
diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh
index b5a2079ea..8ce5a740d 100644
--- a/src/cpu/checker/thread_context.hh
+++ b/src/cpu/checker/thread_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012, 2016 ARM Limited
+ * Copyright (c) 2011-2012, 2016-2018 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -263,6 +263,12 @@ class CheckerThreadContext : public ThreadContext
const VecElem& readVecElem(const RegId& reg) const
{ return actualTC->readVecElem(reg); }
+ const VecPredRegContainer& readVecPredReg(const RegId& reg) const override
+ { return actualTC->readVecPredReg(reg); }
+
+ VecPredRegContainer& getWritableVecPredReg(const RegId& reg) override
+ { return actualTC->getWritableVecPredReg(reg); }
+
CCReg readCCReg(int reg_idx)
{ return actualTC->readCCReg(reg_idx); }
@@ -295,6 +301,13 @@ class CheckerThreadContext : public ThreadContext
}
void
+ setVecPredReg(const RegId& reg, const VecPredRegContainer& val)
+ {
+ actualTC->setVecPredReg(reg, val);
+ checkerTC->setVecPredReg(reg, val);
+ }
+
+ void
setCCReg(int reg_idx, CCReg val)
{
actualTC->setCCReg(reg_idx, val);
@@ -428,6 +441,15 @@ class CheckerThreadContext : public ThreadContext
const ElemIndex& elem_idx, const VecElem& val)
{ actualTC->setVecElemFlat(idx, elem_idx, val); }
+ const VecPredRegContainer& readVecPredRegFlat(int idx) const override
+ { return actualTC->readVecPredRegFlat(idx); }
+
+ VecPredRegContainer& getWritableVecPredRegFlat(int idx) override
+ { return actualTC->getWritableVecPredRegFlat(idx); }
+
+ void setVecPredRegFlat(int idx, const VecPredRegContainer& val) override
+ { actualTC->setVecPredRegFlat(idx, val); }
+
CCReg readCCRegFlat(int idx)
{ return actualTC->readCCRegFlat(idx); }
diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh
index 75f428b87..87af91623 100644
--- a/src/cpu/exec_context.hh
+++ b/src/cpu/exec_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014, 2016 ARM Limited
+ * Copyright (c) 2014, 2016-2017 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -77,6 +77,7 @@ class ExecContext {
typedef TheISA::CCReg CCReg;
using VecRegContainer = TheISA::VecRegContainer;
using VecElem = TheISA::VecElem;
+ using VecPredRegContainer = TheISA::VecPredRegContainer;
public:
/**
@@ -168,6 +169,22 @@ class ExecContext {
const VecElem val) = 0;
/** @} */
+ /** Predicate registers interface. */
+ /** @{ */
+ /** Reads source predicate register operand. */
+ virtual const VecPredRegContainer&
+ readVecPredRegOperand(const StaticInst *si, int idx) const = 0;
+
+ /** Gets destination predicate register operand for modification. */
+ virtual VecPredRegContainer&
+ getWritableVecPredRegOperand(const StaticInst *si, int idx) = 0;
+
+ /** Sets a destination predicate register operand to a value. */
+ virtual void
+ setVecPredRegOperand(const StaticInst *si, int idx,
+ const VecPredRegContainer& val) = 0;
+ /** @} */
+
/**
* @{
* @name Condition Code Registers
diff --git a/src/cpu/inst_res.hh b/src/cpu/inst_res.hh
index 9b6a23d95..bf9c649ef 100644
--- a/src/cpu/inst_res.hh
+++ b/src/cpu/inst_res.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016 ARM Limited
+ * Copyright (c) 2016-2017 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -48,12 +48,14 @@
class InstResult {
using VecRegContainer = TheISA::VecRegContainer;
using VecElem = TheISA::VecElem;
+ using VecPredRegContainer = TheISA::VecPredRegContainer;
public:
union MultiResult {
uint64_t integer;
double dbl;
VecRegContainer vector;
VecElem vecElem;
+ VecPredRegContainer pred;
MultiResult() {}
};
@@ -61,6 +63,7 @@ class InstResult {
Scalar,
VecElem,
VecReg,
+ VecPredReg,
NumResultTypes,
Invalid
};
@@ -87,6 +90,9 @@ class InstResult {
/** Vector result. */
explicit InstResult(const VecRegContainer& v, const ResultType& t)
: type(t) { result.vector = v; }
+ /** Predicate result. */
+ explicit InstResult(const VecPredRegContainer& v, const ResultType& t)
+ : type(t) { result.pred = v; }
InstResult& operator=(const InstResult& that) {
type = that.type;
@@ -104,6 +110,10 @@ class InstResult {
case ResultType::VecReg:
result.vector = that.result.vector;
break;
+ case ResultType::VecPredReg:
+ result.pred = that.result.pred;
+ break;
+
default:
panic("Assigning result from unknown result type");
break;
@@ -124,6 +134,8 @@ class InstResult {
return result.vecElem == that.result.vecElem;
case ResultType::VecReg:
return result.vector == that.result.vector;
+ case ResultType::VecPredReg:
+ return result.pred == that.result.pred;
case ResultType::Invalid:
return false;
default:
@@ -143,6 +155,8 @@ class InstResult {
bool isVector() const { return type == ResultType::VecReg; }
/** Is this a vector element result?. */
bool isVecElem() const { return type == ResultType::VecElem; }
+ /** Is this a predicate result?. */
+ bool isPred() const { return type == ResultType::VecPredReg; }
/** Is this a valid result?. */
bool isValid() const { return type != ResultType::Invalid; }
/** @} */
@@ -177,6 +191,14 @@ class InstResult {
panic_if(!isVecElem(), "Converting scalar (or invalid) to vector!!");
return result.vecElem;
}
+
+ const VecPredRegContainer&
+ asPred() const
+ {
+ panic_if(!isPred(), "Converting scalar (or invalid) to predicate!!");
+ return result.pred;
+ }
+
/** @} */
};
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index b9ed3971f..4cb67372e 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2014, 2016 ARM Limited
+ * Copyright (c) 2011-2014, 2016-2017 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -161,6 +161,22 @@ class ExecContext : public ::ExecContext
return thread.readVecElem(reg);
}
+ const TheISA::VecPredRegContainer&
+ readVecPredRegOperand(const StaticInst *si, int idx) const override
+ {
+ const RegId& reg = si->srcRegIdx(idx);
+ assert(reg.isVecPredReg());
+ return thread.readVecPredReg(reg);
+ }
+
+ TheISA::VecPredRegContainer&
+ getWritableVecPredRegOperand(const StaticInst *si, int idx) override
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecPredReg());
+ return thread.getWritableVecPredReg(reg);
+ }
+
void
setIntRegOperand(const StaticInst *si, int idx, RegVal val) override
{
@@ -186,6 +202,15 @@ class ExecContext : public ::ExecContext
thread.setVecReg(reg, val);
}
+ void
+ setVecPredRegOperand(const StaticInst *si, int idx,
+ const TheISA::VecPredRegContainer& val)
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecPredReg());
+ thread.setVecPredReg(reg, val);
+ }
+
/** Vector Register Lane Interfaces. */
/** @{ */
/** Reads source vector 8bit operand. */
diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc
index 196d035eb..5c0e86a67 100644
--- a/src/cpu/minor/scoreboard.cc
+++ b/src/cpu/minor/scoreboard.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014, 2016 ARM Limited
+ * Copyright (c) 2013-2014, 2016-2017 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -77,6 +77,11 @@ Scoreboard::findIndex(const RegId& reg, Index &scoreboard_index)
TheISA::NumFloatRegs + reg.flatIndex();
ret = true;
break;
+ case VecPredRegClass:
+ scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs +
+ TheISA::NumFloatRegs + TheISA::NumVecRegs + reg.index();
+ ret = true;
+ break;
case CCRegClass:
scoreboard_index = TheISA::NumIntRegs + reg.index();
ret = true;
diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh
index 37ae8da0a..b21e14e24 100644
--- a/src/cpu/minor/scoreboard.hh
+++ b/src/cpu/minor/scoreboard.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014, 2016 ARM Limited
+ * Copyright (c) 2013-2014, 2016-2017 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -95,7 +95,8 @@ class Scoreboard : public Named
Named(name),
numRegs(TheISA::NumIntRegs + TheISA::NumCCRegs +
TheISA::NumFloatRegs +
- (TheISA::NumVecRegs * TheISA::NumVecElemPerVecReg)),
+ (TheISA::NumVecRegs * TheISA::NumVecElemPerVecReg) +
+ TheISA::NumVecPredRegs),
numResults(numRegs, 0),
numUnpredictableResults(numRegs, 0),
fuIndices(numRegs, 0),
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index 32cc19010..e73c09334 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -150,6 +150,8 @@ class DerivO3CPU(BaseCPU):
_defaultNumPhysCCRegs = Self.numPhysIntRegs * 5
numPhysVecRegs = Param.Unsigned(256, "Number of physical vector "
"registers")
+ numPhysVecPredRegs = Param.Unsigned(32, "Number of physical predicate "
+ "registers")
numPhysCCRegs = Param.Unsigned(_defaultNumPhysCCRegs,
"Number of physical cc registers")
numIQEntries = Param.Unsigned(64, "Number of instruction queue entries")
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index f5be5a804..df518b1e4 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011, 2016 ARM Limited
+ * Copyright (c) 2011, 2016-2017 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -121,6 +121,9 @@ class PhysRegId : private RegId {
/** @Return true if it is a vector element physical register. */
bool isVectorPhysElem() const { return isVecElem(); }
+ /** @return true if it is a vector predicate physical register. */
+ bool isVecPredPhysReg() const { return isVecPredReg(); }
+
/** @Return true if it is a condition-code physical register. */
bool isMiscPhysReg() const { return isMiscReg(); }
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 5d92d92dc..ef3b17202 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -159,6 +159,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
regFile(params->numPhysIntRegs,
params->numPhysFloatRegs,
params->numPhysVecRegs,
+ params->numPhysVecPredRegs,
params->numPhysCCRegs,
vecMode),
@@ -258,6 +259,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs);
assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs);
assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs);
+ assert(params->numPhysVecPredRegs >= numThreads * TheISA::NumVecPredRegs);
assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs);
rename.setScoreboard(&scoreboard);
@@ -325,6 +327,13 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
}
}
+ for (RegIndex ridx = 0; ridx < TheISA::NumVecPredRegs; ++ridx) {
+ PhysRegIdPtr phys_reg = freeList.getVecPredReg();
+ renameMap[tid].setEntry(RegId(VecPredRegClass, ridx), phys_reg);
+ commitRenameMap[tid].setEntry(
+ RegId(VecPredRegClass, ridx), phys_reg);
+ }
+
for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) {
PhysRegIdPtr phys_reg = freeList.getCCReg();
renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg);
@@ -538,6 +547,16 @@ FullO3CPU<Impl>::regStats()
.desc("number of vector regfile writes")
.prereq(vecRegfileWrites);
+ vecPredRegfileReads
+ .name(name() + ".pred_regfile_reads")
+ .desc("number of predicate regfile reads")
+ .prereq(vecPredRegfileReads);
+
+ vecPredRegfileWrites
+ .name(name() + ".pred_regfile_writes")
+ .desc("number of predicate regfile writes")
+ .prereq(vecPredRegfileWrites);
+
ccRegfileReads
.name(name() + ".cc_regfile_reads")
.desc("number of cc regfile reads")
@@ -883,6 +902,14 @@ FullO3CPU<Impl>::removeThread(ThreadID tid)
freeList.addReg(phys_reg);
}
+ // Unbind Float Regs from Rename Map
+ for (unsigned preg = 0; preg < TheISA::NumVecPredRegs; preg++) {
+ PhysRegIdPtr phys_reg = renameMap[tid].lookup(
+ RegId(VecPredRegClass, preg));
+ scoreboard.unsetReg(phys_reg);
+ freeList.addReg(phys_reg);
+ }
+
// Unbind condition-code Regs from Rename Map
for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs;
reg_id.index()++) {
@@ -1334,6 +1361,24 @@ FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem&
}
template <class Impl>
+auto
+FullO3CPU<Impl>::readVecPredReg(PhysRegIdPtr phys_reg) const
+ -> const VecPredRegContainer&
+{
+ vecPredRegfileReads++;
+ return regFile.readVecPredReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::getWritableVecPredReg(PhysRegIdPtr phys_reg)
+ -> VecPredRegContainer&
+{
+ vecPredRegfileWrites++;
+ return regFile.getWritableVecPredReg(phys_reg);
+}
+
+template <class Impl>
CCReg
FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg)
{
@@ -1375,6 +1420,15 @@ FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val)
template <class Impl>
void
+FullO3CPU<Impl>::setVecPredReg(PhysRegIdPtr phys_reg,
+ const VecPredRegContainer& val)
+{
+ vecPredRegfileWrites++;
+ regFile.setVecPredReg(phys_reg, val);
+}
+
+template <class Impl>
+void
FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, CCReg val)
{
ccRegfileWrites++;
@@ -1434,6 +1488,26 @@ FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
}
template <class Impl>
+auto
+FullO3CPU<Impl>::readArchVecPredReg(int reg_idx, ThreadID tid) const
+ -> const VecPredRegContainer&
+{
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecPredRegClass, reg_idx));
+ return readVecPredReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::getWritableArchVecPredReg(int reg_idx, ThreadID tid)
+ -> VecPredRegContainer&
+{
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecPredRegClass, reg_idx));
+ return getWritableVecPredReg(phys_reg);
+}
+
+template <class Impl>
CCReg
FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid)
{
@@ -1488,6 +1562,16 @@ FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
template <class Impl>
void
+FullO3CPU<Impl>::setArchVecPredReg(int reg_idx, const VecPredRegContainer& val,
+ ThreadID tid)
+{
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecPredRegClass, reg_idx));
+ setVecPredReg(phys_reg, val);
+}
+
+template <class Impl>
+void
FullO3CPU<Impl>::setArchCCReg(int reg_idx, CCReg val, ThreadID tid)
{
ccRegfileWrites++;
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index b5f754056..30ed4ef3b 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -107,6 +107,8 @@ class FullO3CPU : public BaseO3CPU
using VecElem = TheISA::VecElem;
using VecRegContainer = TheISA::VecRegContainer;
+ using VecPredRegContainer = TheISA::VecPredRegContainer;
+
typedef O3ThreadState<Impl> ImplState;
typedef O3ThreadState<Impl> Thread;
@@ -457,6 +459,10 @@ class FullO3CPU : public BaseO3CPU
const VecElem& readVecElem(PhysRegIdPtr reg_idx) const;
+ const VecPredRegContainer& readVecPredReg(PhysRegIdPtr reg_idx) const;
+
+ VecPredRegContainer& getWritableVecPredReg(PhysRegIdPtr reg_idx);
+
TheISA::CCReg readCCReg(PhysRegIdPtr phys_reg);
void setIntReg(PhysRegIdPtr phys_reg, RegVal val);
@@ -467,6 +473,8 @@ class FullO3CPU : public BaseO3CPU
void setVecElem(PhysRegIdPtr reg_idx, const VecElem& val);
+ void setVecPredReg(PhysRegIdPtr reg_idx, const VecPredRegContainer& val);
+
void setCCReg(PhysRegIdPtr phys_reg, TheISA::CCReg val);
RegVal readArchIntReg(int reg_idx, ThreadID tid);
@@ -501,6 +509,11 @@ class FullO3CPU : public BaseO3CPU
const VecElem& readArchVecElem(const RegIndex& reg_idx,
const ElemIndex& ldx, ThreadID tid) const;
+ const VecPredRegContainer& readArchVecPredReg(int reg_idx,
+ ThreadID tid) const;
+
+ VecPredRegContainer& getWritableArchVecPredReg(int reg_idx, ThreadID tid);
+
TheISA::CCReg readArchCCReg(int reg_idx, ThreadID tid);
/** Architectural register accessors. Looks up in the commit
@@ -512,6 +525,9 @@ class FullO3CPU : public BaseO3CPU
void setArchFloatRegBits(int reg_idx, RegVal val, ThreadID tid);
+ void setArchVecPredReg(int reg_idx, const VecPredRegContainer& val,
+ ThreadID tid);
+
void setArchVecReg(int reg_idx, const VecRegContainer& val, ThreadID tid);
void setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
@@ -805,6 +821,9 @@ class FullO3CPU : public BaseO3CPU
//number of vector register file accesses
mutable Stats::Scalar vecRegfileReads;
Stats::Scalar vecRegfileWrites;
+ //number of predicate register file accesses
+ mutable Stats::Scalar vecPredRegfileReads;
+ Stats::Scalar vecPredRegfileWrites;
//number of CC register file accesses
Stats::Scalar ccRegfileReads;
Stats::Scalar ccRegfileWrites;
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
index 5bd0f8e47..9793f4ead 100644
--- a/src/cpu/o3/dyn_inst.hh
+++ b/src/cpu/o3/dyn_inst.hh
@@ -70,6 +70,7 @@ class BaseO3DynInst : public BaseDynInst<Impl>
using VecRegContainer = TheISA::VecRegContainer;
using VecElem = TheISA::VecElem;
static constexpr auto NumVecElemPerVecReg = TheISA::NumVecElemPerVecReg;
+ using VecPredRegContainer = TheISA::VecPredRegContainer;
enum {
MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs
@@ -231,6 +232,10 @@ class BaseO3DynInst : public BaseDynInst<Impl>
this->setVecElemOperand(this->staticInst.get(), idx,
this->cpu->readVecElem(prev_phys_reg));
break;
+ case VecPredRegClass:
+ this->setVecPredRegOperand(this->staticInst.get(), idx,
+ this->cpu->readVecPredReg(prev_phys_reg));
+ break;
case CCRegClass:
this->setCCRegOperand(this->staticInst.get(), idx,
this->cpu->readCCReg(prev_phys_reg));
@@ -361,6 +366,18 @@ class BaseO3DynInst : public BaseDynInst<Impl>
return this->cpu->readVecElem(this->_srcRegIdx[idx]);
}
+ const VecPredRegContainer&
+ readVecPredRegOperand(const StaticInst *si, int idx) const override
+ {
+ return this->cpu->readVecPredReg(this->_srcRegIdx[idx]);
+ }
+
+ VecPredRegContainer&
+ getWritableVecPredRegOperand(const StaticInst *si, int idx) override
+ {
+ return this->cpu->getWritableVecPredReg(this->_destRegIdx[idx]);
+ }
+
CCReg readCCRegOperand(const StaticInst *si, int idx)
{
return this->cpu->readCCReg(this->_srcRegIdx[idx]);
@@ -399,6 +416,14 @@ class BaseO3DynInst : public BaseDynInst<Impl>
BaseDynInst<Impl>::setVecElemOperand(si, idx, val);
}
+ void
+ setVecPredRegOperand(const StaticInst *si, int idx,
+ const VecPredRegContainer& val) override
+ {
+ this->cpu->setVecPredReg(this->_destRegIdx[idx], val);
+ BaseDynInst<Impl>::setVecPredRegOperand(si, idx, val);
+ }
+
void setCCRegOperand(const StaticInst *si, int idx, CCReg val)
{
this->cpu->setCCReg(this->_destRegIdx[idx], val);
diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh
index e7a899cdf..46bebf30d 100644
--- a/src/cpu/o3/free_list.hh
+++ b/src/cpu/o3/free_list.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016 ARM Limited
+ * Copyright (c) 2016-2017 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -138,6 +138,9 @@ class UnifiedFreeList
SimpleFreeList vecElemList;
/** @} */
+ /** The list of free predicate registers. */
+ SimpleFreeList predList;
+
/** The list of free condition-code registers. */
SimpleFreeList ccList;
@@ -183,6 +186,9 @@ class UnifiedFreeList
/** Gets a free vector elemenet register. */
PhysRegIdPtr getVecElem() { return vecElemList.getReg(); }
+ /** Gets a free predicate register. */
+ PhysRegIdPtr getVecPredReg() { return predList.getReg(); }
+
/** Gets a free cc register. */
PhysRegIdPtr getCCReg() { return ccList.getReg(); }
@@ -207,6 +213,9 @@ class UnifiedFreeList
vecElemList.addReg(freed_reg);
}
+ /** Adds a predicate register back to the free list. */
+ void addVecPredReg(PhysRegIdPtr freed_reg) { predList.addReg(freed_reg); }
+
/** Adds a cc register back to the free list. */
void addCCReg(PhysRegIdPtr freed_reg) { ccList.addReg(freed_reg); }
@@ -222,6 +231,9 @@ class UnifiedFreeList
/** Checks if there are any free vector registers. */
bool hasFreeVecElems() const { return vecElemList.hasFreeRegs(); }
+ /** Checks if there are any free predicate registers. */
+ bool hasFreeVecPredRegs() const { return predList.hasFreeRegs(); }
+
/** Checks if there are any free cc registers. */
bool hasFreeCCRegs() const { return ccList.hasFreeRegs(); }
@@ -237,6 +249,9 @@ class UnifiedFreeList
/** Returns the number of free vector registers. */
unsigned numFreeVecElems() const { return vecElemList.numFreeRegs(); }
+ /** Returns the number of free predicate registers. */
+ unsigned numFreeVecPredRegs() const { return predList.numFreeRegs(); }
+
/** Returns the number of free cc registers. */
unsigned numFreeCCRegs() const { return ccList.numFreeRegs(); }
};
@@ -267,6 +282,9 @@ UnifiedFreeList::addRegs(InputIt first, InputIt last)
case VecElemClass:
vecElemList.addRegs(first, last);
break;
+ case VecPredRegClass:
+ predList.addRegs(first, last);
+ break;
case CCRegClass:
ccList.addRegs(first, last);
break;
@@ -297,6 +315,9 @@ UnifiedFreeList::addReg(PhysRegIdPtr freed_reg)
case VecElemClass:
vecElemList.addReg(freed_reg);
break;
+ case VecPredRegClass:
+ predList.addReg(freed_reg);
+ break;
case CCRegClass:
ccList.addReg(freed_reg);
break;
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 4a55a91ea..ddd7b6d5f 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -104,6 +104,7 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs +
params->numPhysVecRegs +
params->numPhysVecRegs * TheISA::NumVecElemPerVecReg +
+ params->numPhysVecPredRegs +
params->numPhysCCRegs;
//Create an entry for each physical register within the
diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc
index 2f41e2ac2..cc4bba6b0 100644
--- a/src/cpu/o3/regfile.cc
+++ b/src/cpu/o3/regfile.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016 ARM Limited
+ * Copyright (c) 2016-2017 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -52,22 +52,26 @@
PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
unsigned _numPhysicalFloatRegs,
unsigned _numPhysicalVecRegs,
+ unsigned _numPhysicalVecPredRegs,
unsigned _numPhysicalCCRegs,
VecMode vmode)
: intRegFile(_numPhysicalIntRegs),
floatRegFile(_numPhysicalFloatRegs),
vectorRegFile(_numPhysicalVecRegs),
+ vecPredRegFile(_numPhysicalVecPredRegs),
ccRegFile(_numPhysicalCCRegs),
numPhysicalIntRegs(_numPhysicalIntRegs),
numPhysicalFloatRegs(_numPhysicalFloatRegs),
numPhysicalVecRegs(_numPhysicalVecRegs),
numPhysicalVecElemRegs(_numPhysicalVecRegs *
NumVecElemPerVecReg),
+ numPhysicalVecPredRegs(_numPhysicalVecPredRegs),
numPhysicalCCRegs(_numPhysicalCCRegs),
totalNumRegs(_numPhysicalIntRegs
+ _numPhysicalFloatRegs
+ _numPhysicalVecRegs
+ _numPhysicalVecRegs * NumVecElemPerVecReg
+ + _numPhysicalVecPredRegs
+ _numPhysicalCCRegs),
vecMode(vmode)
{
@@ -108,6 +112,12 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
}
}
+ // The next batch of the registers are the predicate physical
+ // registers; put them onto the predicate free list.
+ for (phys_reg = 0; phys_reg < numPhysicalVecPredRegs; phys_reg++) {
+ vecPredRegIds.emplace_back(VecPredRegClass, phys_reg, flat_reg_idx++);
+ }
+
// The rest of the registers are the condition-code physical
// registers; put them onto the condition-code free list.
for (phys_reg = 0; phys_reg < numPhysicalCCRegs; phys_reg++) {
@@ -159,6 +169,13 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList)
else
freeList->addRegs(vecElemIds.begin(), vecElemIds.end());
+ // The next batch of the registers are the predicate physical
+ // registers; put them onto the predicate free list.
+ for (reg_idx = 0; reg_idx < numPhysicalVecPredRegs; reg_idx++) {
+ assert(vecPredRegIds[reg_idx].index() == reg_idx);
+ }
+ freeList->addRegs(vecPredRegIds.begin(), vecPredRegIds.end());
+
// The rest of the registers are the condition-code physical
// registers; put them onto the condition-code free list.
for (reg_idx = 0; reg_idx < numPhysicalCCRegs; reg_idx++) {
@@ -191,6 +208,8 @@ PhysRegFile::getRegIds(RegClass cls) -> IdRange
return std::make_pair(vecRegIds.begin(), vecRegIds.end());
case VecElemClass:
return std::make_pair(vecElemIds.begin(), vecElemIds.end());
+ case VecPredRegClass:
+ return std::make_pair(vecPredRegIds.begin(), vecPredRegIds.end());
case CCRegClass:
return std::make_pair(ccRegIds.begin(), ccRegIds.end());
case MiscRegClass:
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index 9d9113240..4077c99a4 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016 ARM Limited
+ * Copyright (c) 2016-2017 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -70,6 +70,7 @@ class PhysRegFile
using VecRegContainer = TheISA::VecRegContainer;
using PhysIds = std::vector<PhysRegId>;
using VecMode = Enums::VecRegRenameMode;
+ using VecPredRegContainer = TheISA::VecPredRegContainer;
public:
using IdRange = std::pair<PhysIds::const_iterator,
PhysIds::const_iterator>;
@@ -89,6 +90,10 @@ class PhysRegFile
std::vector<PhysRegId> vecRegIds;
std::vector<PhysRegId> vecElemIds;
+ /** Predicate register file. */
+ std::vector<VecPredRegContainer> vecPredRegFile;
+ std::vector<PhysRegId> vecPredRegIds;
+
/** Condition-code register file. */
std::vector<CCReg> ccRegFile;
std::vector<PhysRegId> ccRegIds;
@@ -117,6 +122,11 @@ class PhysRegFile
unsigned numPhysicalVecElemRegs;
/**
+ * Number of physical predicate registers
+ */
+ unsigned numPhysicalVecPredRegs;
+
+ /**
* Number of physical CC registers
*/
unsigned numPhysicalCCRegs;
@@ -135,6 +145,7 @@ class PhysRegFile
PhysRegFile(unsigned _numPhysicalIntRegs,
unsigned _numPhysicalFloatRegs,
unsigned _numPhysicalVecRegs,
+ unsigned _numPhysicalVecPredRegs,
unsigned _numPhysicalCCRegs,
VecMode vmode
);
@@ -154,6 +165,8 @@ class PhysRegFile
unsigned numFloatPhysRegs() const { return numPhysicalFloatRegs; }
/** @return the number of vector physical registers. */
unsigned numVecPhysRegs() const { return numPhysicalVecRegs; }
+ /** @return the number of predicate physical registers. */
+ unsigned numPredPhysRegs() const { return numPhysicalVecPredRegs; }
/** @return the number of vector physical registers. */
unsigned numVecElemPhysRegs() const { return numPhysicalVecElemRegs; }
@@ -201,7 +214,7 @@ class PhysRegFile
DPRINTF(IEW, "RegFile: Access to vector register %i, has "
"data %s\n", int(phys_reg->index()),
- vectorRegFile[phys_reg->index()].as<VecElem>().print());
+ vectorRegFile[phys_reg->index()].print());
return vectorRegFile[phys_reg->index()];
}
@@ -258,6 +271,24 @@ class PhysRegFile
return val;
}
+ /** Reads a predicate register. */
+ const VecPredRegContainer& readVecPredReg(PhysRegIdPtr phys_reg) const
+ {
+ assert(phys_reg->isVecPredPhysReg());
+
+ DPRINTF(IEW, "RegFile: Access to predicate register %i, has "
+ "data %s\n", int(phys_reg->index()),
+ vecPredRegFile[phys_reg->index()].print());
+
+ return vecPredRegFile[phys_reg->index()];
+ }
+
+ VecPredRegContainer& getWritableVecPredReg(PhysRegIdPtr phys_reg)
+ {
+ /* const_cast for not duplicating code above. */
+ return const_cast<VecPredRegContainer&>(readVecPredReg(phys_reg));
+ }
+
/** Reads a condition-code register. */
CCReg
readCCReg(PhysRegIdPtr phys_reg)
@@ -321,6 +352,17 @@ class PhysRegFile
val;
}
+ /** Sets a predicate register to the given value. */
+ void setVecPredReg(PhysRegIdPtr phys_reg, const VecPredRegContainer& val)
+ {
+ assert(phys_reg->isVecPredPhysReg());
+
+ DPRINTF(IEW, "RegFile: Setting predicate register %i to %s\n",
+ int(phys_reg->index()), val.print());
+
+ vecPredRegFile[phys_reg->index()] = val;
+ }
+
/** Sets a condition-code register to the given value. */
void
setCCReg(PhysRegIdPtr phys_reg, CCReg val)
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index bd5e72dec..a091c0908 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012 ARM Limited
+ * Copyright (c) 2012, 2017 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -514,6 +514,7 @@ class DefaultRename
Stats::Scalar intRenameLookups;
Stats::Scalar fpRenameLookups;
Stats::Scalar vecRenameLookups;
+ Stats::Scalar vecPredRenameLookups;
/** Stat for total number of committed renaming mappings. */
Stats::Scalar renameCommittedMaps;
/** Stat for total number of mappings that were undone due to a squash. */
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index ed5dfb6eb..b63163f04 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -196,6 +196,10 @@ DefaultRename<Impl>::regStats()
.name(name() + ".vec_rename_lookups")
.desc("Number of vector rename lookups")
.prereq(vecRenameLookups);
+ vecPredRenameLookups
+ .name(name() + ".vec_pred_rename_lookups")
+ .desc("Number of vector predicate rename lookups")
+ .prereq(vecPredRenameLookups);
}
template <class Impl>
@@ -659,6 +663,7 @@ DefaultRename<Impl>::renameInsts(ThreadID tid)
inst->numFPDestRegs(),
inst->numVecDestRegs(),
inst->numVecElemDestRegs(),
+ inst->numVecPredDestRegs(),
inst->numCCDestRegs())) {
DPRINTF(Rename, "Blocking due to lack of free "
"physical registers to rename to.\n");
@@ -1041,6 +1046,9 @@ DefaultRename<Impl>::renameSrcRegs(const DynInstPtr &inst, ThreadID tid)
case VecElemClass:
vecRenameLookups++;
break;
+ case VecPredRegClass:
+ vecPredRenameLookups++;
+ break;
case CCRegClass:
case MiscRegClass:
break;
@@ -1256,7 +1264,7 @@ DefaultRename<Impl>::readFreeEntries(ThreadID tid)
}
DPRINTF(Rename, "[tid:%i]: Free IQ: %i, Free ROB: %i, "
- "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i)\n",
+ "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i %i)\n",
tid,
freeEntries[tid].iqEntries,
freeEntries[tid].robEntries,
@@ -1266,6 +1274,7 @@ DefaultRename<Impl>::readFreeEntries(ThreadID tid)
renameMap[tid]->numFreeIntEntries(),
renameMap[tid]->numFreeFloatEntries(),
renameMap[tid]->numFreeVecEntries(),
+ renameMap[tid]->numFreePredEntries(),
renameMap[tid]->numFreeCCEntries());
DPRINTF(Rename, "[tid:%i]: %i instructions not yet in ROB\n",
diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc
index 86c43932c..603f1ff36 100644
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016,2019 ARM Limited
+ * Copyright (c) 2016-2017,2019 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -120,6 +120,8 @@ UnifiedRenameMap::init(PhysRegFile *_regFile,
vecElemMap.init(TheISA::NumVecRegs * NVecElems,
&(freeList->vecElemList), (RegIndex)-1);
+ predMap.init(TheISA::NumVecPredRegs, &(freeList->predList), (RegIndex)-1);
+
ccMap.init(TheISA::NumCCRegs, &(freeList->ccList), (RegIndex)-1);
}
diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh
index d30668027..5424633e5 100644
--- a/src/cpu/o3/rename_map.hh
+++ b/src/cpu/o3/rename_map.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015-2016 ARM Limited
+ * Copyright (c) 2015-2017 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
@@ -172,6 +172,7 @@ class UnifiedRenameMap
private:
static constexpr uint32_t NVecElems = TheISA::NumVecElemPerVecReg;
using VecReg = TheISA::VecReg;
+ using VecPredReg = TheISA::VecPredReg;
/** The integer register rename map */
SimpleRenameMap intMap;
@@ -188,6 +189,9 @@ class UnifiedRenameMap
/** The vector element register rename map */
SimpleRenameMap vecElemMap;
+ /** The predicate register rename map */
+ SimpleRenameMap predMap;
+
using VecMode = Enums::VecRegRenameMode;
VecMode vecMode;
@@ -235,6 +239,8 @@ class UnifiedRenameMap
case VecElemClass:
assert(vecMode == Enums::Elem);
return vecElemMap.rename(arch_reg);
+ case VecPredRegClass:
+ return predMap.rename(arch_reg);
case CCRegClass:
return ccMap.rename(arch_reg);
case MiscRegClass:
@@ -276,6 +282,9 @@ class UnifiedRenameMap
assert(vecMode == Enums::Elem);
return vecElemMap.lookup(arch_reg);
+ case VecPredRegClass:
+ return predMap.lookup(arch_reg);
+
case CCRegClass:
return ccMap.lookup(arch_reg);
@@ -319,6 +328,10 @@ class UnifiedRenameMap
assert(vecMode == Enums::Elem);
return vecElemMap.setEntry(arch_reg, phys_reg);
+ case VecPredRegClass:
+ assert(phys_reg->isVecPredPhysReg());
+ return predMap.setEntry(arch_reg, phys_reg);
+
case CCRegClass:
assert(phys_reg->isCCPhysReg());
return ccMap.setEntry(arch_reg, phys_reg);
@@ -345,10 +358,11 @@ class UnifiedRenameMap
*/
unsigned numFreeEntries() const
{
- return std::min(
+ return std::min(std::min(
std::min(intMap.numFreeEntries(), floatMap.numFreeEntries()),
vecMode == Enums::Full ? vecMap.numFreeEntries()
- : vecElemMap.numFreeEntries());
+ : vecElemMap.numFreeEntries()),
+ predMap.numFreeEntries());
}
unsigned numFreeIntEntries() const { return intMap.numFreeEntries(); }
@@ -359,18 +373,21 @@ class UnifiedRenameMap
? vecMap.numFreeEntries()
: vecElemMap.numFreeEntries();
}
+ unsigned numFreePredEntries() const { return predMap.numFreeEntries(); }
unsigned numFreeCCEntries() const { return ccMap.numFreeEntries(); }
/**
* Return whether there are enough registers to serve the request.
*/
bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t vectorRegs,
- uint32_t vecElemRegs, uint32_t ccRegs) const
+ uint32_t vecElemRegs, uint32_t vecPredRegs,
+ uint32_t ccRegs) const
{
return intRegs <= intMap.numFreeEntries() &&
floatRegs <= floatMap.numFreeEntries() &&
vectorRegs <= vecMap.numFreeEntries() &&
vecElemRegs <= vecElemMap.numFreeEntries() &&
+ vecPredRegs <= predMap.numFreeEntries() &&
ccRegs <= ccMap.numFreeEntries();
}
/**
diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh
index c74936469..7858f5a0a 100644
--- a/src/cpu/o3/thread_context.hh
+++ b/src/cpu/o3/thread_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012, 2016 ARM Limited
+ * Copyright (c) 2011-2012, 2016-2018 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -263,6 +263,14 @@ class O3ThreadContext : public ThreadContext
return readVecElemFlat(flattenRegId(reg).index(), reg.elemIndex());
}
+ virtual const VecPredRegContainer& readVecPredReg(const RegId& id) const {
+ return readVecPredRegFlat(flattenRegId(id).index());
+ }
+
+ virtual VecPredRegContainer& getWritableVecPredReg(const RegId& id) {
+ return getWritableVecPredRegFlat(flattenRegId(id).index());
+ }
+
virtual CCReg readCCReg(int reg_idx) {
return readCCRegFlat(flattenRegId(RegId(CCRegClass,
reg_idx)).index());
@@ -295,6 +303,13 @@ class O3ThreadContext : public ThreadContext
}
virtual void
+ setVecPredReg(const RegId& reg,
+ const VecPredRegContainer& val)
+ {
+ setVecPredRegFlat(flattenRegId(reg).index(), val);
+ }
+
+ virtual void
setCCReg(int reg_idx, CCReg val)
{
setCCRegFlat(flattenRegId(RegId(CCRegClass, reg_idx)).index(), val);
@@ -403,6 +418,12 @@ class O3ThreadContext : public ThreadContext
virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx,
const VecElem& val);
+ virtual const VecPredRegContainer& readVecPredRegFlat(int idx)
+ const override;
+ virtual VecPredRegContainer& getWritableVecPredRegFlat(int idx) override;
+ virtual void setVecPredRegFlat(int idx,
+ const VecPredRegContainer& val) override;
+
virtual CCReg readCCRegFlat(int idx);
virtual void setCCRegFlat(int idx, CCReg val);
};
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index bd2bf63b0..59562ba3b 100644
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -233,6 +233,20 @@ O3ThreadContext<Impl>::readVecElemFlat(const RegIndex& idx,
}
template <class Impl>
+const TheISA::VecPredRegContainer&
+O3ThreadContext<Impl>::readVecPredRegFlat(int reg_id) const
+{
+ return cpu->readArchVecPredReg(reg_id, thread->threadId());
+}
+
+template <class Impl>
+TheISA::VecPredRegContainer&
+O3ThreadContext<Impl>::getWritableVecPredRegFlat(int reg_id)
+{
+ return cpu->getWritableArchVecPredReg(reg_id, thread->threadId());
+}
+
+template <class Impl>
TheISA::CCReg
O3ThreadContext<Impl>::readCCRegFlat(int reg_idx)
{
@@ -277,6 +291,16 @@ O3ThreadContext<Impl>::setVecElemFlat(const RegIndex& idx,
template <class Impl>
void
+O3ThreadContext<Impl>::setVecPredRegFlat(int reg_idx,
+ const VecPredRegContainer& val)
+{
+ cpu->setArchVecPredReg(reg_idx, val, thread->threadId());
+
+ conditionalSquash();
+}
+
+template <class Impl>
+void
O3ThreadContext<Impl>::setCCRegFlat(int reg_idx, TheISA::CCReg val)
{
cpu->setArchCCReg(reg_idx, val, thread->threadId());
diff --git a/src/cpu/reg_class.cc b/src/cpu/reg_class.cc
index 16c1949ee..eeb06adcc 100644
--- a/src/cpu/reg_class.cc
+++ b/src/cpu/reg_class.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016 ARM Limited
+ * Copyright (c) 2016-2017 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -47,6 +47,7 @@ const char *RegId::regClassStrings[] = {
"FloatRegClass",
"VecRegClass",
"VecElemClass",
+ "VecPredRegClass",
"CCRegClass",
"MiscRegClass"
};
diff --git a/src/cpu/reg_class.hh b/src/cpu/reg_class.hh
index 69da9cf7e..70cfab39e 100644
--- a/src/cpu/reg_class.hh
+++ b/src/cpu/reg_class.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016 ARM Limited
+ * Copyright (c) 2016-2017 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -60,6 +60,7 @@ enum RegClass {
VecRegClass,
/** Vector Register Native Elem lane. */
VecElemClass,
+ VecPredRegClass,
CCRegClass, ///< Condition-code register
MiscRegClass ///< Control (misc) register
};
@@ -151,6 +152,9 @@ class RegId {
/** @Return true if it is a condition-code physical register. */
bool isVecElem() const { return regClass == VecElemClass; }
+ /** @Return true if it is a predicate physical register. */
+ bool isVecPredReg() const { return regClass == VecPredRegClass; }
+
/** @Return true if it is a condition-code physical register. */
bool isCCReg() const { return regClass == CCRegClass; }
@@ -179,6 +183,7 @@ class RegId {
case IntRegClass:
case FloatRegClass:
case VecRegClass:
+ case VecPredRegClass:
case CCRegClass:
case MiscRegClass:
return regIdx;
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index cbca34123..d2107b89a 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014-2016 ARM Limited
+ * Copyright (c) 2014-2017 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -121,6 +121,10 @@ class SimpleExecContext : public ExecContext {
mutable Stats::Scalar numVecRegReads;
Stats::Scalar numVecRegWrites;
+ // Number of predicate register file accesses
+ mutable Stats::Scalar numVecPredRegReads;
+ Stats::Scalar numVecPredRegWrites;
+
// Number of condition code register file accesses
Stats::Scalar numCCRegReads;
Stats::Scalar numCCRegWrites;
@@ -333,6 +337,34 @@ class SimpleExecContext : public ExecContext {
thread->setVecElem(reg, val);
}
+ const VecPredRegContainer&
+ readVecPredRegOperand(const StaticInst *si, int idx) const override
+ {
+ numVecPredRegReads++;
+ const RegId& reg = si->srcRegIdx(idx);
+ assert(reg.isVecPredReg());
+ return thread->readVecPredReg(reg);
+ }
+
+ VecPredRegContainer&
+ getWritableVecPredRegOperand(const StaticInst *si, int idx) override
+ {
+ numVecPredRegWrites++;
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecPredReg());
+ return thread->getWritableVecPredReg(reg);
+ }
+
+ void
+ setVecPredRegOperand(const StaticInst *si, int idx,
+ const VecPredRegContainer& val) override
+ {
+ numVecPredRegWrites++;
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecPredReg());
+ thread->setVecPredReg(reg, val);
+ }
+
CCReg
readCCRegOperand(const StaticInst *si, int idx) override
{
diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh
index 211a4c89f..00355c602 100644
--- a/src/cpu/simple_thread.hh
+++ b/src/cpu/simple_thread.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012, 2016 ARM Limited
+ * Copyright (c) 2011-2012, 2016-2018 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -58,6 +58,7 @@
#include "debug/CCRegs.hh"
#include "debug/FloatRegs.hh"
#include "debug/IntRegs.hh"
+#include "debug/VecPredRegs.hh"
#include "debug/VecRegs.hh"
#include "mem/page_table.hh"
#include "mem/request.hh"
@@ -102,6 +103,7 @@ class SimpleThread : public ThreadState
typedef TheISA::CCReg CCReg;
using VecRegContainer = TheISA::VecRegContainer;
using VecElem = TheISA::VecElem;
+ using VecPredRegContainer = TheISA::VecPredRegContainer;
public:
typedef ThreadContext::Status Status;
@@ -109,6 +111,7 @@ class SimpleThread : public ThreadState
RegVal floatRegs[TheISA::NumFloatRegs];
RegVal intRegs[TheISA::NumIntRegs];
VecRegContainer vecRegs[TheISA::NumVecRegs];
+ VecPredRegContainer vecPredRegs[TheISA::NumVecPredRegs];
#ifdef ISA_HAS_CC_REGS
TheISA::CCReg ccRegs[TheISA::NumCCRegs];
#endif
@@ -228,6 +231,9 @@ class SimpleThread : public ThreadState
for (int i = 0; i < TheISA::NumVecRegs; i++) {
vecRegs[i].zero();
}
+ for (int i = 0; i < TheISA::NumVecPredRegs; i++) {
+ vecPredRegs[i].reset();
+ }
#ifdef ISA_HAS_CC_REGS
memset(ccRegs, 0, sizeof(ccRegs));
#endif
@@ -266,7 +272,7 @@ class SimpleThread : public ThreadState
assert(flatIndex < TheISA::NumVecRegs);
const VecRegContainer& regVal = readVecRegFlat(flatIndex);
DPRINTF(VecRegs, "Reading vector reg %d (%d) as %s.\n",
- reg.index(), flatIndex, regVal.as<TheISA::VecElem>().print());
+ reg.index(), flatIndex, regVal.print());
return regVal;
}
@@ -277,7 +283,7 @@ class SimpleThread : public ThreadState
assert(flatIndex < TheISA::NumVecRegs);
VecRegContainer& regVal = getWritableVecRegFlat(flatIndex);
DPRINTF(VecRegs, "Reading vector reg %d (%d) as %s for modify.\n",
- reg.index(), flatIndex, regVal.as<TheISA::VecElem>().print());
+ reg.index(), flatIndex, regVal.print());
return regVal;
}
@@ -350,6 +356,28 @@ class SimpleThread : public ThreadState
return regVal;
}
+ const VecPredRegContainer&
+ readVecPredReg(const RegId& reg) const
+ {
+ int flatIndex = isa->flattenVecPredIndex(reg.index());
+ assert(flatIndex < TheISA::NumVecPredRegs);
+ const VecPredRegContainer& regVal = readVecPredRegFlat(flatIndex);
+ DPRINTF(VecPredRegs, "Reading predicate reg %d (%d) as %s.\n",
+ reg.index(), flatIndex, regVal.print());
+ return regVal;
+ }
+
+ VecPredRegContainer&
+ getWritableVecPredReg(const RegId& reg)
+ {
+ int flatIndex = isa->flattenVecPredIndex(reg.index());
+ assert(flatIndex < TheISA::NumVecPredRegs);
+ VecPredRegContainer& regVal = getWritableVecPredRegFlat(flatIndex);
+ DPRINTF(VecPredRegs,
+ "Reading predicate reg %d (%d) as %s for modify.\n",
+ reg.index(), flatIndex, regVal.print());
+ return regVal;
+ }
CCReg readCCReg(int reg_idx)
{
@@ -411,6 +439,16 @@ class SimpleThread : public ThreadState
}
void
+ setVecPredReg(const RegId& reg, const VecPredRegContainer& val)
+ {
+ int flatIndex = isa->flattenVecPredIndex(reg.index());
+ assert(flatIndex < TheISA::NumVecPredRegs);
+ setVecPredRegFlat(flatIndex, val);
+ DPRINTF(VecPredRegs, "Setting predicate reg %d (%d) to %s.\n",
+ reg.index(), flatIndex, val.print());
+ }
+
+ void
setCCReg(int reg_idx, CCReg val)
{
#ifdef ISA_HAS_CC_REGS
@@ -568,6 +606,21 @@ class SimpleThread : public ThreadState
vecRegs[reg].as<TheISA::VecElem>()[elemIndex] = val;
}
+ const VecPredRegContainer& readVecPredRegFlat(const RegIndex& reg) const
+ {
+ return vecPredRegs[reg];
+ }
+
+ VecPredRegContainer& getWritableVecPredRegFlat(const RegIndex& reg)
+ {
+ return vecPredRegs[reg];
+ }
+
+ void setVecPredRegFlat(const RegIndex& reg, const VecPredRegContainer& val)
+ {
+ vecPredRegs[reg] = val;
+ }
+
#ifdef ISA_HAS_CC_REGS
CCReg readCCRegFlat(int idx) { return ccRegs[idx]; }
void setCCRegFlat(int idx, CCReg val) { ccRegs[idx] = val; }
diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh
index 70d933c31..bcb53f5ea 100644
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@@ -1,4 +1,16 @@
/*
+ * Copyright (c) 2017 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
* Copyright (c) 2003-2005 The Regents of The University of Michigan
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved.
@@ -105,16 +117,17 @@ class StaticInst : public RefCounted, public StaticInstFlags
/** @{ */
int8_t _numVecDestRegs;
int8_t _numVecElemDestRegs;
+ int8_t _numVecPredDestRegs;
/** @} */
public:
/// @name Register information.
- /// The sum of numFPDestRegs(), numIntDestRegs(), numVecDestRegs() and
- /// numVecelemDestRegs() equals numDestRegs(). The former two functions
- /// are used to track physical register usage for machines with separate
- /// int & FP reg files, the next two is for machines with vector register
- /// file.
+ /// The sum of numFPDestRegs(), numIntDestRegs(), numVecDestRegs(),
+ /// numVecElemDestRegs() and numVecPredDestRegs() equals numDestRegs().
+ /// The former two functions are used to track physical register usage for
+ /// machines with separate int & FP reg files, the next three are for
+ /// machines with vector and predicate register files.
//@{
/// Number of source registers.
int8_t numSrcRegs() const { return _numSrcRegs; }
@@ -128,6 +141,8 @@ class StaticInst : public RefCounted, public StaticInstFlags
int8_t numVecDestRegs() const { return _numVecDestRegs; }
/// Number of vector element destination regs.
int8_t numVecElemDestRegs() const { return _numVecElemDestRegs; }
+ /// Number of predicate destination regs.
+ int8_t numVecPredDestRegs() const { return _numVecPredDestRegs; }
/// Number of coprocesor destination regs.
int8_t numCCDestRegs() const { return _numCCDestRegs; }
//@}
@@ -248,8 +263,8 @@ class StaticInst : public RefCounted, public StaticInstFlags
StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass)
: _opClass(__opClass), _numSrcRegs(0), _numDestRegs(0),
_numFPDestRegs(0), _numIntDestRegs(0), _numCCDestRegs(0),
- _numVecDestRegs(0), _numVecElemDestRegs(0), machInst(_machInst),
- mnemonic(_mnemonic), cachedDisassembly(0)
+ _numVecDestRegs(0), _numVecElemDestRegs(0), _numVecPredDestRegs(0),
+ machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0)
{ }
public:
diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc
index 2d907a055..7597dbfb2 100644
--- a/src/cpu/thread_context.cc
+++ b/src/cpu/thread_context.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2016 ARM Limited
+ * Copyright (c) 2012, 2016-2017 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -43,6 +43,7 @@
#include "cpu/thread_context.hh"
+#include "arch/generic/vec_pred_reg.hh"
#include "arch/kernel_stats.hh"
#include "base/logging.hh"
#include "base/trace.hh"
@@ -86,6 +87,17 @@ ThreadContext::compare(ThreadContext *one, ThreadContext *two)
panic("Vec reg idx %d doesn't match, one: %#x, two: %#x",
i, t1, t2);
}
+
+ // Then loop through the predicate registers.
+ for (int i = 0; i < TheISA::NumVecPredRegs; ++i) {
+ RegId rid(VecPredRegClass, i);
+ const TheISA::VecPredRegContainer& t1 = one->readVecPredReg(rid);
+ const TheISA::VecPredRegContainer& t2 = two->readVecPredReg(rid);
+ if (t1 != t2)
+ panic("Pred reg idx %d doesn't match, one: %#x, two: %#x",
+ i, t1, t2);
+ }
+
for (int i = 0; i < TheISA::NumMiscRegs; ++i) {
RegVal t1 = one->readMiscRegNoEffect(i);
RegVal t2 = two->readMiscRegNoEffect(i);
@@ -168,6 +180,12 @@ serialize(ThreadContext &tc, CheckpointOut &cp)
}
SERIALIZE_CONTAINER(vecRegs);
+ std::vector<TheISA::VecPredRegContainer> vecPredRegs(NumVecPredRegs);
+ for (int i = 0; i < NumVecPredRegs; ++i) {
+ vecPredRegs[i] = tc.readVecPredRegFlat(i);
+ }
+ SERIALIZE_CONTAINER(vecPredRegs);
+
RegVal intRegs[NumIntRegs];
for (int i = 0; i < NumIntRegs; ++i)
intRegs[i] = tc.readIntRegFlat(i);
@@ -203,6 +221,12 @@ unserialize(ThreadContext &tc, CheckpointIn &cp)
tc.setVecRegFlat(i, vecRegs[i]);
}
+ std::vector<TheISA::VecPredRegContainer> vecPredRegs(NumVecPredRegs);
+ UNSERIALIZE_CONTAINER(vecPredRegs);
+ for (int i = 0; i < NumVecPredRegs; ++i) {
+ tc.setVecPredRegFlat(i, vecPredRegs[i]);
+ }
+
RegVal intRegs[NumIntRegs];
UNSERIALIZE_ARRAY(intRegs, NumIntRegs);
for (int i = 0; i < NumIntRegs; ++i)
diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh
index cad073b4f..6dde68650 100644
--- a/src/cpu/thread_context.hh
+++ b/src/cpu/thread_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012, 2016 ARM Limited
+ * Copyright (c) 2011-2012, 2016-2018 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -98,6 +98,8 @@ class ThreadContext
typedef TheISA::CCReg CCReg;
using VecRegContainer = TheISA::VecRegContainer;
using VecElem = TheISA::VecElem;
+ using VecPredRegContainer = TheISA::VecPredRegContainer;
+
public:
enum Status
@@ -242,6 +244,10 @@ class ThreadContext
virtual const VecElem& readVecElem(const RegId& reg) const = 0;
+ virtual const VecPredRegContainer& readVecPredReg(const RegId& reg)
+ const = 0;
+ virtual VecPredRegContainer& getWritableVecPredReg(const RegId& reg) = 0;
+
virtual CCReg readCCReg(int reg_idx) = 0;
virtual void setIntReg(int reg_idx, RegVal val) = 0;
@@ -252,6 +258,9 @@ class ThreadContext
virtual void setVecElem(const RegId& reg, const VecElem& val) = 0;
+ virtual void setVecPredReg(const RegId& reg,
+ const VecPredRegContainer& val) = 0;
+
virtual void setCCReg(int reg_idx, CCReg val) = 0;
virtual TheISA::PCState pcState() = 0;
@@ -341,6 +350,11 @@ class ThreadContext
virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx,
const VecElem& val) = 0;
+ virtual const VecPredRegContainer& readVecPredRegFlat(int idx) const = 0;
+ virtual VecPredRegContainer& getWritableVecPredRegFlat(int idx) = 0;
+ virtual void setVecPredRegFlat(int idx,
+ const VecPredRegContainer& val) = 0;
+
virtual CCReg readCCRegFlat(int idx) = 0;
virtual void setCCRegFlat(int idx, CCReg val) = 0;
/** @} */
@@ -502,6 +516,12 @@ class ProxyThreadContext : public ThreadContext
const VecElem& readVecElem(const RegId& reg) const
{ return actualTC->readVecElem(reg); }
+ const VecPredRegContainer& readVecPredReg(const RegId& reg) const
+ { return actualTC->readVecPredReg(reg); }
+
+ VecPredRegContainer& getWritableVecPredReg(const RegId& reg)
+ { return actualTC->getWritableVecPredReg(reg); }
+
CCReg readCCReg(int reg_idx)
{ return actualTC->readCCReg(reg_idx); }
@@ -514,6 +534,9 @@ class ProxyThreadContext : public ThreadContext
void setVecReg(const RegId& reg, const VecRegContainer& val)
{ actualTC->setVecReg(reg, val); }
+ void setVecPredReg(const RegId& reg, const VecPredRegContainer& val)
+ { actualTC->setVecPredReg(reg, val); }
+
void setVecElem(const RegId& reg, const VecElem& val)
{ actualTC->setVecElem(reg, val); }
@@ -590,6 +613,15 @@ class ProxyThreadContext : public ThreadContext
const VecElem& val)
{ actualTC->setVecElemFlat(id, elemIndex, val); }
+ const VecPredRegContainer& readVecPredRegFlat(int id) const
+ { return actualTC->readVecPredRegFlat(id); }
+
+ VecPredRegContainer& getWritableVecPredRegFlat(int id)
+ { return actualTC->getWritableVecPredRegFlat(id); }
+
+ void setVecPredRegFlat(int idx, const VecPredRegContainer& val)
+ { actualTC->setVecPredRegFlat(idx, val); }
+
CCReg readCCRegFlat(int idx)
{ return actualTC->readCCRegFlat(idx); }