summaryrefslogtreecommitdiff
path: root/src/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu')
-rw-r--r--src/cpu/base_dyn_inst.hh43
-rw-r--r--src/cpu/checker/cpu.hh138
-rw-r--r--src/cpu/checker/cpu_impl.hh28
-rw-r--r--src/cpu/checker/thread_context.hh83
-rw-r--r--src/cpu/exec_context.hh61
-rw-r--r--src/cpu/inst_res.hh52
-rw-r--r--src/cpu/minor/dyn_inst.cc11
-rw-r--r--src/cpu/minor/exec_context.hh122
-rw-r--r--src/cpu/minor/scoreboard.cc15
-rw-r--r--src/cpu/minor/scoreboard.hh4
-rw-r--r--src/cpu/o3/O3CPU.py14
-rw-r--r--src/cpu/o3/comm.hh25
-rw-r--r--src/cpu/o3/cpu.cc143
-rw-r--r--src/cpu/o3/cpu.hh89
-rw-r--r--src/cpu/o3/dyn_inst.hh132
-rw-r--r--src/cpu/o3/free_list.hh100
-rw-r--r--src/cpu/o3/inst_queue_impl.hh5
-rw-r--r--src/cpu/o3/regfile.cc120
-rw-r--r--src/cpu/o3/regfile.hh161
-rw-r--r--src/cpu/o3/rename.hh3
-rw-r--r--src/cpu/o3/rename_impl.hh19
-rw-r--r--src/cpu/o3/rename_map.cc91
-rw-r--r--src/cpu/o3/rename_map.hh82
-rwxr-xr-xsrc/cpu/o3/thread_context.hh97
-rwxr-xr-xsrc/cpu/o3/thread_context_impl.hh42
-rw-r--r--src/cpu/reg_class.cc14
-rw-r--r--src/cpu/reg_class.hh44
-rw-r--r--src/cpu/reg_class_impl.hh5
-rw-r--r--src/cpu/simple/base.cc10
-rw-r--r--src/cpu/simple/exec_context.hh126
-rw-r--r--src/cpu/simple_thread.hh158
-rw-r--r--src/cpu/static_inst.hh23
-rw-r--r--src/cpu/thread_context.cc24
-rw-r--r--src/cpu/thread_context.hh120
34 files changed, 2142 insertions, 62 deletions
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index a8e619cd9..132c390b3 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -48,8 +48,8 @@
#include <array>
#include <bitset>
+#include <deque>
#include <list>
-#include <queue>
#include <string>
#include "arch/generic/tlb.hh"
@@ -82,6 +82,7 @@ class BaseDynInst : public ExecContext, public RefCounted
// Typedef for the CPU.
typedef typename Impl::CPUType ImplCPU;
typedef typename ImplCPU::ImplState ImplState;
+ using VecRegContainer = TheISA::VecRegContainer;
// The DynInstPtr type.
typedef typename Impl::DynInstPtr DynInstPtr;
@@ -591,6 +592,10 @@ class BaseDynInst : public ExecContext, public RefCounted
int8_t numFPDestRegs() const { return staticInst->numFPDestRegs(); }
int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); }
int8_t numCCDestRegs() const { return staticInst->numCCDestRegs(); }
+ int8_t numVecDestRegs() const { return staticInst->numVecDestRegs(); }
+ int8_t numVecElemDestRegs() const {
+ return staticInst->numVecElemDestRegs();
+ }
/** Returns the logical register index of the i'th destination register. */
const RegId& destRegIdx(int i) const { return staticInst->destRegIdx(i); }
@@ -615,6 +620,8 @@ class BaseDynInst : public ExecContext, public RefCounted
}
/** Pushes a result onto the instResult queue. */
+ /** @{ */
+ /** Scalar result. */
template<typename T>
void setScalarResult(T&& t)
{
@@ -624,6 +631,27 @@ class BaseDynInst : public ExecContext, public RefCounted
}
}
+ /** Full vector result. */
+ template<typename T>
+ void setVecResult(T&& t)
+ {
+ if (instFlags[RecordResult]) {
+ instResult.push(InstResult(std::forward<T>(t),
+ InstResult::ResultType::VecReg));
+ }
+ }
+
+ /** Vector element result. */
+ template<typename T>
+ void setVecElemResult(T&& t)
+ {
+ if (instFlags[RecordResult]) {
+ instResult.push(InstResult(std::forward<T>(t),
+ InstResult::ResultType::VecElem));
+ }
+ }
+ /** @} */
+
/** Records an integer register being set to a value. */
void setIntRegOperand(const StaticInst *si, int idx, IntReg val)
{
@@ -642,6 +670,13 @@ class BaseDynInst : public ExecContext, public RefCounted
setScalarResult(val);
}
+ /** Record a vector register being set to a value */
+ void setVecRegOperand(const StaticInst *si, int idx,
+ const VecRegContainer& val)
+ {
+ setVecResult(val);
+ }
+
/** Records an fp register being set to an integer value. */
void
setFloatRegOperandBits(const StaticInst *si, int idx, FloatRegBits val)
@@ -649,6 +684,12 @@ class BaseDynInst : public ExecContext, public RefCounted
setScalarResult(val);
}
+ /** Record a vector register being set to a value */
+ void setVecElemOperand(const StaticInst *si, int idx, const VecElem val)
+ {
+ setVecElemResult(val);
+ }
+
/** Records that one of the source registers is ready. */
void markSrcRegReady();
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index 6571d034a..213106bd2 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -96,6 +96,7 @@ class CheckerCPU : public BaseCPU, public ExecContext
typedef TheISA::FloatReg FloatReg;
typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::MiscReg MiscReg;
+ using VecRegContainer = TheISA::VecRegContainer;
/** id attached to all issued requests */
MasterID masterId;
@@ -225,6 +226,111 @@ class CheckerCPU : public BaseCPU, public ExecContext
return thread->readFloatRegBits(reg.index());
}
+ /**
+ * Read source vector register operand.
+ */
+ const VecRegContainer& readVecRegOperand(const StaticInst *si,
+ int idx) const override
+ {
+ const RegId& reg = si->srcRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread->readVecReg(reg);
+ }
+
+ /**
+ * Read destination vector register operand for modification.
+ */
+ VecRegContainer& getWritableVecRegOperand(const StaticInst *si,
+ int idx) override
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread->getWritableVecReg(reg);
+ }
+
+ /** Vector Register Lane Interfaces. */
+ /** @{ */
+ /** Reads source vector 8bit operand. */
+ virtual ConstVecLane8
+ readVec8BitLaneOperand(const StaticInst *si, int idx) const
+ override
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread->readVec8BitLaneReg(reg);
+ }
+
+ /** Reads source vector 16bit operand. */
+ virtual ConstVecLane16
+ readVec16BitLaneOperand(const StaticInst *si, int idx) const
+ override
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread->readVec16BitLaneReg(reg);
+ }
+
+ /** Reads source vector 32bit operand. */
+ virtual ConstVecLane32
+ readVec32BitLaneOperand(const StaticInst *si, int idx) const
+ override
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread->readVec32BitLaneReg(reg);
+ }
+
+ /** Reads source vector 64bit operand. */
+ virtual ConstVecLane64
+ readVec64BitLaneOperand(const StaticInst *si, int idx) const
+ override
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread->readVec64BitLaneReg(reg);
+ }
+
+ /** Write a lane of the destination vector operand. */
+ template <typename LD>
+ void
+ setVecLaneOperandT(const StaticInst *si, int idx, const LD& val)
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread->setVecLane(reg, val);
+ }
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::Byte>& val) override
+ {
+ setVecLaneOperandT(si, idx, val);
+ }
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::TwoByte>& val) override
+ {
+ setVecLaneOperandT(si, idx, val);
+ }
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::FourByte>& val) override
+ {
+ setVecLaneOperandT(si, idx, val);
+ }
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::EightByte>& val) override
+ {
+ setVecLaneOperandT(si, idx, val);
+ }
+ /** @} */
+
+ VecElem readVecElemOperand(const StaticInst *si, int idx) const override
+ {
+ const RegId& reg = si->srcRegIdx(idx);
+ return thread->readVecElem(reg);
+ }
+
CCReg readCCRegOperand(const StaticInst *si, int idx) override
{
const RegId& reg = si->srcRegIdx(idx);
@@ -239,6 +345,20 @@ class CheckerCPU : public BaseCPU, public ExecContext
InstResult::ResultType::Scalar));
}
+ template<typename T>
+ void setVecResult(T&& t)
+ {
+ result.push(InstResult(std::forward<T>(t),
+ InstResult::ResultType::VecReg));
+ }
+
+ template<typename T>
+ void setVecElemResult(T&& t)
+ {
+ result.push(InstResult(std::forward<T>(t),
+ InstResult::ResultType::VecElem));
+ }
+
void setIntRegOperand(const StaticInst *si, int idx,
IntReg val) override
{
@@ -274,6 +394,24 @@ class CheckerCPU : public BaseCPU, public ExecContext
setScalarResult((uint64_t)val);
}
+ void setVecRegOperand(const StaticInst *si, int idx,
+ const VecRegContainer& val) override
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecReg());
+ thread->setVecReg(reg, val);
+ setVecResult(val);
+ }
+
+ void setVecElemOperand(const StaticInst *si, int idx,
+ const VecElem val) override
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecElem());
+ thread->setVecElem(reg, val);
+ setVecElemResult(val);
+ }
+
bool readPredicate() override { return thread->readPredicate(); }
void setPredicate(bool val) override
{
diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh
index ed86aec84..d81858c14 100644
--- a/src/cpu/checker/cpu_impl.hh
+++ b/src/cpu/checker/cpu_impl.hh
@@ -486,6 +486,7 @@ Checker<Impl>::validateExecution(DynInstPtr &inst)
int idx = -1;
bool result_mismatch = false;
bool scalar_mismatch = false;
+ bool vector_mismatch = false;
if (inst->isUnverifiable()) {
// Unverifiable instructions assume they were executed
@@ -503,8 +504,10 @@ Checker<Impl>::validateExecution(DynInstPtr &inst)
if (checker_val != inst_val) {
result_mismatch = true;
idx = i;
- scalar_mismatch = true;
- break;
+ scalar_mismatch = checker_val.isScalar();
+ vector_mismatch = checker_val.isVector();
+ panic_if(!(scalar_mismatch || vector_mismatch),
+ "Unknown type of result\n");
}
}
} // Checker CPU checks all the saved results in the dyninst passed by
@@ -610,6 +613,15 @@ Checker<Impl>::copyResult(DynInstPtr &inst, const InstResult& mismatch_val,
panic_if(!mismatch_val.isScalar(), "Unexpected type of result");
thread->setFloatRegBits(idx.index(), mismatch_val.asInteger());
break;
+ case VecRegClass:
+ panic_if(!mismatch_val.isVector(), "Unexpected type of result");
+ thread->setVecReg(idx, mismatch_val.asVector());
+ break;
+ case VecElemClass:
+ panic_if(!mismatch_val.isVecElem(),
+ "Unexpected type of result");
+ thread->setVecElem(idx, mismatch_val.asVectorElem());
+ break;
case CCRegClass:
panic_if(!mismatch_val.isScalar(), "Unexpected type of result");
thread->setCCReg(idx.index(), mismatch_val.asInteger());
@@ -618,6 +630,8 @@ Checker<Impl>::copyResult(DynInstPtr &inst, const InstResult& mismatch_val,
panic_if(!mismatch_val.isScalar(), "Unexpected type of result");
thread->setMiscReg(idx.index(), mismatch_val.asInteger());
break;
+ default:
+ panic("Unknown register class: %d", (int)idx.classValue());
}
}
start_idx++;
@@ -634,6 +648,14 @@ Checker<Impl>::copyResult(DynInstPtr &inst, const InstResult& mismatch_val,
panic_if(!res.isScalar(), "Unexpected type of result");
thread->setFloatRegBits(idx.index(), res.asInteger());
break;
+ case VecRegClass:
+ panic_if(!res.isVector(), "Unexpected type of result");
+ thread->setVecReg(idx, res.asVector());
+ break;
+ case VecElemClass:
+ panic_if(!res.isVecElem(), "Unexpected type of result");
+ thread->setVecElem(idx, res.asVectorElem());
+ break;
case CCRegClass:
panic_if(!res.isScalar(), "Unexpected type of result");
thread->setCCReg(idx.index(), res.asInteger());
@@ -644,6 +666,8 @@ Checker<Impl>::copyResult(DynInstPtr &inst, const InstResult& mismatch_val,
thread->setMiscReg(idx.index(), 0);
break;
// else Register is out of range...
+ default:
+ panic("Unknown register class: %d", (int)idx.classValue());
}
}
}
diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh
index e48f5936b..5208932de 100644
--- a/src/cpu/checker/thread_context.hh
+++ b/src/cpu/checker/thread_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2011-2012, 2016 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -215,6 +215,55 @@ class CheckerThreadContext : public ThreadContext
FloatRegBits readFloatRegBits(int reg_idx)
{ return actualTC->readFloatRegBits(reg_idx); }
+ const VecRegContainer& readVecReg(const RegId& reg) const
+ { return actualTC->readVecReg(reg); }
+
+ /**
+ * Read vector register for modification, hierarchical indexing.
+ */
+ VecRegContainer& getWritableVecReg(const RegId& reg)
+ { return actualTC->getWritableVecReg(reg); }
+
+ /** Vector Register Lane Interfaces. */
+ /** @{ */
+ /** Reads source vector 8bit operand. */
+ ConstVecLane8
+ readVec8BitLaneReg(const RegId& reg) const
+ { return actualTC->readVec8BitLaneReg(reg); }
+
+ /** Reads source vector 16bit operand. */
+ ConstVecLane16
+ readVec16BitLaneReg(const RegId& reg) const
+ { return actualTC->readVec16BitLaneReg(reg); }
+
+ /** Reads source vector 32bit operand. */
+ ConstVecLane32
+ readVec32BitLaneReg(const RegId& reg) const
+ { return actualTC->readVec32BitLaneReg(reg); }
+
+ /** Reads source vector 64bit operand. */
+ ConstVecLane64
+ readVec64BitLaneReg(const RegId& reg) const
+ { return actualTC->readVec64BitLaneReg(reg); }
+
+ /** Write a lane of the destination vector register. */
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::Byte>& val)
+ { return actualTC->setVecLane(reg, val); }
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::TwoByte>& val)
+ { return actualTC->setVecLane(reg, val); }
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::FourByte>& val)
+ { return actualTC->setVecLane(reg, val); }
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::EightByte>& val)
+ { return actualTC->setVecLane(reg, val); }
+ /** @} */
+
+ const VecElem& readVecElem(const RegId& reg) const
+ { return actualTC->readVecElem(reg); }
+
CCReg readCCReg(int reg_idx)
{ return actualTC->readCCReg(reg_idx); }
@@ -236,6 +285,18 @@ class CheckerThreadContext : public ThreadContext
checkerTC->setFloatRegBits(reg_idx, val);
}
+ void setVecReg(const RegId& reg, const VecRegContainer& val)
+ {
+ actualTC->setVecReg(reg, val);
+ checkerTC->setVecReg(reg, val);
+ }
+
+ void setVecElem(const RegId& reg, const VecElem& val)
+ {
+ actualTC->setVecElem(reg, val);
+ checkerTC->setVecElem(reg, val);
+ }
+
void setCCReg(int reg_idx, CCReg val)
{
actualTC->setCCReg(reg_idx, val);
@@ -333,6 +394,26 @@ class CheckerThreadContext : public ThreadContext
void setFloatRegBitsFlat(int idx, FloatRegBits val)
{ actualTC->setFloatRegBitsFlat(idx, val); }
+ const VecRegContainer& readVecRegFlat(int idx) const
+ { return actualTC->readVecRegFlat(idx); }
+
+ /**
+ * Read vector register for modification, flat indexing.
+ */
+ VecRegContainer& getWritableVecRegFlat(int idx)
+ { return actualTC->getWritableVecRegFlat(idx); }
+
+ void setVecRegFlat(int idx, const VecRegContainer& val)
+ { actualTC->setVecRegFlat(idx, val); }
+
+ const VecElem& readVecElemFlat(const RegIndex& idx,
+ const ElemIndex& elem_idx) const
+ { return actualTC->readVecElemFlat(idx, elem_idx); }
+
+ void setVecElemFlat(const RegIndex& idx,
+ const ElemIndex& elem_idx, const VecElem& val)
+ { actualTC->setVecElemFlat(idx, elem_idx, val); }
+
CCReg readCCRegFlat(int idx)
{ return actualTC->readCCRegFlat(idx); }
diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh
index d33147240..e7f5d37ac 100644
--- a/src/cpu/exec_context.hh
+++ b/src/cpu/exec_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 ARM Limited
+ * Copyright (c) 2014, 2016 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -79,6 +79,8 @@ class ExecContext {
typedef TheISA::MiscReg MiscReg;
typedef TheISA::CCReg CCReg;
+ using VecRegContainer = TheISA::VecRegContainer;
+ using VecElem = TheISA::VecElem;
public:
/**
@@ -121,6 +123,63 @@ class ExecContext {
/** @} */
+ /** Vector Register Interfaces. */
+ /** @{ */
+ /** Reads source vector register operand. */
+ virtual const VecRegContainer&
+ readVecRegOperand(const StaticInst *si, int idx) const = 0;
+
+ /** Gets destination vector register operand for modification. */
+ virtual VecRegContainer&
+ getWritableVecRegOperand(const StaticInst *si, int idx) = 0;
+
+ /** Sets a destination vector register operand to a value. */
+ virtual void
+ setVecRegOperand(const StaticInst *si, int idx,
+ const VecRegContainer& val) = 0;
+ /** @} */
+
+ /** Vector Register Lane Interfaces. */
+ /** @{ */
+ /** Reads source vector 8bit operand. */
+ virtual ConstVecLane8
+ readVec8BitLaneOperand(const StaticInst *si, int idx) const = 0;
+
+ /** Reads source vector 16bit operand. */
+ virtual ConstVecLane16
+ readVec16BitLaneOperand(const StaticInst *si, int idx) const = 0;
+
+ /** Reads source vector 32bit operand. */
+ virtual ConstVecLane32
+ readVec32BitLaneOperand(const StaticInst *si, int idx) const = 0;
+
+ /** Reads source vector 64bit operand. */
+ virtual ConstVecLane64
+ readVec64BitLaneOperand(const StaticInst *si, int idx) const = 0;
+
+ /** Write a lane of the destination vector operand. */
+ /** @{ */
+ virtual void setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::Byte>& val) = 0;
+ virtual void setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::TwoByte>& val) = 0;
+ virtual void setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::FourByte>& val) = 0;
+ virtual void setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::EightByte>& val) = 0;
+ /** @} */
+
+ /** Vector Elem Interfaces. */
+ /** @{ */
+ /** Reads an element of a vector register. */
+ virtual VecElem readVecElemOperand(const StaticInst *si,
+ int idx) const = 0;
+
+ /** Sets a vector register to a value. */
+ virtual void setVecElemOperand(const StaticInst *si, int idx,
+ const VecElem val) = 0;
+ /** @} */
+
/**
* @{
* @name Condition Code Registers
diff --git a/src/cpu/inst_res.hh b/src/cpu/inst_res.hh
index f6f14fe19..9b6a23d95 100644
--- a/src/cpu/inst_res.hh
+++ b/src/cpu/inst_res.hh
@@ -43,17 +43,24 @@
#include <type_traits>
#include "arch/generic/types.hh"
+#include "arch/generic/vec_reg.hh"
class InstResult {
+ using VecRegContainer = TheISA::VecRegContainer;
+ using VecElem = TheISA::VecElem;
public:
union MultiResult {
uint64_t integer;
double dbl;
+ VecRegContainer vector;
+ VecElem vecElem;
MultiResult() {}
};
enum class ResultType {
Scalar,
+ VecElem,
+ VecReg,
NumResultTypes,
Invalid
};
@@ -77,7 +84,32 @@ class InstResult {
result.dbl = i;
}
}
+ /** Vector result. */
+ explicit InstResult(const VecRegContainer& v, const ResultType& t)
+ : type(t) { result.vector = v; }
+ InstResult& operator=(const InstResult& that) {
+ type = that.type;
+ switch (type) {
+ /* Given that misc regs are not written to, there may be invalids in
+ * the result stack. */
+ case ResultType::Invalid:
+ break;
+ case ResultType::Scalar:
+ result.integer = that.result.integer;
+ break;
+ case ResultType::VecElem:
+ result.vecElem = that.result.vecElem;
+ break;
+ case ResultType::VecReg:
+ result.vector = that.result.vector;
+ break;
+ default:
+ panic("Assigning result from unknown result type");
+ break;
+ }
+ return *this;
+ }
/**
* Result comparison
* Two invalid results always differ.
@@ -88,6 +120,10 @@ class InstResult {
switch (type) {
case ResultType::Scalar:
return result.integer == that.result.integer;
+ case ResultType::VecElem:
+ return result.vecElem == that.result.vecElem;
+ case ResultType::VecReg:
+ return result.vector == that.result.vector;
case ResultType::Invalid:
return false;
default:
@@ -103,6 +139,10 @@ class InstResult {
/** @{ */
/** Is this a scalar result?. */
bool isScalar() const { return type == ResultType::Scalar; }
+ /** Is this a vector result?. */
+ bool isVector() const { return type == ResultType::VecReg; }
+ /** Is this a vector element result?. */
+ bool isVecElem() const { return type == ResultType::VecElem; }
/** Is this a valid result?. */
bool isValid() const { return type != ResultType::Invalid; }
/** @} */
@@ -125,6 +165,18 @@ class InstResult {
{
return result.integer;
}
+ const VecRegContainer&
+ asVector() const
+ {
+ panic_if(!isVector(), "Converting scalar (or invalid) to vector!!");
+ return result.vector;
+ }
+ const VecElem&
+ asVectorElem() const
+ {
+ panic_if(!isVecElem(), "Converting scalar (or invalid) to vector!!");
+ return result.vecElem;
+ }
/** @} */
};
diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc
index 1ed598833..756b214bd 100644
--- a/src/cpu/minor/dyn_inst.cc
+++ b/src/cpu/minor/dyn_inst.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014, 2016 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -153,6 +153,13 @@ printRegName(std::ostream &os, const RegId& reg)
case FloatRegClass:
os << 'f' << static_cast<unsigned int>(reg.index());
break;
+ case VecRegClass:
+ os << 'v' << static_cast<unsigned int>(reg.index());
+ break;
+ case VecElemClass:
+ os << 'v' << static_cast<unsigned int>(reg.index()) << '[' <<
+ static_cast<unsigned int>(reg.elemIndex()) << ']';
+ break;
case IntRegClass:
if (reg.isZeroReg()) {
os << 'z';
@@ -162,6 +169,8 @@ printRegName(std::ostream &os, const RegId& reg)
break;
case CCRegClass:
os << 'c' << static_cast<unsigned int>(reg.index());
+ default:
+ panic("Unknown register class: %d", (int)reg.classValue());
}
}
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index e91b7a6dd..4b3a02fca 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2014 ARM Limited
+ * Copyright (c) 2011-2014, 2016 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -145,6 +145,30 @@ class ExecContext : public ::ExecContext
return thread.readFloatRegBits(reg.index());
}
+ const TheISA::VecRegContainer&
+ readVecRegOperand(const StaticInst *si, int idx) const override
+ {
+ const RegId& reg = si->srcRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread.readVecReg(reg);
+ }
+
+ TheISA::VecRegContainer&
+ getWritableVecRegOperand(const StaticInst *si, int idx) override
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread.getWritableVecReg(reg);
+ }
+
+ TheISA::VecElem
+ readVecElemOperand(const StaticInst *si, int idx) const override
+ {
+ const RegId& reg = si->srcRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread.readVecElem(reg);
+ }
+
void
setIntRegOperand(const StaticInst *si, int idx, IntReg val) override
{
@@ -171,6 +195,102 @@ class ExecContext : public ::ExecContext
thread.setFloatRegBits(reg.index(), val);
}
+ void
+ setVecRegOperand(const StaticInst *si, int idx,
+ const TheISA::VecRegContainer& val) override
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecReg());
+ thread.setVecReg(reg, val);
+ }
+
+ /** Vector Register Lane Interfaces. */
+ /** @{ */
+ /** Reads source vector 8bit operand. */
+ ConstVecLane8
+ readVec8BitLaneOperand(const StaticInst *si, int idx) const
+ override
+ {
+ const RegId& reg = si->srcRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread.readVec8BitLaneReg(reg);
+ }
+
+ /** Reads source vector 16bit operand. */
+ ConstVecLane16
+ readVec16BitLaneOperand(const StaticInst *si, int idx) const
+ override
+ {
+ const RegId& reg = si->srcRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread.readVec16BitLaneReg(reg);
+ }
+
+ /** Reads source vector 32bit operand. */
+ ConstVecLane32
+ readVec32BitLaneOperand(const StaticInst *si, int idx) const
+ override
+ {
+ const RegId& reg = si->srcRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread.readVec32BitLaneReg(reg);
+ }
+
+ /** Reads source vector 64bit operand. */
+ ConstVecLane64
+ readVec64BitLaneOperand(const StaticInst *si, int idx) const
+ override
+ {
+ const RegId& reg = si->srcRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread.readVec64BitLaneReg(reg);
+ }
+
+ /** Write a lane of the destination vector operand. */
+ template <typename LD>
+ void
+ setVecLaneOperandT(const StaticInst *si, int idx,
+ const LD& val)
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread.setVecLane(reg, val);
+ }
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::Byte>& val) override
+ {
+ setVecLaneOperandT(si, idx, val);
+ }
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::TwoByte>& val) override
+ {
+ setVecLaneOperandT(si, idx, val);
+ }
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::FourByte>& val) override
+ {
+ setVecLaneOperandT(si, idx, val);
+ }
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::EightByte>& val) override
+ {
+ setVecLaneOperandT(si, idx, val);
+ }
+ /** @} */
+
+ void
+ setVecElemOperand(const StaticInst *si, int idx,
+ const TheISA::VecElem val) override
+ {
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecReg());
+ thread.setVecElem(reg, val);
+ }
+
bool
readPredicate() override
{
diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc
index e3497a5cf..c56d3b303 100644
--- a/src/cpu/minor/scoreboard.cc
+++ b/src/cpu/minor/scoreboard.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014, 2016 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -67,6 +67,16 @@ Scoreboard::findIndex(const RegId& reg, Index &scoreboard_index)
reg.index();
ret = true;
break;
+ case VecRegClass:
+ scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs +
+ TheISA::NumFloatRegs + reg.index();
+ ret = true;
+ break;
+ case VecElemClass:
+ scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs +
+ TheISA::NumFloatRegs + TheISA::NumVecRegs + reg.index();
+ ret = true;
+ break;
case CCRegClass:
scoreboard_index = TheISA::NumIntRegs + reg.index();
ret = true;
@@ -75,6 +85,9 @@ Scoreboard::findIndex(const RegId& reg, Index &scoreboard_index)
/* Don't bother with Misc registers */
ret = false;
break;
+ default:
+ panic("Unknown register class: %d",
+ static_cast<int>(reg.classValue()));
}
}
diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh
index 7fe5002f9..9e42c2a6b 100644
--- a/src/cpu/minor/scoreboard.hh
+++ b/src/cpu/minor/scoreboard.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014, 2016 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -94,7 +94,7 @@ class Scoreboard : public Named
Scoreboard(const std::string &name) :
Named(name),
numRegs(TheISA::NumIntRegs + TheISA::NumCCRegs +
- TheISA::NumFloatRegs),
+ TheISA::NumFloatRegs + TheISA::NumVecRegs),
numResults(numRegs, 0),
numUnpredictableResults(numRegs, 0),
fuIndices(numRegs, 0),
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index cab2cf34f..8507ab6ff 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -1,3 +1,15 @@
+# Copyright (c) 2016 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
# Copyright (c) 2005-2007 The Regents of The University of Michigan
# All rights reserved.
#
@@ -125,6 +137,8 @@ class DerivO3CPU(BaseCPU):
# (it's a side effect of int reg renaming), so they should
# never be the bottleneck here.
_defaultNumPhysCCRegs = Self.numPhysIntRegs * 5
+ numPhysVecRegs = Param.Unsigned(256, "Number of physical vector "
+ "registers")
numPhysCCRegs = Param.Unsigned(_defaultNumPhysCCRegs,
"Number of physical cc registers")
numIQEntries = Param.Unsigned(64, "Number of instruction queue entries")
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index 49e153a52..f5be5a804 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -75,12 +75,18 @@ class PhysRegId : private RegId {
: RegId(_regClass, _regIdx), flatIdx(_flatIdx)
{}
+ /** Vector PhysRegId constructor (w/ elemIndex). */
+ explicit PhysRegId(RegClass _regClass, PhysRegIndex _regIdx,
+ ElemIndex elem_idx, PhysRegIndex flat_idx)
+ : RegId(_regClass, _regIdx, elem_idx), flatIdx(flat_idx) { }
+
/** Visible RegId methods */
/** @{ */
using RegId::index;
using RegId::classValue;
using RegId::isZeroReg;
using RegId::className;
+ using RegId::elemIndex;
/** @} */
/**
* Explicit forward methods, to prevent comparisons of PhysRegId with
@@ -109,6 +115,12 @@ class PhysRegId : private RegId {
/** @Return true if it is a condition-code physical register. */
bool isCCPhysReg() const { return isCCReg(); }
+ /** @Return true if it is a vector physical register. */
+ bool isVectorPhysReg() const { return isVecReg(); }
+
+ /** @Return true if it is a vector element physical register. */
+ bool isVectorPhysElem() const { return isVecElem(); }
+
/** @Return true if it is a condition-code physical register. */
bool isMiscPhysReg() const { return isMiscReg(); }
@@ -123,11 +135,18 @@ class PhysRegId : private RegId {
/** Flat index accessor */
const PhysRegIndex& flatIndex() const { return flatIdx; }
+
+ static PhysRegId elemId(const PhysRegId* vid, ElemIndex elem)
+ {
+ assert(vid->isVectorPhysReg());
+ return PhysRegId(VecElemClass, vid->index(), elem);
+ }
};
-// PhysRegIds only need to be created once and then we can use the following
-// to work with them
-typedef const PhysRegId* PhysRegIdPtr;
+/** Constant pointer definition.
+ * PhysRegIds only need to be created once and then we can just share
+ * pointers */
+using PhysRegIdPtr = const PhysRegId*;
/** Struct that defines the information passed from fetch to decode. */
template<class Impl>
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index a7a39b72a..c249d90ba 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012, 2014 ARM Limited
+ * Copyright (c) 2011-2012, 2014, 2016 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -46,6 +46,7 @@
#include "cpu/o3/cpu.hh"
+#include "arch/generic/traits.hh"
#include "arch/kernel_stats.hh"
#include "config/the_isa.hh"
#include "cpu/activity.hh"
@@ -171,9 +172,14 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
iew(this, params),
commit(this, params),
+ /* It is mandatory that all SMT threads use the same renaming mode as
+ * they are sharing registers and rename */
+ vecMode(initRenameMode<TheISA::ISA>::mode(params->isa[0])),
regFile(params->numPhysIntRegs,
params->numPhysFloatRegs,
- params->numPhysCCRegs),
+ params->numPhysVecRegs,
+ params->numPhysCCRegs,
+ vecMode),
freeList(name() + ".freelist", &regFile),
@@ -270,6 +276,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
//Make Sure That this a Valid Architeture
assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs);
assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs);
+ assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs);
assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs);
rename.setScoreboard(&scoreboard);
@@ -278,6 +285,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
// Setup the rename map for whichever stages need it.
for (ThreadID tid = 0; tid < numThreads; tid++) {
isa[tid] = params->isa[tid];
+ assert(initRenameMode<TheISA::ISA>::equals(isa[tid], isa[0]));
// Only Alpha has an FP zero register, so for other ISAs we
// use an invalid FP register index to avoid special treatment
@@ -287,10 +295,11 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
(THE_ISA == ALPHA_ISA) ? TheISA::ZeroReg : invalidFPReg;
commitRenameMap[tid].init(&regFile, TheISA::ZeroReg, fpZeroReg,
- &freeList);
+ &freeList,
+ vecMode);
renameMap[tid].init(&regFile, TheISA::ZeroReg, fpZeroReg,
- &freeList);
+ &freeList, vecMode);
}
// Initialize rename map to assign physical registers to the
@@ -311,6 +320,30 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
RegId(FloatRegClass, ridx), phys_reg);
}
+ /* Here we need two 'interfaces' the 'whole register' and the
+ * 'register element'. At any point only one of them will be
+ * active. */
+ if (vecMode == Enums::Full) {
+ /* Initialize the full-vector interface */
+ for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) {
+ RegId rid = RegId(VecRegClass, ridx);
+ PhysRegIdPtr phys_reg = freeList.getVecReg();
+ renameMap[tid].setEntry(rid, phys_reg);
+ commitRenameMap[tid].setEntry(rid, phys_reg);
+ }
+ } else {
+ /* Initialize the vector-element interface */
+ for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) {
+ for (ElemIndex ldx = 0; ldx < TheISA::NumVecElemPerVecReg;
+ ++ldx) {
+ RegId lrid = RegId(VecElemClass, ridx, ldx);
+ PhysRegIdPtr phys_elem = freeList.getVecElem();
+ renameMap[tid].setEntry(lrid, phys_elem);
+ commitRenameMap[tid].setEntry(lrid, phys_elem);
+ }
+ }
+ }
+
for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) {
PhysRegIdPtr phys_reg = freeList.getCCReg();
renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg);
@@ -514,6 +547,16 @@ FullO3CPU<Impl>::regStats()
.desc("number of floating regfile writes")
.prereq(fpRegfileWrites);
+ vecRegfileReads
+ .name(name() + ".vec_regfile_reads")
+ .desc("number of vector regfile reads")
+ .prereq(vecRegfileReads);
+
+ vecRegfileWrites
+ .name(name() + ".vec_regfile_writes")
+ .desc("number of vector regfile writes")
+ .prereq(vecRegfileWrites);
+
ccRegfileReads
.name(name() + ".cc_regfile_reads")
.desc("number of cc regfile reads")
@@ -1257,6 +1300,32 @@ FullO3CPU<Impl>::readFloatRegBits(PhysRegIdPtr phys_reg)
}
template <class Impl>
+auto
+FullO3CPU<Impl>::readVecReg(PhysRegIdPtr phys_reg) const
+ -> const VecRegContainer&
+{
+ vecRegfileReads++;
+ return regFile.readVecReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::getWritableVecReg(PhysRegIdPtr phys_reg)
+ -> VecRegContainer&
+{
+ vecRegfileWrites++;
+ return regFile.getWritableVecReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem&
+{
+ vecRegfileReads++;
+ return regFile.readVecElem(phys_reg);
+}
+
+template <class Impl>
CCReg
FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg)
{
@@ -1290,6 +1359,22 @@ FullO3CPU<Impl>::setFloatRegBits(PhysRegIdPtr phys_reg, FloatRegBits val)
template <class Impl>
void
+FullO3CPU<Impl>::setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val)
+{
+ vecRegfileWrites++;
+ regFile.setVecReg(phys_reg, val);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val)
+{
+ vecRegfileWrites++;
+ regFile.setVecElem(phys_reg, val);
+}
+
+template <class Impl>
+void
FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, CCReg val)
{
ccRegfileWrites++;
@@ -1330,6 +1415,36 @@ FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, ThreadID tid)
}
template <class Impl>
+auto
+FullO3CPU<Impl>::readArchVecReg(int reg_idx, ThreadID tid) const
+ -> const VecRegContainer&
+{
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecRegClass, reg_idx));
+ return readVecReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::getWritableArchVecReg(int reg_idx, ThreadID tid)
+ -> VecRegContainer&
+{
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecRegClass, reg_idx));
+ return getWritableVecReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
+ ThreadID tid) const -> const VecElem&
+{
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecRegClass, reg_idx, ldx));
+ return readVecElem(phys_reg);
+}
+
+template <class Impl>
CCReg
FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid)
{
@@ -1375,6 +1490,26 @@ FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, ThreadID tid)
template <class Impl>
void
+FullO3CPU<Impl>::setArchVecReg(int reg_idx, const VecRegContainer& val,
+ ThreadID tid)
+{
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecRegClass, reg_idx));
+ setVecReg(phys_reg, val);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
+ const VecElem& val, ThreadID tid)
+{
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecRegClass, reg_idx, ldx));
+ setVecElem(phys_reg, val);
+}
+
+template <class Impl>
+void
FullO3CPU<Impl>::setArchCCReg(int reg_idx, CCReg val, ThreadID tid)
{
ccRegfileWrites++;
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index b5cbc5fe2..d78d1b9d3 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2013 ARM Limited
+ * Copyright (c) 2011-2013, 2016 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -53,6 +53,7 @@
#include <set>
#include <vector>
+#include "arch/generic/types.hh"
#include "arch/types.hh"
#include "base/statistics.hh"
#include "config/the_isa.hh"
@@ -103,6 +104,9 @@ class FullO3CPU : public BaseO3CPU
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::O3CPU O3CPU;
+ using VecElem = TheISA::VecElem;
+ using VecRegContainer = TheISA::VecRegContainer;
+
typedef O3ThreadState<Impl> ImplState;
typedef O3ThreadState<Impl> Thread;
@@ -417,6 +421,46 @@ class FullO3CPU : public BaseO3CPU
TheISA::FloatRegBits readFloatRegBits(PhysRegIdPtr phys_reg);
+ const VecRegContainer& readVecReg(PhysRegIdPtr reg_idx) const;
+
+ /**
+ * Read physical vector register for modification.
+ */
+ VecRegContainer& getWritableVecReg(PhysRegIdPtr reg_idx);
+
+ /**
+ * Read physical vector register lane
+ */
+ template<typename VecElem, int LaneIdx>
+ VecLaneT<VecElem, true>
+ readVecLane(PhysRegIdPtr phys_reg) const
+ {
+ vecRegfileReads++;
+ return regFile.readVecLane<VecElem, LaneIdx>(phys_reg);
+ }
+
+ /**
+ * Read physical vector register lane
+ */
+ template<typename VecElem>
+ VecLaneT<VecElem, true>
+ readVecLane(PhysRegIdPtr phys_reg) const
+ {
+ vecRegfileReads++;
+ return regFile.readVecLane<VecElem>(phys_reg);
+ }
+
+ /** Write a lane of the destination vector register. */
+ template<typename LD>
+ void
+ setVecLane(PhysRegIdPtr phys_reg, const LD& val)
+ {
+ vecRegfileWrites++;
+ return regFile.setVecLane(phys_reg, val);
+ }
+
+ const VecElem& readVecElem(PhysRegIdPtr reg_idx) const;
+
TheISA::CCReg readCCReg(PhysRegIdPtr phys_reg);
void setIntReg(PhysRegIdPtr phys_reg, uint64_t val);
@@ -425,6 +469,10 @@ class FullO3CPU : public BaseO3CPU
void setFloatRegBits(PhysRegIdPtr phys_reg, TheISA::FloatRegBits val);
+ void setVecReg(PhysRegIdPtr reg_idx, const VecRegContainer& val);
+
+ void setVecElem(PhysRegIdPtr reg_idx, const VecElem& val);
+
void setCCReg(PhysRegIdPtr phys_reg, TheISA::CCReg val);
uint64_t readArchIntReg(int reg_idx, ThreadID tid);
@@ -433,6 +481,34 @@ class FullO3CPU : public BaseO3CPU
uint64_t readArchFloatRegInt(int reg_idx, ThreadID tid);
+ const VecRegContainer& readArchVecReg(int reg_idx, ThreadID tid) const;
+ /** Read architectural vector register for modification. */
+ VecRegContainer& getWritableArchVecReg(int reg_idx, ThreadID tid);
+
+ /** Read architectural vector register lane. */
+ template<typename VecElem>
+ VecLaneT<VecElem, true>
+ readArchVecLane(int reg_idx, int lId, ThreadID tid) const
+ {
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecRegClass, reg_idx));
+ return readVecLane<VecElem>(phys_reg);
+ }
+
+
+ /** Write a lane of the destination vector register. */
+ template<typename LD>
+ void
+ setArchVecLane(int reg_idx, int lId, ThreadID tid, const LD& val)
+ {
+ PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+ RegId(VecRegClass, reg_idx));
+ setVecLane(phys_reg, val);
+ }
+
+ const VecElem& readArchVecElem(const RegIndex& reg_idx,
+ const ElemIndex& ldx, ThreadID tid) const;
+
TheISA::CCReg readArchCCReg(int reg_idx, ThreadID tid);
/** Architectural register accessors. Looks up in the commit
@@ -446,6 +522,11 @@ class FullO3CPU : public BaseO3CPU
void setArchFloatRegInt(int reg_idx, uint64_t val, ThreadID tid);
+ void setArchVecReg(int reg_idx, const VecRegContainer& val, ThreadID tid);
+
+ void setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
+ const VecElem& val, ThreadID tid);
+
void setArchCCReg(int reg_idx, TheISA::CCReg val, ThreadID tid);
/** Sets the commit PC state of a specific thread. */
@@ -540,6 +621,9 @@ class FullO3CPU : public BaseO3CPU
/** The commit stage. */
typename CPUPolicy::Commit commit;
+ /** The rename mode of the vector registers */
+ Enums::VecRegRenameMode vecMode;
+
/** The register file. */
PhysRegFile regFile;
@@ -722,6 +806,9 @@ class FullO3CPU : public BaseO3CPU
//number of float register file accesses
Stats::Scalar fpRegfileReads;
Stats::Scalar fpRegfileWrites;
+ //number of vector register file accesses
+ mutable Stats::Scalar vecRegfileReads;
+ Stats::Scalar vecRegfileWrites;
//number of CC register file accesses
Stats::Scalar ccRegfileReads;
Stats::Scalar ccRegfileWrites;
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
index a6adb4c20..0643e7e30 100644
--- a/src/cpu/o3/dyn_inst.hh
+++ b/src/cpu/o3/dyn_inst.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2016 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -72,6 +72,9 @@ class BaseO3DynInst : public BaseDynInst<Impl>
typedef TheISA::FloatReg FloatReg;
typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::CCReg CCReg;
+ using VecRegContainer = TheISA::VecRegContainer;
+ using VecElem = TheISA::VecElem;
+ static constexpr auto NumVecElemPerVecReg = TheISA::NumVecElemPerVecReg;
/** Misc register type. */
typedef TheISA::MiscReg MiscReg;
@@ -83,9 +86,9 @@ class BaseO3DynInst : public BaseDynInst<Impl>
public:
/** BaseDynInst constructor given a binary instruction. */
- BaseO3DynInst(const StaticInstPtr &staticInst, const StaticInstPtr &macroop,
- TheISA::PCState pc, TheISA::PCState predPC,
- InstSeqNum seq_num, O3CPU *cpu);
+ BaseO3DynInst(const StaticInstPtr &staticInst, const StaticInstPtr
+ &macroop, TheISA::PCState pc, TheISA::PCState predPC,
+ InstSeqNum seq_num, O3CPU *cpu);
/** BaseDynInst constructor given a static inst pointer. */
BaseO3DynInst(const StaticInstPtr &_staticInst,
@@ -107,6 +110,11 @@ class BaseO3DynInst : public BaseDynInst<Impl>
void initVars();
protected:
+ /** Explicitation of dependent names. */
+ using BaseDynInst<Impl>::cpu;
+ using BaseDynInst<Impl>::_srcRegIdx;
+ using BaseDynInst<Impl>::_destRegIdx;
+
/** Values to be written to the destination misc. registers. */
std::array<MiscReg, TheISA::MaxMiscDestRegs> _destMiscRegVal;
@@ -213,19 +221,30 @@ class BaseO3DynInst : public BaseDynInst<Impl>
switch (original_dest_reg.classValue()) {
case IntRegClass:
this->setIntRegOperand(this->staticInst.get(), idx,
- this->cpu->readIntReg(prev_phys_reg));
+ this->cpu->readIntReg(prev_phys_reg));
break;
case FloatRegClass:
this->setFloatRegOperandBits(this->staticInst.get(), idx,
- this->cpu->readFloatRegBits(prev_phys_reg));
+ this->cpu->readFloatRegBits(prev_phys_reg));
+ break;
+ case VecRegClass:
+ this->setVecRegOperand(this->staticInst.get(), idx,
+ this->cpu->readVecReg(prev_phys_reg));
+ break;
+ case VecElemClass:
+ this->setVecElemOperand(this->staticInst.get(), idx,
+ this->cpu->readVecElem(prev_phys_reg));
break;
case CCRegClass:
this->setCCRegOperand(this->staticInst.get(), idx,
- this->cpu->readCCReg(prev_phys_reg));
+ this->cpu->readCCReg(prev_phys_reg));
break;
case MiscRegClass:
// no need to forward misc reg values
break;
+ default:
+ panic("Unknown register class: %d",
+ (int)original_dest_reg.classValue());
}
}
}
@@ -266,6 +285,89 @@ class BaseO3DynInst : public BaseDynInst<Impl>
return this->cpu->readFloatRegBits(this->_srcRegIdx[idx]);
}
+ const VecRegContainer&
+ readVecRegOperand(const StaticInst *si, int idx) const
+ {
+ return this->cpu->readVecReg(this->_srcRegIdx[idx]);
+ }
+
+ /**
+ * Read destination vector register operand for modification.
+ */
+ VecRegContainer&
+ getWritableVecRegOperand(const StaticInst *si, int idx)
+ {
+ return this->cpu->getWritableVecReg(this->_destRegIdx[idx]);
+ }
+
+ /** Vector Register Lane Interfaces. */
+ /** @{ */
+ /** Reads source vector 8bit operand. */
+ ConstVecLane8
+ readVec8BitLaneOperand(const StaticInst *si, int idx) const
+ {
+ return cpu->template readVecLane<uint8_t>(_srcRegIdx[idx]);
+ }
+
+ /** Reads source vector 16bit operand. */
+ ConstVecLane16
+ readVec16BitLaneOperand(const StaticInst *si, int idx) const
+ {
+ return cpu->template readVecLane<uint16_t>(_srcRegIdx[idx]);
+ }
+
+ /** Reads source vector 32bit operand. */
+ ConstVecLane32
+ readVec32BitLaneOperand(const StaticInst *si, int idx) const
+ {
+ return cpu->template readVecLane<uint32_t>(_srcRegIdx[idx]);
+ }
+
+ /** Reads source vector 64bit operand. */
+ ConstVecLane64
+ readVec64BitLaneOperand(const StaticInst *si, int idx) const
+ {
+ return cpu->template readVecLane<uint64_t>(_srcRegIdx[idx]);
+ }
+
+ /** Write a lane of the destination vector operand. */
+ template <typename LD>
+ void
+ setVecLaneOperandT(const StaticInst *si, int idx, const LD& val)
+ {
+ return cpu->template setVecLane(_destRegIdx[idx], val);
+ }
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::Byte>& val)
+ {
+ return setVecLaneOperandT(si, idx, val);
+ }
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::TwoByte>& val)
+ {
+ return setVecLaneOperandT(si, idx, val);
+ }
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::FourByte>& val)
+ {
+ return setVecLaneOperandT(si, idx, val);
+ }
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::EightByte>& val)
+ {
+ return setVecLaneOperandT(si, idx, val);
+ }
+ /** @} */
+
+ VecElem readVecElemOperand(const StaticInst *si, int idx) const
+ {
+ return this->cpu->readVecElem(this->_srcRegIdx[idx]);
+ }
+
CCReg readCCRegOperand(const StaticInst *si, int idx)
{
return this->cpu->readCCReg(this->_srcRegIdx[idx]);
@@ -293,6 +395,22 @@ class BaseO3DynInst : public BaseDynInst<Impl>
BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
}
+ void
+ setVecRegOperand(const StaticInst *si, int idx,
+ const VecRegContainer& val)
+ {
+ this->cpu->setVecReg(this->_destRegIdx[idx], val);
+ BaseDynInst<Impl>::setVecRegOperand(si, idx, val);
+ }
+
+ void setVecElemOperand(const StaticInst *si, int idx,
+ const VecElem val)
+ {
+ int reg_idx = idx;
+ this->cpu->setVecElem(this->_destRegIdx[reg_idx], val);
+ BaseDynInst<Impl>::setVecElemOperand(si, idx, val);
+ }
+
void setCCRegOperand(const StaticInst *si, int idx, CCReg val)
{
this->cpu->setCCReg(this->_destRegIdx[idx], val);
diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh
index 6fc6cc909..f4c26a697 100644
--- a/src/cpu/o3/free_list.hh
+++ b/src/cpu/o3/free_list.hh
@@ -1,4 +1,16 @@
/*
+ * Copyright (c) 2016 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved.
@@ -63,6 +75,16 @@ class SimpleFreeList
/** Add a physical register to the free list */
void addReg(PhysRegIdPtr reg) { freeRegs.push(reg); }
+ /** Add physical registers to the free list */
+ template<class InputIt>
+ void
+ addRegs(InputIt first, InputIt last) {
+ std::for_each(first, last,
+ [this](const typename InputIt::value_type& reg) {
+ this->freeRegs.push(&reg);
+ });
+ }
+
/** Get the next available register from the free list */
PhysRegIdPtr getReg()
{
@@ -107,6 +129,15 @@ class UnifiedFreeList
/** The list of free floating point registers. */
SimpleFreeList floatList;
+ /** The following two are exclusive interfaces. */
+ /** @{ */
+ /** The list of free vector registers. */
+ SimpleFreeList vecList;
+
+ /** The list of free vector element registers. */
+ SimpleFreeList vecElemList;
+ /** @} */
+
/** The list of free condition-code registers. */
SimpleFreeList ccList;
@@ -146,18 +177,36 @@ class UnifiedFreeList
/** Gets a free fp register. */
PhysRegIdPtr getFloatReg() { return floatList.getReg(); }
+ /** Gets a free vector register. */
+ PhysRegIdPtr getVecReg() { return vecList.getReg(); }
+
+ /** Gets a free vector elemenet register. */
+ PhysRegIdPtr getVecElem() { return vecElemList.getReg(); }
+
/** Gets a free cc register. */
PhysRegIdPtr getCCReg() { return ccList.getReg(); }
/** Adds a register back to the free list. */
void addReg(PhysRegIdPtr freed_reg);
+ /** Adds a register back to the free list. */
+ template<class InputIt>
+ void addRegs(InputIt first, InputIt last);
+
/** Adds an integer register back to the free list. */
void addIntReg(PhysRegIdPtr freed_reg) { intList.addReg(freed_reg); }
/** Adds a fp register back to the free list. */
void addFloatReg(PhysRegIdPtr freed_reg) { floatList.addReg(freed_reg); }
+ /** Adds a vector register back to the free list. */
+ void addVecReg(PhysRegIdPtr freed_reg) { vecList.addReg(freed_reg); }
+
+ /** Adds a vector element register back to the free list. */
+ void addVecElem(PhysRegIdPtr freed_reg) {
+ vecElemList.addReg(freed_reg);
+ }
+
/** Adds a cc register back to the free list. */
void addCCReg(PhysRegIdPtr freed_reg) { ccList.addReg(freed_reg); }
@@ -167,6 +216,12 @@ class UnifiedFreeList
/** Checks if there are any free fp registers. */
bool hasFreeFloatRegs() const { return floatList.hasFreeRegs(); }
+ /** Checks if there are any free vector registers. */
+ bool hasFreeVecRegs() const { return vecList.hasFreeRegs(); }
+
+ /** Checks if there are any free vector registers. */
+ bool hasFreeVecElems() const { return vecElemList.hasFreeRegs(); }
+
/** Checks if there are any free cc registers. */
bool hasFreeCCRegs() const { return ccList.hasFreeRegs(); }
@@ -176,10 +231,49 @@ class UnifiedFreeList
/** Returns the number of free fp registers. */
unsigned numFreeFloatRegs() const { return floatList.numFreeRegs(); }
+ /** Returns the number of free vector registers. */
+ unsigned numFreeVecRegs() const { return vecList.numFreeRegs(); }
+
/** Returns the number of free cc registers. */
unsigned numFreeCCRegs() const { return ccList.numFreeRegs(); }
};
+template<class InputIt>
+inline void
+UnifiedFreeList::addRegs(InputIt first, InputIt last)
+{
+ // Are there any registers to add?
+ if (first == last)
+ return;
+
+ panic_if((first != last) &&
+ first->classValue() != (last-1)->classValue(),
+ "Attempt to add mixed type regs: %s and %s",
+ first->className(),
+ (last-1)->className());
+ switch (first->classValue()) {
+ case IntRegClass:
+ intList.addRegs(first, last);
+ break;
+ case FloatRegClass:
+ floatList.addRegs(first, last);
+ break;
+ case VecRegClass:
+ vecList.addRegs(first, last);
+ break;
+ case VecElemClass:
+ vecElemList.addRegs(first, last);
+ break;
+ case CCRegClass:
+ ccList.addRegs(first, last);
+ break;
+ default:
+ panic("Unexpected RegClass (%s)",
+ first->className());
+ }
+
+}
+
inline void
UnifiedFreeList::addReg(PhysRegIdPtr freed_reg)
{
@@ -194,6 +288,12 @@ UnifiedFreeList::addReg(PhysRegIdPtr freed_reg)
case FloatRegClass:
floatList.addReg(freed_reg);
break;
+ case VecRegClass:
+ vecList.addReg(freed_reg);
+ break;
+ case VecElemClass:
+ vecElemList.addReg(freed_reg);
+ break;
case CCRegClass:
ccList.addReg(freed_reg);
break;
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index f52cf2d6c..2b113ae04 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -98,8 +98,11 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
numThreads = params->numThreads;
// Set the number of total physical registers
+ // As the vector registers have two addressing modes, they are added twice
numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs +
- params->numPhysCCRegs;
+ params->numPhysVecRegs +
+ params->numPhysVecRegs * TheISA::NumVecElemPerVecReg +
+ params->numPhysCCRegs;
//Create an entry for each physical register within the
//dependency graph.
diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc
index ea4370f48..2f41e2ac2 100644
--- a/src/cpu/o3/regfile.cc
+++ b/src/cpu/o3/regfile.cc
@@ -1,4 +1,16 @@
/*
+ * Copyright (c) 2016 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved.
@@ -34,19 +46,30 @@
#include "cpu/o3/regfile.hh"
#include "cpu/o3/free_list.hh"
+#include "arch/generic/types.hh"
+#include "cpu/o3/free_list.hh"
PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
unsigned _numPhysicalFloatRegs,
- unsigned _numPhysicalCCRegs)
+ unsigned _numPhysicalVecRegs,
+ unsigned _numPhysicalCCRegs,
+ VecMode vmode)
: intRegFile(_numPhysicalIntRegs),
floatRegFile(_numPhysicalFloatRegs),
+ vectorRegFile(_numPhysicalVecRegs),
ccRegFile(_numPhysicalCCRegs),
numPhysicalIntRegs(_numPhysicalIntRegs),
numPhysicalFloatRegs(_numPhysicalFloatRegs),
+ numPhysicalVecRegs(_numPhysicalVecRegs),
+ numPhysicalVecElemRegs(_numPhysicalVecRegs *
+ NumVecElemPerVecReg),
numPhysicalCCRegs(_numPhysicalCCRegs),
totalNumRegs(_numPhysicalIntRegs
+ _numPhysicalFloatRegs
- + _numPhysicalCCRegs)
+ + _numPhysicalVecRegs
+ + _numPhysicalVecRegs * NumVecElemPerVecReg
+ + _numPhysicalCCRegs),
+ vecMode(vmode)
{
PhysRegIndex phys_reg;
PhysRegIndex flat_reg_idx = 0;
@@ -68,6 +91,23 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
floatRegIds.emplace_back(FloatRegClass, phys_reg, flat_reg_idx++);
}
+ // The next batch of the registers are the vector physical
+ // registers; put them onto the vector free list.
+ for (phys_reg = 0; phys_reg < numPhysicalVecRegs; phys_reg++) {
+ vectorRegFile[phys_reg].zero();
+ vecRegIds.emplace_back(VecRegClass, phys_reg, flat_reg_idx++);
+ }
+ // The next batch of the registers are the vector element physical
+ // registers; they refer to the same containers as the vector
+ // registers, just a different (and incompatible) way to access
+ // them; put them onto the vector free list.
+ for (phys_reg = 0; phys_reg < numPhysicalVecRegs; phys_reg++) {
+ for (ElemIndex eIdx = 0; eIdx < NumVecElemPerVecReg; eIdx++) {
+ vecElemIds.emplace_back(VecElemClass, phys_reg,
+ eIdx, flat_reg_idx++);
+ }
+ }
+
// The rest of the registers are the condition-code physical
// registers; put them onto the condition-code free list.
for (phys_reg = 0; phys_reg < numPhysicalCCRegs; phys_reg++) {
@@ -90,20 +130,90 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList)
// The initial batch of registers are the integer ones
for (reg_idx = 0; reg_idx < numPhysicalIntRegs; reg_idx++) {
assert(intRegIds[reg_idx].index() == reg_idx);
- freeList->addIntReg(&intRegIds[reg_idx]);
}
+ freeList->addRegs(intRegIds.begin(), intRegIds.end());
// The next batch of the registers are the floating-point physical
// registers; put them onto the floating-point free list.
for (reg_idx = 0; reg_idx < numPhysicalFloatRegs; reg_idx++) {
assert(floatRegIds[reg_idx].index() == reg_idx);
- freeList->addFloatReg(&floatRegIds[reg_idx]);
}
+ freeList->addRegs(floatRegIds.begin(), floatRegIds.end());
+
+ /* The next batch of the registers are the vector physical
+ * registers; put them onto the vector free list. */
+ for (reg_idx = 0; reg_idx < numPhysicalVecRegs; reg_idx++) {
+ assert(vecRegIds[reg_idx].index() == reg_idx);
+ for (ElemIndex elemIdx = 0; elemIdx < NumVecElemPerVecReg; elemIdx++) {
+ assert(vecElemIds[reg_idx * NumVecElemPerVecReg +
+ elemIdx].index() == reg_idx);
+ assert(vecElemIds[reg_idx * NumVecElemPerVecReg +
+ elemIdx].elemIndex() == elemIdx);
+ }
+ }
+
+ /* depending on the mode we add the vector registers as whole units or
+ * as different elements. */
+ if (vecMode == Enums::Full)
+ freeList->addRegs(vecRegIds.begin(), vecRegIds.end());
+ else
+ freeList->addRegs(vecElemIds.begin(), vecElemIds.end());
// The rest of the registers are the condition-code physical
// registers; put them onto the condition-code free list.
for (reg_idx = 0; reg_idx < numPhysicalCCRegs; reg_idx++) {
assert(ccRegIds[reg_idx].index() == reg_idx);
- freeList->addCCReg(&ccRegIds[reg_idx]);
}
+ freeList->addRegs(ccRegIds.begin(), ccRegIds.end());
}
+
+auto
+PhysRegFile::getRegElemIds(PhysRegIdPtr reg) -> IdRange
+{
+ panic_if(!reg->isVectorPhysReg(),
+ "Trying to get elems of a %s register", reg->className());
+ auto idx = reg->index();
+ return std::make_pair(
+ vecElemIds.begin() + idx * NumVecElemPerVecReg,
+ vecElemIds.begin() + (idx+1) * NumVecElemPerVecReg);
+}
+
+auto
+PhysRegFile::getRegIds(RegClass cls) -> IdRange
+{
+ switch (cls)
+ {
+ case IntRegClass:
+ return std::make_pair(intRegIds.begin(), intRegIds.end());
+ case FloatRegClass:
+ return std::make_pair(floatRegIds.begin(), floatRegIds.end());
+ case VecRegClass:
+ return std::make_pair(vecRegIds.begin(), vecRegIds.end());
+ case VecElemClass:
+ return std::make_pair(vecElemIds.begin(), vecElemIds.end());
+ case CCRegClass:
+ return std::make_pair(ccRegIds.begin(), ccRegIds.end());
+ case MiscRegClass:
+ return std::make_pair(miscRegIds.begin(), miscRegIds.end());
+ }
+ /* There is no way to make an empty iterator */
+ return std::make_pair(PhysIds::const_iterator(),
+ PhysIds::const_iterator());
+}
+
+PhysRegIdPtr
+PhysRegFile::getTrueId(PhysRegIdPtr reg)
+{
+ switch (reg->classValue()) {
+ case VecRegClass:
+ return &vecRegIds[reg->index()];
+ case VecElemClass:
+ return &vecElemIds[reg->index() * NumVecElemPerVecReg +
+ reg->elemIndex()];
+ default:
+ panic_if(!reg->isVectorPhysElem(),
+ "Trying to get the register of a %s register", reg->className());
+ }
+ return nullptr;
+}
+
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index c353b2746..7feec933f 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -1,4 +1,16 @@
/*
+ * Copyright (c) 2016 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved.
@@ -42,6 +54,7 @@
#include "config/the_isa.hh"
#include "cpu/o3/comm.hh"
#include "debug/IEW.hh"
+#include "enums/VecRegRenameMode.hh"
class UnifiedFreeList;
@@ -56,6 +69,15 @@ class PhysRegFile
typedef TheISA::FloatReg FloatReg;
typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::CCReg CCReg;
+ using VecElem = TheISA::VecElem;
+ using VecRegContainer = TheISA::VecRegContainer;
+ using PhysIds = std::vector<PhysRegId>;
+ using VecMode = Enums::VecRegRenameMode;
+ public:
+ using IdRange = std::pair<PhysIds::const_iterator,
+ PhysIds::const_iterator>;
+ private:
+ static constexpr auto NumVecElemPerVecReg = TheISA::NumVecElemPerVecReg;
typedef union {
FloatReg d;
@@ -70,6 +92,11 @@ class PhysRegFile
std::vector<PhysFloatReg> floatRegFile;
std::vector<PhysRegId> floatRegIds;
+ /** Vector register file. */
+ std::vector<VecRegContainer> vectorRegFile;
+ std::vector<PhysRegId> vecRegIds;
+ std::vector<PhysRegId> vecElemIds;
+
/** Condition-code register file. */
std::vector<CCReg> ccRegFile;
std::vector<PhysRegId> ccRegIds;
@@ -83,18 +110,31 @@ class PhysRegFile
unsigned numPhysicalIntRegs;
/**
- * Number of physical general purpose registers
+ * Number of physical floating point registers
*/
unsigned numPhysicalFloatRegs;
/**
- * Number of physical general purpose registers
+ * Number of physical vector registers
+ */
+ unsigned numPhysicalVecRegs;
+
+ /**
+ * Number of physical vector element registers
+ */
+ unsigned numPhysicalVecElemRegs;
+
+ /**
+ * Number of physical CC registers
*/
unsigned numPhysicalCCRegs;
/** Total number of physical registers. */
unsigned totalNumRegs;
+ /** Mode in which vector registers are addressed. */
+ VecMode vecMode;
+
public:
/**
* Constructs a physical register file with the specified amount of
@@ -102,7 +142,10 @@ class PhysRegFile
*/
PhysRegFile(unsigned _numPhysicalIntRegs,
unsigned _numPhysicalFloatRegs,
- unsigned _numPhysicalCCRegs);
+ unsigned _numPhysicalVecRegs,
+ unsigned _numPhysicalCCRegs,
+ VecMode vmode
+ );
/**
* Destructor to free resources
@@ -117,6 +160,11 @@ class PhysRegFile
/** @return the number of floating-point physical registers. */
unsigned numFloatPhysRegs() const { return numPhysicalFloatRegs; }
+ /** @return the number of vector physical registers. */
+ unsigned numVecPhysRegs() const { return numPhysicalVecRegs; }
+
+ /** @return the number of vector physical registers. */
+ unsigned numVecElemPhysRegs() const { return numPhysicalVecElemRegs; }
/** @return the number of condition-code physical registers. */
unsigned numCCPhysRegs() const { return numPhysicalCCRegs; }
@@ -164,6 +212,68 @@ class PhysRegFile
return floatRegBits;
}
+ /** Reads a vector register. */
+ const VecRegContainer& readVecReg(PhysRegIdPtr phys_reg) const
+ {
+ assert(phys_reg->isVectorPhysReg());
+
+ DPRINTF(IEW, "RegFile: Access to vector register %i, has "
+ "data %s\n", int(phys_reg->index()),
+ vectorRegFile[phys_reg->index()].as<VecElem>().print());
+
+ return vectorRegFile[phys_reg->index()];
+ }
+
+ /** Reads a vector register for modification. */
+ VecRegContainer& getWritableVecReg(PhysRegIdPtr phys_reg)
+ {
+ /* const_cast for not duplicating code above. */
+ return const_cast<VecRegContainer&>(readVecReg(phys_reg));
+ }
+
+ /** Reads a vector register lane. */
+ template <typename VecElem, int LaneIdx>
+ VecLaneT<VecElem, true>
+ readVecLane(PhysRegIdPtr phys_reg) const
+ {
+ return readVecReg(phys_reg).laneView<VecElem, LaneIdx>();
+ }
+
+ /** Reads a vector register lane. */
+ template <typename VecElem>
+ VecLaneT<VecElem, true>
+ readVecLane(PhysRegIdPtr phys_reg) const
+ {
+ return readVecReg(phys_reg).laneView<VecElem>(phys_reg->elemIndex());
+ }
+
+ /** Get a vector register lane for modification. */
+ template <typename LD>
+ void
+ setVecLane(PhysRegIdPtr phys_reg, const LD& val)
+ {
+ assert(phys_reg->isVectorPhysReg());
+
+ DPRINTF(IEW, "RegFile: Setting vector register %i[%d] to %lx\n",
+ int(phys_reg->index()), phys_reg->elemIndex(), val);
+
+ vectorRegFile[phys_reg->index()].laneView<typename LD::UnderlyingType>(
+ phys_reg->elemIndex()) = val;
+ }
+
+ /** Reads a vector element. */
+ const VecElem& readVecElem(PhysRegIdPtr phys_reg) const
+ {
+ assert(phys_reg->isVectorPhysElem());
+ auto ret = vectorRegFile[phys_reg->index()].as<VecElem>();
+ const VecElem& val = ret[phys_reg->elemIndex()];
+ DPRINTF(IEW, "RegFile: Access to element %d of vector register %i,"
+ " has data %#x\n", phys_reg->elemIndex(),
+ int(phys_reg->index()), val);
+
+ return val;
+ }
+
/** Reads a condition-code register. */
CCReg readCCReg(PhysRegIdPtr phys_reg)
{
@@ -207,7 +317,31 @@ class PhysRegFile
DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
phys_reg->index(), (uint64_t)val);
- floatRegFile[phys_reg->index()].q = val;
+ if (!phys_reg->isZeroReg())
+ floatRegFile[phys_reg->index()].q = val;
+ }
+
+ /** Sets a vector register to the given value. */
+ void setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val)
+ {
+ assert(phys_reg->isVectorPhysReg());
+
+ DPRINTF(IEW, "RegFile: Setting vector register %i to %s\n",
+ int(phys_reg->index()), val.print());
+
+ vectorRegFile[phys_reg->index()] = val;
+ }
+
+ /** Sets a vector register to the given value. */
+ void setVecElem(PhysRegIdPtr phys_reg, const VecElem val)
+ {
+ assert(phys_reg->isVectorPhysElem());
+
+ DPRINTF(IEW, "RegFile: Setting element %d of vector register %i to"
+ " %#x\n", phys_reg->elemIndex(), int(phys_reg->index()), val);
+
+ vectorRegFile[phys_reg->index()].as<VecElem>()[phys_reg->elemIndex()] =
+ val;
}
/** Sets a condition-code register to the given value. */
@@ -220,6 +354,25 @@ class PhysRegFile
ccRegFile[phys_reg->index()] = val;
}
+
+ /** Get the PhysRegIds of the elems of a vector register.
+ * Auxiliary function to transition from Full vector mode to Elem mode.
+ */
+ IdRange getRegElemIds(PhysRegIdPtr reg);
+
+ /**
+ * Get the PhysRegIds of the elems of all vector registers.
+ * Auxiliary function to transition from Full vector mode to Elem mode
+ * and to initialise the rename map.
+ */
+ IdRange getRegIds(RegClass cls);
+
+ /**
+ * Get the true physical register id.
+ * As many parts work with PhysRegIdPtr, we need to be able to produce
+ * the pointer out of just class and register idx.
+ */
+ PhysRegIdPtr getTrueId(PhysRegIdPtr reg);
};
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index 6d3861ba6..d0f6ba13d 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -464,8 +464,6 @@ class DefaultRename
/** The maximum skid buffer size. */
unsigned skidBufferMax;
- PhysRegIndex maxPhysicalRegs;
-
/** Enum to record the source of a structure full stall. Can come from
* either ROB, IQ, LSQ, and it is priortized in that order.
*/
@@ -515,6 +513,7 @@ class DefaultRename
Stats::Scalar renameRenameLookups;
Stats::Scalar intRenameLookups;
Stats::Scalar fpRenameLookups;
+ Stats::Scalar vecRenameLookups;
/** Stat for total number of committed renaming mappings. */
Stats::Scalar renameCommittedMaps;
/** Stat for total number of mappings that were undone due to a squash. */
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index 9c9b030f5..b9adcdff7 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -67,9 +67,7 @@ DefaultRename<Impl>::DefaultRename(O3CPU *_cpu, DerivO3CPUParams *params)
commitToRenameDelay(params->commitToRenameDelay),
renameWidth(params->renameWidth),
commitWidth(params->commitWidth),
- numThreads(params->numThreads),
- maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs
- + params->numPhysCCRegs)
+ numThreads(params->numThreads)
{
if (renameWidth > Impl::MaxWidth)
fatal("renameWidth (%d) is larger than compiled limit (%d),\n"
@@ -182,6 +180,10 @@ DefaultRename<Impl>::regStats()
.name(name() + ".fp_rename_lookups")
.desc("Number of floating rename lookups")
.prereq(fpRenameLookups);
+ vecRenameLookups
+ .name(name() + ".vec_rename_lookups")
+ .desc("Number of vector rename lookups")
+ .prereq(vecRenameLookups);
}
template <class Impl>
@@ -645,6 +647,8 @@ DefaultRename<Impl>::renameInsts(ThreadID tid)
// to rename to. Otherwise block.
if (!renameMap[tid]->canRename(inst->numIntDestRegs(),
inst->numFPDestRegs(),
+ inst->numVecDestRegs(),
+ inst->numVecElemDestRegs(),
inst->numCCDestRegs())) {
DPRINTF(Rename, "Blocking due to lack of free "
"physical registers to rename to.\n");
@@ -1239,12 +1243,17 @@ DefaultRename<Impl>::readFreeEntries(ThreadID tid)
}
DPRINTF(Rename, "[tid:%i]: Free IQ: %i, Free ROB: %i, "
- "Free LQ: %i, Free SQ: %i\n",
+ "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i)\n",
tid,
freeEntries[tid].iqEntries,
freeEntries[tid].robEntries,
freeEntries[tid].lqEntries,
- freeEntries[tid].sqEntries);
+ freeEntries[tid].sqEntries,
+ renameMap[tid]->numFreeEntries(),
+ renameMap[tid]->numFreeIntEntries(),
+ renameMap[tid]->numFreeFloatEntries(),
+ renameMap[tid]->numFreeVecEntries(),
+ renameMap[tid]->numFreeCCEntries());
DPRINTF(Rename, "[tid:%i]: %i instructions not yet in ROB\n",
tid, instsInProgress[tid]);
diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc
index 38ccc7ec9..bde2a6921 100644
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@@ -1,4 +1,16 @@
/*
+ * Copyright (c) 2016 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved.
@@ -93,15 +105,92 @@ void
UnifiedRenameMap::init(PhysRegFile *_regFile,
RegIndex _intZeroReg,
RegIndex _floatZeroReg,
- UnifiedFreeList *freeList)
+ UnifiedFreeList *freeList,
+ VecMode _mode)
{
regFile = _regFile;
+ vecMode = _mode;
intMap.init(TheISA::NumIntRegs, &(freeList->intList), _intZeroReg);
floatMap.init(TheISA::NumFloatRegs, &(freeList->floatList), _floatZeroReg);
+ vecMap.init(TheISA::NumVecRegs, &(freeList->vecList), (RegIndex)-1);
+
+ vecElemMap.init(TheISA::NumVecRegs * NVecElems,
+ &(freeList->vecElemList), (RegIndex)-1);
+
ccMap.init(TheISA::NumCCRegs, &(freeList->ccList), (RegIndex)-1);
}
+void
+UnifiedRenameMap::switchMode(VecMode newVecMode, UnifiedFreeList* freeList)
+{
+ if (newVecMode == Enums::Elem && vecMode == Enums::Full) {
+ /* Switch to vector element rename mode. */
+ /* The free list should currently be tracking full registers. */
+ panic_if(freeList->hasFreeVecElems(),
+ "The free list is already tracking Vec elems");
+ panic_if(freeList->numFreeVecRegs() !=
+ regFile->numVecPhysRegs() - TheISA::NumVecRegs,
+ "The free list has lost vector registers");
+ /* Split the mapping of each arch reg. */
+ int reg = 0;
+ for (auto &e: vecMap) {
+ PhysRegFile::IdRange range = this->regFile->getRegElemIds(e);
+ uint32_t i;
+ for (i = 0; range.first != range.second; i++, range.first++) {
+ vecElemMap.setEntry(RegId(VecElemClass, reg, i),
+ &(*range.first));
+ }
+ panic_if(i != NVecElems,
+ "Wrong name of elems: expecting %u, got %d\n",
+ TheISA::NumVecElemPerVecReg, i);
+ reg++;
+ }
+ /* Split the free regs. */
+ while (freeList->hasFreeVecRegs()) {
+ auto vr = freeList->getVecReg();
+ auto range = this->regFile->getRegElemIds(vr);
+ freeList->addRegs(range.first, range.second);
+ }
+ vecMode = Enums::Elem;
+ } else if (newVecMode == Enums::Full && vecMode == Enums::Elem) {
+ /* Switch to full vector register rename mode. */
+ /* The free list should currently be tracking register elems. */
+ panic_if(freeList->hasFreeVecRegs(),
+ "The free list is already tracking full Vec");
+ panic_if(freeList->numFreeVecRegs() !=
+ regFile->numVecElemPhysRegs() - TheISA::NumFloatRegs,
+ "The free list has lost vector register elements");
+ /* To rebuild the arch regs we take the easy road:
+ * 1.- Stitch the elems together into vectors.
+ * 2.- Replace the contents of the register file with the vectors
+ * 3.- Set the remaining registers as free
+ */
+ TheISA::VecRegContainer new_RF[TheISA::NumVecRegs];
+ for (uint32_t i = 0; i < TheISA::NumVecRegs; i++) {
+ VecReg dst = new_RF[i].as<TheISA::VecElem>();
+ for (uint32_t l = 0; l < NVecElems; l++) {
+ RegId s_rid(VecElemClass, i, l);
+ PhysRegIdPtr s_prid = vecElemMap.lookup(s_rid);
+ dst[l] = regFile->readVecElem(s_prid);
+ }
+ }
+
+ for (uint32_t i = 0; i < TheISA::NumVecRegs; i++) {
+ PhysRegId pregId(VecRegClass, i, 0);
+ regFile->setVecReg(regFile->getTrueId(&pregId), new_RF[i]);
+ }
+
+ auto range = regFile->getRegIds(VecRegClass);
+ freeList->addRegs(range.first + TheISA::NumVecRegs, range.second);
+
+ /* We remove the elems from the free list. */
+ while (freeList->hasFreeVecElems())
+ freeList->getVecElem();
+ vecMode = Enums::Full;
+ }
+}
+
diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh
index 028c32e3a..ab909f090 100644
--- a/src/cpu/o3/rename_map.hh
+++ b/src/cpu/o3/rename_map.hh
@@ -54,6 +54,7 @@
#include "cpu/o3/free_list.hh"
#include "cpu/o3/regfile.hh"
#include "cpu/reg_class.hh"
+#include "enums/VecRegRenameMode.hh"
/**
* Register rename map for a single class of registers (e.g., integer
@@ -68,6 +69,10 @@ class SimpleRenameMap
using Arch2PhysMap = std::vector<PhysRegIdPtr>;
/** The acutal arch-to-phys register map */
Arch2PhysMap map;
+ public:
+ using iterator = Arch2PhysMap::iterator;
+ using const_iterator = Arch2PhysMap::const_iterator;
+ private:
/**
* Pointer to the free list from which new physical registers
@@ -139,6 +144,20 @@ class SimpleRenameMap
/** Return the number of free entries on the associated free list. */
unsigned numFreeEntries() const { return freeList->numFreeRegs(); }
+
+ /** Forward begin/cbegin to the map. */
+ /** @{ */
+ iterator begin() { return map.begin(); }
+ const_iterator begin() const { return map.begin(); }
+ const_iterator cbegin() const { return map.cbegin(); }
+ /** @} */
+
+ /** Forward end/cend to the map. */
+ /** @{ */
+ iterator end() { return map.end(); }
+ const_iterator end() const { return map.end(); }
+ const_iterator cend() const { return map.cend(); }
+ /** @} */
};
@@ -152,6 +171,8 @@ class SimpleRenameMap
class UnifiedRenameMap
{
private:
+ static constexpr uint32_t NVecElems = TheISA::NumVecElemPerVecReg;
+ using VecReg = TheISA::VecReg;
/** The integer register rename map */
SimpleRenameMap intMap;
@@ -162,6 +183,15 @@ class UnifiedRenameMap
/** The condition-code register rename map */
SimpleRenameMap ccMap;
+ /** The vector register rename map */
+ SimpleRenameMap vecMap;
+
+ /** The vector element register rename map */
+ SimpleRenameMap vecElemMap;
+
+ using VecMode = Enums::VecRegRenameMode;
+ VecMode vecMode;
+
/**
* The register file object is used only to get PhysRegIdPtr
* on MiscRegs, as they are stored in it.
@@ -182,7 +212,8 @@ class UnifiedRenameMap
void init(PhysRegFile *_regFile,
RegIndex _intZeroReg,
RegIndex _floatZeroReg,
- UnifiedFreeList *freeList);
+ UnifiedFreeList *freeList,
+ VecMode _mode);
/**
* Tell rename map to get a new free physical register to remap
@@ -199,6 +230,12 @@ class UnifiedRenameMap
return intMap.rename(arch_reg);
case FloatRegClass:
return floatMap.rename(arch_reg);
+ case VecRegClass:
+ assert(vecMode == Enums::Full);
+ return vecMap.rename(arch_reg);
+ case VecElemClass:
+ assert(vecMode == Enums::Elem);
+ return vecElemMap.rename(arch_reg);
case CCRegClass:
return ccMap.rename(arch_reg);
case MiscRegClass:
@@ -232,6 +269,14 @@ class UnifiedRenameMap
case FloatRegClass:
return floatMap.lookup(arch_reg);
+ case VecRegClass:
+ assert(vecMode == Enums::Full);
+ return vecMap.lookup(arch_reg);
+
+ case VecElemClass:
+ assert(vecMode == Enums::Elem);
+ return vecElemMap.lookup(arch_reg);
+
case CCRegClass:
return ccMap.lookup(arch_reg);
@@ -265,6 +310,16 @@ class UnifiedRenameMap
assert(phys_reg->isFloatPhysReg());
return floatMap.setEntry(arch_reg, phys_reg);
+ case VecRegClass:
+ assert(phys_reg->isVectorPhysReg());
+ assert(vecMode == Enums::Full);
+ return vecMap.setEntry(arch_reg, phys_reg);
+
+ case VecElemClass:
+ assert(phys_reg->isVectorPhysElem());
+ assert(vecMode == Enums::Elem);
+ return vecElemMap.setEntry(arch_reg, phys_reg);
+
case CCRegClass:
assert(phys_reg->isCCPhysReg());
return ccMap.setEntry(arch_reg, phys_reg);
@@ -291,18 +346,39 @@ class UnifiedRenameMap
*/
unsigned numFreeEntries() const
{
- return std::min(intMap.numFreeEntries(), floatMap.numFreeEntries());
+ return std::min(
+ std::min(intMap.numFreeEntries(), floatMap.numFreeEntries()),
+ vecMode == Enums::Full ? vecMap.numFreeEntries()
+ : vecElemMap.numFreeEntries());
}
+ unsigned numFreeIntEntries() const { return intMap.numFreeEntries(); }
+ unsigned numFreeFloatEntries() const { return floatMap.numFreeEntries(); }
+ unsigned numFreeVecEntries() const
+ {
+ return vecMode == Enums::Full
+ ? vecMap.numFreeEntries()
+ : vecElemMap.numFreeEntries();
+ }
+ unsigned numFreeCCEntries() const { return ccMap.numFreeEntries(); }
+
/**
* Return whether there are enough registers to serve the request.
*/
- bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t ccRegs) const
+ bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t vectorRegs,
+ uint32_t vecElemRegs, uint32_t ccRegs) const
{
return intRegs <= intMap.numFreeEntries() &&
floatRegs <= floatMap.numFreeEntries() &&
+ vectorRegs <= vecMap.numFreeEntries() &&
+ vecElemRegs <= vecElemMap.numFreeEntries() &&
ccRegs <= ccMap.numFreeEntries();
}
+ /**
+ * Set vector mode to Full or Elem.
+ * Ignore 'silent' modifications.
+ */
+ void switchMode(VecMode newVecMode, UnifiedFreeList* freeList);
};
diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh
index 161d70b28..ac4ceed02 100755
--- a/src/cpu/o3/thread_context.hh
+++ b/src/cpu/o3/thread_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2011-2012, 2016 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -194,6 +194,70 @@ class O3ThreadContext : public ThreadContext
reg_idx)).index());
}
+ virtual const VecRegContainer& readVecReg(const RegId& id) const {
+ return readVecRegFlat(flattenRegId(id).index());
+ }
+
+ /**
+ * Read vector register operand for modification, hierarchical indexing.
+ */
+ virtual VecRegContainer& getWritableVecReg(const RegId& id) {
+ return getWritableVecRegFlat(flattenRegId(id).index());
+ }
+
+ /** Vector Register Lane Interfaces. */
+ /** @{ */
+ /** Reads source vector 8bit operand. */
+ virtual ConstVecLane8
+ readVec8BitLaneReg(const RegId& id) const
+ {
+ return readVecLaneFlat<uint8_t>(flattenRegId(id).index(),
+ id.elemIndex());
+ }
+
+ /** Reads source vector 16bit operand. */
+ virtual ConstVecLane16
+ readVec16BitLaneReg(const RegId& id) const
+ {
+ return readVecLaneFlat<uint16_t>(flattenRegId(id).index(),
+ id.elemIndex());
+ }
+
+ /** Reads source vector 32bit operand. */
+ virtual ConstVecLane32
+ readVec32BitLaneReg(const RegId& id) const
+ {
+ return readVecLaneFlat<uint32_t>(flattenRegId(id).index(),
+ id.elemIndex());
+ }
+
+ /** Reads source vector 64bit operand. */
+ virtual ConstVecLane64
+ readVec64BitLaneReg(const RegId& id) const
+ {
+ return readVecLaneFlat<uint64_t>(flattenRegId(id).index(),
+ id.elemIndex());
+ }
+
+ /** Write a lane of the destination vector register. */
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::Byte>& val)
+ { return setVecLaneFlat(flattenRegId(reg).index(), reg.elemIndex(), val); }
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::TwoByte>& val)
+ { return setVecLaneFlat(flattenRegId(reg).index(), reg.elemIndex(), val); }
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::FourByte>& val)
+ { return setVecLaneFlat(flattenRegId(reg).index(), reg.elemIndex(), val); }
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::EightByte>& val)
+ { return setVecLaneFlat(flattenRegId(reg).index(), reg.elemIndex(), val); }
+ /** @} */
+
+ virtual const VecElem& readVecElem(const RegId& reg) const {
+ return readVecElemFlat(flattenRegId(reg).index(), reg.elemIndex());
+ }
+
virtual CCReg readCCReg(int reg_idx) {
return readCCRegFlat(flattenRegId(RegId(CCRegClass,
reg_idx)).index());
@@ -214,6 +278,14 @@ class O3ThreadContext : public ThreadContext
reg_idx)).index(), val);
}
+ virtual void setVecReg(const RegId& reg, const VecRegContainer& val) {
+ setVecRegFlat(flattenRegId(reg).index(), val);
+ }
+
+ virtual void setVecElem(const RegId& reg, const VecElem& val) {
+ setVecElemFlat(flattenRegId(reg).index(), reg.elemIndex(), val);
+ }
+
virtual void setCCReg(int reg_idx, CCReg val) {
setCCRegFlat(flattenRegId(RegId(CCRegClass, reg_idx)).index(), val);
}
@@ -298,6 +370,29 @@ class O3ThreadContext : public ThreadContext
virtual FloatRegBits readFloatRegBitsFlat(int idx);
virtual void setFloatRegBitsFlat(int idx, FloatRegBits val);
+ virtual const VecRegContainer& readVecRegFlat(int idx) const;
+ /** Read vector register operand for modification, flat indexing. */
+ virtual VecRegContainer& getWritableVecRegFlat(int idx);
+ virtual void setVecRegFlat(int idx, const VecRegContainer& val);
+
+ template <typename VecElem>
+ VecLaneT<VecElem, true> readVecLaneFlat(int idx, int lId) const
+ {
+ return cpu->template readArchVecLane<VecElem>(idx, lId,
+ thread->threadId());
+ }
+
+ template <typename LD>
+ void setVecLaneFlat(int idx, int lId, const LD& val)
+ {
+ cpu->template setArchVecLane(idx, lId, thread->threadId(), val);
+ }
+
+ virtual const VecElem& readVecElemFlat(const RegIndex& idx,
+ const ElemIndex& elemIndex) const;
+ virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx,
+ const VecElem& val);
+
virtual CCReg readCCRegFlat(int idx);
virtual void setCCRegFlat(int idx, CCReg val);
};
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index c3f894275..2d109aea9 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2012, 2016 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -209,6 +209,28 @@ O3ThreadContext<Impl>::readFloatRegBitsFlat(int reg_idx)
}
template <class Impl>
+const TheISA::VecRegContainer&
+O3ThreadContext<Impl>::readVecRegFlat(int reg_id) const
+{
+ return cpu->readArchVecReg(reg_id, thread->threadId());
+}
+
+template <class Impl>
+TheISA::VecRegContainer&
+O3ThreadContext<Impl>::getWritableVecRegFlat(int reg_id)
+{
+ return cpu->getWritableArchVecReg(reg_id, thread->threadId());
+}
+
+template <class Impl>
+const TheISA::VecElem&
+O3ThreadContext<Impl>::readVecElemFlat(const RegIndex& idx,
+ const ElemIndex& elemIndex) const
+{
+ return cpu->readArchVecElem(idx, elemIndex, thread->threadId());
+}
+
+template <class Impl>
TheISA::CCReg
O3ThreadContext<Impl>::readCCRegFlat(int reg_idx)
{
@@ -244,6 +266,24 @@ O3ThreadContext<Impl>::setFloatRegBitsFlat(int reg_idx, FloatRegBits val)
template <class Impl>
void
+O3ThreadContext<Impl>::setVecRegFlat(int reg_idx, const VecRegContainer& val)
+{
+ cpu->setArchVecReg(reg_idx, val, thread->threadId());
+
+ conditionalSquash();
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::setVecElemFlat(const RegIndex& idx,
+ const ElemIndex& elemIndex, const VecElem& val)
+{
+ cpu->setArchVecElem(idx, elemIndex, val, thread->threadId());
+ conditionalSquash();
+}
+
+template <class Impl>
+void
O3ThreadContext<Impl>::setCCRegFlat(int reg_idx, TheISA::CCReg val)
{
cpu->setArchCCReg(reg_idx, val, thread->threadId());
diff --git a/src/cpu/reg_class.cc b/src/cpu/reg_class.cc
index 53a50ce8e..16c1949ee 100644
--- a/src/cpu/reg_class.cc
+++ b/src/cpu/reg_class.cc
@@ -1,4 +1,16 @@
/*
+ * Copyright (c) 2016 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -33,6 +45,8 @@
const char *RegId::regClassStrings[] = {
"IntRegClass",
"FloatRegClass",
+ "VecRegClass",
+ "VecElemClass",
"CCRegClass",
"MiscRegClass"
};
diff --git a/src/cpu/reg_class.hh b/src/cpu/reg_class.hh
index 05869e8fb..27bf59b19 100644
--- a/src/cpu/reg_class.hh
+++ b/src/cpu/reg_class.hh
@@ -39,6 +39,7 @@
*
* Authors: Steve Reinhardt
* Nathanael Premillieu
+ * Rekai Gonzalez
*/
#ifndef __CPU__REG_CLASS_HH__
@@ -55,6 +56,10 @@
enum RegClass {
IntRegClass, ///< Integer register
FloatRegClass, ///< Floating-point register
+ /** Vector Register. */
+ VecRegClass,
+ /** Vector Register Native Elem lane. */
+ VecElemClass,
CCRegClass, ///< Condition-code register
MiscRegClass ///< Control (misc) register
};
@@ -75,14 +80,27 @@ class RegId {
static const char* regClassStrings[];
RegClass regClass;
RegIndex regIdx;
+ ElemIndex elemIdx;
+ static constexpr size_t Scale = TheISA::NumVecElemPerVecReg;
public:
RegId() {};
RegId(RegClass reg_class, RegIndex reg_idx)
- : regClass(reg_class), regIdx(reg_idx)
- {}
+ : regClass(reg_class), regIdx(reg_idx), elemIdx(-1)
+ {
+ panic_if(regClass == VecElemClass,
+ "Creating vector physical index w/o element index");
+ }
+
+ explicit RegId(RegClass reg_class, RegIndex reg_idx, ElemIndex elem_idx)
+ : regClass(reg_class), regIdx(reg_idx), elemIdx(elem_idx)
+ {
+ panic_if(regClass != VecElemClass,
+ "Creating non-vector physical index w/ element index");
+ }
bool operator==(const RegId& that) const {
- return regClass == that.classValue() && regIdx == that.index();
+ return regClass == that.classValue() && regIdx == that.index()
+ && elemIdx == that.elemIndex();
}
bool operator!=(const RegId& that) const {
@@ -94,7 +112,9 @@ class RegId {
*/
bool operator<(const RegId& that) const {
return regClass < that.classValue() ||
- (regClass == that.classValue() && regIdx < that.index());
+ (regClass == that.classValue() && (
+ regIdx < that.index() ||
+ (regIdx == that.index() && elemIdx < that.elemIndex())));
}
/**
@@ -120,11 +140,25 @@ class RegId {
bool isFloatReg() const { return regClass == FloatRegClass; }
/** @Return true if it is a condition-code physical register. */
+ bool isVecReg() const { return regClass == VecRegClass; }
+
+ /** @Return true if it is a condition-code physical register. */
+ bool isVecElem() const { return regClass == VecElemClass; }
+
+ /** @Return true if it is a condition-code physical register. */
bool isCCReg() const { return regClass == CCRegClass; }
/** @Return true if it is a condition-code physical register. */
bool isMiscReg() const { return regClass == MiscRegClass; }
+ /**
+ * Return true if this register can be renamed
+ */
+ bool isRenameable()
+ {
+ return regClass != MiscRegClass;
+ }
+
/** Index accessors */
/** @{ */
const RegIndex& index() const { return regIdx; }
@@ -136,6 +170,8 @@ class RegId {
inline RegIndex flatIndex() const;
/** @} */
+ /** Elem accessor */
+ const RegIndex& elemIndex() const { return elemIdx; }
/** Class accessor */
const RegClass& classValue() const { return regClass; }
/** Return a const char* with the register class name. */
diff --git a/src/cpu/reg_class_impl.hh b/src/cpu/reg_class_impl.hh
index a47328b10..98b341e86 100644
--- a/src/cpu/reg_class_impl.hh
+++ b/src/cpu/reg_class_impl.hh
@@ -55,13 +55,18 @@ bool RegId::isZeroReg() const
regIdx == TheISA::ZeroReg));
}
+static constexpr size_t Scale = TheISA::NumVecElemPerVecReg;
+
RegIndex RegId::flatIndex() const {
switch (regClass) {
case IntRegClass:
case FloatRegClass:
+ case VecRegClass:
case CCRegClass:
case MiscRegClass:
return regIdx;
+ case VecElemClass:
+ return Scale*regIdx + elemIdx;
}
panic("Trying to flatten a register without class!");
return -1;
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 77d2fb4ac..57cea4ba7 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -292,6 +292,16 @@ BaseSimpleCPU::regStats()
.desc("number of times the floating registers were written")
;
+ t_info.numVecRegReads
+ .name(thread_str + ".num_vec_register_reads")
+ .desc("number of times the vector registers were read")
+ ;
+
+ t_info.numVecRegWrites
+ .name(thread_str + ".num_vec_register_writes")
+ .desc("number of times the vector registers were written")
+ ;
+
t_info.numCCRegReads
.name(thread_str + ".num_cc_register_reads")
.desc("number of times the CC registers were read")
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index f221d6c93..0f546407d 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014-2015 ARM Limited
+ * Copyright (c) 2014-2016 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -64,6 +64,8 @@ class SimpleExecContext : public ExecContext {
typedef TheISA::FloatReg FloatReg;
typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::CCReg CCReg;
+ using VecRegContainer = TheISA::VecRegContainer;
+ using VecElem = TheISA::VecElem;
public:
BaseSimpleCPU *cpu;
@@ -112,6 +114,10 @@ class SimpleExecContext : public ExecContext {
Stats::Scalar numFpRegReads;
Stats::Scalar numFpRegWrites;
+ // Number of vector register file accesses
+ mutable Stats::Scalar numVecRegReads;
+ Stats::Scalar numVecRegWrites;
+
// Number of condition code register file accesses
Stats::Scalar numCCRegReads;
Stats::Scalar numCCRegWrites;
@@ -219,6 +225,124 @@ class SimpleExecContext : public ExecContext {
thread->setFloatRegBits(reg.index(), val);
}
+ /** Reads a vector register. */
+ const VecRegContainer&
+ readVecRegOperand(const StaticInst *si, int idx) const override
+ {
+ numVecRegReads++;
+ const RegId& reg = si->srcRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread->readVecReg(reg);
+ }
+
+ /** Reads a vector register for modification. */
+ VecRegContainer&
+ getWritableVecRegOperand(const StaticInst *si, int idx) override
+ {
+ numVecRegWrites++;
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread->getWritableVecReg(reg);
+ }
+
+ /** Sets a vector register to a value. */
+ void setVecRegOperand(const StaticInst *si, int idx,
+ const VecRegContainer& val) override
+ {
+ numVecRegWrites++;
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecReg());
+ thread->setVecReg(reg, val);
+ }
+
+ /** Vector Register Lane Interfaces. */
+ /** @{ */
+ /** Reads source vector lane. */
+ template <typename VecElem>
+ VecLaneT<VecElem, true>
+ readVecLaneOperand(const StaticInst *si, int idx) const
+ {
+ numVecRegReads++;
+ const RegId& reg = si->srcRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread->readVecLane<VecElem>(reg);
+ }
+ /** Reads source vector 8bit operand. */
+ virtual ConstVecLane8
+ readVec8BitLaneOperand(const StaticInst *si, int idx) const
+ override
+ { return readVecLaneOperand<uint8_t>(si, idx); }
+
+ /** Reads source vector 16bit operand. */
+ virtual ConstVecLane16
+ readVec16BitLaneOperand(const StaticInst *si, int idx) const
+ override
+ { return readVecLaneOperand<uint16_t>(si, idx); }
+
+ /** Reads source vector 32bit operand. */
+ virtual ConstVecLane32
+ readVec32BitLaneOperand(const StaticInst *si, int idx) const
+ override
+ { return readVecLaneOperand<uint32_t>(si, idx); }
+
+ /** Reads source vector 64bit operand. */
+ virtual ConstVecLane64
+ readVec64BitLaneOperand(const StaticInst *si, int idx) const
+ override
+ { return readVecLaneOperand<uint64_t>(si, idx); }
+
+ /** Write a lane of the destination vector operand. */
+ template <typename LD>
+ void
+ setVecLaneOperandT(const StaticInst *si, int idx,
+ const LD& val)
+ {
+ numVecRegWrites++;
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecReg());
+ return thread->setVecLane(reg, val);
+ }
+ /** Write a lane of the destination vector operand. */
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::Byte>& val) override
+ { return setVecLaneOperandT(si, idx, val); }
+ /** Write a lane of the destination vector operand. */
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::TwoByte>& val) override
+ { return setVecLaneOperandT(si, idx, val); }
+ /** Write a lane of the destination vector operand. */
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::FourByte>& val) override
+ { return setVecLaneOperandT(si, idx, val); }
+ /** Write a lane of the destination vector operand. */
+ virtual void
+ setVecLaneOperand(const StaticInst *si, int idx,
+ const LaneData<LaneSize::EightByte>& val) override
+ { return setVecLaneOperandT(si, idx, val); }
+ /** @} */
+
+ /** Reads an element of a vector register. */
+ VecElem readVecElemOperand(const StaticInst *si, int idx) const override
+ {
+ numVecRegReads++;
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecElem());
+ return thread->readVecElem(reg);
+ }
+
+ /** Sets an element of a vector register to a value. */
+ void setVecElemOperand(const StaticInst *si, int idx,
+ const VecElem val) override
+ {
+ numVecRegWrites++;
+ const RegId& reg = si->destRegIdx(idx);
+ assert(reg.isVecElem());
+ thread->setVecElem(reg, val);
+ }
+
CCReg readCCRegOperand(const StaticInst *si, int idx) override
{
numCCRegReads++;
diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh
index 286d91766..4ea8b91ba 100644
--- a/src/cpu/simple_thread.hh
+++ b/src/cpu/simple_thread.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2011-2012, 2016 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -58,6 +58,7 @@
#include "debug/CCRegs.hh"
#include "debug/FloatRegs.hh"
#include "debug/IntRegs.hh"
+#include "debug/VecRegs.hh"
#include "mem/page_table.hh"
#include "mem/request.hh"
#include "sim/byteswap.hh"
@@ -102,6 +103,8 @@ class SimpleThread : public ThreadState
typedef TheISA::FloatReg FloatReg;
typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::CCReg CCReg;
+ using VecRegContainer = TheISA::VecRegContainer;
+ using VecElem = TheISA::VecElem;
public:
typedef ThreadContext::Status Status;
@@ -111,6 +114,7 @@ class SimpleThread : public ThreadState
FloatRegBits i[TheISA::NumFloatRegs];
} floatRegs;
TheISA::IntReg intRegs[TheISA::NumIntRegs];
+ VecRegContainer vecRegs[TheISA::NumVecRegs];
#ifdef ISA_HAS_CC_REGS
TheISA::CCReg ccRegs[TheISA::NumCCRegs];
#endif
@@ -227,6 +231,9 @@ class SimpleThread : public ThreadState
_pcState = 0;
memset(intRegs, 0, sizeof(intRegs));
memset(floatRegs.i, 0, sizeof(floatRegs.i));
+ for (int i = 0; i < TheISA::NumVecRegs; i++) {
+ vecRegs[i].zero();
+ }
#ifdef ISA_HAS_CC_REGS
memset(ccRegs, 0, sizeof(ccRegs));
#endif
@@ -266,6 +273,98 @@ class SimpleThread : public ThreadState
return regVal;
}
+ const VecRegContainer&
+ readVecReg(const RegId& reg) const
+ {
+ int flatIndex = isa->flattenVecIndex(reg.index());
+ assert(flatIndex < TheISA::NumVecRegs);
+ const VecRegContainer& regVal = readVecRegFlat(flatIndex);
+ DPRINTF(VecRegs, "Reading vector reg %d (%d) as %s.\n",
+ reg.index(), flatIndex, regVal.as<TheISA::VecElem>().print());
+ return regVal;
+ }
+
+ VecRegContainer&
+ getWritableVecReg(const RegId& reg)
+ {
+ int flatIndex = isa->flattenVecIndex(reg.index());
+ assert(flatIndex < TheISA::NumVecRegs);
+ VecRegContainer& regVal = getWritableVecRegFlat(flatIndex);
+ DPRINTF(VecRegs, "Reading vector reg %d (%d) as %s for modify.\n",
+ reg.index(), flatIndex, regVal.as<TheISA::VecElem>().print());
+ return regVal;
+ }
+
+ /** Vector Register Lane Interfaces. */
+ /** @{ */
+ /** Reads source vector <T> operand. */
+ template <typename T>
+ VecLaneT<T, true>
+ readVecLane(const RegId& reg) const
+ {
+ int flatIndex = isa->flattenVecIndex(reg.index());
+ assert(flatIndex < TheISA::NumVecRegs);
+ auto regVal = readVecLaneFlat<T>(flatIndex, reg.elemIndex());
+ DPRINTF(VecRegs, "Reading vector lane %d (%d)[%d] as %lx.\n",
+ reg.index(), flatIndex, reg.elemIndex(), regVal);
+ return regVal;
+ }
+
+ /** Reads source vector 8bit operand. */
+ virtual ConstVecLane8
+ readVec8BitLaneReg(const RegId& reg) const
+ { return readVecLane<uint8_t>(reg); }
+
+ /** Reads source vector 16bit operand. */
+ virtual ConstVecLane16
+ readVec16BitLaneReg(const RegId& reg) const
+ { return readVecLane<uint16_t>(reg); }
+
+ /** Reads source vector 32bit operand. */
+ virtual ConstVecLane32
+ readVec32BitLaneReg(const RegId& reg) const
+ { return readVecLane<uint32_t>(reg); }
+
+ /** Reads source vector 64bit operand. */
+ virtual ConstVecLane64
+ readVec64BitLaneReg(const RegId& reg) const
+ { return readVecLane<uint64_t>(reg); }
+
+ /** Write a lane of the destination vector register. */
+ template <typename LD>
+ void setVecLaneT(const RegId& reg, const LD& val)
+ {
+ int flatIndex = isa->flattenVecIndex(reg.index());
+ assert(flatIndex < TheISA::NumVecRegs);
+ setVecLaneFlat(flatIndex, reg.elemIndex(), val);
+ DPRINTF(VecRegs, "Reading vector lane %d (%d)[%d] to %lx.\n",
+ reg.index(), flatIndex, reg.elemIndex(), val);
+ }
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::Byte>& val)
+ { return setVecLaneT(reg, val); }
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::TwoByte>& val)
+ { return setVecLaneT(reg, val); }
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::FourByte>& val)
+ { return setVecLaneT(reg, val); }
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::EightByte>& val)
+ { return setVecLaneT(reg, val); }
+ /** @} */
+
+ const VecElem& readVecElem(const RegId& reg) const
+ {
+ int flatIndex = isa->flattenVecElemIndex(reg.index());
+ assert(flatIndex < TheISA::NumVecRegs);
+ const VecElem& regVal = readVecElemFlat(flatIndex, reg.elemIndex());
+ DPRINTF(VecRegs, "Reading element %d of vector reg %d (%d) as"
+ " %#x.\n", reg.elemIndex(), reg.index(), flatIndex, regVal);
+ return regVal;
+ }
+
+
CCReg readCCReg(int reg_idx)
{
#ifdef ISA_HAS_CC_REGS
@@ -312,6 +411,24 @@ class SimpleThread : public ThreadState
reg_idx, flatIndex, val, floatRegs.f[flatIndex]);
}
+ void setVecReg(const RegId& reg, const VecRegContainer& val)
+ {
+ int flatIndex = isa->flattenVecIndex(reg.index());
+ assert(flatIndex < TheISA::NumVecRegs);
+ setVecRegFlat(flatIndex, val);
+ DPRINTF(VecRegs, "Setting vector reg %d (%d) to %s.\n",
+ reg.index(), flatIndex, val.print());
+ }
+
+ void setVecElem(const RegId& reg, const VecElem& val)
+ {
+ int flatIndex = isa->flattenVecElemIndex(reg.index());
+ assert(flatIndex < TheISA::NumVecRegs);
+ setVecElemFlat(flatIndex, reg.elemIndex(), val);
+ DPRINTF(VecRegs, "Setting element %d of vector reg %d (%d) to"
+ " %#x.\n", reg.elemIndex(), reg.index(), flatIndex, val);
+ }
+
void setCCReg(int reg_idx, CCReg val)
{
#ifdef ISA_HAS_CC_REGS
@@ -428,6 +545,45 @@ class SimpleThread : public ThreadState
floatRegs.i[idx] = val;
}
+ const VecRegContainer& readVecRegFlat(const RegIndex& reg) const
+ {
+ return vecRegs[reg];
+ }
+
+ VecRegContainer& getWritableVecRegFlat(const RegIndex& reg)
+ {
+ return vecRegs[reg];
+ }
+
+ void setVecRegFlat(const RegIndex& reg, const VecRegContainer& val)
+ {
+ vecRegs[reg] = val;
+ }
+
+ template <typename T>
+ VecLaneT<T, true> readVecLaneFlat(const RegIndex& reg, int lId) const
+ {
+ return vecRegs[reg].laneView<T>(lId);
+ }
+
+ template <typename LD>
+ void setVecLaneFlat(const RegIndex& reg, int lId, const LD& val)
+ {
+ vecRegs[reg].laneView<typename LD::UnderlyingType>(lId) = val;
+ }
+
+ const VecElem& readVecElemFlat(const RegIndex& reg,
+ const ElemIndex& elemIndex) const
+ {
+ return vecRegs[reg].as<TheISA::VecElem>()[elemIndex];
+ }
+
+ void setVecElemFlat(const RegIndex& reg, const ElemIndex& elemIndex,
+ const VecElem val)
+ {
+ vecRegs[reg].as<TheISA::VecElem>()[elemIndex] = val;
+ }
+
#ifdef ISA_HAS_CC_REGS
CCReg readCCRegFlat(int idx) { return ccRegs[idx]; }
void setCCRegFlat(int idx, CCReg val) { ccRegs[idx] = val; }
diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh
index d60afc019..e7507c6a6 100644
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@@ -100,13 +100,20 @@ class StaticInst : public RefCounted, public StaticInstFlags
int8_t _numCCDestRegs;
//@}
+ /** To use in architectures with vector register file. */
+ /** @{ */
+ int8_t _numVecDestRegs;
+ int8_t _numVecElemDestRegs;
+ /** @} */
+
public:
/// @name Register information.
- /// The sum of numFPDestRegs() and numIntDestRegs() equals
- /// numDestRegs(). The former two functions are used to track
- /// physical register usage for machines with separate int & FP
- /// reg files.
+ /// The sum of numFPDestRegs(), numIntDestRegs(), numVecDestRegs() and
+ /// numVecelemDestRegs() equals numDestRegs(). The former two functions
+ /// are used to track physical register usage for machines with separate
+ /// int & FP reg files, the next two is for machines with vector register
+ /// file.
//@{
/// Number of source registers.
int8_t numSrcRegs() const { return _numSrcRegs; }
@@ -116,7 +123,10 @@ class StaticInst : public RefCounted, public StaticInstFlags
int8_t numFPDestRegs() const { return _numFPDestRegs; }
/// Number of integer destination regs.
int8_t numIntDestRegs() const { return _numIntDestRegs; }
- //@}
+ /// Number of vector destination regs.
+ int8_t numVecDestRegs() const { return _numVecDestRegs; }
+ /// Number of vector element destination regs.
+ int8_t numVecElemDestRegs() const { return _numVecElemDestRegs; }
/// Number of coprocesor destination regs.
int8_t numCCDestRegs() const { return _numCCDestRegs; }
//@}
@@ -252,7 +262,8 @@ class StaticInst : public RefCounted, public StaticInstFlags
StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass)
: _opClass(__opClass), _numSrcRegs(0), _numDestRegs(0),
_numFPDestRegs(0), _numIntDestRegs(0), _numCCDestRegs(0),
- machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0)
+ _numVecDestRegs(0), _numVecElemDestRegs(0), machInst(_machInst),
+ mnemonic(_mnemonic), cachedDisassembly(0)
{ }
public:
diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc
index cea21e790..0d288de6f 100644
--- a/src/cpu/thread_context.cc
+++ b/src/cpu/thread_context.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012 ARM Limited
+ * Copyright (c) 2012, 2016 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -76,6 +76,16 @@ ThreadContext::compare(ThreadContext *one, ThreadContext *two)
panic("Float reg idx %d doesn't match, one: %#x, two: %#x",
i, t1, t2);
}
+
+ // Then loop through the vector registers.
+ for (int i = 0; i < TheISA::NumVecRegs; ++i) {
+ RegId rid(VecRegClass, i);
+ const TheISA::VecRegContainer& t1 = one->readVecReg(rid);
+ const TheISA::VecRegContainer& t2 = two->readVecReg(rid);
+ if (t1 != t2)
+ panic("Vec reg idx %d doesn't match, one: %#x, two: %#x",
+ i, t1, t2);
+ }
for (int i = 0; i < TheISA::NumMiscRegs; ++i) {
TheISA::MiscReg t1 = one->readMiscRegNoEffect(i);
TheISA::MiscReg t2 = two->readMiscRegNoEffect(i);
@@ -152,6 +162,12 @@ serialize(ThreadContext &tc, CheckpointOut &cp)
// compatibility.
arrayParamOut(cp, "floatRegs.i", floatRegs, NumFloatRegs);
+ std::vector<TheISA::VecRegContainer> vecRegs(NumVecRegs);
+ for (int i = 0; i < NumVecRegs; ++i) {
+ vecRegs[i] = tc.readVecRegFlat(i);
+ }
+ SERIALIZE_CONTAINER(vecRegs);
+
IntReg intRegs[NumIntRegs];
for (int i = 0; i < NumIntRegs; ++i)
intRegs[i] = tc.readIntRegFlat(i);
@@ -181,6 +197,12 @@ unserialize(ThreadContext &tc, CheckpointIn &cp)
for (int i = 0; i < NumFloatRegs; ++i)
tc.setFloatRegBitsFlat(i, floatRegs[i]);
+ std::vector<TheISA::VecRegContainer> vecRegs(NumVecRegs);
+ UNSERIALIZE_CONTAINER(vecRegs);
+ for (int i = 0; i < NumVecRegs; ++i) {
+ tc.setVecRegFlat(i, vecRegs[i]);
+ }
+
IntReg intRegs[NumIntRegs];
UNSERIALIZE_ARRAY(intRegs, NumIntRegs);
for (int i = 0; i < NumIntRegs; ++i)
diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh
index 43c40481e..66b2f7554 100644
--- a/src/cpu/thread_context.hh
+++ b/src/cpu/thread_context.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2011-2012, 2016 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -100,6 +100,8 @@ class ThreadContext
typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::CCReg CCReg;
typedef TheISA::MiscReg MiscReg;
+ using VecRegContainer = TheISA::VecRegContainer;
+ using VecElem = TheISA::VecElem;
public:
enum Status
@@ -212,6 +214,40 @@ class ThreadContext
virtual FloatRegBits readFloatRegBits(int reg_idx) = 0;
+ virtual const VecRegContainer& readVecReg(const RegId& reg) const = 0;
+ virtual VecRegContainer& getWritableVecReg(const RegId& reg) = 0;
+
+ /** Vector Register Lane Interfaces. */
+ /** @{ */
+ /** Reads source vector 8bit operand. */
+ virtual ConstVecLane8
+ readVec8BitLaneReg(const RegId& reg) const = 0;
+
+ /** Reads source vector 16bit operand. */
+ virtual ConstVecLane16
+ readVec16BitLaneReg(const RegId& reg) const = 0;
+
+ /** Reads source vector 32bit operand. */
+ virtual ConstVecLane32
+ readVec32BitLaneReg(const RegId& reg) const = 0;
+
+ /** Reads source vector 64bit operand. */
+ virtual ConstVecLane64
+ readVec64BitLaneReg(const RegId& reg) const = 0;
+
+ /** Write a lane of the destination vector register. */
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::Byte>& val) = 0;
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::TwoByte>& val) = 0;
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::FourByte>& val) = 0;
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::EightByte>& val) = 0;
+ /** @} */
+
+ virtual const VecElem& readVecElem(const RegId& reg) const = 0;
+
virtual CCReg readCCReg(int reg_idx) = 0;
virtual void setIntReg(int reg_idx, uint64_t val) = 0;
@@ -220,6 +256,10 @@ class ThreadContext
virtual void setFloatRegBits(int reg_idx, FloatRegBits val) = 0;
+ virtual void setVecReg(const RegId& reg, const VecRegContainer& val) = 0;
+
+ virtual void setVecElem(const RegId& reg, const VecElem& val) = 0;
+
virtual void setCCReg(int reg_idx, CCReg val) = 0;
virtual TheISA::PCState pcState() = 0;
@@ -303,6 +343,15 @@ class ThreadContext
virtual FloatRegBits readFloatRegBitsFlat(int idx) = 0;
virtual void setFloatRegBitsFlat(int idx, FloatRegBits val) = 0;
+ virtual const VecRegContainer& readVecRegFlat(int idx) const = 0;
+ virtual VecRegContainer& getWritableVecRegFlat(int idx) = 0;
+ virtual void setVecRegFlat(int idx, const VecRegContainer& val) = 0;
+
+ virtual const VecElem& readVecElemFlat(const RegIndex& idx,
+ const ElemIndex& elemIdx) const = 0;
+ virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx,
+ const VecElem& val) = 0;
+
virtual CCReg readCCRegFlat(int idx) = 0;
virtual void setCCRegFlat(int idx, CCReg val) = 0;
/** @} */
@@ -421,6 +470,52 @@ class ProxyThreadContext : public ThreadContext
FloatRegBits readFloatRegBits(int reg_idx)
{ return actualTC->readFloatRegBits(reg_idx); }
+ const VecRegContainer& readVecReg(const RegId& reg) const
+ { return actualTC->readVecReg(reg); }
+
+ VecRegContainer& getWritableVecReg(const RegId& reg)
+ { return actualTC->getWritableVecReg(reg); }
+
+ /** Vector Register Lane Interfaces. */
+ /** @{ */
+ /** Reads source vector 8bit operand. */
+ ConstVecLane8
+ readVec8BitLaneReg(const RegId& reg) const
+ { return actualTC->readVec8BitLaneReg(reg); }
+
+ /** Reads source vector 16bit operand. */
+ ConstVecLane16
+ readVec16BitLaneReg(const RegId& reg) const
+ { return actualTC->readVec16BitLaneReg(reg); }
+
+ /** Reads source vector 32bit operand. */
+ ConstVecLane32
+ readVec32BitLaneReg(const RegId& reg) const
+ { return actualTC->readVec32BitLaneReg(reg); }
+
+ /** Reads source vector 64bit operand. */
+ ConstVecLane64
+ readVec64BitLaneReg(const RegId& reg) const
+ { return actualTC->readVec64BitLaneReg(reg); }
+
+ /** Write a lane of the destination vector register. */
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::Byte>& val)
+ { return actualTC->setVecLane(reg, val); }
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::TwoByte>& val)
+ { return actualTC->setVecLane(reg, val); }
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::FourByte>& val)
+ { return actualTC->setVecLane(reg, val); }
+ virtual void setVecLane(const RegId& reg,
+ const LaneData<LaneSize::EightByte>& val)
+ { return actualTC->setVecLane(reg, val); }
+ /** @} */
+
+ const VecElem& readVecElem(const RegId& reg) const
+ { return actualTC->readVecElem(reg); }
+
CCReg readCCReg(int reg_idx)
{ return actualTC->readCCReg(reg_idx); }
@@ -433,6 +528,12 @@ class ProxyThreadContext : public ThreadContext
void setFloatRegBits(int reg_idx, FloatRegBits val)
{ actualTC->setFloatRegBits(reg_idx, val); }
+ void setVecReg(const RegId& reg, const VecRegContainer& val)
+ { actualTC->setVecReg(reg, val); }
+
+ void setVecElem(const RegId& reg, const VecElem& val)
+ { actualTC->setVecElem(reg, val); }
+
void setCCReg(int reg_idx, CCReg val)
{ actualTC->setCCReg(reg_idx, val); }
@@ -495,6 +596,23 @@ class ProxyThreadContext : public ThreadContext
void setFloatRegBitsFlat(int idx, FloatRegBits val)
{ actualTC->setFloatRegBitsFlat(idx, val); }
+ const VecRegContainer& readVecRegFlat(int id) const
+ { return actualTC->readVecRegFlat(id); }
+
+ VecRegContainer& getWritableVecRegFlat(int id)
+ { return actualTC->getWritableVecRegFlat(id); }
+
+ void setVecRegFlat(int idx, const VecRegContainer& val)
+ { actualTC->setVecRegFlat(idx, val); }
+
+ const VecElem& readVecElemFlat(const RegIndex& id,
+ const ElemIndex& elemIndex) const
+ { return actualTC->readVecElemFlat(id, elemIndex); }
+
+ void setVecElemFlat(const RegIndex& id, const ElemIndex& elemIndex,
+ const VecElem& val)
+ { actualTC->setVecElemFlat(id, elemIndex, val); }
+
CCReg readCCRegFlat(int idx)
{ return actualTC->readCCRegFlat(idx); }