50 files changed, 1370 insertions, 101 deletions
diff --git a/src/arch/SConscript b/src/arch/SConscript
index 5ea7a6a75..ed583aa5a 100644
--- a/src/arch/SConscript
+++ b/src/arch/SConscript
@@ -1,6 +1,6 @@
 # -*- mode:python -*-
 
-# Copyright (c) 2016 ARM Limited
+# Copyright (c) 2016-2017 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -226,6 +226,8 @@ Export('ISADesc')
 DebugFlag('IntRegs')
 DebugFlag('FloatRegs')
 DebugFlag('VecRegs')
+DebugFlag('VecPredRegs')
 DebugFlag('CCRegs')
 DebugFlag('MiscRegs')
-CompoundFlag('Registers', [ 'IntRegs', 'FloatRegs', 'CCRegs', 'MiscRegs' ])
+CompoundFlag('Registers', [ 'IntRegs', 'FloatRegs', 'VecRegs', 'VecPredRegs',
+                            'CCRegs', 'MiscRegs' ])
diff --git a/src/arch/alpha/isa.hh b/src/arch/alpha/isa.hh
index 54e12022a..2b183f0e3 100644
--- a/src/arch/alpha/isa.hh
+++ b/src/arch/alpha/isa.hh
@@ -121,6 +121,12 @@ namespace AlphaISA
             return reg;
         }
 
+        int
+        flattenVecPredIndex(int reg) const
+        {
+            return reg;
+        }
+
         // dummy
         int
         flattenCCIndex(int reg) const
diff --git a/src/arch/alpha/registers.hh b/src/arch/alpha/registers.hh
index 6c71320b6..218390597 100644
--- a/src/arch/alpha/registers.hh
+++ b/src/arch/alpha/registers.hh
@@ -34,6 +34,7 @@
 #include "arch/alpha/generated/max_inst_regs.hh"
 #include "arch/alpha/ipr.hh"
 #include "arch/generic/types.hh"
+#include "arch/generic/vec_pred_reg.hh"
 #include "arch/generic/vec_reg.hh"
 #include "base/types.hh"
 
@@ -56,14 +57,20 @@ typedef RegVal MiscReg;
 // dummy typedef since we don't have CC regs
 typedef uint8_t CCReg;
 
-// dummy typedefs since we don't have vector regs
-constexpr unsigned NumVecElemPerVecReg = 2;
-using VecElem = uint32_t;
-using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
-using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
-using VecRegContainer = VecReg::Container;
-// This has to be one to prevent warnings that are treated as errors
-constexpr unsigned NumVecRegs = 1;
+// Not applicable to Alpha
+using VecElem = ::DummyVecElem;
+using VecReg = ::DummyVecReg;
+using ConstVecReg = ::DummyConstVecReg;
+using VecRegContainer = ::DummyVecRegContainer;
+constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg;
+constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes;
+
+// Not applicable to Alpha
+using VecPredReg = ::DummyVecPredReg;
+using ConstVecPredReg = ::DummyConstVecPredReg;
+using VecPredRegContainer = ::DummyVecPredRegContainer;
+constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits;
+constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr;
 
 enum MiscRegIndex
 {
@@ -96,6 +103,10 @@ const int NumFloatArchRegs = 32;
 
 const int NumIntRegs = NumIntArchRegs + NumPALShadowRegs;
 const int NumFloatRegs = NumFloatArchRegs;
+const int NumVecRegs = 1;  // Not applicable to Alpha
+                           // (1 to prevent warnings)
+const int NumVecPredRegs = 1;  // Not applicable to Alpha
+                               // (1 to prevent warnings)
 const int NumCCRegs = 0;
 const int NumMiscRegs = NUM_MISCREGS;
 
diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh
index a3e89b544..b98610bfc 100644
--- a/src/arch/arm/isa.hh
+++ b/src/arch/arm/isa.hh
@@ -446,6 +446,9 @@ namespace ArmISA
               case VecElemClass:
                 return RegId(VecElemClass, flattenVecElemIndex(regId.index()),
                              regId.elemIndex());
+              case VecPredRegClass:
+                return RegId(VecPredRegClass,
+                             flattenVecPredIndex(regId.index()));
               case CCRegClass:
                 return RegId(CCRegClass, flattenCCIndex(regId.index()));
               case MiscRegClass:
@@ -508,6 +511,13 @@ namespace ArmISA
         }
 
         int
+        flattenVecPredIndex(int reg) const
+        {
+            assert(reg >= 0);
+            return reg;
+        }
+
+        int
         flattenCCIndex(int reg) const
         {
             assert(reg >= 0);
diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh
index 8346f454b..8960f9f92 100644
--- a/src/arch/arm/registers.hh
+++ b/src/arch/arm/registers.hh
@@ -47,6 +47,8 @@
 #include "arch/arm/generated/max_inst_regs.hh"
 #include "arch/arm/intregs.hh"
 #include "arch/arm/miscregs.hh"
+#include "arch/arm/types.hh"
+#include "arch/generic/vec_pred_reg.hh"
 #include "arch/generic/vec_reg.hh"
 
 namespace ArmISA {
@@ -66,6 +68,15 @@ using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
 using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
 using VecRegContainer = VecReg::Container;
 
+constexpr size_t VecRegSizeBytes = NumVecElemPerVecReg * sizeof(VecElem);
+
+// Dummy typedefs
+using VecPredReg = ::DummyVecPredReg;
+using ConstVecPredReg = ::DummyConstVecPredReg;
+using VecPredRegContainer = ::DummyVecPredRegContainer;
+constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits;
+constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr;
+
 // condition code register; must be at least 32 bits for FpCondCodes
 typedef uint64_t CCReg;
 
@@ -82,12 +93,14 @@ const int NumVecSpecialRegs = 8;
 const int NumIntRegs = NUM_INTREGS;
 const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs;
 const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs;
+const int NumVecPredRegs = 1;
 const int NumCCRegs = NUM_CCREGS;
 const int NumMiscRegs = NUM_MISCREGS;
 
 #define ISA_HAS_CC_REGS
 
-const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumVecRegs + NumMiscRegs;
+const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumVecRegs +
+    NumVecPredRegs + NumMiscRegs;
 
 // semantically meaningful register indices
 const int ReturnValueReg = 0;
diff --git a/src/arch/generic/vec_pred_reg.hh b/src/arch/generic/vec_pred_reg.hh
new file mode 100644
index 000000000..9ff9915ef
--- /dev/null
+++ b/src/arch/generic/vec_pred_reg.hh
@@ -0,0 +1,404 @@
+// Copyright (c) 2017 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Giacomo Gabrielli
+//          Rekai Gonzalez
+//          Javier Setoain
+
+#ifndef __ARCH_GENERIC_VEC_PRED_REG_HH__
+#define __ARCH_GENERIC_VEC_PRED_REG_HH__
+
+#include <array>
+#include <cassert>
+#include <vector>
+
+#include "arch/generic/vec_reg.hh"
+#include "base/cprintf.hh"
+
+template <size_t NumBits, bool Packed>
+class VecPredRegContainer;
+
+/// Predicate register view.
+///
+/// This generic class implements the View in an MVC pattern, similarly to
+/// @see VecRegT. Since predicates are mainly used in conjunction with vectors
+/// to specify which lanes are active in a vector operation, the class is
+/// templated on the vector element type to simplify ISA definitions.
+/// @tparam VecElem Type of the vector elements.
+/// @tparam NumElems Number of vector elements making up the view.
+/// @tparam Packed True if the predicate register relies on a packed
+/// representation, i.e. adjacent bits refer to different vector elements
+/// irrespective of the vector element size (e.g. this is the case for
+/// AVX-512). If false, the predicate register relies on an unpacked
+/// representation, where each bit refers to the corresponding byte in a vector
+/// register (e.g. this is the case for ARM SVE).
+/// @tparam Const True if the underlying container can be modified through
+/// the view.
+template <typename VecElem, size_t NumElems, bool Packed, bool Const>
+class VecPredRegT
+{
+  protected:
+    /// Size of the register in bits.
+    static constexpr size_t NUM_BITS = Packed ? NumElems :
+                                                sizeof(VecElem) * NumElems;
+
+  public:
+    /// Container type alias.
+    using Container = typename std::conditional<
+        Const,
+        const VecPredRegContainer<NUM_BITS, Packed>,
+        VecPredRegContainer<NUM_BITS, Packed>>::type;
+
+  protected:
+    // Alias for this type
+    using MyClass = VecPredRegT<VecElem, NumElems, Packed, Const>;
+    /// Container corresponding to this view.
+    Container& container;
+
+  public:
+    VecPredRegT(Container& c) : container(c) {}
+
+    /// Reset the register to an all-false value.
+    template<bool Condition = !Const>
+    typename std::enable_if<Condition, void>::type
+    reset() { container.reset(); }
+
+    /// Reset the register to an all-true value.
+    template<bool Condition = !Const>
+    typename std::enable_if<Condition, void>::type
+    set() { container.set(); }
+
+    template<bool Condition = !Const>
+    typename std::enable_if<Condition, MyClass&>::type
+    operator=(const MyClass& that)
+    {
+        container = that.container;
+        return *this;
+    }
+
+    const bool&
+    operator[](size_t idx) const
+    {
+        return container[idx * (Packed ? 1 : sizeof(VecElem))];
+    }
+
+    template<bool Condition = !Const>
+    typename std::enable_if<Condition, bool&>::type
+    operator[](size_t idx)
+    {
+        return container[idx * (Packed ? 1 : sizeof(VecElem))];
+    }
+
+    /// Return an element of the predicate register as it appears
+    /// in the raw (untyped) internal representation
+    uint8_t
+    get_raw(size_t idx) const
+    {
+        return container.get_bits(idx * (Packed ? 1 : sizeof(VecElem)),
+                (Packed ? 1 : sizeof(VecElem)));
+    }
+
+    /// Write a raw value in an element of the predicate register
+    template<bool Condition = !Const>
+    typename std::enable_if<Condition, void>::type
+    set_raw(size_t idx, uint8_t val)
+    {
+        container.set_bits(idx * (Packed ? 1 : sizeof(VecElem)),
+                (Packed ? 1 : sizeof(VecElem)), val);
+    }
+
+    /// Equality operator, required to compare thread contexts.
+    template<typename VE2, size_t NE2, bool P2, bool C2>
+    bool
+    operator==(const VecPredRegT<VE2, NE2, P2, C2>& that) const
+    {
+        return container == that.container;
+    }
+
+    /// Inequality operator, required to compare thread contexts.
+    template<typename VE2, size_t NE2, bool P2, bool C2>
+    bool
+    operator!=(const VecPredRegT<VE2, NE2, P2, C2>& that) const
+    {
+        return !operator==(that);
+    }
+
+    friend std::ostream&
+    operator<<(std::ostream& os, const MyClass& p)
+    {
+        // 0-sized is not allowed
+        os << '[' << p.container[0];
+        for (int i = 0; i < p.NUM_BITS; ++i) {
+            os << " " << (p.container[i] ? 1 : 0);
+        }
+        os << ']';
+        return os;
+    }
+
+    /// Returns a string representation of the register content.
+    const std::string print() const { return csprintf("%s", *this); }
+
+    /// Returns true if the first active element of the register is true.
+    /// @param mask Input mask used to filter the predicates to be tested.
+    /// @param actual_num_elems Actual number of vector elements considered for
+    /// the test (corresponding to the current vector length).
+    template <bool MC>
+    bool
+    firstActive(const VecPredRegT<VecElem, NumElems, Packed, MC>& mask,
+                size_t actual_num_elems) const
+    {
+        assert(actual_num_elems <= NumElems);
+        for (int i = 0; i < actual_num_elems; ++i) {
+            if (mask[i]) {
+                return (*this)[i];
+            }
+        }
+        return false;
+    }
+
+    /// Returns true if there are no active elements in the register.
+    /// @param mask Input mask used to filter the predicates to be tested.
+    /// @param actual_num_elems Actual number of vector elements considered for
+    /// the test (corresponding to the current vector length).
+    template <bool MC>
+    bool
+    noneActive(const VecPredRegT<VecElem, NumElems, Packed, MC>& mask,
+               size_t actual_num_elems) const
+    {
+        assert(actual_num_elems <= NumElems);
+        for (int i = 0; i < actual_num_elems; ++i) {
+            if (mask[i] && operator[](i)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /// Returns true if the last active element of the register is true.
+    /// @param mask Input mask used to filter the predicates to be tested.
+    /// @param actual_num_elems Actual number of vector elements considered for
+    /// the test (corresponding to the current vector length).
+    template <bool MC>
+    bool
+    lastActive(const VecPredRegT<VecElem, NumElems, Packed, MC>& mask,
+               size_t actual_num_elems) const
+    {
+        assert(actual_num_elems <= NumElems);
+        for (int i = actual_num_elems - 1; i >= 0; --i) {
+            if (mask[i]) {
+                return operator[](i);
+            }
+        }
+        return false;
+    }
+};
+
+/// Generic predicate register container.
+///
+/// This generic class implements the Model in an MVC pattern, similarly to
+/// @see VecRegContainer.
+/// @tparam NumBits Size of the container in bits.
+/// @tparam Packed See @VecRegT.
+template <size_t NumBits, bool Packed>
+class VecPredRegContainer
+{
+    static_assert(NumBits > 0,
+                  "Size of a predicate register must be > 0");
+
+  public:
+    static constexpr size_t NUM_BITS = NumBits;
+    using Container = std::array<bool, NumBits>;
+
+  private:
+    Container container;
+    // Alias for this type
+    using MyClass = VecPredRegContainer<NumBits, Packed>;
+
+  public:
+    VecPredRegContainer() {}
+
+    MyClass&
+    operator=(const MyClass& that)
+    {
+        if (&that == this)
+            return *this;
+        container = that.container;
+        return *this;
+    }
+
+    /// Required for de-serialization.
+    MyClass&
+    operator=(const std::vector<uint8_t>& that)
+    {
+        assert(that.size() == NUM_BITS);
+        std::copy(that.begin(), that.end(), container.begin());
+        return *this;
+    }
+
+    /// Resets the predicate register to an all-false register.
+    void
+    reset()
+    {
+        container.fill(false);
+    }
+
+    /// Sets the predicate register to an all-true value.
+    void
+    set()
+    {
+        container.fill(true);
+    }
+
+    /// Equality operator, required to compare thread contexts.
+    template<size_t N2, bool P2>
+    inline bool
+    operator==(const VecPredRegContainer<N2, P2>& that) const
+    {
+        return NumBits == N2 && Packed == P2 && container == that.container;
+    }
+
+    /// Inequality operator, required to compare thread contexts.
+    template<size_t N2, bool P2>
+    bool
+    operator!=(const VecPredRegContainer<N2, P2>& that) const
+    {
+        return !operator==(that);
+    }
+
+    /// Returns a reference to a specific element of the internal container.
+    bool& operator[](size_t idx) { return container[idx]; }
+
+    /// Returns a const reference to a specific element of the internal
+    /// container.
+    const bool& operator[](size_t idx) const { return container[idx]; }
+
+    /// Returns a subset of bits starting from a specific element in the
+    /// container.
+    uint8_t
+    get_bits(size_t idx, uint8_t nbits) const
+    {
+        assert(nbits > 0 && nbits <= 8 && (idx + nbits - 1) < NumBits);
+        uint8_t v = 0;
+        idx = idx + nbits - 1;
+        for (int i = 0; i < nbits; ++i, --idx) {
+            v <<= 1;
+            v |= container[idx];
+        }
+        return v;
+    }
+
+    /// Set a subset of bits starting from a specific element in the
+    /// container.
+    void
+    set_bits(size_t idx, uint8_t nbits, uint8_t bval)
+    {
+        assert(nbits > 0 && nbits <= 8 && (idx + nbits - 1) < NumBits);
+        for (int i = 0; i < nbits; ++i, ++idx) {
+            container[idx] = bval & 1;
+            bval >>= 1;
+        }
+    }
+
+    /// Returns a string representation of the register content.
+    const std::string print() const { return csprintf("%s", *this); }
+
+    friend std::ostream&
+    operator<<(std::ostream& os, const MyClass& v)
+    {
+        for (auto b: v.container) {
+            os << csprintf("%d", b);
+        }
+        return os;
+    }
+
+    /// Create a view of this container.
+    ///
+    /// If NumElems is provided, the size of the container is bounds-checked,
+    /// otherwise the size is inferred from the container size.
+    /// @tparam VecElem Type of the vector elements.
+    /// @tparam NumElems Number of vector elements making up the view.
+    /// @{
+    template <typename VecElem,
+              size_t NumElems = (Packed ? NumBits : NumBits / sizeof(VecElem))>
+    VecPredRegT<VecElem, NumElems, Packed, true> as() const
+    {
+        static_assert((Packed && NumElems <= NumBits) ||
+                      (!Packed &&
+                       NumBits % sizeof(VecElem) == 0 &&
+                       sizeof(VecElem) * NumElems <= NumBits),
+                      "Container size incompatible with view size");
+        return VecPredRegT<VecElem, NumElems, Packed, true>(*this);
+    }
+
+    template <typename VecElem,
+              size_t NumElems = (Packed ? NumBits : NumBits / sizeof(VecElem))>
+    VecPredRegT<VecElem, NumElems, Packed, false> as()
+    {
+        static_assert((Packed && NumElems <= NumBits) ||
+                      (!Packed &&
+                       NumBits % sizeof(VecElem) == 0 &&
+                       sizeof(VecElem) * NumElems <= NumBits),
+                      "Container size incompatible with view size");
+        return VecPredRegT<VecElem, NumElems, Packed, false>(*this);
+    }
+    /// @}
+};
+
+/// Helper functions used for serialization/de-serialization
+template <size_t NumBits, bool Packed>
+inline bool
+to_number(const std::string& value, VecPredRegContainer<NumBits, Packed>& p)
+{
+    int i = 0;
+    for (const auto& c: value) {
+        p[i] = (c == '1');
+    }
+    return true;
+}
+
+/// Dummy type aliases and constants for architectures that do not implement
+/// vector predicate registers.
+/// @{
+constexpr bool DummyVecPredRegHasPackedRepr = false;
+using DummyVecPredReg = VecPredRegT<DummyVecElem, DummyNumVecElemPerVecReg,
+                                    DummyVecPredRegHasPackedRepr, false>;
+using DummyConstVecPredReg = VecPredRegT<DummyVecElem,
+                                         DummyNumVecElemPerVecReg,
+                                         DummyVecPredRegHasPackedRepr, true>;
+using DummyVecPredRegContainer = DummyVecPredReg::Container;
+constexpr size_t DummyVecPredRegSizeBits = 8;
+/// @}
+
+#endif  // __ARCH_GENERIC_VEC_PRED_REG_HH__
diff --git a/src/arch/generic/vec_reg.hh b/src/arch/generic/vec_reg.hh
index 7145af4cf..f26a8c8ad 100644
--- a/src/arch/generic/vec_reg.hh
+++ b/src/arch/generic/vec_reg.hh
@@ -648,4 +648,18 @@ to_number(const std::string& value, VecRegContainer<Sz>& v)
 }
 /** @} */
 
+/**
+ * Dummy type aliases and constants for architectures that do not implement
+ * vector registers.
+ */
+/** @{ */
+using DummyVecElem = uint32_t;
+constexpr unsigned DummyNumVecElemPerVecReg = 2;
+using DummyVecReg = VecRegT<DummyVecElem, DummyNumVecElemPerVecReg, false>;
+using DummyConstVecReg = VecRegT<DummyVecElem, DummyNumVecElemPerVecReg, true>;
+using DummyVecRegContainer = DummyVecReg::Container;
+constexpr size_t DummyVecRegSizeBytes = DummyNumVecElemPerVecReg *
+    sizeof(DummyVecElem);
+/** @} */
+
 #endif /* __ARCH_GENERIC_VEC_REG_HH__ */
diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py
index 755f966eb..16004c009 100755
--- a/src/arch/isa_parser.py
+++ b/src/arch/isa_parser.py
@@ -490,6 +490,9 @@ class Operand(object):
     def isVecElem(self):
         return 0
 
+    def isVecPredReg(self):
+        return 0
+
     def isPCState(self):
         return 0
 
@@ -795,10 +798,9 @@ class VecRegOperand(Operand):
 
         wb = '''
         if (traceData) {
-            warn_once("Vectors not supported yet in tracedata");
-            /*traceData->setData(final_val);*/
+            traceData->setData(tmp_d%d);
         }
-        '''
+        ''' % self.dest_reg_idx
         return wb
 
     def finalize(self, predRead, predWrite):
@@ -860,6 +862,88 @@ class VecElemOperand(Operand):
 
         return c_write
 
+class VecPredRegOperand(Operand):
+    reg_class = 'VecPredRegClass'
+
+    def __init__(self, parser, full_name, ext, is_src, is_dest):
+        Operand.__init__(self, parser, full_name, ext, is_src, is_dest)
+        self.parser = parser
+
+    def isReg(self):
+        return 1
+
+    def isVecPredReg(self):
+        return 1
+
+    def makeDecl(self):
+        return ''
+
+    def makeConstructor(self, predRead, predWrite):
+        c_src = ''
+        c_dest = ''
+
+        if self.is_src:
+            c_src = src_reg_constructor % (self.reg_class, self.reg_spec)
+
+        if self.is_dest:
+            c_dest = dst_reg_constructor % (self.reg_class, self.reg_spec)
+            c_dest += '\n\t_numVecPredDestRegs++;'
+
+        return c_src + c_dest
+
+    def makeRead(self, predRead):
+        func = 'readVecPredRegOperand'
+        if self.read_code != None:
+            return self.buildReadCode(func)
+
+        if predRead:
+            rindex = '_sourceIndex++'
+        else:
+            rindex = '%d' % self.src_reg_idx
+
+        c_read =  '\t\t%s& tmp_s%s = xc->%s(this, %s);\n' % (
+                'const TheISA::VecPredRegContainer', rindex, func, rindex)
+        if self.ext:
+            c_read += '\t\tauto %s = tmp_s%s.as<%s>();\n' % (
+                    self.base_name, rindex,
+                    self.parser.operandTypeMap[self.ext])
+        return c_read
+
+    def makeReadW(self, predWrite):
+        func = 'getWritableVecPredRegOperand'
+        if self.read_code != None:
+            return self.buildReadCode(func)
+
+        if predWrite:
+            rindex = '_destIndex++'
+        else:
+            rindex = '%d' % self.dest_reg_idx
+
+        c_readw = '\t\t%s& tmp_d%s = xc->%s(this, %s);\n' % (
+                'TheISA::VecPredRegContainer', rindex, func, rindex)
+        if self.ext:
+            c_readw += '\t\tauto %s = tmp_d%s.as<%s>();\n' % (
+                    self.base_name, rindex,
+                    self.parser.operandTypeMap[self.ext])
+        return c_readw
+
+    def makeWrite(self, predWrite):
+        func = 'setVecPredRegOperand'
+        if self.write_code != None:
+            return self.buildWriteCode(func)
+
+        wb = '''
+        if (traceData) {
+            traceData->setData(tmp_d%d);
+        }
+        ''' % self.dest_reg_idx
+        return wb
+
+    def finalize(self, predRead, predWrite):
+        super(VecPredRegOperand, self).finalize(predRead, predWrite)
+        if self.is_dest:
+            self.op_rd = self.makeReadW(predWrite) + self.op_rd
+
 class CCRegOperand(Operand):
     reg_class = 'CCRegClass'
 
@@ -1113,6 +1197,7 @@ class OperandList(object):
         self.numFPDestRegs = 0
         self.numIntDestRegs = 0
         self.numVecDestRegs = 0
+        self.numVecPredDestRegs = 0
         self.numCCDestRegs = 0
         self.numMiscDestRegs = 0
         self.memOperand = None
@@ -1136,6 +1221,8 @@ class OperandList(object):
                         self.numIntDestRegs += 1
                     elif op_desc.isVecReg():
                         self.numVecDestRegs += 1
+                    elif op_desc.isVecPredReg():
+                        self.numVecPredDestRegs += 1
                     elif op_desc.isCCReg():
                         self.numCCDestRegs += 1
                     elif op_desc.isControlReg():
@@ -1344,6 +1431,7 @@ class InstObjParams(object):
         header += '\n\t_numFPDestRegs = 0;'
         header += '\n\t_numVecDestRegs = 0;'
         header += '\n\t_numVecElemDestRegs = 0;'
+        header += '\n\t_numVecPredDestRegs = 0;'
         header += '\n\t_numIntDestRegs = 0;'
         header += '\n\t_numCCDestRegs = 0;'
 
diff --git a/src/arch/mips/isa.hh b/src/arch/mips/isa.hh
index ffcb3f1dc..cea2d5412 100644
--- a/src/arch/mips/isa.hh
+++ b/src/arch/mips/isa.hh
@@ -165,6 +165,12 @@ namespace MipsISA
             return reg;
         }
 
+        int
+        flattenVecPredIndex(int reg) const
+        {
+            return reg;
+        }
+
         // dummy
         int
         flattenCCIndex(int reg) const
diff --git a/src/arch/mips/registers.hh b/src/arch/mips/registers.hh
index 6f7097b08..633199c94 100644
--- a/src/arch/mips/registers.hh
+++ b/src/arch/mips/registers.hh
@@ -32,6 +32,7 @@
 #ifndef __ARCH_MIPS_REGISTERS_HH__
 #define __ARCH_MIPS_REGISTERS_HH__
 
+#include "arch/generic/vec_pred_reg.hh"
 #include "arch/generic/vec_reg.hh"
 #include "arch/mips/generated/max_inst_regs.hh"
 #include "base/logging.hh"
@@ -55,6 +56,10 @@ const int NumFloatSpecialRegs = 5;
 const int MaxShadowRegSets = 16; // Maximum number of shadow register sets
 const int NumIntRegs = NumIntArchRegs + NumIntSpecialRegs;        //HI & LO Regs
 const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs;//
+const int NumVecRegs = 1;  // Not applicable to MIPS
+                           // (1 to prevent warnings)
+const int NumVecPredRegs = 1;  // Not applicable to MIPS
+                               // (1 to prevent warnings)
 const int NumCCRegs = 0;
 
 const uint32_t MIPS32_QNAN = 0x7fbfffff;
@@ -289,14 +294,20 @@ typedef RegVal MiscReg;
 // dummy typedef since we don't have CC regs
 typedef uint8_t CCReg;
 
-// dummy typedefs since we don't have vector regs
-constexpr unsigned NumVecElemPerVecReg = 2;
-using VecElem = uint32_t;
-using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
-using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
-using VecRegContainer = VecReg::Container;
-// This has to be one to prevent warnings that are treated as errors
-constexpr unsigned NumVecRegs = 1;
+// Not applicable to MIPS
+using VecElem = ::DummyVecElem;
+using VecReg = ::DummyVecReg;
+using ConstVecReg = ::DummyConstVecReg;
+using VecRegContainer = ::DummyVecRegContainer;
+constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg;
+constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes;
+
+// Not applicable to MIPS
+using VecPredReg = ::DummyVecPredReg;
+using ConstVecPredReg = ::DummyConstVecPredReg;
+using VecPredRegContainer = ::DummyVecPredRegContainer;
+constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits;
+constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr;
 
 } // namespace MipsISA
 
diff --git a/src/arch/null/registers.hh b/src/arch/null/registers.hh
index fb815af4a..ff9e0cda6 100644
--- a/src/arch/null/registers.hh
+++ b/src/arch/null/registers.hh
@@ -40,6 +40,7 @@
 #ifndef __ARCH_NULL_REGISTERS_HH__
 #define __ARCH_NULL_REGISTERS_HH__
 
+#include "arch/generic/vec_pred_reg.hh"
 #include "arch/generic/vec_reg.hh"
 #include "arch/types.hh"
 #include "base/types.hh"
@@ -52,14 +53,20 @@ typedef uint8_t CCReg;
 typedef RegVal MiscReg;
 const RegIndex ZeroReg = 0;
 
-// dummy typedefs since we don't have vector regs
-constexpr unsigned NumVecElemPerVecReg = 2;
-using VecElem = uint32_t;
-using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
-using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
-using VecRegContainer = VecReg::Container;
-// This has to be one to prevent warnings that are treated as errors
-constexpr unsigned NumVecRegs = 1;
+// Not applicable to null
+using VecElem = ::DummyVecElem;
+using VecReg = ::DummyVecReg;
+using ConstVecReg = ::DummyConstVecReg;
+using VecRegContainer = ::DummyVecRegContainer;
+constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg;
+constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes;
+
+// Not applicable to null
+using VecPredReg = ::DummyVecPredReg;
+using ConstVecPredReg = ::DummyConstVecPredReg;
+using VecPredRegContainer = ::DummyVecPredRegContainer;
+constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits;
+constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr;
 
 }
 
diff --git a/src/arch/power/isa.hh b/src/arch/power/isa.hh
index 4e9fdb00a..3f26f57de 100644
--- a/src/arch/power/isa.hh
+++ b/src/arch/power/isa.hh
@@ -113,6 +113,12 @@ class ISA : public SimObject
         return reg;
     }
 
+    int
+    flattenVecPredIndex(int reg) const
+    {
+        return reg;
+    }
+
     // dummy
     int
     flattenCCIndex(int reg) const
diff --git a/src/arch/power/registers.hh b/src/arch/power/registers.hh
index 989b4c52a..e8de218e7 100644
--- a/src/arch/power/registers.hh
+++ b/src/arch/power/registers.hh
@@ -31,6 +31,7 @@
 #ifndef __ARCH_POWER_REGISTERS_HH__
 #define __ARCH_POWER_REGISTERS_HH__
 
+#include "arch/generic/vec_pred_reg.hh"
 #include "arch/generic/vec_reg.hh"
 #include "arch/power/generated/max_inst_regs.hh"
 #include "arch/power/miscregs.hh"
@@ -54,14 +55,20 @@ typedef RegVal MiscReg;
 // dummy typedef since we don't have CC regs
 typedef uint8_t CCReg;
 
-// dummy typedefs since we don't have vector regs
-constexpr unsigned NumVecElemPerVecReg = 2;
-using VecElem = uint32_t;
-using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
-using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
-using VecRegContainer = VecReg::Container;
-// This has to be one to prevent warnings that are treated as errors
-constexpr unsigned NumVecRegs = 1;
+// Not applicable to Power
+using VecElem = ::DummyVecElem;
+using VecReg = ::DummyVecReg;
+using ConstVecReg = ::DummyConstVecReg;
+using VecRegContainer = ::DummyVecRegContainer;
+constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg;
+constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes;
+
+// Not applicable to Power
+using VecPredReg = ::DummyVecPredReg;
+using ConstVecPredReg = ::DummyConstVecPredReg;
+using VecPredRegContainer = ::DummyVecPredRegContainer;
+constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits;
+constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr;
 
 // Constants Related to the number of registers
 const int NumIntArchRegs = 32;
@@ -75,6 +82,10 @@ const int NumInternalProcRegs = 0;
 
 const int NumIntRegs = NumIntArchRegs + NumIntSpecialRegs;
 const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs;
+const int NumVecRegs = 1;  // Not applicable to Power
+                           // (1 to prevent warnings)
+const int NumVecPredRegs = 1;  // Not applicable to Power
+                               // (1 to prevent warnings)
 const int NumCCRegs = 0;
 const int NumMiscRegs = NUM_MISCREGS;
 
diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh
index 2602f6dde..0107f8e92 100644
--- a/src/arch/riscv/isa.hh
+++ b/src/arch/riscv/isa.hh
@@ -84,6 +84,7 @@ class ISA : public SimObject
     int flattenFloatIndex(int reg) const { return reg; }
     int flattenVecIndex(int reg) const { return reg; }
     int flattenVecElemIndex(int reg) const { return reg; }
+    int flattenVecPredIndex(int reg) const { return reg; }
     int flattenCCIndex(int reg) const { return reg; }
     int flattenMiscIndex(int reg) const { return reg; }
 
diff --git a/src/arch/riscv/registers.hh b/src/arch/riscv/registers.hh
index 2de154e22..a67274221 100644
--- a/src/arch/riscv/registers.hh
+++ b/src/arch/riscv/registers.hh
@@ -52,6 +52,7 @@
 #include <vector>
 
 #include "arch/generic/types.hh"
+#include "arch/generic/vec_pred_reg.hh"
 #include "arch/generic/vec_reg.hh"
 #include "arch/isa_traits.hh"
 #include "arch/riscv/generated/max_inst_regs.hh"
@@ -68,19 +69,31 @@ typedef RegVal FloatRegBits;
 typedef uint8_t CCReg; // Not applicable to Riscv
 typedef RegVal MiscReg;
 
-// dummy typedefs since we don't have vector regs
-const unsigned NumVecElemPerVecReg = 2;
-using VecElem = uint32_t;
-using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
-using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
-using VecRegContainer = VecReg::Container;
+// Not applicable to RISC-V
+using VecElem = ::DummyVecElem;
+using VecReg = ::DummyVecReg;
+using ConstVecReg = ::DummyConstVecReg;
+using VecRegContainer = ::DummyVecRegContainer;
+constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg;
+constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes;
+
+// Not applicable to RISC-V
+using VecPredReg = ::DummyVecPredReg;
+using ConstVecPredReg = ::DummyConstVecPredReg;
+using VecPredRegContainer = ::DummyVecPredRegContainer;
+constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits;
+constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr;
 
 const int NumIntArchRegs = 32;
 const int NumMicroIntRegs = 1;
 const int NumIntRegs = NumIntArchRegs + NumMicroIntRegs;
 const int NumFloatRegs = 32;
-// This has to be one to prevent warnings that are treated as errors
-const unsigned NumVecRegs = 1;
+
+const unsigned NumVecRegs = 1;  // Not applicable to RISC-V
+                                // (1 to prevent warnings)
+const int NumVecPredRegs = 1;  // Not applicable to RISC-V
+                               // (1 to prevent warnings)
+
 const int NumCCRegs = 0;
 
 // Semantically meaningful register indices
diff --git a/src/arch/sparc/isa.hh b/src/arch/sparc/isa.hh
index 8ad729862..6cda32038 100644
--- a/src/arch/sparc/isa.hh
+++ b/src/arch/sparc/isa.hh
@@ -234,6 +234,12 @@ class ISA : public SimObject
         return reg;
     }
 
+    int
+    flattenVecPredIndex(int reg) const
+    {
+        return reg;
+    }
+
     // dummy
     int
     flattenCCIndex(int reg) const
diff --git a/src/arch/sparc/registers.hh b/src/arch/sparc/registers.hh
index 5f12b98cb..d9b182e7f 100644
--- a/src/arch/sparc/registers.hh
+++ b/src/arch/sparc/registers.hh
@@ -32,6 +32,7 @@
 #ifndef __ARCH_SPARC_REGISTERS_HH__
 #define __ARCH_SPARC_REGISTERS_HH__
 
+#include "arch/generic/vec_pred_reg.hh"
 #include "arch/generic/vec_reg.hh"
 #include "arch/sparc/generated/max_inst_regs.hh"
 #include "arch/sparc/miscregs.hh"
@@ -48,14 +49,20 @@ using SparcISAInst::MaxMiscDestRegs;
 // dummy typedef since we don't have CC regs
 typedef uint8_t CCReg;
 
-// dummy typedefs since we don't have vector regs
-constexpr unsigned NumVecElemPerVecReg = 2;
-using VecElem = uint32_t;
-using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
-using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
-using VecRegContainer = VecReg::Container;
-// This has to be one to prevent warnings that are treated as errors
-constexpr unsigned NumVecRegs = 1;
+// Not applicable to SPARC
+using VecElem = ::DummyVecElem;
+using VecReg = ::DummyVecReg;
+using ConstVecReg = ::DummyConstVecReg;
+using VecRegContainer = ::DummyVecRegContainer;
+constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg;
+constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes;
+
+// Not applicable to SPARC
+using VecPredReg = ::DummyVecPredReg;
+using ConstVecPredReg = ::DummyConstVecPredReg;
+using VecPredRegContainer = ::DummyVecPredRegContainer;
+constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits;
+constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr;
 
 // semantically meaningful register indices
 const int ZeroReg = 0;      // architecturally meaningful
@@ -70,6 +77,10 @@ const int SyscallPseudoReturnReg = 9;
 
 const int NumIntArchRegs = 32;
 const int NumIntRegs = (MaxGL + 1) * 8 + NWindows * 16 + NumMicroIntRegs;
+const int NumVecRegs = 1;  // Not applicable to SPARC
+                           // (1 to prevent warnings)
+const int NumVecPredRegs = 1;  // Not applicable to SPARC
+                               // (1 to prevent warnings)
 const int NumCCRegs = 0;
 
 const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumMiscRegs;
diff --git a/src/arch/x86/isa.hh b/src/arch/x86/isa.hh
index b61face09..7ad464643 100644
--- a/src/arch/x86/isa.hh
+++ b/src/arch/x86/isa.hh
@@ -117,6 +117,12 @@ namespace X86ISA
         }
 
         int
+        flattenVecPredIndex(int reg) const
+        {
+            return reg;
+        }
+
+        int
         flattenCCIndex(int reg) const
         {
             return reg;
diff --git a/src/arch/x86/registers.hh b/src/arch/x86/registers.hh
index 509f7a111..893822263 100644
--- a/src/arch/x86/registers.hh
+++ b/src/arch/x86/registers.hh
@@ -41,6 +41,7 @@
 #ifndef __ARCH_X86_REGISTERS_HH__
 #define __ARCH_X86_REGISTERS_HH__
 
+#include "arch/generic/vec_pred_reg.hh"
 #include "arch/generic/vec_reg.hh"
 #include "arch/x86/generated/max_inst_regs.hh"
 #include "arch/x86/regs/int.hh"
@@ -77,6 +78,11 @@ enum DependenceTags {
     Max_Reg_Index = Misc_Reg_Base + NumMiscRegs
 };
 
+const int NumVecRegs = 1;  // Not applicable to x86
+                           // (1 to prevent warnings)
+const int NumVecPredRegs = 1;  // Not applicable to x86
+                               // (1 to prevent warnings)
+
 // semantically meaningful register indices
 //There is no such register in X86
 const int ZeroReg = NUM_INTREGS;
@@ -94,14 +100,20 @@ typedef RegVal IntReg;
 typedef uint64_t CCReg;
 typedef RegVal MiscReg;
 
-// dummy typedefs since we don't have vector regs
-constexpr unsigned NumVecElemPerVecReg = 2;
-using VecElem = uint32_t;
-using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
-using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
-using VecRegContainer = VecReg::Container;
-// This has to be one to prevent warnings that are treated as errors
-constexpr unsigned NumVecRegs = 1;
+// Not applicable to x86
+using VecElem = ::DummyVecElem;
+using VecReg = ::DummyVecReg;
+using ConstVecReg = ::DummyConstVecReg;
+using VecRegContainer = ::DummyVecRegContainer;
+constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg;
+constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes;
+
+// Not applicable to x86
+using VecPredReg = ::DummyVecPredReg;
+using ConstVecPredReg = ::DummyConstVecPredReg;
+using VecPredRegContainer = ::DummyVecPredRegContainer;
+constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits;
+constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr;
 
 //These floating point types are correct for mmx, but not
 //technically for x87 (80 bits) or at all for xmm (128 bits)
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index d81b58bdf..b87fd8b4e 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -584,6 +584,11 @@ class BaseDynInst : public ExecContext, public RefCounted
     {
         return staticInst->numVecElemDestRegs();
     }
+    int8_t
+    numVecPredDestRegs() const
+    {
+        return staticInst->numVecPredDestRegs();
+    }
 
     /** Returns the logical register index of the i'th destination register. */
     const RegId& destRegIdx(int i) const { return staticInst->destRegIdx(i); }
@@ -638,6 +643,16 @@ class BaseDynInst : public ExecContext, public RefCounted
                         InstResult::ResultType::VecElem));
         }
     }
+
+    /** Predicate result. */
+    template<typename T>
+    void setVecPredResult(T&& t)
+    {
+        if (instFlags[RecordResult]) {
+            instResult.push(InstResult(std::forward<T>(t),
+                            InstResult::ResultType::VecPredReg));
+        }
+    }
     /** @} */
 
     /** Records an integer register being set to a value. */
@@ -672,6 +687,13 @@ class BaseDynInst : public ExecContext, public RefCounted
         setVecElemResult(val);
     }
 
+    /** Record a vector register being set to a value */
+    void setVecPredRegOperand(const StaticInst *si, int idx,
+                              const VecPredRegContainer& val)
+    {
+        setVecPredResult(val);
+    }
+
     /** Records that one of the source registers is ready. */
     void markSrcRegReady();
 
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index 4468689bd..9d6061ad8 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2016 ARM Limited
+ * Copyright (c) 2011, 2016-2017 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -304,6 +304,22 @@ class CheckerCPU : public BaseCPU, public ExecContext
         return thread->readVecElem(reg);
     }
 
+    const VecPredRegContainer&
+    readVecPredRegOperand(const StaticInst *si, int idx) const override
+    {
+        const RegId& reg = si->srcRegIdx(idx);
+        assert(reg.isVecPredReg());
+        return thread->readVecPredReg(reg);
+    }
+
+    VecPredRegContainer&
+    getWritableVecPredRegOperand(const StaticInst *si, int idx) override
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecPredReg());
+        return thread->getWritableVecPredReg(reg);
+    }
+
     CCReg
     readCCRegOperand(const StaticInst *si, int idx) override
     {
@@ -336,6 +352,14 @@ class CheckerCPU : public BaseCPU, public ExecContext
                                InstResult::ResultType::VecElem));
     }
 
+    template<typename T>
+    void
+    setVecPredResult(T&& t)
+    {
+        result.push(InstResult(std::forward<T>(t),
+                               InstResult::ResultType::VecPredReg));
+    }
+
     void
     setIntRegOperand(const StaticInst *si, int idx, RegVal val) override
     {
@@ -383,6 +407,15 @@ class CheckerCPU : public BaseCPU, public ExecContext
         setVecElemResult(val);
     }
 
+    void setVecPredRegOperand(const StaticInst *si, int idx,
+                              const VecPredRegContainer& val) override
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecPredReg());
+        thread->setVecPredReg(reg, val);
+        setVecPredResult(val);
+    }
+
     bool readPredicate() const override { return thread->readPredicate(); }
 
     void
diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh
index b5a2079ea..8ce5a740d 100644
--- a/src/cpu/checker/thread_context.hh
+++ b/src/cpu/checker/thread_context.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012, 2016 ARM Limited
+ * Copyright (c) 2011-2012, 2016-2018 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -263,6 +263,12 @@ class CheckerThreadContext : public ThreadContext
     const VecElem& readVecElem(const RegId& reg) const
     { return actualTC->readVecElem(reg); }
 
+    const VecPredRegContainer& readVecPredReg(const RegId& reg) const override
+    { return actualTC->readVecPredReg(reg); }
+
+    VecPredRegContainer& getWritableVecPredReg(const RegId& reg) override
+    { return actualTC->getWritableVecPredReg(reg); }
+
     CCReg readCCReg(int reg_idx)
     { return actualTC->readCCReg(reg_idx); }
 
@@ -295,6 +301,13 @@ class CheckerThreadContext : public ThreadContext
     }
 
     void
+    setVecPredReg(const RegId& reg, const VecPredRegContainer& val)
+    {
+        actualTC->setVecPredReg(reg, val);
+        checkerTC->setVecPredReg(reg, val);
+    }
+
+    void
     setCCReg(int reg_idx, CCReg val)
     {
         actualTC->setCCReg(reg_idx, val);
@@ -428,6 +441,15 @@ class CheckerThreadContext : public ThreadContext
                         const ElemIndex& elem_idx, const VecElem& val)
     { actualTC->setVecElemFlat(idx, elem_idx, val); }
 
+    const VecPredRegContainer& readVecPredRegFlat(int idx) const override
+    { return actualTC->readVecPredRegFlat(idx); }
+
+    VecPredRegContainer& getWritableVecPredRegFlat(int idx) override
+    { return actualTC->getWritableVecPredRegFlat(idx); }
+
+    void setVecPredRegFlat(int idx, const VecPredRegContainer& val) override
+    { actualTC->setVecPredRegFlat(idx, val); }
+
     CCReg readCCRegFlat(int idx)
     { return actualTC->readCCRegFlat(idx); }
 
diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh
index 75f428b87..87af91623 100644
--- a/src/cpu/exec_context.hh
+++ b/src/cpu/exec_context.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016 ARM Limited
+ * Copyright (c) 2014, 2016-2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -77,6 +77,7 @@ class ExecContext {
     typedef TheISA::CCReg CCReg;
     using VecRegContainer = TheISA::VecRegContainer;
     using VecElem = TheISA::VecElem;
+    using VecPredRegContainer = TheISA::VecPredRegContainer;
 
   public:
     /**
@@ -168,6 +169,22 @@ class ExecContext {
                                    const VecElem val) = 0;
     /** @} */
 
+    /** Predicate registers interface. */
+    /** @{ */
+    /** Reads source predicate register operand. */
+    virtual const VecPredRegContainer&
+    readVecPredRegOperand(const StaticInst *si, int idx) const = 0;
+
+    /** Gets destination predicate register operand for modification. */
+    virtual VecPredRegContainer&
+    getWritableVecPredRegOperand(const StaticInst *si, int idx) = 0;
+
+    /** Sets a destination predicate register operand to a value. */
+    virtual void
+    setVecPredRegOperand(const StaticInst *si, int idx,
+                         const VecPredRegContainer& val) = 0;
+    /** @} */
+
     /**
      * @{
      * @name Condition Code Registers
diff --git a/src/cpu/inst_res.hh b/src/cpu/inst_res.hh
index 9b6a23d95..bf9c649ef 100644
--- a/src/cpu/inst_res.hh
+++ b/src/cpu/inst_res.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016 ARM Limited
+ * Copyright (c) 2016-2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -48,12 +48,14 @@
 class InstResult {
     using VecRegContainer = TheISA::VecRegContainer;
     using VecElem = TheISA::VecElem;
+    using VecPredRegContainer = TheISA::VecPredRegContainer;
   public:
     union MultiResult {
         uint64_t integer;
         double dbl;
         VecRegContainer vector;
         VecElem vecElem;
+        VecPredRegContainer pred;
         MultiResult() {}
     };
 
@@ -61,6 +63,7 @@ class InstResult {
         Scalar,
         VecElem,
         VecReg,
+        VecPredReg,
         NumResultTypes,
         Invalid
     };
@@ -87,6 +90,9 @@ class InstResult {
     /** Vector result. */
     explicit InstResult(const VecRegContainer& v, const ResultType& t)
         : type(t) { result.vector = v; }
+    /** Predicate result. */
+    explicit InstResult(const VecPredRegContainer& v, const ResultType& t)
+        : type(t) { result.pred = v; }
 
     InstResult& operator=(const InstResult& that) {
         type = that.type;
@@ -104,6 +110,10 @@ class InstResult {
         case ResultType::VecReg:
             result.vector = that.result.vector;
             break;
+        case ResultType::VecPredReg:
+            result.pred = that.result.pred;
+            break;
+
         default:
             panic("Assigning result from unknown result type");
             break;
@@ -124,6 +134,8 @@ class InstResult {
             return result.vecElem == that.result.vecElem;
         case ResultType::VecReg:
             return result.vector == that.result.vector;
+        case ResultType::VecPredReg:
+            return result.pred == that.result.pred;
         case ResultType::Invalid:
             return false;
         default:
@@ -143,6 +155,8 @@ class InstResult {
     bool isVector() const { return type == ResultType::VecReg; }
     /** Is this a vector element result?. */
     bool isVecElem() const { return type == ResultType::VecElem; }
+    /** Is this a predicate result?. */
+    bool isPred() const { return type == ResultType::VecPredReg; }
     /** Is this a valid result?. */
     bool isValid() const { return type != ResultType::Invalid; }
     /** @} */
@@ -177,6 +191,14 @@ class InstResult {
         panic_if(!isVecElem(), "Converting scalar (or invalid) to vector!!");
         return result.vecElem;
     }
+
+    const VecPredRegContainer&
+    asPred() const
+    {
+        panic_if(!isPred(), "Converting scalar (or invalid) to predicate!!");
+        return result.pred;
+    }
+
     /** @} */
 };
 
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index b9ed3971f..4cb67372e 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2014, 2016 ARM Limited
+ * Copyright (c) 2011-2014, 2016-2017 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -161,6 +161,22 @@ class ExecContext : public ::ExecContext
         return thread.readVecElem(reg);
     }
 
+    const TheISA::VecPredRegContainer&
+    readVecPredRegOperand(const StaticInst *si, int idx) const override
+    {
+        const RegId& reg = si->srcRegIdx(idx);
+        assert(reg.isVecPredReg());
+        return thread.readVecPredReg(reg);
+    }
+
+    TheISA::VecPredRegContainer&
+    getWritableVecPredRegOperand(const StaticInst *si, int idx) override
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecPredReg());
+        return thread.getWritableVecPredReg(reg);
+    }
+
     void
     setIntRegOperand(const StaticInst *si, int idx, RegVal val) override
     {
@@ -186,6 +202,15 @@ class ExecContext : public ::ExecContext
         thread.setVecReg(reg, val);
     }
 
+    void
+    setVecPredRegOperand(const StaticInst *si, int idx,
+                         const TheISA::VecPredRegContainer& val)
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecPredReg());
+        thread.setVecPredReg(reg, val);
+    }
+
     /** Vector Register Lane Interfaces. */
     /** @{ */
     /** Reads source vector 8bit operand. */
diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc
index 196d035eb..5c0e86a67 100644
--- a/src/cpu/minor/scoreboard.cc
+++ b/src/cpu/minor/scoreboard.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, 2016 ARM Limited
+ * Copyright (c) 2013-2014, 2016-2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -77,6 +77,11 @@ Scoreboard::findIndex(const RegId& reg, Index &scoreboard_index)
                 TheISA::NumFloatRegs + reg.flatIndex();
             ret = true;
             break;
+          case VecPredRegClass:
+            scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs +
+                TheISA::NumFloatRegs + TheISA::NumVecRegs + reg.index();
+            ret = true;
+            break;
           case CCRegClass:
             scoreboard_index = TheISA::NumIntRegs + reg.index();
             ret = true;
diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh
index 37ae8da0a..b21e14e24 100644
--- a/src/cpu/minor/scoreboard.hh
+++ b/src/cpu/minor/scoreboard.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, 2016 ARM Limited
+ * Copyright (c) 2013-2014, 2016-2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -95,7 +95,8 @@ class Scoreboard : public Named
         Named(name),
         numRegs(TheISA::NumIntRegs + TheISA::NumCCRegs +
             TheISA::NumFloatRegs +
-            (TheISA::NumVecRegs * TheISA::NumVecElemPerVecReg)),
+            (TheISA::NumVecRegs * TheISA::NumVecElemPerVecReg) +
+            TheISA::NumVecPredRegs),
         numResults(numRegs, 0),
         numUnpredictableResults(numRegs, 0),
         fuIndices(numRegs, 0),
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index 32cc19010..e73c09334 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -150,6 +150,8 @@ class DerivO3CPU(BaseCPU):
         _defaultNumPhysCCRegs = Self.numPhysIntRegs * 5
     numPhysVecRegs = Param.Unsigned(256, "Number of physical vector "
                                       "registers")
+    numPhysVecPredRegs = Param.Unsigned(32, "Number of physical predicate "
+                                      "registers")
     numPhysCCRegs = Param.Unsigned(_defaultNumPhysCCRegs,
                                    "Number of physical cc registers")
     numIQEntries = Param.Unsigned(64, "Number of instruction queue entries")
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index f5be5a804..df518b1e4 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2016 ARM Limited
+ * Copyright (c) 2011, 2016-2017 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -121,6 +121,9 @@ class PhysRegId : private RegId {
     /** @Return true if it is a vector element physical register. */
     bool isVectorPhysElem() const { return isVecElem(); }
 
+    /** @return true if it is a vector predicate physical register. */
+    bool isVecPredPhysReg() const { return isVecPredReg(); }
+
     /** @Return true if it is a  condition-code physical register. */
     bool isMiscPhysReg() const { return isMiscReg(); }
 
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 5d92d92dc..ef3b17202 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -159,6 +159,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
       regFile(params->numPhysIntRegs,
               params->numPhysFloatRegs,
               params->numPhysVecRegs,
+              params->numPhysVecPredRegs,
               params->numPhysCCRegs,
               vecMode),
 
@@ -258,6 +259,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
     assert(params->numPhysIntRegs   >= numThreads * TheISA::NumIntRegs);
     assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs);
     assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs);
+    assert(params->numPhysVecPredRegs >= numThreads * TheISA::NumVecPredRegs);
     assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs);
 
     rename.setScoreboard(&scoreboard);
@@ -325,6 +327,13 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
             }
         }
 
+        for (RegIndex ridx = 0; ridx < TheISA::NumVecPredRegs; ++ridx) {
+            PhysRegIdPtr phys_reg = freeList.getVecPredReg();
+            renameMap[tid].setEntry(RegId(VecPredRegClass, ridx), phys_reg);
+            commitRenameMap[tid].setEntry(
+                    RegId(VecPredRegClass, ridx), phys_reg);
+        }
+
         for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) {
             PhysRegIdPtr phys_reg = freeList.getCCReg();
             renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg);
@@ -538,6 +547,16 @@ FullO3CPU<Impl>::regStats()
         .desc("number of vector regfile writes")
         .prereq(vecRegfileWrites);
 
+    vecPredRegfileReads
+        .name(name() + ".pred_regfile_reads")
+        .desc("number of predicate regfile reads")
+        .prereq(vecPredRegfileReads);
+
+    vecPredRegfileWrites
+        .name(name() + ".pred_regfile_writes")
+        .desc("number of predicate regfile writes")
+        .prereq(vecPredRegfileWrites);
+
     ccRegfileReads
         .name(name() + ".cc_regfile_reads")
         .desc("number of cc regfile reads")
@@ -883,6 +902,14 @@ FullO3CPU<Impl>::removeThread(ThreadID tid)
         freeList.addReg(phys_reg);
     }
 
+    // Unbind Float Regs from Rename Map
+    for (unsigned preg = 0; preg < TheISA::NumVecPredRegs; preg++) {
+        PhysRegIdPtr phys_reg = renameMap[tid].lookup(
+            RegId(VecPredRegClass, preg));
+        scoreboard.unsetReg(phys_reg);
+        freeList.addReg(phys_reg);
+    }
+
     // Unbind condition-code Regs from Rename Map
     for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs;
          reg_id.index()++) {
@@ -1334,6 +1361,24 @@ FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem&
 }
 
 template <class Impl>
+auto
+FullO3CPU<Impl>::readVecPredReg(PhysRegIdPtr phys_reg) const
+        -> const VecPredRegContainer&
+{
+    vecPredRegfileReads++;
+    return regFile.readVecPredReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::getWritableVecPredReg(PhysRegIdPtr phys_reg)
+        -> VecPredRegContainer&
+{
+    vecPredRegfileWrites++;
+    return regFile.getWritableVecPredReg(phys_reg);
+}
+
+template <class Impl>
 CCReg
 FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg)
 {
@@ -1375,6 +1420,15 @@ FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val)
 
 template <class Impl>
 void
+FullO3CPU<Impl>::setVecPredReg(PhysRegIdPtr phys_reg,
+                               const VecPredRegContainer& val)
+{
+    vecPredRegfileWrites++;
+    regFile.setVecPredReg(phys_reg, val);
+}
+
+template <class Impl>
+void
 FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, CCReg val)
 {
     ccRegfileWrites++;
@@ -1434,6 +1488,26 @@ FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
 }
 
 template <class Impl>
+auto
+FullO3CPU<Impl>::readArchVecPredReg(int reg_idx, ThreadID tid) const
+        -> const VecPredRegContainer&
+{
+    PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+                RegId(VecPredRegClass, reg_idx));
+    return readVecPredReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::getWritableArchVecPredReg(int reg_idx, ThreadID tid)
+        -> VecPredRegContainer&
+{
+    PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+                RegId(VecPredRegClass, reg_idx));
+    return getWritableVecPredReg(phys_reg);
+}
+
+template <class Impl>
 CCReg
 FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid)
 {
@@ -1488,6 +1562,16 @@ FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
 
 template <class Impl>
 void
+FullO3CPU<Impl>::setArchVecPredReg(int reg_idx, const VecPredRegContainer& val,
+                                   ThreadID tid)
+{
+    PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+                RegId(VecPredRegClass, reg_idx));
+    setVecPredReg(phys_reg, val);
+}
+
+template <class Impl>
+void
 FullO3CPU<Impl>::setArchCCReg(int reg_idx, CCReg val, ThreadID tid)
 {
     ccRegfileWrites++;
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index b5f754056..30ed4ef3b 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -107,6 +107,8 @@ class FullO3CPU : public BaseO3CPU
     using VecElem =  TheISA::VecElem;
     using VecRegContainer =  TheISA::VecRegContainer;
 
+    using VecPredRegContainer = TheISA::VecPredRegContainer;
+
     typedef O3ThreadState<Impl> ImplState;
     typedef O3ThreadState<Impl> Thread;
 
@@ -457,6 +459,10 @@ class FullO3CPU : public BaseO3CPU
 
     const VecElem& readVecElem(PhysRegIdPtr reg_idx) const;
 
+    const VecPredRegContainer& readVecPredReg(PhysRegIdPtr reg_idx) const;
+
+    VecPredRegContainer& getWritableVecPredReg(PhysRegIdPtr reg_idx);
+
     TheISA::CCReg readCCReg(PhysRegIdPtr phys_reg);
 
     void setIntReg(PhysRegIdPtr phys_reg, RegVal val);
@@ -467,6 +473,8 @@ class FullO3CPU : public BaseO3CPU
 
     void setVecElem(PhysRegIdPtr reg_idx, const VecElem& val);
 
+    void setVecPredReg(PhysRegIdPtr reg_idx, const VecPredRegContainer& val);
+
     void setCCReg(PhysRegIdPtr phys_reg, TheISA::CCReg val);
 
     RegVal readArchIntReg(int reg_idx, ThreadID tid);
@@ -501,6 +509,11 @@ class FullO3CPU : public BaseO3CPU
     const VecElem& readArchVecElem(const RegIndex& reg_idx,
                                    const ElemIndex& ldx, ThreadID tid) const;
 
+    const VecPredRegContainer& readArchVecPredReg(int reg_idx,
+                                                  ThreadID tid) const;
+
+    VecPredRegContainer& getWritableArchVecPredReg(int reg_idx, ThreadID tid);
+
     TheISA::CCReg readArchCCReg(int reg_idx, ThreadID tid);
 
     /** Architectural register accessors.  Looks up in the commit
@@ -512,6 +525,9 @@ class FullO3CPU : public BaseO3CPU
 
     void setArchFloatRegBits(int reg_idx, RegVal val, ThreadID tid);
 
+    void setArchVecPredReg(int reg_idx, const VecPredRegContainer& val,
+                           ThreadID tid);
+
     void setArchVecReg(int reg_idx, const VecRegContainer& val, ThreadID tid);
 
     void setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
@@ -805,6 +821,9 @@ class FullO3CPU : public BaseO3CPU
     //number of vector register file accesses
     mutable Stats::Scalar vecRegfileReads;
     Stats::Scalar vecRegfileWrites;
+    //number of predicate register file accesses
+    mutable Stats::Scalar vecPredRegfileReads;
+    Stats::Scalar vecPredRegfileWrites;
     //number of CC register file accesses
     Stats::Scalar ccRegfileReads;
     Stats::Scalar ccRegfileWrites;
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
index 5bd0f8e47..9793f4ead 100644
--- a/src/cpu/o3/dyn_inst.hh
+++ b/src/cpu/o3/dyn_inst.hh
@@ -70,6 +70,7 @@ class BaseO3DynInst : public BaseDynInst<Impl>
     using VecRegContainer = TheISA::VecRegContainer;
     using VecElem = TheISA::VecElem;
     static constexpr auto NumVecElemPerVecReg = TheISA::NumVecElemPerVecReg;
+    using VecPredRegContainer = TheISA::VecPredRegContainer;
 
     enum {
         MaxInstSrcRegs = TheISA::MaxInstSrcRegs,        //< Max source regs
@@ -231,6 +232,10 @@ class BaseO3DynInst : public BaseDynInst<Impl>
                 this->setVecElemOperand(this->staticInst.get(), idx,
                                this->cpu->readVecElem(prev_phys_reg));
                 break;
+              case VecPredRegClass:
+                this->setVecPredRegOperand(this->staticInst.get(), idx,
+                               this->cpu->readVecPredReg(prev_phys_reg));
+                break;
               case CCRegClass:
                 this->setCCRegOperand(this->staticInst.get(), idx,
                                this->cpu->readCCReg(prev_phys_reg));
@@ -361,6 +366,18 @@ class BaseO3DynInst : public BaseDynInst<Impl>
         return this->cpu->readVecElem(this->_srcRegIdx[idx]);
     }
 
+    const VecPredRegContainer&
+    readVecPredRegOperand(const StaticInst *si, int idx) const override
+    {
+        return this->cpu->readVecPredReg(this->_srcRegIdx[idx]);
+    }
+
+    VecPredRegContainer&
+    getWritableVecPredRegOperand(const StaticInst *si, int idx) override
+    {
+        return this->cpu->getWritableVecPredReg(this->_destRegIdx[idx]);
+    }
+
     CCReg readCCRegOperand(const StaticInst *si, int idx)
     {
         return this->cpu->readCCReg(this->_srcRegIdx[idx]);
@@ -399,6 +416,14 @@ class BaseO3DynInst : public BaseDynInst<Impl>
         BaseDynInst<Impl>::setVecElemOperand(si, idx, val);
     }
 
+    void
+    setVecPredRegOperand(const StaticInst *si, int idx,
+                         const VecPredRegContainer& val) override
+    {
+        this->cpu->setVecPredReg(this->_destRegIdx[idx], val);
+        BaseDynInst<Impl>::setVecPredRegOperand(si, idx, val);
+    }
+
     void setCCRegOperand(const StaticInst *si, int idx, CCReg val)
     {
         this->cpu->setCCReg(this->_destRegIdx[idx], val);
diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh
index e7a899cdf..46bebf30d 100644
--- a/src/cpu/o3/free_list.hh
+++ b/src/cpu/o3/free_list.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016 ARM Limited
+ * Copyright (c) 2016-2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -138,6 +138,9 @@ class UnifiedFreeList
     SimpleFreeList vecElemList;
     /** @} */
 
+    /** The list of free predicate registers. */
+    SimpleFreeList predList;
+
     /** The list of free condition-code registers. */
     SimpleFreeList ccList;
 
@@ -183,6 +186,9 @@ class UnifiedFreeList
     /** Gets a free vector elemenet register. */
     PhysRegIdPtr getVecElem() { return vecElemList.getReg(); }
 
+    /** Gets a free predicate register. */
+    PhysRegIdPtr getVecPredReg() { return predList.getReg(); }
+
     /** Gets a free cc register. */
     PhysRegIdPtr getCCReg() { return ccList.getReg(); }
 
@@ -207,6 +213,9 @@ class UnifiedFreeList
         vecElemList.addReg(freed_reg);
     }
 
+    /** Adds a predicate register back to the free list. */
+    void addVecPredReg(PhysRegIdPtr freed_reg) { predList.addReg(freed_reg); }
+
     /** Adds a cc register back to the free list. */
     void addCCReg(PhysRegIdPtr freed_reg) { ccList.addReg(freed_reg); }
 
@@ -222,6 +231,9 @@ class UnifiedFreeList
     /** Checks if there are any free vector registers. */
     bool hasFreeVecElems() const { return vecElemList.hasFreeRegs(); }
 
+    /** Checks if there are any free predicate registers. */
+    bool hasFreeVecPredRegs() const { return predList.hasFreeRegs(); }
+
     /** Checks if there are any free cc registers. */
     bool hasFreeCCRegs() const { return ccList.hasFreeRegs(); }
 
@@ -237,6 +249,9 @@ class UnifiedFreeList
     /** Returns the number of free vector registers. */
     unsigned numFreeVecElems() const { return vecElemList.numFreeRegs(); }
 
+    /** Returns the number of free predicate registers. */
+    unsigned numFreeVecPredRegs() const { return predList.numFreeRegs(); }
+
     /** Returns the number of free cc registers. */
     unsigned numFreeCCRegs() const { return ccList.numFreeRegs(); }
 };
@@ -267,6 +282,9 @@ UnifiedFreeList::addRegs(InputIt first, InputIt last)
         case VecElemClass:
             vecElemList.addRegs(first, last);
             break;
+        case VecPredRegClass:
+            predList.addRegs(first, last);
+            break;
         case CCRegClass:
             ccList.addRegs(first, last);
             break;
@@ -297,6 +315,9 @@ UnifiedFreeList::addReg(PhysRegIdPtr freed_reg)
         case VecElemClass:
             vecElemList.addReg(freed_reg);
             break;
+        case VecPredRegClass:
+            predList.addReg(freed_reg);
+            break;
         case CCRegClass:
             ccList.addReg(freed_reg);
             break;
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index 4a55a91ea..ddd7b6d5f 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -104,6 +104,7 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
     numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs +
                     params->numPhysVecRegs +
                     params->numPhysVecRegs * TheISA::NumVecElemPerVecReg +
+                    params->numPhysVecPredRegs +
                     params->numPhysCCRegs;
 
     //Create an entry for each physical register within the
diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc
index 2f41e2ac2..cc4bba6b0 100644
--- a/src/cpu/o3/regfile.cc
+++ b/src/cpu/o3/regfile.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016 ARM Limited
+ * Copyright (c) 2016-2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -52,22 +52,26 @@
 PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
                          unsigned _numPhysicalFloatRegs,
                          unsigned _numPhysicalVecRegs,
+                         unsigned _numPhysicalVecPredRegs,
                          unsigned _numPhysicalCCRegs,
                          VecMode vmode)
     : intRegFile(_numPhysicalIntRegs),
       floatRegFile(_numPhysicalFloatRegs),
       vectorRegFile(_numPhysicalVecRegs),
+      vecPredRegFile(_numPhysicalVecPredRegs),
       ccRegFile(_numPhysicalCCRegs),
       numPhysicalIntRegs(_numPhysicalIntRegs),
       numPhysicalFloatRegs(_numPhysicalFloatRegs),
       numPhysicalVecRegs(_numPhysicalVecRegs),
       numPhysicalVecElemRegs(_numPhysicalVecRegs *
                              NumVecElemPerVecReg),
+      numPhysicalVecPredRegs(_numPhysicalVecPredRegs),
       numPhysicalCCRegs(_numPhysicalCCRegs),
       totalNumRegs(_numPhysicalIntRegs
                    + _numPhysicalFloatRegs
                    + _numPhysicalVecRegs
                    + _numPhysicalVecRegs * NumVecElemPerVecReg
+                   + _numPhysicalVecPredRegs
                    + _numPhysicalCCRegs),
       vecMode(vmode)
 {
@@ -108,6 +112,12 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
         }
     }
 
+    // The next batch of the registers are the predicate physical
+    // registers; put them onto the predicate free list.
+    for (phys_reg = 0; phys_reg < numPhysicalVecPredRegs; phys_reg++) {
+        vecPredRegIds.emplace_back(VecPredRegClass, phys_reg, flat_reg_idx++);
+    }
+
     // The rest of the registers are the condition-code physical
     // registers; put them onto the condition-code free list.
     for (phys_reg = 0; phys_reg < numPhysicalCCRegs; phys_reg++) {
@@ -159,6 +169,13 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList)
     else
         freeList->addRegs(vecElemIds.begin(), vecElemIds.end());
 
+    // The next batch of the registers are the predicate physical
+    // registers; put them onto the predicate free list.
+    for (reg_idx = 0; reg_idx < numPhysicalVecPredRegs; reg_idx++) {
+        assert(vecPredRegIds[reg_idx].index() == reg_idx);
+    }
+    freeList->addRegs(vecPredRegIds.begin(), vecPredRegIds.end());
+
     // The rest of the registers are the condition-code physical
     // registers; put them onto the condition-code free list.
     for (reg_idx = 0; reg_idx < numPhysicalCCRegs; reg_idx++) {
@@ -191,6 +208,8 @@ PhysRegFile::getRegIds(RegClass cls) -> IdRange
         return std::make_pair(vecRegIds.begin(), vecRegIds.end());
       case VecElemClass:
         return std::make_pair(vecElemIds.begin(), vecElemIds.end());
+      case VecPredRegClass:
+        return std::make_pair(vecPredRegIds.begin(), vecPredRegIds.end());
       case CCRegClass:
         return std::make_pair(ccRegIds.begin(), ccRegIds.end());
       case MiscRegClass:
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index 9d9113240..4077c99a4 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016 ARM Limited
+ * Copyright (c) 2016-2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -70,6 +70,7 @@ class PhysRegFile
     using VecRegContainer = TheISA::VecRegContainer;
     using PhysIds = std::vector<PhysRegId>;
     using VecMode = Enums::VecRegRenameMode;
+    using VecPredRegContainer = TheISA::VecPredRegContainer;
   public:
     using IdRange = std::pair<PhysIds::const_iterator,
                               PhysIds::const_iterator>;
@@ -89,6 +90,10 @@ class PhysRegFile
     std::vector<PhysRegId> vecRegIds;
     std::vector<PhysRegId> vecElemIds;
 
+    /** Predicate register file. */
+    std::vector<VecPredRegContainer> vecPredRegFile;
+    std::vector<PhysRegId> vecPredRegIds;
+
     /** Condition-code register file. */
     std::vector<CCReg> ccRegFile;
     std::vector<PhysRegId> ccRegIds;
@@ -117,6 +122,11 @@ class PhysRegFile
     unsigned numPhysicalVecElemRegs;
 
     /**
+     * Number of physical predicate registers
+     */
+    unsigned numPhysicalVecPredRegs;
+
+    /**
      * Number of physical CC registers
      */
     unsigned numPhysicalCCRegs;
@@ -135,6 +145,7 @@ class PhysRegFile
     PhysRegFile(unsigned _numPhysicalIntRegs,
                 unsigned _numPhysicalFloatRegs,
                 unsigned _numPhysicalVecRegs,
+                unsigned _numPhysicalVecPredRegs,
                 unsigned _numPhysicalCCRegs,
                 VecMode vmode
                 );
@@ -154,6 +165,8 @@ class PhysRegFile
     unsigned numFloatPhysRegs() const { return numPhysicalFloatRegs; }
     /** @return the number of vector physical registers. */
     unsigned numVecPhysRegs() const { return numPhysicalVecRegs; }
+    /** @return the number of predicate physical registers. */
+    unsigned numPredPhysRegs() const { return numPhysicalVecPredRegs; }
 
     /** @return the number of vector physical registers. */
     unsigned numVecElemPhysRegs() const { return numPhysicalVecElemRegs; }
@@ -201,7 +214,7 @@ class PhysRegFile
 
         DPRINTF(IEW, "RegFile: Access to vector register %i, has "
                 "data %s\n", int(phys_reg->index()),
-                vectorRegFile[phys_reg->index()].as<VecElem>().print());
+                vectorRegFile[phys_reg->index()].print());
 
         return vectorRegFile[phys_reg->index()];
     }
@@ -258,6 +271,24 @@ class PhysRegFile
         return val;
     }
 
+    /** Reads a predicate register. */
+    const VecPredRegContainer& readVecPredReg(PhysRegIdPtr phys_reg) const
+    {
+        assert(phys_reg->isVecPredPhysReg());
+
+        DPRINTF(IEW, "RegFile: Access to predicate register %i, has "
+                "data %s\n", int(phys_reg->index()),
+                vecPredRegFile[phys_reg->index()].print());
+
+        return vecPredRegFile[phys_reg->index()];
+    }
+
+    VecPredRegContainer& getWritableVecPredReg(PhysRegIdPtr phys_reg)
+    {
+        /* const_cast for not duplicating code above. */
+        return const_cast<VecPredRegContainer&>(readVecPredReg(phys_reg));
+    }
+
     /** Reads a condition-code register. */
     CCReg
     readCCReg(PhysRegIdPtr phys_reg)
@@ -321,6 +352,17 @@ class PhysRegFile
                 val;
     }
 
+    /** Sets a predicate register to the given value. */
+    void setVecPredReg(PhysRegIdPtr phys_reg, const VecPredRegContainer& val)
+    {
+        assert(phys_reg->isVecPredPhysReg());
+
+        DPRINTF(IEW, "RegFile: Setting predicate register %i to %s\n",
+                int(phys_reg->index()), val.print());
+
+        vecPredRegFile[phys_reg->index()] = val;
+    }
+
     /** Sets a condition-code register to the given value. */
     void
     setCCReg(PhysRegIdPtr phys_reg, CCReg val)
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index bd5e72dec..a091c0908 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 ARM Limited
+ * Copyright (c) 2012, 2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -514,6 +514,7 @@ class DefaultRename
     Stats::Scalar intRenameLookups;
     Stats::Scalar fpRenameLookups;
     Stats::Scalar vecRenameLookups;
+    Stats::Scalar vecPredRenameLookups;
     /** Stat for total number of committed renaming mappings. */
     Stats::Scalar renameCommittedMaps;
     /** Stat for total number of mappings that were undone due to a squash. */
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index ed5dfb6eb..b63163f04 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -196,6 +196,10 @@ DefaultRename<Impl>::regStats()
         .name(name() + ".vec_rename_lookups")
         .desc("Number of vector rename lookups")
         .prereq(vecRenameLookups);
+    vecPredRenameLookups
+        .name(name() + ".vec_pred_rename_lookups")
+        .desc("Number of vector predicate rename lookups")
+        .prereq(vecPredRenameLookups);
 }
 
 template <class Impl>
@@ -659,6 +663,7 @@ DefaultRename<Impl>::renameInsts(ThreadID tid)
                                        inst->numFPDestRegs(),
                                        inst->numVecDestRegs(),
                                        inst->numVecElemDestRegs(),
+                                       inst->numVecPredDestRegs(),
                                        inst->numCCDestRegs())) {
             DPRINTF(Rename, "Blocking due to lack of free "
                     "physical registers to rename to.\n");
@@ -1041,6 +1046,9 @@ DefaultRename<Impl>::renameSrcRegs(const DynInstPtr &inst, ThreadID tid)
           case VecElemClass:
             vecRenameLookups++;
             break;
+          case VecPredRegClass:
+            vecPredRenameLookups++;
+            break;
           case CCRegClass:
           case MiscRegClass:
             break;
@@ -1256,7 +1264,7 @@ DefaultRename<Impl>::readFreeEntries(ThreadID tid)
     }
 
     DPRINTF(Rename, "[tid:%i]: Free IQ: %i, Free ROB: %i, "
-                    "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i)\n",
+                    "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i %i)\n",
             tid,
             freeEntries[tid].iqEntries,
             freeEntries[tid].robEntries,
@@ -1266,6 +1274,7 @@ DefaultRename<Impl>::readFreeEntries(ThreadID tid)
             renameMap[tid]->numFreeIntEntries(),
             renameMap[tid]->numFreeFloatEntries(),
             renameMap[tid]->numFreeVecEntries(),
+            renameMap[tid]->numFreePredEntries(),
             renameMap[tid]->numFreeCCEntries());
 
     DPRINTF(Rename, "[tid:%i]: %i instructions not yet in ROB\n",
diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc
index 86c43932c..603f1ff36 100644
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016,2019 ARM Limited
+ * Copyright (c) 2016-2017,2019 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -120,6 +120,8 @@ UnifiedRenameMap::init(PhysRegFile *_regFile,
     vecElemMap.init(TheISA::NumVecRegs * NVecElems,
             &(freeList->vecElemList), (RegIndex)-1);
 
+    predMap.init(TheISA::NumVecPredRegs, &(freeList->predList), (RegIndex)-1);
+
     ccMap.init(TheISA::NumCCRegs, &(freeList->ccList), (RegIndex)-1);
 
 }
diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh
index d30668027..5424633e5 100644
--- a/src/cpu/o3/rename_map.hh
+++ b/src/cpu/o3/rename_map.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016 ARM Limited
+ * Copyright (c) 2015-2017 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -172,6 +172,7 @@ class UnifiedRenameMap
   private:
     static constexpr uint32_t NVecElems = TheISA::NumVecElemPerVecReg;
     using VecReg = TheISA::VecReg;
+    using VecPredReg = TheISA::VecPredReg;
 
     /** The integer register rename map */
     SimpleRenameMap intMap;
@@ -188,6 +189,9 @@ class UnifiedRenameMap
     /** The vector element register rename map */
     SimpleRenameMap vecElemMap;
 
+    /** The predicate register rename map */
+    SimpleRenameMap predMap;
+
     using VecMode = Enums::VecRegRenameMode;
     VecMode vecMode;
 
@@ -235,6 +239,8 @@ class UnifiedRenameMap
           case VecElemClass:
             assert(vecMode == Enums::Elem);
             return vecElemMap.rename(arch_reg);
+          case VecPredRegClass:
+            return predMap.rename(arch_reg);
           case CCRegClass:
             return ccMap.rename(arch_reg);
           case MiscRegClass:
@@ -276,6 +282,9 @@ class UnifiedRenameMap
             assert(vecMode == Enums::Elem);
             return  vecElemMap.lookup(arch_reg);
 
+          case VecPredRegClass:
+            return predMap.lookup(arch_reg);
+
           case CCRegClass:
             return ccMap.lookup(arch_reg);
 
@@ -319,6 +328,10 @@ class UnifiedRenameMap
             assert(vecMode == Enums::Elem);
             return vecElemMap.setEntry(arch_reg, phys_reg);
 
+          case VecPredRegClass:
+            assert(phys_reg->isVecPredPhysReg());
+            return predMap.setEntry(arch_reg, phys_reg);
+
           case CCRegClass:
             assert(phys_reg->isCCPhysReg());
             return ccMap.setEntry(arch_reg, phys_reg);
@@ -345,10 +358,11 @@ class UnifiedRenameMap
      */
     unsigned numFreeEntries() const
     {
-        return std::min(
+        return std::min(std::min(
                 std::min(intMap.numFreeEntries(), floatMap.numFreeEntries()),
                 vecMode == Enums::Full ? vecMap.numFreeEntries()
-                                    : vecElemMap.numFreeEntries());
+                                    : vecElemMap.numFreeEntries()),
+                predMap.numFreeEntries());
     }
 
     unsigned numFreeIntEntries() const { return intMap.numFreeEntries(); }
@@ -359,18 +373,21 @@ class UnifiedRenameMap
                 ? vecMap.numFreeEntries()
                 : vecElemMap.numFreeEntries();
     }
+    unsigned numFreePredEntries() const { return predMap.numFreeEntries(); }
     unsigned numFreeCCEntries() const { return ccMap.numFreeEntries(); }
 
     /**
      * Return whether there are enough registers to serve the request.
      */
     bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t vectorRegs,
-                    uint32_t vecElemRegs, uint32_t ccRegs) const
+                   uint32_t vecElemRegs, uint32_t vecPredRegs,
+                   uint32_t ccRegs) const
     {
         return intRegs <= intMap.numFreeEntries() &&
             floatRegs <= floatMap.numFreeEntries() &&
             vectorRegs <= vecMap.numFreeEntries() &&
             vecElemRegs <= vecElemMap.numFreeEntries() &&
+            vecPredRegs <= predMap.numFreeEntries() &&
             ccRegs <= ccMap.numFreeEntries();
     }
     /**
diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh
index c74936469..7858f5a0a 100644
--- a/src/cpu/o3/thread_context.hh
+++ b/src/cpu/o3/thread_context.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012, 2016 ARM Limited
+ * Copyright (c) 2011-2012, 2016-2018 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -263,6 +263,14 @@ class O3ThreadContext : public ThreadContext
         return readVecElemFlat(flattenRegId(reg).index(), reg.elemIndex());
     }
 
+    virtual const VecPredRegContainer& readVecPredReg(const RegId& id) const {
+        return readVecPredRegFlat(flattenRegId(id).index());
+    }
+
+    virtual VecPredRegContainer& getWritableVecPredReg(const RegId& id) {
+        return getWritableVecPredRegFlat(flattenRegId(id).index());
+    }
+
     virtual CCReg readCCReg(int reg_idx) {
         return readCCRegFlat(flattenRegId(RegId(CCRegClass,
                                                  reg_idx)).index());
@@ -295,6 +303,13 @@ class O3ThreadContext : public ThreadContext
     }
 
     virtual void
+    setVecPredReg(const RegId& reg,
+                  const VecPredRegContainer& val)
+    {
+        setVecPredRegFlat(flattenRegId(reg).index(), val);
+    }
+
+    virtual void
     setCCReg(int reg_idx, CCReg val)
     {
         setCCRegFlat(flattenRegId(RegId(CCRegClass, reg_idx)).index(), val);
@@ -403,6 +418,12 @@ class O3ThreadContext : public ThreadContext
     virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx,
                                 const VecElem& val);
 
+    virtual const VecPredRegContainer& readVecPredRegFlat(int idx)
+        const override;
+    virtual VecPredRegContainer& getWritableVecPredRegFlat(int idx) override;
+    virtual void setVecPredRegFlat(int idx,
+                                   const VecPredRegContainer& val) override;
+
     virtual CCReg readCCRegFlat(int idx);
     virtual void setCCRegFlat(int idx, CCReg val);
 };
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index bd2bf63b0..59562ba3b 100644
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -233,6 +233,20 @@ O3ThreadContext<Impl>::readVecElemFlat(const RegIndex& idx,
 }
 
 template <class Impl>
+const TheISA::VecPredRegContainer&
+O3ThreadContext<Impl>::readVecPredRegFlat(int reg_id) const
+{
+    return cpu->readArchVecPredReg(reg_id, thread->threadId());
+}
+
+template <class Impl>
+TheISA::VecPredRegContainer&
+O3ThreadContext<Impl>::getWritableVecPredRegFlat(int reg_id)
+{
+    return cpu->getWritableArchVecPredReg(reg_id, thread->threadId());
+}
+
+template <class Impl>
 TheISA::CCReg
 O3ThreadContext<Impl>::readCCRegFlat(int reg_idx)
 {
@@ -277,6 +291,16 @@ O3ThreadContext<Impl>::setVecElemFlat(const RegIndex& idx,
 
 template <class Impl>
 void
+O3ThreadContext<Impl>::setVecPredRegFlat(int reg_idx,
+                                         const VecPredRegContainer& val)
+{
+    cpu->setArchVecPredReg(reg_idx, val, thread->threadId());
+
+    conditionalSquash();
+}
+
+template <class Impl>
+void
 O3ThreadContext<Impl>::setCCRegFlat(int reg_idx, TheISA::CCReg val)
 {
     cpu->setArchCCReg(reg_idx, val, thread->threadId());
diff --git a/src/cpu/reg_class.cc b/src/cpu/reg_class.cc
index 16c1949ee..eeb06adcc 100644
--- a/src/cpu/reg_class.cc
+++ b/src/cpu/reg_class.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016 ARM Limited
+ * Copyright (c) 2016-2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -47,6 +47,7 @@ const char *RegId::regClassStrings[] = {
     "FloatRegClass",
     "VecRegClass",
     "VecElemClass",
+    "VecPredRegClass",
     "CCRegClass",
     "MiscRegClass"
 };
diff --git a/src/cpu/reg_class.hh b/src/cpu/reg_class.hh
index 69da9cf7e..70cfab39e 100644
--- a/src/cpu/reg_class.hh
+++ b/src/cpu/reg_class.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016 ARM Limited
+ * Copyright (c) 2016-2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -60,6 +60,7 @@ enum RegClass {
     VecRegClass,
     /** Vector Register Native Elem lane. */
     VecElemClass,
+    VecPredRegClass,
     CCRegClass,         ///< Condition-code register
     MiscRegClass        ///< Control (misc) register
 };
@@ -151,6 +152,9 @@ class RegId {
     /** @Return true if it is a  condition-code physical register. */
     bool isVecElem() const { return regClass == VecElemClass; }
 
+    /** @Return true if it is a predicate physical register. */
+    bool isVecPredReg() const { return regClass == VecPredRegClass; }
+
     /** @Return true if it is a  condition-code physical register. */
     bool isCCReg() const { return regClass == CCRegClass; }
 
@@ -179,6 +183,7 @@ class RegId {
           case IntRegClass:
           case FloatRegClass:
           case VecRegClass:
+          case VecPredRegClass:
           case CCRegClass:
           case MiscRegClass:
             return regIdx;
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index cbca34123..d2107b89a 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2016 ARM Limited
+ * Copyright (c) 2014-2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -121,6 +121,10 @@ class SimpleExecContext : public ExecContext {
     mutable Stats::Scalar numVecRegReads;
     Stats::Scalar numVecRegWrites;
 
+    // Number of predicate register file accesses
+    mutable Stats::Scalar numVecPredRegReads;
+    Stats::Scalar numVecPredRegWrites;
+
     // Number of condition code register file accesses
     Stats::Scalar numCCRegReads;
     Stats::Scalar numCCRegWrites;
@@ -333,6 +337,34 @@ class SimpleExecContext : public ExecContext {
         thread->setVecElem(reg, val);
     }
 
+    const VecPredRegContainer&
+    readVecPredRegOperand(const StaticInst *si, int idx) const override
+    {
+        numVecPredRegReads++;
+        const RegId& reg = si->srcRegIdx(idx);
+        assert(reg.isVecPredReg());
+        return thread->readVecPredReg(reg);
+    }
+
+    VecPredRegContainer&
+    getWritableVecPredRegOperand(const StaticInst *si, int idx) override
+    {
+        numVecPredRegWrites++;
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecPredReg());
+        return thread->getWritableVecPredReg(reg);
+    }
+
+    void
+    setVecPredRegOperand(const StaticInst *si, int idx,
+                         const VecPredRegContainer& val) override
+    {
+        numVecPredRegWrites++;
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecPredReg());
+        thread->setVecPredReg(reg, val);
+    }
+
     CCReg
     readCCRegOperand(const StaticInst *si, int idx) override
     {
diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh
index 211a4c89f..00355c602 100644
--- a/src/cpu/simple_thread.hh
+++ b/src/cpu/simple_thread.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012, 2016 ARM Limited
+ * Copyright (c) 2011-2012, 2016-2018 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -58,6 +58,7 @@
 #include "debug/CCRegs.hh"
 #include "debug/FloatRegs.hh"
 #include "debug/IntRegs.hh"
+#include "debug/VecPredRegs.hh"
 #include "debug/VecRegs.hh"
 #include "mem/page_table.hh"
 #include "mem/request.hh"
@@ -102,6 +103,7 @@ class SimpleThread : public ThreadState
     typedef TheISA::CCReg CCReg;
     using VecRegContainer = TheISA::VecRegContainer;
     using VecElem = TheISA::VecElem;
+    using VecPredRegContainer = TheISA::VecPredRegContainer;
   public:
     typedef ThreadContext::Status Status;
 
@@ -109,6 +111,7 @@ class SimpleThread : public ThreadState
     RegVal floatRegs[TheISA::NumFloatRegs];
     RegVal intRegs[TheISA::NumIntRegs];
     VecRegContainer vecRegs[TheISA::NumVecRegs];
+    VecPredRegContainer vecPredRegs[TheISA::NumVecPredRegs];
 #ifdef ISA_HAS_CC_REGS
     TheISA::CCReg ccRegs[TheISA::NumCCRegs];
 #endif
@@ -228,6 +231,9 @@ class SimpleThread : public ThreadState
         for (int i = 0; i < TheISA::NumVecRegs; i++) {
             vecRegs[i].zero();
         }
+        for (int i = 0; i < TheISA::NumVecPredRegs; i++) {
+            vecPredRegs[i].reset();
+        }
 #ifdef ISA_HAS_CC_REGS
         memset(ccRegs, 0, sizeof(ccRegs));
 #endif
@@ -266,7 +272,7 @@ class SimpleThread : public ThreadState
         assert(flatIndex < TheISA::NumVecRegs);
         const VecRegContainer& regVal = readVecRegFlat(flatIndex);
         DPRINTF(VecRegs, "Reading vector reg %d (%d) as %s.\n",
-                reg.index(), flatIndex, regVal.as<TheISA::VecElem>().print());
+                reg.index(), flatIndex, regVal.print());
         return regVal;
     }
 
@@ -277,7 +283,7 @@ class SimpleThread : public ThreadState
         assert(flatIndex < TheISA::NumVecRegs);
         VecRegContainer& regVal = getWritableVecRegFlat(flatIndex);
         DPRINTF(VecRegs, "Reading vector reg %d (%d) as %s for modify.\n",
-                reg.index(), flatIndex, regVal.as<TheISA::VecElem>().print());
+                reg.index(), flatIndex, regVal.print());
         return regVal;
     }
 
@@ -350,6 +356,28 @@ class SimpleThread : public ThreadState
         return regVal;
     }
 
+    const VecPredRegContainer&
+    readVecPredReg(const RegId& reg) const
+    {
+        int flatIndex = isa->flattenVecPredIndex(reg.index());
+        assert(flatIndex < TheISA::NumVecPredRegs);
+        const VecPredRegContainer& regVal = readVecPredRegFlat(flatIndex);
+        DPRINTF(VecPredRegs, "Reading predicate reg %d (%d) as %s.\n",
+                reg.index(), flatIndex, regVal.print());
+        return regVal;
+    }
+
+    VecPredRegContainer&
+    getWritableVecPredReg(const RegId& reg)
+    {
+        int flatIndex = isa->flattenVecPredIndex(reg.index());
+        assert(flatIndex < TheISA::NumVecPredRegs);
+        VecPredRegContainer& regVal = getWritableVecPredRegFlat(flatIndex);
+        DPRINTF(VecPredRegs,
+                "Reading predicate reg %d (%d) as %s for modify.\n",
+                reg.index(), flatIndex, regVal.print());
+        return regVal;
+    }
 
     CCReg readCCReg(int reg_idx)
     {
@@ -411,6 +439,16 @@ class SimpleThread : public ThreadState
     }
 
     void
+    setVecPredReg(const RegId& reg, const VecPredRegContainer& val)
+    {
+        int flatIndex = isa->flattenVecPredIndex(reg.index());
+        assert(flatIndex < TheISA::NumVecPredRegs);
+        setVecPredRegFlat(flatIndex, val);
+        DPRINTF(VecPredRegs, "Setting predicate reg %d (%d) to %s.\n",
+                reg.index(), flatIndex, val.print());
+    }
+
+    void
     setCCReg(int reg_idx, CCReg val)
     {
 #ifdef ISA_HAS_CC_REGS
@@ -568,6 +606,21 @@ class SimpleThread : public ThreadState
         vecRegs[reg].as<TheISA::VecElem>()[elemIndex] = val;
     }
 
+    const VecPredRegContainer& readVecPredRegFlat(const RegIndex& reg) const
+    {
+        return vecPredRegs[reg];
+    }
+
+    VecPredRegContainer& getWritableVecPredRegFlat(const RegIndex& reg)
+    {
+        return vecPredRegs[reg];
+    }
+
+    void setVecPredRegFlat(const RegIndex& reg, const VecPredRegContainer& val)
+    {
+        vecPredRegs[reg] = val;
+    }
+
 #ifdef ISA_HAS_CC_REGS
     CCReg readCCRegFlat(int idx) { return ccRegs[idx]; }
     void setCCRegFlat(int idx, CCReg val) { ccRegs[idx] = val; }
diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh
index 70d933c31..bcb53f5ea 100644
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2017 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2003-2005 The Regents of The University of Michigan
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved.
@@ -105,16 +117,17 @@ class StaticInst : public RefCounted, public StaticInstFlags
     /** @{ */
     int8_t _numVecDestRegs;
     int8_t _numVecElemDestRegs;
+    int8_t _numVecPredDestRegs;
     /** @} */
 
   public:
 
     /// @name Register information.
-    /// The sum of numFPDestRegs(), numIntDestRegs(), numVecDestRegs() and
-    /// numVecelemDestRegs() equals numDestRegs().  The former two functions
-    /// are used to track physical register usage for machines with separate
-    /// int & FP reg files, the next two is for machines with vector register
-    /// file.
+    /// The sum of numFPDestRegs(), numIntDestRegs(), numVecDestRegs(),
+    /// numVecElemDestRegs() and numVecPredDestRegs() equals numDestRegs().
+    /// The former two functions are used to track physical register usage for
+    /// machines with separate int & FP reg files, the next three are for
+    /// machines with vector and predicate register files.
     //@{
     /// Number of source registers.
     int8_t numSrcRegs()  const { return _numSrcRegs; }
@@ -128,6 +141,8 @@ class StaticInst : public RefCounted, public StaticInstFlags
     int8_t numVecDestRegs() const { return _numVecDestRegs; }
     /// Number of vector element destination regs.
     int8_t numVecElemDestRegs() const { return _numVecElemDestRegs; }
+    /// Number of predicate destination regs.
+    int8_t numVecPredDestRegs() const { return _numVecPredDestRegs; }
     /// Number of coprocesor destination regs.
     int8_t numCCDestRegs() const { return _numCCDestRegs; }
     //@}
@@ -248,8 +263,8 @@ class StaticInst : public RefCounted, public StaticInstFlags
     StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass)
         : _opClass(__opClass), _numSrcRegs(0), _numDestRegs(0),
           _numFPDestRegs(0), _numIntDestRegs(0), _numCCDestRegs(0),
-          _numVecDestRegs(0), _numVecElemDestRegs(0), machInst(_machInst),
-          mnemonic(_mnemonic), cachedDisassembly(0)
+          _numVecDestRegs(0), _numVecElemDestRegs(0), _numVecPredDestRegs(0),
+          machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0)
     { }
 
   public:
diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc
index 2d907a055..7597dbfb2 100644
--- a/src/cpu/thread_context.cc
+++ b/src/cpu/thread_context.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2016 ARM Limited
+ * Copyright (c) 2012, 2016-2017 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -43,6 +43,7 @@
 
 #include "cpu/thread_context.hh"
 
+#include "arch/generic/vec_pred_reg.hh"
 #include "arch/kernel_stats.hh"
 #include "base/logging.hh"
 #include "base/trace.hh"
@@ -86,6 +87,17 @@ ThreadContext::compare(ThreadContext *one, ThreadContext *two)
             panic("Vec reg idx %d doesn't match, one: %#x, two: %#x",
                   i, t1, t2);
     }
+
+    // Then loop through the predicate registers.
+    for (int i = 0; i < TheISA::NumVecPredRegs; ++i) {
+        RegId rid(VecPredRegClass, i);
+        const TheISA::VecPredRegContainer& t1 = one->readVecPredReg(rid);
+        const TheISA::VecPredRegContainer& t2 = two->readVecPredReg(rid);
+        if (t1 != t2)
+            panic("Pred reg idx %d doesn't match, one: %#x, two: %#x",
+                  i, t1, t2);
+    }
+
     for (int i = 0; i < TheISA::NumMiscRegs; ++i) {
         RegVal t1 = one->readMiscRegNoEffect(i);
         RegVal t2 = two->readMiscRegNoEffect(i);
@@ -168,6 +180,12 @@ serialize(ThreadContext &tc, CheckpointOut &cp)
     }
     SERIALIZE_CONTAINER(vecRegs);
 
+    std::vector<TheISA::VecPredRegContainer> vecPredRegs(NumVecPredRegs);
+    for (int i = 0; i < NumVecPredRegs; ++i) {
+        vecPredRegs[i] = tc.readVecPredRegFlat(i);
+    }
+    SERIALIZE_CONTAINER(vecPredRegs);
+
     RegVal intRegs[NumIntRegs];
     for (int i = 0; i < NumIntRegs; ++i)
         intRegs[i] = tc.readIntRegFlat(i);
@@ -203,6 +221,12 @@ unserialize(ThreadContext &tc, CheckpointIn &cp)
         tc.setVecRegFlat(i, vecRegs[i]);
     }
 
+    std::vector<TheISA::VecPredRegContainer> vecPredRegs(NumVecPredRegs);
+    UNSERIALIZE_CONTAINER(vecPredRegs);
+    for (int i = 0; i < NumVecPredRegs; ++i) {
+        tc.setVecPredRegFlat(i, vecPredRegs[i]);
+    }
+
     RegVal intRegs[NumIntRegs];
     UNSERIALIZE_ARRAY(intRegs, NumIntRegs);
     for (int i = 0; i < NumIntRegs; ++i)
diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh
index cad073b4f..6dde68650 100644
--- a/src/cpu/thread_context.hh
+++ b/src/cpu/thread_context.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012, 2016 ARM Limited
+ * Copyright (c) 2011-2012, 2016-2018 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -98,6 +98,8 @@ class ThreadContext
     typedef TheISA::CCReg CCReg;
     using VecRegContainer = TheISA::VecRegContainer;
     using VecElem = TheISA::VecElem;
+    using VecPredRegContainer = TheISA::VecPredRegContainer;
+
   public:
 
     enum Status
@@ -242,6 +244,10 @@ class ThreadContext
 
     virtual const VecElem& readVecElem(const RegId& reg) const = 0;
 
+    virtual const VecPredRegContainer& readVecPredReg(const RegId& reg)
+        const = 0;
+    virtual VecPredRegContainer& getWritableVecPredReg(const RegId& reg) = 0;
+
     virtual CCReg readCCReg(int reg_idx) = 0;
 
     virtual void setIntReg(int reg_idx, RegVal val) = 0;
@@ -252,6 +258,9 @@ class ThreadContext
 
     virtual void setVecElem(const RegId& reg, const VecElem& val) = 0;
 
+    virtual void setVecPredReg(const RegId& reg,
+                               const VecPredRegContainer& val) = 0;
+
     virtual void setCCReg(int reg_idx, CCReg val) = 0;
 
     virtual TheISA::PCState pcState() = 0;
@@ -341,6 +350,11 @@ class ThreadContext
     virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx,
                                 const VecElem& val) = 0;
 
+    virtual const VecPredRegContainer& readVecPredRegFlat(int idx) const = 0;
+    virtual VecPredRegContainer& getWritableVecPredRegFlat(int idx) = 0;
+    virtual void setVecPredRegFlat(int idx,
+                                   const VecPredRegContainer& val) = 0;
+
     virtual CCReg readCCRegFlat(int idx) = 0;
     virtual void setCCRegFlat(int idx, CCReg val) = 0;
     /** @} */
@@ -502,6 +516,12 @@ class ProxyThreadContext : public ThreadContext
     const VecElem& readVecElem(const RegId& reg) const
     { return actualTC->readVecElem(reg); }
 
+    const VecPredRegContainer& readVecPredReg(const RegId& reg) const
+    { return actualTC->readVecPredReg(reg); }
+
+    VecPredRegContainer& getWritableVecPredReg(const RegId& reg)
+    { return actualTC->getWritableVecPredReg(reg); }
+
     CCReg readCCReg(int reg_idx)
     { return actualTC->readCCReg(reg_idx); }
 
@@ -514,6 +534,9 @@ class ProxyThreadContext : public ThreadContext
     void setVecReg(const RegId& reg, const VecRegContainer& val)
     { actualTC->setVecReg(reg, val); }
 
+    void setVecPredReg(const RegId& reg, const VecPredRegContainer& val)
+    { actualTC->setVecPredReg(reg, val); }
+
     void setVecElem(const RegId& reg, const VecElem& val)
     { actualTC->setVecElem(reg, val); }
 
@@ -590,6 +613,15 @@ class ProxyThreadContext : public ThreadContext
                         const VecElem& val)
     { actualTC->setVecElemFlat(id, elemIndex, val); }
 
+    const VecPredRegContainer& readVecPredRegFlat(int id) const
+    { return actualTC->readVecPredRegFlat(id); }
+
+    VecPredRegContainer& getWritableVecPredRegFlat(int id)
+    { return actualTC->getWritableVecPredRegFlat(id); }
+
+    void setVecPredRegFlat(int idx, const VecPredRegContainer& val)
+    { actualTC->setVecPredRegFlat(idx, val); }
+
     CCReg readCCRegFlat(int idx)
     { return actualTC->readCCRegFlat(idx); }
 
diff --git a/src/sim/insttracer.hh b/src/sim/insttracer.hh
index d57f5a04d..c1efd2118 100644
--- a/src/sim/insttracer.hh
+++ b/src/sim/insttracer.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 ARM Limited
+ * Copyright (c) 2014, 2017 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -44,6 +44,8 @@
 #ifndef __INSTRECORD_HH__
 #define __INSTRECORD_HH__
 
+#include "arch/generic/vec_pred_reg.hh"
+#include "arch/generic/vec_reg.hh"
 #include "base/types.hh"
 #include "cpu/inst_seq.hh"
 #include "cpu/static_inst.hh"
@@ -95,6 +97,9 @@ class InstRecord
     union {
         uint64_t as_int;
         double as_double;
+        ::VecRegContainer<TheISA::VecRegSizeBytes>* as_vec;
+        ::VecPredRegContainer<TheISA::VecPredRegSizeBits,
+                              TheISA::VecPredRegHasPackedRepr>* as_pred;
     } data;
 
     /** @defgroup fetch_seq
@@ -118,7 +123,9 @@ class InstRecord
         DataInt16 = 2,
         DataInt32 = 4,
         DataInt64 = 8,
-        DataDouble = 3
+        DataDouble = 3,
+        DataVec = 5,
+        DataVecPred = 6
     } data_status;
 
     /** @ingroup memory
@@ -150,7 +157,16 @@ class InstRecord
         fetch_seq_valid(false), cp_seq_valid(false), predicate(true)
     { }
 
-    virtual ~InstRecord() { }
+    virtual ~InstRecord()
+    {
+        if (data_status == DataVec) {
+            assert(data.as_vec);
+            delete data.as_vec;
+        } else if (data_status == DataVecPred) {
+            assert(data.as_pred);
+            delete data.as_pred;
+        }
+    }
 
     void setWhen(Tick new_when) { when = new_when; }
     void setMem(Addr a, Addr s, unsigned f)
@@ -181,6 +197,22 @@ class InstRecord
 
     void setData(double d) { data.as_double = d; data_status = DataDouble; }
 
+    void
+    setData(::VecRegContainer<TheISA::VecRegSizeBytes>& d)
+    {
+        data.as_vec = new ::VecRegContainer<TheISA::VecRegSizeBytes>(d);
+        data_status = DataVec;
+    }
+
+    void
+    setData(::VecPredRegContainer<TheISA::VecPredRegSizeBits,
+                                  TheISA::VecPredRegHasPackedRepr>& d)
+    {
+        data.as_pred = new ::VecPredRegContainer<
+            TheISA::VecPredRegSizeBits, TheISA::VecPredRegHasPackedRepr>(d);
+        data_status = DataVecPred;
+    }
+
     void setFetchSeq(InstSeqNum seq)
     { fetch_seq = seq; fetch_seq_valid = true; }