diff options
author | Gabor Dozsa <gabor.dozsa@arm.com> | 2019-06-11 11:47:26 +0100 |
---|---|---|
committer | Giacomo Travaglini <giacomo.travaglini@arm.com> | 2019-07-18 15:09:22 +0000 |
commit | 9130f5427d7009c4f40e0097b79b4972430a27c3 (patch) | |
tree | 98f39295d24a16637c2fba641ef66b4a0741663a /src/arch/arm | |
parent | ddd3f43f8a590cd287cd3449ea6e49bc48dad06a (diff) | |
download | gem5-9130f5427d7009c4f40e0097b79b4972430a27c3.tar.xz |
arch-arm: Add first-/non-faulting load instructions
First-/non-faulting loads are part of Arm SVE.
Change-Id: I93dfd6d1d74791653927e99098ddb651150a8ef7
Signed-off-by: Gabor Dozsa <gabor.dozsa@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/19177
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Diffstat (limited to 'src/arch/arm')
-rw-r--r-- | src/arch/arm/faults.cc | 32 | ||||
-rw-r--r-- | src/arch/arm/faults.hh | 16 | ||||
-rw-r--r-- | src/arch/arm/insts/sve_macromem.hh | 45 | ||||
-rw-r--r-- | src/arch/arm/isa/formats/sve_2nd_level.isa | 75 | ||||
-rw-r--r-- | src/arch/arm/isa/insts/sve_mem.isa | 353 | ||||
-rw-r--r-- | src/arch/arm/isa/operands.isa | 2 | ||||
-rw-r--r-- | src/arch/arm/isa/templates/sve_mem.isa | 170 | ||||
-rw-r--r-- | src/arch/arm/registers.hh | 5 |
8 files changed, 514 insertions, 184 deletions
diff --git a/src/arch/arm/faults.cc b/src/arch/arm/faults.cc index 94374714b..a03c91703 100644 --- a/src/arch/arm/faults.cc +++ b/src/arch/arm/faults.cc @@ -1216,6 +1216,14 @@ AbortFault<T>::isMMUFault() const (source < ArmFault::PermissionLL + 4)); } +template<class T> +bool +AbortFault<T>::getFaultVAddr(Addr &va) const +{ + va = (stage2 ? OVAddr : faultAddr); + return true; +} + ExceptionClass PrefetchAbort::ec(ThreadContext *tc) const { @@ -1618,5 +1626,29 @@ template class AbortFault<VirtualDataAbort>; IllegalInstSetStateFault::IllegalInstSetStateFault() {} +bool +getFaultVAddr(Fault fault, Addr &va) +{ + auto arm_fault = dynamic_cast<ArmFault *>(fault.get()); + + if (arm_fault) { + return arm_fault->getFaultVAddr(va); + } else { + auto pgt_fault = dynamic_cast<GenericPageTableFault *>(fault.get()); + if (pgt_fault) { + va = pgt_fault->getFaultVAddr(); + return true; + } + + auto align_fault = dynamic_cast<GenericAlignmentFault *>(fault.get()); + if (align_fault) { + va = align_fault->getFaultVAddr(); + return true; + } + + // Return false since it's not an address triggered exception + return false; + } +} } // namespace ArmISA diff --git a/src/arch/arm/faults.hh b/src/arch/arm/faults.hh index e04a0dcc6..d14983d28 100644 --- a/src/arch/arm/faults.hh +++ b/src/arch/arm/faults.hh @@ -234,6 +234,8 @@ class ArmFault : public FaultBase virtual bool isStage2() const { return false; } virtual FSR getFsr(ThreadContext *tc) const { return 0; } virtual void setSyndrome(ThreadContext *tc, MiscRegIndex syndrome_reg); + virtual bool getFaultVAddr(Addr &va) const { return false; } + }; template<typename T> @@ -435,6 +437,8 @@ class AbortFault : public ArmFaultVals<T> stage2(_stage2), s1ptw(false), tranMethod(_tranMethod) {} + bool getFaultVAddr(Addr &va) const override; + void invoke(ThreadContext *tc, const StaticInstPtr &inst = StaticInst::nullStaticInstPtr) override; @@ -625,6 +629,18 @@ template<> ArmFault::FaultVals ArmFaultVals<SystemError>::vals; template<> ArmFault::FaultVals ArmFaultVals<SoftwareBreakpoint>::vals; template<> ArmFault::FaultVals ArmFaultVals<ArmSev>::vals; +/** + * Returns true if the fault passed as a first argument was triggered + * by a memory access, false otherwise. + * If true it is storing the faulting address in the va argument + * + * @param fault generated fault + * @param va function will modify this passed-by-reference parameter + * with the correct faulting virtual address + * @return true if va contains a valid value, false otherwise + */ +bool getFaultVAddr(Fault fault, Addr &va); + } // namespace ArmISA diff --git a/src/arch/arm/insts/sve_macromem.hh b/src/arch/arm/insts/sve_macromem.hh index a31af9b92..b365dcb4b 100644 --- a/src/arch/arm/insts/sve_macromem.hh +++ b/src/arch/arm/insts/sve_macromem.hh @@ -46,7 +46,8 @@ namespace ArmISA { template <typename RegElemType, typename MemElemType, - template <typename, typename> class MicroopType> + template <typename, typename> class MicroopType, + template <typename> class FirstFaultWritebackMicroopType> class SveIndexedMemVI : public PredMacroOp { protected: @@ -58,17 +59,22 @@ class SveIndexedMemVI : public PredMacroOp public: SveIndexedMemVI(const char *mnem, ExtMachInst machInst, OpClass __opClass, IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base, - uint64_t _imm) + uint64_t _imm, bool firstFault) : PredMacroOp(mnem, machInst, __opClass), dest(_dest), gp(_gp), base(_base), imm(_imm) { bool isLoad = (__opClass == MemReadOp); + assert(!firstFault || isLoad); int num_elems = ((machInst.sveLen + 1) * 16) / sizeof(RegElemType); numMicroops = num_elems; if (isLoad) { - numMicroops++; + if (firstFault) { + numMicroops += 2; + } else { + numMicroops++; + } } microOps = new StaticInstPtr[numMicroops]; @@ -90,10 +96,16 @@ class SveIndexedMemVI : public PredMacroOp *uop = new MicroopType<RegElemType, MemElemType>( mnem, machInst, __opClass, _dest, _gp, isLoad ? (IntRegIndex) VECREG_UREG0 : _base, _imm, i, - num_elems); + num_elems, firstFault); + } + + if (firstFault) { + *uop = new FirstFaultWritebackMicroopType<RegElemType>( + mnem, machInst, __opClass, num_elems, this); + } else { + --uop; } - --uop; (*uop)->setLastMicroop(); microOps[0]->setFirstMicroop(); @@ -130,7 +142,8 @@ class SveIndexedMemVI : public PredMacroOp }; template <typename RegElemType, typename MemElemType, - template <typename, typename> class MicroopType> + template <typename, typename> class MicroopType, + template <typename> class FirstFaultWritebackMicroopType> class SveIndexedMemSV : public PredMacroOp { protected: @@ -147,19 +160,25 @@ class SveIndexedMemSV : public PredMacroOp SveIndexedMemSV(const char *mnem, ExtMachInst machInst, OpClass __opClass, IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base, IntRegIndex _offset, bool _offsetIs32, - bool _offsetIsSigned, bool _offsetIsScaled) + bool _offsetIsSigned, bool _offsetIsScaled, + bool firstFault) : PredMacroOp(mnem, machInst, __opClass), dest(_dest), gp(_gp), base(_base), offset(_offset), offsetIs32(_offsetIs32), offsetIsSigned(_offsetIsSigned), offsetIsScaled(_offsetIsScaled) { bool isLoad = (__opClass == MemReadOp); + assert(!firstFault || isLoad); int num_elems = ((machInst.sveLen + 1) * 16) / sizeof(RegElemType); numMicroops = num_elems; if (isLoad) { - numMicroops++; + if (firstFault) { + numMicroops += 2; + } else { + numMicroops++; + } } microOps = new StaticInstPtr[numMicroops]; @@ -181,10 +200,16 @@ class SveIndexedMemSV : public PredMacroOp *uop = new MicroopType<RegElemType, MemElemType>( mnem, machInst, __opClass, _dest, _gp, _base, isLoad ? (IntRegIndex) VECREG_UREG0 : _offset, _offsetIs32, - _offsetIsSigned, _offsetIsScaled, i, num_elems); + _offsetIsSigned, _offsetIsScaled, i, num_elems, firstFault); + } + + if (firstFault) { + *uop = new FirstFaultWritebackMicroopType<RegElemType>( + mnem, machInst, __opClass, num_elems, this); + } else { + --uop; } - --uop; (*uop)->setLastMicroop(); microOps[0]->setFirstMicroop(); diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa index d4e75285b..69d80e294 100644 --- a/src/arch/arm/isa/formats/sve_2nd_level.isa +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -2921,9 +2921,6 @@ namespace Aarch64 uint8_t dtype = (bits(machInst, 24, 23) << 1) | bits(machInst, 14); uint8_t ff = bits(machInst, 13); - if (ff) { - return new Unknown64(machInst); - } return decodeSveGatherLoadVIInsts( dtype, machInst, zt, pg, zn, imm, true, ff); } else { @@ -2952,9 +2949,6 @@ namespace Aarch64 bits(machInst, 14); uint8_t xs = bits(machInst, 22); uint8_t ff = bits(machInst, 13); - if (ff) { - return new Unknown64(machInst); - } return decodeSveGatherLoadSVInsts( dtype, machInst, zt, pg, rn, zm, true, true, xs, false, ff); @@ -2980,19 +2974,20 @@ namespace Aarch64 bits(machInst, 12, 10); uint8_t xs = bits(machInst, 22); uint8_t ff = bits(machInst, 13); - if (ff) { - return new Unknown64(machInst); - } if (bits(machInst, 14)) { - return new SveIndexedMemSV<uint32_t, uint16_t, - SveGatherLoadSVMicroop>( - "ld1", machInst, MemReadOp, zt, pg, rn, zm, - true, xs, true); + return + new SveIndexedMemSV<uint32_t, uint16_t, + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( + ff ? "ldff1" : "ld1", machInst, MemReadOp, zt, pg, + rn, zm, true, xs, true, ff); } else { - return new SveIndexedMemSV<int32_t, int16_t, - SveGatherLoadSVMicroop>( - "ld1", machInst, MemReadOp, zt, pg, rn, zm, - true, xs, true); + return + new SveIndexedMemSV<int32_t, int16_t, + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( + ff ? "ldff1" : "ld1", machInst, MemReadOp, zt, pg, + rn, zm, true, xs, true, ff); } } break; @@ -3010,13 +3005,11 @@ namespace Aarch64 bits(machInst, 12, 10); uint8_t xs = bits(machInst, 22); uint8_t ff = bits(machInst, 13); - if (ff) { - return new Unknown64(machInst); - } return new SveIndexedMemSV<uint32_t, uint32_t, - SveGatherLoadSVMicroop>( - "ld1", machInst, MemReadOp, zt, pg, rn, zm, - true, xs, true); + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( + ff ? "ldff1" : "ld1", machInst, MemReadOp, zt, pg, rn, + zm, true, xs, true, ff); } break; case 0x3: @@ -3083,7 +3076,18 @@ namespace Aarch64 StaticInstPtr decodeSveContigFFLoadSS(ExtMachInst machInst) { - return new Unknown64(machInst); + IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = makeSP((IntRegIndex) (uint8_t) bits(machInst, 9, 5)); + IntRegIndex rm = makeSP( + (IntRegIndex) (uint8_t) bits(machInst, 20, 16)); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + + if (rm == 0x1f) { + return new Unknown64(machInst); + } + + return decodeSveContigLoadSSInsts<SveContigFFLoadSS>( + bits(machInst, 24, 21), machInst, zt, pg, rn, rm, true); } // decodeSveContigFFLoadSS StaticInstPtr @@ -3101,7 +3105,13 @@ namespace Aarch64 StaticInstPtr decodeSveContigNFLoadSI(ExtMachInst machInst) { - return new Unknown64(machInst); + IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = makeSP((IntRegIndex) (uint8_t) bits(machInst, 9, 5)); + uint64_t imm = sext<4>(bits(machInst, 19, 16)); + IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10); + + return decodeSveContigLoadSIInsts<SveContigNFLoadSI>( + bits(machInst, 24, 21), machInst, zt, pg, rn, imm, true); } // decodeSveContigNFLoadSI StaticInstPtr @@ -3186,9 +3196,6 @@ namespace Aarch64 bits(machInst, 14); uint8_t xs = bits(machInst, 22); uint8_t ff = bits(machInst, 13); - if (ff) { - return new Unknown64(machInst); - } return decodeSveGatherLoadSVInsts( dtype, machInst, zt, pg, rn, zm, false, true, xs, false, ff); @@ -3205,9 +3212,6 @@ namespace Aarch64 uint8_t dtype = (bits(machInst, 24, 23) << 1) | bits(machInst, 14); uint8_t ff = bits(machInst, 13); - if (ff) { - return new Unknown64(machInst); - } return decodeSveGatherLoadSVInsts( dtype, machInst, zt, pg, rn, zm, false, false, false, false, ff); @@ -3232,9 +3236,6 @@ namespace Aarch64 bits(machInst, 14); uint8_t xs = bits(machInst, 22); uint8_t ff = bits(machInst, 13); - if (ff) { - return new Unknown64(machInst); - } return decodeSveGatherLoadSVInsts( dtype, machInst, zt, pg, rn, zm, false, true, xs, true, ff); @@ -3255,9 +3256,6 @@ namespace Aarch64 uint8_t dtype = (bits(machInst, 24, 23) << 1) | bits(machInst, 14); uint8_t ff = bits(machInst, 13); - if (ff) { - return new Unknown64(machInst); - } return decodeSveGatherLoadVIInsts( dtype, machInst, zt, pg, zn, imm, false, ff); } else { @@ -3275,9 +3273,6 @@ namespace Aarch64 uint8_t dtype = (bits(machInst, 24, 23) << 1) | bits(machInst, 14); uint8_t ff = bits(machInst, 13); - if (ff) { - return new Unknown64(machInst); - } return decodeSveGatherLoadSVInsts( dtype, machInst, zt, pg, rn, zm, false, false, false, true, ff); diff --git a/src/arch/arm/isa/insts/sve_mem.isa b/src/arch/arm/isa/insts/sve_mem.isa index 3102e800a..e776deb59 100644 --- a/src/arch/arm/isa/insts/sve_mem.isa +++ b/src/arch/arm/isa/insts/sve_mem.isa @@ -89,13 +89,11 @@ output header {{ StaticInstPtr decodeSveContigLoadSIInsts(uint8_t dtype, ExtMachInst machInst, IntRegIndex zt, IntRegIndex pg, IntRegIndex rn, - uint64_t imm, bool firstFaulting, + uint64_t imm, bool nonFaulting, bool replicate = false) { - assert(!(replicate && firstFaulting)); - - const char* mn = replicate ? "ld1r" : - (firstFaulting ? "ldff1" : "ld1"); + assert(!(nonFaulting && replicate)); + const char* mn = replicate ? "ld1r" : (nonFaulting ? "ldnf1" : "ld1"); switch (dtype) { case 0x0: return new Base<uint8_t, uint8_t>(mn, machInst, zt, pg, rn, imm); @@ -210,75 +208,87 @@ output decoder {{ decodeSveGatherLoadVIInsts(uint8_t dtype, ExtMachInst machInst, IntRegIndex zt, IntRegIndex pg, IntRegIndex zn, uint64_t imm, bool esizeIs32, - bool firstFaulting) + bool firstFault) { - const char* mn = firstFaulting ? "ldff1" : "ld1"; + const char* mn = firstFault ? "ldff1" : "ld1"; switch (dtype) { case 0x0: if (esizeIs32) { return new SveIndexedMemVI<int32_t, int8_t, - SveGatherLoadVIMicroop>( - mn, machInst, MemReadOp, zt, pg, zn, imm); + SveGatherLoadVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemReadOp, zt, pg, zn, imm, firstFault); } else { return new SveIndexedMemVI<int64_t, int8_t, - SveGatherLoadVIMicroop>( - mn, machInst, MemReadOp, zt, pg, zn, imm); + SveGatherLoadVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemReadOp, zt, pg, zn, imm, firstFault); } case 0x1: if (esizeIs32) { return new SveIndexedMemVI<uint32_t, uint8_t, - SveGatherLoadVIMicroop>( - mn, machInst, MemReadOp, zt, pg, zn, imm); + SveGatherLoadVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemReadOp, zt, pg, zn, imm, firstFault); } else { return new SveIndexedMemVI<uint64_t, uint8_t, - SveGatherLoadVIMicroop>( - mn, machInst, MemReadOp, zt, pg, zn, imm); + SveGatherLoadVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemReadOp, zt, pg, zn, imm, firstFault); } case 0x2: if (esizeIs32) { return new SveIndexedMemVI<int32_t, int16_t, - SveGatherLoadVIMicroop>( - mn, machInst, MemReadOp, zt, pg, zn, imm); + SveGatherLoadVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemReadOp, zt, pg, zn, imm, firstFault); } else { return new SveIndexedMemVI<int64_t, int16_t, - SveGatherLoadVIMicroop>( - mn, machInst, MemReadOp, zt, pg, zn, imm); + SveGatherLoadVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemReadOp, zt, pg, zn, imm, firstFault); } case 0x3: if (esizeIs32) { return new SveIndexedMemVI<uint32_t, uint16_t, - SveGatherLoadVIMicroop>( - mn, machInst, MemReadOp, zt, pg, zn, imm); + SveGatherLoadVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemReadOp, zt, pg, zn, imm, firstFault); } else { return new SveIndexedMemVI<uint64_t, uint16_t, - SveGatherLoadVIMicroop>( - mn, machInst, MemReadOp, zt, pg, zn, imm); + SveGatherLoadVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemReadOp, zt, pg, zn, imm, firstFault); } case 0x4: if (esizeIs32) { break; } else { return new SveIndexedMemVI<int64_t, int32_t, - SveGatherLoadVIMicroop>( - mn, machInst, MemReadOp, zt, pg, zn, imm); + SveGatherLoadVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemReadOp, zt, pg, zn, imm, firstFault); } case 0x5: if (esizeIs32) { return new SveIndexedMemVI<uint32_t, uint32_t, - SveGatherLoadVIMicroop>( - mn, machInst, MemReadOp, zt, pg, zn, imm); + SveGatherLoadVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemReadOp, zt, pg, zn, imm, firstFault); } else { return new SveIndexedMemVI<uint64_t, uint32_t, - SveGatherLoadVIMicroop>( - mn, machInst, MemReadOp, zt, pg, zn, imm); + SveGatherLoadVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemReadOp, zt, pg, zn, imm, firstFault); } case 0x7: if (esizeIs32) { break; } else { return new SveIndexedMemVI<uint64_t, uint64_t, - SveGatherLoadVIMicroop>( - mn, machInst, MemReadOp, zt, pg, zn, imm); + SveGatherLoadVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemReadOp, zt, pg, zn, imm, firstFault); } } return new Unknown64(machInst); @@ -289,87 +299,99 @@ output decoder {{ IntRegIndex zt, IntRegIndex pg, IntRegIndex rn, IntRegIndex zm, bool esizeIs32, bool offsetIs32, bool offsetIsSigned, bool offsetIsScaled, - bool firstFaulting) + bool firstFault) { - const char* mn = firstFaulting ? "ldff1" : "ld1"; + const char* mn = firstFault ? "ldff1" : "ld1"; switch (dtype) { case 0x0: if (esizeIs32) { return new SveIndexedMemSV<int32_t, int8_t, - SveGatherLoadSVMicroop>( + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemReadOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, firstFault); } else { return new SveIndexedMemSV<int64_t, int8_t, - SveGatherLoadSVMicroop>( + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemReadOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, firstFault); } case 0x1: if (esizeIs32) { return new SveIndexedMemSV<uint32_t, uint8_t, - SveGatherLoadSVMicroop>( + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemReadOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, firstFault); } else { return new SveIndexedMemSV<uint64_t, uint8_t, - SveGatherLoadSVMicroop>( + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemReadOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, firstFault); } case 0x2: if (esizeIs32) { return new SveIndexedMemSV<int32_t, int16_t, - SveGatherLoadSVMicroop>( + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemReadOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, firstFault); } else { return new SveIndexedMemSV<int64_t, int16_t, - SveGatherLoadSVMicroop>( + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemReadOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, firstFault); } case 0x3: if (esizeIs32) { return new SveIndexedMemSV<uint32_t, uint16_t, - SveGatherLoadSVMicroop>( + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemReadOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, firstFault); } else { return new SveIndexedMemSV<uint64_t, uint16_t, - SveGatherLoadSVMicroop>( + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemReadOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, firstFault); } case 0x4: if (esizeIs32) { break; } else { return new SveIndexedMemSV<int64_t, int32_t, - SveGatherLoadSVMicroop>( + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemReadOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, firstFault); } case 0x5: if (esizeIs32) { return new SveIndexedMemSV<uint32_t, uint32_t, - SveGatherLoadSVMicroop>( + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemReadOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, firstFault); } else { return new SveIndexedMemSV<uint64_t, uint32_t, - SveGatherLoadSVMicroop>( + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemReadOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, firstFault); } case 0x7: if (esizeIs32) { break; } else { return new SveIndexedMemSV<uint64_t, uint64_t, - SveGatherLoadSVMicroop>( + SveGatherLoadSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemReadOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, firstFault); } } return new Unknown64(machInst); @@ -386,40 +408,47 @@ output decoder {{ case 0x0: if (esizeIs32) { return new SveIndexedMemVI<uint32_t, uint8_t, - SveScatterStoreVIMicroop>( - mn, machInst, MemWriteOp, zt, pg, zn, imm); + SveScatterStoreVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemWriteOp, zt, pg, zn, imm, false); } else { return new SveIndexedMemVI<uint64_t, uint8_t, - SveScatterStoreVIMicroop>( - mn, machInst, MemWriteOp, zt, pg, zn, imm); + SveScatterStoreVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemWriteOp, zt, pg, zn, imm, false); } case 0x1: if (esizeIs32) { return new SveIndexedMemVI<uint32_t, uint16_t, - SveScatterStoreVIMicroop>( - mn, machInst, MemWriteOp, zt, pg, zn, imm); + SveScatterStoreVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemWriteOp, zt, pg, zn, imm, false); } else { return new SveIndexedMemVI<uint64_t, uint16_t, - SveScatterStoreVIMicroop>( - mn, machInst, MemWriteOp, zt, pg, zn, imm); + SveScatterStoreVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemWriteOp, zt, pg, zn, imm, false); } case 0x2: if (esizeIs32) { return new SveIndexedMemVI<uint32_t, uint32_t, - SveScatterStoreVIMicroop>( - mn, machInst, MemWriteOp, zt, pg, zn, imm); + SveScatterStoreVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemWriteOp, zt, pg, zn, imm, false); } else { return new SveIndexedMemVI<uint64_t, uint32_t, - SveScatterStoreVIMicroop>( - mn, machInst, MemWriteOp, zt, pg, zn, imm); + SveScatterStoreVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemWriteOp, zt, pg, zn, imm, false); } case 0x3: if (esizeIs32) { break; } else { return new SveIndexedMemVI<uint64_t, uint64_t, - SveScatterStoreVIMicroop>( - mn, machInst, MemWriteOp, zt, pg, zn, imm); + SveScatterStoreVIMicroop, + SveFirstFaultWritebackMicroop>( + mn, machInst, MemWriteOp, zt, pg, zn, imm, false); } } return new Unknown64(machInst); @@ -437,47 +466,54 @@ output decoder {{ case 0x0: if (esizeIs32) { return new SveIndexedMemSV<uint32_t, uint8_t, - SveScatterStoreSVMicroop>( + SveScatterStoreSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemWriteOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, false); } else { return new SveIndexedMemSV<uint64_t, uint8_t, - SveScatterStoreSVMicroop>( + SveScatterStoreSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemWriteOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, false); } case 0x1: if (esizeIs32) { return new SveIndexedMemSV<uint32_t, uint16_t, - SveScatterStoreSVMicroop>( + SveScatterStoreSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemWriteOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, false); } else { return new SveIndexedMemSV<uint64_t, uint16_t, - SveScatterStoreSVMicroop>( + SveScatterStoreSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemWriteOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, false); } case 0x2: if (esizeIs32) { return new SveIndexedMemSV<uint32_t, uint32_t, - SveScatterStoreSVMicroop>( + SveScatterStoreSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemWriteOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, false); } else { return new SveIndexedMemSV<uint64_t, uint32_t, - SveScatterStoreSVMicroop>( + SveScatterStoreSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemWriteOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, false); } case 0x3: if (esizeIs32) { break; } else { return new SveIndexedMemSV<uint64_t, uint64_t, - SveScatterStoreSVMicroop>( + SveScatterStoreSVMicroop, + SveFirstFaultWritebackMicroop>( mn, machInst, MemWriteOp, zt, pg, rn, zm, - offsetIs32, offsetIsSigned, offsetIsScaled); + offsetIs32, offsetIsSigned, offsetIsScaled, false); } } return new Unknown64(machInst); @@ -505,6 +541,9 @@ let {{ int memAccessSize = %(memacc_size)s; EA = XBase + ((int64_t) imm * %(memacc_size)s)''' % { 'memacc_size': 'eCount / 8' if isPred else 'eCount'} + loadRdEnableCode = ''' + auto rdEn = std::vector<bool>(); + ''' if isPred: loadMemAccCode = ''' int index = 0; @@ -551,6 +590,8 @@ let {{ 'tpl_args': '', 'memacc_code': loadMemAccCode, 'ea_code' : sveEnabledCheckCode + eaCode, + 'rden_code' : loadRdEnableCode, + 'fault_code' : '', 'fa_code' : ''}, ['IsMemRef', 'IsLoad']) storeIop = InstObjParams('str', @@ -633,6 +674,11 @@ let {{ # Generates definitions for SVE contiguous loads def emitSveContigMemInsts(offsetIsImm): global header_output, exec_output, decoders + # First-faulting instructions only have a scalar plus scalar form, + # while non-faulting instructions only a scalar plus immediate form, so + # `offsetIsImm` is used to determine which class of instructions is + # generated + firstFaulting = not offsetIsImm tplHeader = 'template <class RegElemType, class MemElemType>' tplArgs = '<RegElemType, MemElemType>' eaCode = SPAlignmentCheckCode + ''' @@ -642,6 +688,16 @@ let {{ eaCode += '((int64_t) this->imm * eCount * sizeof(MemElemType))' else: eaCode += '(XOffset * sizeof(MemElemType));' + loadRdEnableCode = ''' + auto rdEn = std::vector<bool>(sizeof(MemElemType) * eCount, true); + for (int i = 0; i < eCount; i++) { + if (!GpOp_x[i]) { + for (int j = 0; j < sizeof(MemElemType); j++) { + rdEn[sizeof(MemElemType) * i + j] = false; + } + } + } + ''' loadMemAccCode = ''' for (int i = 0; i < eCount; i++) { if (GpOp_x[i]) { @@ -666,13 +722,60 @@ let {{ storeWrEnableCode = ''' auto wrEn = std::vector<bool>(sizeof(MemElemType) * eCount, true); ''' + ffrReadBackCode = ''' + auto& firstFaultReg = Ffr;''' + fautlingLoadmemAccCode = ''' + for (int i = 0; i < eCount; i++) { + if (GpOp_x[i] && firstFaultReg[i * sizeof(RegElemType)]) { + AA64FpDest_x[i] = memDataView[i]; + } else { + AA64FpDest_x[i] = 0; + } + } + ''' + nonFaultingCode = 'true ||' + faultCode = ''' + Addr fault_addr; + if (fault == NoFault || getFaultVAddr(fault, fault_addr)) { + unsigned fault_elem_index; + if (fault != NoFault) { + assert(fault_addr >= EA); + fault_elem_index = (fault_addr - EA) / sizeof(MemElemType); + } else { + fault_elem_index = eCount + 1; + } + int first_active_index; + for (first_active_index = 0; + first_active_index < eCount && !(GpOp_x[first_active_index]); + first_active_index++); + if (%s first_active_index < fault_elem_index) { + for (int i = 0; i < eCount; i++) { + for (int j = 0; j < sizeof(RegElemType); j++) { + if (i < fault_elem_index) { + Ffr_ub[i * sizeof(RegElemType) + j] = FfrAux_x[i]; + } else { + Ffr_ub[i * sizeof(RegElemType) + j] = 0; + } + } + } + fault = NoFault; + if (first_active_index >= fault_elem_index) { + // non-faulting load needs this + xc->setMemAccPredicate(false); + } + } + } + ''' % ('' if firstFaulting else nonFaultingCode) + loadIop = InstObjParams('ld1', 'SveContigLoadSI' if offsetIsImm else 'SveContigLoadSS', 'SveContigMemSI' if offsetIsImm else 'SveContigMemSS', {'tpl_header': tplHeader, 'tpl_args': tplArgs, + 'rden_code' : loadRdEnableCode, 'memacc_code': loadMemAccCode, 'ea_code' : sveEnabledCheckCode + eaCode, + 'fault_code' : '', 'fa_code' : ''}, ['IsMemRef', 'IsLoad']) storeIop = InstObjParams('st1', @@ -685,19 +788,38 @@ let {{ 'ea_code' : sveEnabledCheckCode + eaCode, 'fa_code' : ''}, ['IsMemRef', 'IsStore']) + faultIop = InstObjParams('ldff1' if firstFaulting else 'ldnf1', + 'SveContigFFLoadSS' if firstFaulting else 'SveContigNFLoadSI', + 'SveContigMemSS' if firstFaulting else 'SveContigMemSI', + {'tpl_header': tplHeader, + 'tpl_args': tplArgs, + 'rden_code' : loadRdEnableCode, + 'memacc_code': fautlingLoadmemAccCode, + 'ea_code' : sveEnabledCheckCode + eaCode, + 'fault_code' : faultCode, + 'fa_code' : ''}, + ['IsMemRef', 'IsLoad']) + faultIop.snippets['memacc_code'] = (ffrReadBackCode + + faultIop.snippets['memacc_code']) if offsetIsImm: header_output += SveContigMemSIOpDeclare.subst(loadIop) header_output += SveContigMemSIOpDeclare.subst(storeIop) + header_output += SveContigMemSIOpDeclare.subst(faultIop) else: header_output += SveContigMemSSOpDeclare.subst(loadIop) header_output += SveContigMemSSOpDeclare.subst(storeIop) + header_output += SveContigMemSSOpDeclare.subst(faultIop) exec_output += ( SveContigLoadExecute.subst(loadIop) + SveContigLoadInitiateAcc.subst(loadIop) + SveContigLoadCompleteAcc.subst(loadIop) + SveContigStoreExecute.subst(storeIop) + SveContigStoreInitiateAcc.subst(storeIop) + - SveContigStoreCompleteAcc.subst(storeIop)) + SveContigStoreCompleteAcc.subst(storeIop) + + SveContigLoadExecute.subst(faultIop) + + SveContigLoadInitiateAcc.subst(faultIop) + + SveContigLoadCompleteAcc.subst(faultIop)) + for args in loadTplArgs: substDict = {'tpl_args': '<%s>' % ', '.join(args), 'class_name': 'SveContigLoadSI' if offsetIsImm @@ -708,6 +830,12 @@ let {{ 'class_name': 'SveContigStoreSI' if offsetIsImm else 'SveContigStoreSS'} exec_output += SveContigMemExecDeclare.subst(substDict) + for args in loadTplArgs: + substDict = {'tpl_args': '<%s>' % ', '.join(args), + 'class_name': 'SveContigFFLoadSS' if firstFaulting + else 'SveContigNFLoadSI'} + exec_output += SveContigMemExecDeclare.subst(substDict) + # Generates definitions for SVE load-and-replicate instructions def emitSveLoadAndRepl(): @@ -773,16 +901,14 @@ let {{ } EA = XBase + offset''' loadMemAccCode = ''' - if (GpOp_x[elemIndex]) { - AA64FpDest_x[elemIndex] = memData; - } else { - AA64FpDest_x[elemIndex] = 0; - } + AA64FpDest_x[elemIndex] = memData; ''' storeMemAccCode = ''' memData = AA64FpDest_x[elemIndex]; ''' - predCheckCode = 'GpOp_x[elemIndex]' + predCheckCode = 'GpOp_x[index]' + faultStatusSetCode = 'PUreg0_x[elemIndex] = 1;' + faultStatusResetCode = 'PUreg0_x[elemIndex] = 0;' loadIop = InstObjParams('ld1', ('SveGatherLoadVIMicroop' if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM @@ -792,6 +918,8 @@ let {{ 'tpl_args': tplArgs, 'memacc_code': loadMemAccCode, 'ea_code' : sveEnabledCheckCode + eaCode, + 'fault_status_set_code' : faultStatusSetCode, + 'fault_status_reset_code' : faultStatusResetCode, 'pred_check_code' : predCheckCode, 'fa_code' : ''}, ['IsMicroop', 'IsMemRef', 'IsLoad']) @@ -839,6 +967,39 @@ let {{ # TODO: this should become SveMemExecDeclare exec_output += SveContigMemExecDeclare.subst(substDict) + firstFaultTplArgs = ('int32_t', 'int64_t', 'uint32_t', 'uint64_t') + + def emitSveFirstFaultWritebackMicroop(): + global header_output, exec_output, decoders + tplHeader = 'template <class RegElemType>' + tplArgs = '<RegElemType>' + faultStatusCheckCode = 'PUreg0_x[index]' + firstFaultResetCode = ''' + for(int j = 0; j < sizeof(RegElemType); j++) { + Ffr_ub[index * sizeof(RegElemType) + j] = 0; + } + ''' + firstFaultForwardCode = ''' + for(int j = 0; j < sizeof(RegElemType); j++) { + Ffr_ub[index * sizeof(RegElemType) + j] = FfrAux_x[index]; + } + ''' + iop = InstObjParams('ldff1', + 'SveFirstFaultWritebackMicroop', + 'MicroOp', + {'tpl_header': tplHeader, + 'tpl_args': tplArgs, + 'fault_status_check_code' : faultStatusCheckCode, + 'first_fault_reset_code' : firstFaultResetCode, + 'first_fault_forward_code' : firstFaultForwardCode}, + ['IsMicroop']) + header_output += SveFirstFaultWritebackMicroopDeclare.subst(iop) + exec_output += SveFirstFaultWritebackMicroopExecute.subst(iop) + for args in firstFaultTplArgs: + substDict = {'targs': args, + 'class_name' : 'SveFirstFaultWritebackMicroop' } + exec_output += SveOpExecDeclare.subst(substDict) + # Generates definitions for the first microop of SVE gather loads, required # to propagate the source vector register to the transfer microops def emitSveGatherLoadCpySrcVecMicroop(): @@ -859,9 +1020,11 @@ let {{ # LD1[S]{B,H,W,D} (scalar plus immediate) # ST1[S]{B,H,W,D} (scalar plus immediate) + # LDNF1[S]{B,H,W,D} (scalar plus immediate) emitSveContigMemInsts(True) # LD1[S]{B,H,W,D} (scalar plus scalar) # ST1[S]{B,H,W,D} (scalar plus scalar) + # LDFF1[S]{B,H,W,D} (scalar plus vector) emitSveContigMemInsts(False) # LD1R[S]{B,H,W,D} @@ -874,12 +1037,16 @@ let {{ # LD1[S]{B,H,W,D} (vector plus immediate) # ST1[S]{B,H,W,D} (vector plus immediate) + # LDFF1[S]{B,H,W,D} (scalar plus immediate) emitSveIndexedMemMicroops(IndexedAddrForm.VEC_PLUS_IMM) # LD1[S]{B,H,W,D} (scalar plus vector) # ST1[S]{B,H,W,D} (scalar plus vector) + # LDFF1[S]{B,H,W,D} (scalar plus vector) emitSveIndexedMemMicroops(IndexedAddrForm.SCA_PLUS_VEC) + # FFR writeback microop for gather loads + emitSveFirstFaultWritebackMicroop() + # Source vector copy microop for gather loads emitSveGatherLoadCpySrcVecMicroop() - }}; diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index a3b385756..aaa64e7b0 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -581,6 +581,8 @@ def operands {{ 'PDest': vecPredReg('dest'), 'PDestMerge': vecPredReg('dest'), 'Ffr': vecPredReg('PREDREG_FFR'), + 'FfrAux': vecPredReg('PREDREG_FFR'), + 'PUreg0': vecPredReg('PREDREG_UREG0'), #Abstracted control reg operands 'MiscDest': cntrlReg('dest'), diff --git a/src/arch/arm/isa/templates/sve_mem.isa b/src/arch/arm/isa/templates/sve_mem.isa index 2cdf2ffd4..5e2e55333 100644 --- a/src/arch/arm/isa/templates/sve_mem.isa +++ b/src/arch/arm/isa/templates/sve_mem.isa @@ -151,13 +151,15 @@ def template SveContigLoadExecute {{ TheISA::VecRegContainer memData; auto memDataView = memData.as<MemElemType>(); - if (fault == NoFault) { - fault = xc->readMem(EA, memData.raw_ptr<uint8_t>(), memAccessSize, - this->memAccessFlags); - %(memacc_code)s; - } + %(rden_code)s; + + fault = xc->readMem(EA, memData.raw_ptr<uint8_t>(), memAccessSize, + this->memAccessFlags, rdEn); + + %(fault_code)s; if (fault == NoFault) { + %(memacc_code)s; %(op_wb)s; } @@ -178,13 +180,14 @@ def template SveContigLoadInitiateAcc {{ %(op_src_decl)s; %(op_rd)s; - %(ea_code)s; - if (fault == NoFault) { - fault = xc->initiateMemRead(EA, memAccessSize, - this->memAccessFlags); - } + %(rden_code)s; + + fault = xc->initiateMemRead(EA, memAccessSize, this->memAccessFlags, + rdEn); + + %(fault_code)s; return fault; } @@ -195,7 +198,6 @@ def template SveContigLoadCompleteAcc {{ Fault %(class_name)s%(tpl_args)s::completeAcc(PacketPtr pkt, ExecContext *xc, Trace::InstRecord *traceData) const { - Fault fault = NoFault; bool aarch64 M5_VAR_USED = true; unsigned eCount = ArmStaticInst::getCurSveVecLen<RegElemType>( xc->tcBase()); @@ -206,18 +208,15 @@ def template SveContigLoadCompleteAcc {{ TheISA::VecRegContainer memData; auto memDataView = memData.as<MemElemType>(); - memcpy(memData.raw_ptr<uint8_t>(), pkt->getPtr<uint8_t>(), - pkt->getSize()); - - if (fault == NoFault) { - %(memacc_code)s; + if (xc->readMemAccPredicate()) { + memcpy(memData.raw_ptr<uint8_t>(), pkt->getPtr<uint8_t>(), + pkt->getSize()); } - if (fault == NoFault) { - %(op_wb)s; - } + %(memacc_code)s; + %(op_wb)s; - return fault; + return NoFault; } }}; @@ -398,24 +397,29 @@ def template SveIndexedMemVIMicroopDeclare {{ int elemIndex; int numElems; + bool firstFault; unsigned memAccessFlags; public: %(class_name)s(const char* mnem, ExtMachInst machInst, OpClass __opClass, IntRegIndex _dest, IntRegIndex _gp, - IntRegIndex _base, uint64_t _imm, int _elemIndex, int _numElems) + IntRegIndex _base, uint64_t _imm, int _elemIndex, int _numElems, + bool _firstFault) : %(base_class)s(mnem, machInst, %(op_class)s), dest(_dest), gp(_gp), base(_base), imm(_imm), elemIndex(_elemIndex), numElems(_numElems), + firstFault(_firstFault), memAccessFlags(ArmISA::TLB::AllowUnaligned | ArmISA::TLB::MustBeOne) { %(constructor)s; if (_opClass == MemReadOp && elemIndex == 0) { // The first micro-op is responsible for pinning the - // destination register - _destRegIdx[0].setNumPinnedWrites(numElems - 1); + // destination and the fault status registers + assert(_numDestRegs == 2); + _destRegIdx[0].setNumPinnedWrites(numElems - 1); + _destRegIdx[1].setNumPinnedWrites(numElems - 1); } } @@ -471,6 +475,7 @@ def template SveIndexedMemSVMicroopDeclare {{ int elemIndex; int numElems; + bool firstFault; unsigned memAccessFlags; @@ -479,20 +484,22 @@ def template SveIndexedMemSVMicroopDeclare {{ OpClass __opClass, IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base, IntRegIndex _offset, bool _offsetIs32, bool _offsetIsSigned, bool _offsetIsScaled, int _elemIndex, - int _numElems) + int _numElems, bool _firstFault) : %(base_class)s(mnem, machInst, %(op_class)s), dest(_dest), gp(_gp), base(_base), offset(_offset), offsetIs32(_offsetIs32), offsetIsSigned(_offsetIsSigned), offsetIsScaled(_offsetIsScaled), elemIndex(_elemIndex), - numElems(_numElems), + numElems(_numElems), firstFault(_firstFault), memAccessFlags(ArmISA::TLB::AllowUnaligned | ArmISA::TLB::MustBeOne) { %(constructor)s; if (_opClass == MemReadOp && elemIndex == 0) { // The first micro-op is responsible for pinning the - // destination register - _destRegIdx[0].setNumPinnedWrites(numElems - 1); + // destination and the fault status registers + assert(_numDestRegs == 2); + _destRegIdx[0].setNumPinnedWrites(numElems - 1); + _destRegIdx[1].setNumPinnedWrites(numElems - 1); } } @@ -542,18 +549,33 @@ def template SveGatherLoadMicroopExecute {{ %(op_rd)s; %(ea_code)s; - MemElemType memData; + MemElemType memData = 0; + int index = elemIndex; if (%(pred_check_code)s) { fault = readMemAtomic(xc, traceData, EA, memData, this->memAccessFlags); } if (fault == NoFault) { + %(fault_status_reset_code)s; %(memacc_code)s; %(op_wb)s; + } else { + %(fault_status_set_code)s; + if (firstFault) { + for (index = 0; + index < numElems && !(%(pred_check_code)s); + index++); + + if (index < elemIndex) { + fault = NoFault; + memData = 0; + %(memacc_code)s; + %(op_wb)s; + } + } } - return fault; } }}; @@ -573,11 +595,27 @@ def template SveGatherLoadMicroopInitiateAcc {{ MemElemType memData; + int index = elemIndex; if (%(pred_check_code)s) { fault = initiateMemRead(xc, traceData, EA, memData, this->memAccessFlags); + if (fault != NoFault) { + %(fault_status_set_code)s; + if (firstFault) { + for (index = 0; + index < numElems && !(%(pred_check_code)s); + index++); + if (index < elemIndex) { + fault = NoFault; + xc->setMemAccPredicate(false); + } + } + } else { + %(fault_status_reset_code)s; + } } else { xc->setMemAccPredicate(false); + %(fault_status_reset_code)s; } return fault; @@ -589,26 +627,20 @@ def template SveGatherLoadMicroopCompleteAcc {{ Fault %(class_name)s%(tpl_args)s::completeAcc(PacketPtr pkt, ExecContext *xc, Trace::InstRecord *traceData) const { - Fault fault = NoFault; bool aarch64 M5_VAR_USED = true; %(op_decl)s; %(op_rd)s; MemElemType memData = 0; - if (%(pred_check_code)s) { + if (xc->readMemAccPredicate()) { getMem(pkt, memData, traceData); } - if (fault == NoFault) { - %(memacc_code)s; - } - - if (fault == NoFault) { - %(op_wb)s; - } + %(memacc_code)s; + %(op_wb)s; - return fault; + return NoFault; } }}; @@ -628,6 +660,7 @@ def template SveScatterStoreMicroopExecute {{ MemElemType memData; %(memacc_code)s; + int index = elemIndex; if (%(pred_check_code)s) { fault = writeMemAtomic(xc, traceData, memData, EA, this->memAccessFlags, NULL); @@ -657,6 +690,7 @@ def template SveScatterStoreMicroopInitiateAcc {{ MemElemType memData; %(memacc_code)s; + int index = elemIndex; if (%(pred_check_code)s) { fault = writeMemTiming(xc, traceData, memData, EA, this->memAccessFlags, NULL); @@ -677,6 +711,64 @@ def template SveScatterStoreMicroopCompleteAcc {{ } }}; +def template SveFirstFaultWritebackMicroopDeclare {{ + %(tpl_header)s + class SveFirstFaultWritebackMicroop : public MicroOp + { + protected: + typedef RegElemType TPElem; + + int numElems; + StaticInst *macroOp; + + public: + SveFirstFaultWritebackMicroop(const char* mnem, ExtMachInst machInst, + OpClass __opClass, int _numElems, StaticInst *_macroOp) + : MicroOp(mnem, machInst, __opClass), + numElems(_numElems), macroOp(_macroOp) + { + %(constructor)s; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const; + + std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const + { + std::stringstream ss; + ccprintf(ss, "%s", macroOp->disassemble(pc, symtab)); + ccprintf(ss, " (uop%d)", numElems); + return ss.str(); + } + }; +}}; + +def template SveFirstFaultWritebackMicroopExecute {{ + %(tpl_header)s + Fault %(class_name)s%(tpl_args)s::execute(ExecContext *xc, + Trace::InstRecord *traceData) const + { + bool aarch64 M5_VAR_USED = true; + + %(op_decl)s; + %(op_rd)s; + + int index, firstFaultIndex; + for (index = 0; + index < numElems && !%(fault_status_check_code)s; + index++); + firstFaultIndex = index; + for (index = 0; index < numElems; index++) { + if (index < firstFaultIndex) { + %(first_fault_forward_code)s; + } else { + %(first_fault_reset_code)s; + } + } + return NoFault; + } +}}; + def template SveGatherLoadCpySrcVecMicroopDeclare {{ class SveGatherLoadCpySrcVecMicroop : public MicroOp { diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh index 8e6ce799b..3790d9d5c 100644 --- a/src/arch/arm/registers.hh +++ b/src/arch/arm/registers.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2011, 2014, 2016-2017 ARM Limited + * Copyright (c) 2010-2011, 2014, 2016-2018 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -89,8 +89,9 @@ const int NumIntRegs = NUM_INTREGS; const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs; const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs; const int VECREG_UREG0 = 32; -const int NumVecPredRegs = 17; // P0-P15, FFR +const int NumVecPredRegs = 18; // P0-P15, FFR, UREG0 const int PREDREG_FFR = 16; +const int PREDREG_UREG0 = 17; const int NumCCRegs = NUM_CCREGS; const int NumMiscRegs = NUM_MISCREGS; |