diff options
-rw-r--r-- | src/arch/riscv/insts/amo.cc | 43 | ||||
-rw-r--r-- | src/arch/riscv/insts/amo.hh | 65 | ||||
-rw-r--r-- | src/arch/riscv/isa/decoder.isa | 202 | ||||
-rw-r--r-- | src/arch/riscv/isa/formats/amo.isa | 355 | ||||
-rw-r--r-- | src/arch/riscv/locked_mem.cc | 4 | ||||
-rw-r--r-- | src/arch/riscv/locked_mem.hh | 27 |
6 files changed, 505 insertions, 191 deletions
diff --git a/src/arch/riscv/insts/amo.cc b/src/arch/riscv/insts/amo.cc index 7f5740f14..d12064720 100644 --- a/src/arch/riscv/insts/amo.cc +++ b/src/arch/riscv/insts/amo.cc @@ -43,6 +43,22 @@ using namespace std; namespace RiscvISA { +// memfence micro instruction +string MemFenceMicro::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + stringstream ss; + ss << csprintf("0x%08x", machInst) << ' ' << mnemonic; + return ss.str(); +} + +Fault MemFenceMicro::execute(ExecContext *xc, + Trace::InstRecord *traceData) const +{ + return NoFault; +} + +// load-reserved string LoadReserved::generateDisassembly(Addr pc, const SymbolTable *symtab) const { @@ -52,6 +68,16 @@ string LoadReserved::generateDisassembly(Addr pc, return ss.str(); } +string LoadReservedMicro::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + stringstream ss; + ss << mnemonic << ' ' << registerName(_destRegIdx[0]) << ", (" + << registerName(_srcRegIdx[0]) << ')'; + return ss.str(); +} + +// store-conditional string StoreCond::generateDisassembly(Addr pc, const SymbolTable *symtab) const { @@ -62,6 +88,17 @@ string StoreCond::generateDisassembly(Addr pc, return ss.str(); } +string StoreCondMicro::generateDisassembly(Addr pc, + const SymbolTable *symtab) const +{ + stringstream ss; + ss << mnemonic << ' ' << registerName(_destRegIdx[0]) << ", " + << registerName(_srcRegIdx[1]) << ", (" + << registerName(_srcRegIdx[0]) << ')'; + return ss.str(); +} + +// AMOs string AtomicMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { @@ -76,8 +113,10 @@ string AtomicMemOpMicro::generateDisassembly(Addr pc, const SymbolTable *symtab) const { stringstream ss; - ss << csprintf("0x%08x", machInst) << ' ' << mnemonic; + ss << mnemonic << ' ' << registerName(_destRegIdx[0]) << ", " + << registerName(_srcRegIdx[1]) << ", (" + << registerName(_srcRegIdx[0]) << ')'; return ss.str(); } -}
\ No newline at end of file +} diff --git a/src/arch/riscv/insts/amo.hh b/src/arch/riscv/insts/amo.hh index 7c07bc243..748fe14a3 100644 --- a/src/arch/riscv/insts/amo.hh +++ b/src/arch/riscv/insts/amo.hh @@ -41,24 +41,62 @@ namespace RiscvISA { -class LoadReserved : public MemInst +// memfence micro instruction +class MemFenceMicro : public RiscvMicroInst { + public: + MemFenceMicro(ExtMachInst _machInst, OpClass __opClass) + : RiscvMicroInst("fence", _machInst, __opClass) + { } protected: - using MemInst::MemInst; + using RiscvMicroInst::RiscvMicroInst; + Fault execute(ExecContext *, Trace::InstRecord *) const override; std::string generateDisassembly( Addr pc, const SymbolTable *symtab) const override; }; -class StoreCond : public MemInst +// load-reserved +class LoadReserved : public RiscvMacroInst { protected: - using MemInst::MemInst; + using RiscvMacroInst::RiscvMacroInst; std::string generateDisassembly( Addr pc, const SymbolTable *symtab) const override; }; +class LoadReservedMicro : public RiscvMicroInst +{ + protected: + Request::Flags memAccessFlags; + using RiscvMicroInst::RiscvMicroInst; + + std::string generateDisassembly( + Addr pc, const SymbolTable *symtab) const override; +}; + +// store-cond +class StoreCond : public RiscvMacroInst +{ + protected: + using RiscvMacroInst::RiscvMacroInst; + + std::string generateDisassembly( + Addr pc, const SymbolTable *symtab) const override; +}; + +class StoreCondMicro : public RiscvMicroInst +{ + protected: + Request::Flags memAccessFlags; + using RiscvMicroInst::RiscvMicroInst; + + std::string generateDisassembly( + Addr pc, const SymbolTable *symtab) const override; +}; + +// AMOs class AtomicMemOp : public RiscvMacroInst { protected: @@ -78,6 +116,23 @@ class AtomicMemOpMicro : public RiscvMicroInst Addr pc, const SymbolTable *symtab) const override; }; +/** + * A generic atomic op class + */ + +template<typename T> +class AtomicGenericOp : public TypedAtomicOpFunctor<T> +{ + public: + AtomicGenericOp(T _a, std::function<void(T*,T)> _op) + : a(_a), op(_op) { } + AtomicOpFunctor* clone() { return new AtomicGenericOp<T>(*this); } + void execute(T *b) { op(b, a); } + private: + T a; + std::function<void(T*,T)> op; +}; + } -#endif // __ARCH_RISCV_INSTS_AMO_HH__
\ No newline at end of file +#endif // __ARCH_RISCV_INSTS_AMO_HH__ diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 3a04a02de..8de4829a6 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -512,44 +512,69 @@ decode QUADRANT default Unknown::unknown() { }}, {{ Rd = result; }}, inst_flags=IsStoreConditional, mem_flags=LLSC); - format AtomicMemOp { - 0x0: amoadd_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = Rs2_sw + Rt_sd; - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x1: amoswap_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = Rs2_uw; - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x4: amoxor_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = Rs2_uw^Rt_sd; - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x8: amoor_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = Rs2_uw | Rt_sd; - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0xc: amoand_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = Rs2_uw&Rt_sd; - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x10: amomin_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = min<int32_t>(Rs2_sw, Rt_sd); - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x14: amomax_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = max<int32_t>(Rs2_sw, Rt_sd); - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x18: amominu_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = min<uint32_t>(Rs2_uw, Rt_sd); - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x1c: amomaxu_w({{Rt_sd = Mem_sw;}}, {{ - Mem_sw = max<uint32_t>(Rs2_uw, Rt_sd); - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - } + 0x0: AtomicMemOp::amoadd_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor<int32_t> *amo_op = + new AtomicGenericOp<int32_t>(Rs2_sw, + [](int32_t* b, int32_t a){ *b += a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x1: AtomicMemOp::amoswap_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor<uint32_t> *amo_op = + new AtomicGenericOp<uint32_t>(Rs2_uw, + [](uint32_t* b, uint32_t a){ *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x4: AtomicMemOp::amoxor_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor<uint32_t> *amo_op = + new AtomicGenericOp<uint32_t>(Rs2_uw, + [](uint32_t* b, uint32_t a){ *b ^= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x8: AtomicMemOp::amoor_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor<uint32_t> *amo_op = + new AtomicGenericOp<uint32_t>(Rs2_uw, + [](uint32_t* b, uint32_t a){ *b |= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0xc: AtomicMemOp::amoand_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor<uint32_t> *amo_op = + new AtomicGenericOp<uint32_t>(Rs2_uw, + [](uint32_t* b, uint32_t a){ *b &= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x10: AtomicMemOp::amomin_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor<int32_t> *amo_op = + new AtomicGenericOp<int32_t>(Rs2_sw, + [](int32_t* b, int32_t a){ if (a < *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x14: AtomicMemOp::amomax_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor<int32_t> *amo_op = + new AtomicGenericOp<int32_t>(Rs2_sw, + [](int32_t* b, int32_t a){ if (a > *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x18: AtomicMemOp::amominu_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor<uint32_t> *amo_op = + new AtomicGenericOp<uint32_t>(Rs2_uw, + [](uint32_t* b, uint32_t a){ if (a < *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x1c: AtomicMemOp::amomaxu_w({{ + Rd_sd = Mem_sw; + }}, {{ + TypedAtomicOpFunctor<uint32_t> *amo_op = + new AtomicGenericOp<uint32_t>(Rs2_uw, + [](uint32_t* b, uint32_t a){ if (a > *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); } 0x3: decode AMOFUNCT { 0x2: LoadReserved::lr_d({{ @@ -560,44 +585,69 @@ decode QUADRANT default Unknown::unknown() { }}, {{ Rd = result; }}, mem_flags=LLSC, inst_flags=IsStoreConditional); - format AtomicMemOp { - 0x0: amoadd_d({{Rt_sd = Mem_sd;}}, {{ - Mem_sd = Rs2_sd + Rt_sd; - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x1: amoswap_d({{Rt = Mem;}}, {{ - Mem = Rs2; - Rd = Rt; - }}, {{EA = Rs1;}}); - 0x4: amoxor_d({{Rt = Mem;}}, {{ - Mem = Rs2^Rt; - Rd = Rt; - }}, {{EA = Rs1;}}); - 0x8: amoor_d({{Rt = Mem;}}, {{ - Mem = Rs2 | Rt; - Rd = Rt; - }}, {{EA = Rs1;}}); - 0xc: amoand_d({{Rt = Mem;}}, {{ - Mem = Rs2&Rt; - Rd = Rt; - }}, {{EA = Rs1;}}); - 0x10: amomin_d({{Rt_sd = Mem_sd;}}, {{ - Mem_sd = min(Rs2_sd, Rt_sd); - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x14: amomax_d({{Rt_sd = Mem_sd;}}, {{ - Mem_sd = max(Rs2_sd, Rt_sd); - Rd_sd = Rt_sd; - }}, {{EA = Rs1;}}); - 0x18: amominu_d({{Rt = Mem;}}, {{ - Mem = min(Rs2, Rt); - Rd = Rt; - }}, {{EA = Rs1;}}); - 0x1c: amomaxu_d({{Rt = Mem;}}, {{ - Mem = max(Rs2, Rt); - Rd = Rt; - }}, {{EA = Rs1;}}); - } + 0x0: AtomicMemOp::amoadd_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor<int64_t> *amo_op = + new AtomicGenericOp<int64_t>(Rs2_sd, + [](int64_t* b, int64_t a){ *b += a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x1: AtomicMemOp::amoswap_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor<uint64_t> *amo_op = + new AtomicGenericOp<uint64_t>(Rs2_ud, + [](uint64_t* b, uint64_t a){ *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x4: AtomicMemOp::amoxor_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor<uint64_t> *amo_op = + new AtomicGenericOp<uint64_t>(Rs2_ud, + [](uint64_t* b, uint64_t a){ *b ^= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x8: AtomicMemOp::amoor_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor<uint64_t> *amo_op = + new AtomicGenericOp<uint64_t>(Rs2_ud, + [](uint64_t* b, uint64_t a){ *b |= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0xc: AtomicMemOp::amoand_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor<uint64_t> *amo_op = + new AtomicGenericOp<uint64_t>(Rs2_ud, + [](uint64_t* b, uint64_t a){ *b &= a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x10: AtomicMemOp::amomin_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor<int64_t> *amo_op = + new AtomicGenericOp<int64_t>(Rs2_sd, + [](int64_t* b, int64_t a){ if (a < *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x14: AtomicMemOp::amomax_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor<int64_t> *amo_op = + new AtomicGenericOp<int64_t>(Rs2_sd, + [](int64_t* b, int64_t a){ if (a > *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x18: AtomicMemOp::amominu_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor<uint64_t> *amo_op = + new AtomicGenericOp<uint64_t>(Rs2_ud, + [](uint64_t* b, uint64_t a){ if (a < *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); + 0x1c: AtomicMemOp::amomaxu_d({{ + Rd_sd = Mem_sd; + }}, {{ + TypedAtomicOpFunctor<uint64_t> *amo_op = + new AtomicGenericOp<uint64_t>(Rs2_ud, + [](uint64_t* b, uint64_t a){ if (a > *b) *b = a; }); + }}, mem_flags=ATOMIC_RETURN_OP); } } 0x0c: decode FUNCT3 { diff --git a/src/arch/riscv/isa/formats/amo.isa b/src/arch/riscv/isa/formats/amo.isa index 1dca57191..cc7346aa8 100644 --- a/src/arch/riscv/isa/formats/amo.isa +++ b/src/arch/riscv/isa/formats/amo.isa @@ -29,10 +29,7 @@ // // Authors: Alec Roelke -//////////////////////////////////////////////////////////////////// -// -// Atomic memory operation instructions -// +// Declaration templates def template AtomicMemOpDeclare {{ /** * Static instruction class for an AtomicMemOp operation @@ -45,11 +42,14 @@ def template AtomicMemOpDeclare {{ protected: - class %(class_name)sLoad : public %(base_class)sMicro + /* + * The main RMW part of an AMO + */ + class %(class_name)sRMW : public %(base_class)sMicro { public: // Constructor - %(class_name)sLoad(ExtMachInst machInst, %(class_name)s *_p); + %(class_name)sRMW(ExtMachInst machInst, %(class_name)s *_p); Fault execute(ExecContext *, Trace::InstRecord *) const override; Fault initiateAcc(ExecContext *, @@ -57,12 +57,26 @@ def template AtomicMemOpDeclare {{ Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const override; }; + }; +}}; + +def template LRSCDeclare {{ + /** + * Static instruction class for an AtomicMemOp operation + */ + class %(class_name)s : public %(base_class)s + { + public: + // Constructor + %(class_name)s(ExtMachInst machInst); + + protected: - class %(class_name)sStore : public %(base_class)sMicro + class %(class_name)sMicro : public %(base_class)sMicro { public: // Constructor - %(class_name)sStore(ExtMachInst machInst, %(class_name)s *_p); + %(class_name)sMicro(ExtMachInst machInst, %(class_name)s *_p); Fault execute(ExecContext *, Trace::InstRecord *) const override; Fault initiateAcc(ExecContext *, @@ -73,15 +87,63 @@ def template AtomicMemOpDeclare {{ }; }}; -def template LRSCConstructor {{ +// Constructor templates +def template LRSCMacroConstructor {{ %(class_name)s::%(class_name)s(ExtMachInst machInst): %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) { %(constructor)s; - if (AQ) - memAccessFlags = memAccessFlags | Request::ACQUIRE; - if (RL) - memAccessFlags = memAccessFlags | Request::RELEASE; + + StaticInstPtr rel_fence; + StaticInstPtr lrsc; + StaticInstPtr acq_fence; + + // set up release fence + if (RL) { + rel_fence = new MemFenceMicro(machInst, No_OpClass); + rel_fence->setFlag(IsFirstMicroop); + rel_fence->setFlag(IsMemBarrier); + rel_fence->setFlag(IsDelayedCommit); + } + + // set up atomic rmw op + lrsc = new %(class_name)sMicro(machInst, this); + + if (!RL) { + lrsc->setFlag(IsFirstMicroop); + } + + if (!AQ) { + lrsc->setFlag(IsLastMicroop); + } else { + lrsc->setFlag(IsDelayedCommit); + } + + // set up acquire fence + if (AQ) { + acq_fence = new MemFenceMicro(machInst, No_OpClass); + acq_fence->setFlag(IsLastMicroop); + acq_fence->setFlag(IsMemBarrier); + } + + if (RL && AQ) { + microops = {rel_fence, lrsc, acq_fence}; + } else if (RL) { + microops = {rel_fence, lrsc}; + } else if (AQ) { + microops = {lrsc, acq_fence}; + } else { + microops = {lrsc}; + } + } +}}; + +def template LRSCMicroConstructor {{ + %(class_name)s::%(class_name)sMicro::%(class_name)sMicro( + ExtMachInst machInst, %(class_name)s *_p) + : %(base_class)sMicro("%(mnemonic)s", machInst, %(op_class)s) + { + %(constructor)s; } }}; @@ -90,39 +152,95 @@ def template AtomicMemOpMacroConstructor {{ : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) { %(constructor)s; - microops = {new %(class_name)sLoad(machInst, this), - new %(class_name)sStore(machInst, this)}; + + StaticInstPtr rel_fence; + StaticInstPtr rmw_op; + StaticInstPtr acq_fence; + + // set up release fence + if (RL) { + rel_fence = new MemFenceMicro(machInst, No_OpClass); + rel_fence->setFlag(IsFirstMicroop); + rel_fence->setFlag(IsMemBarrier); + rel_fence->setFlag(IsDelayedCommit); + } + + // set up atomic rmw op + rmw_op = new %(class_name)sRMW(machInst, this); + + if (!RL) { + rmw_op->setFlag(IsFirstMicroop); + } + + if (!AQ) { + rmw_op->setFlag(IsLastMicroop); + } else { + rmw_op->setFlag(IsDelayedCommit); + } + + // set up acquire fence + if (AQ) { + acq_fence = new MemFenceMicro(machInst, No_OpClass); + acq_fence->setFlag(IsLastMicroop); + acq_fence->setFlag(IsMemBarrier); + } + + if (RL && AQ) { + microops = {rel_fence, rmw_op, acq_fence}; + } else if (RL) { + microops = {rel_fence, rmw_op}; + } else if (AQ) { + microops = {rmw_op, acq_fence}; + } else { + microops = {rmw_op}; + } } }}; -def template AtomicMemOpLoadConstructor {{ - %(class_name)s::%(class_name)sLoad::%(class_name)sLoad( +def template AtomicMemOpRMWConstructor {{ + %(class_name)s::%(class_name)sRMW::%(class_name)sRMW( ExtMachInst machInst, %(class_name)s *_p) : %(base_class)s("%(mnemonic)s[l]", machInst, %(op_class)s) { %(constructor)s; - flags[IsFirstMicroop] = true; - flags[IsDelayedCommit] = true; - if (AQ) - memAccessFlags = Request::ACQUIRE; + + // overwrite default flags + flags[IsMemRef] = true; + flags[IsLoad] = false; + flags[IsStore] = false; + flags[IsAtomic] = true; } }}; -def template AtomicMemOpStoreConstructor {{ - %(class_name)s::%(class_name)sStore::%(class_name)sStore( - ExtMachInst machInst, %(class_name)s *_p) - : %(base_class)s("%(mnemonic)s[s]", machInst, %(op_class)s) +// execute() templates + +def template LoadReservedExecute {{ + Fault + %(class_name)s::%(class_name)sMicro::execute( + ExecContext *xc, Trace::InstRecord *traceData) const { - %(constructor)s; - flags[IsLastMicroop] = true; - flags[IsNonSpeculative] = true; - if (RL) - memAccessFlags = Request::RELEASE; + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + fault = readMemAtomic(xc, traceData, EA, Mem, memAccessFlags); + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; } }}; def template StoreCondExecute {{ - Fault %(class_name)s::execute(ExecContext *xc, + Fault %(class_name)s::%(class_name)sMicro::execute(ExecContext *xc, Trace::InstRecord *traceData) const { Addr EA; @@ -157,8 +275,8 @@ def template StoreCondExecute {{ } }}; -def template AtomicMemOpLoadExecute {{ - Fault %(class_name)s::%(class_name)sLoad::execute(ExecContext *xc, +def template AtomicMemOpRMWExecute {{ + Fault %(class_name)s::%(class_name)sRMW::execute(ExecContext *xc, Trace::InstRecord *traceData) const { Addr EA; @@ -167,13 +285,18 @@ def template AtomicMemOpLoadExecute {{ %(op_decl)s; %(op_rd)s; %(ea_code)s; + %(amoop_code)s; + + assert(amo_op); if (fault == NoFault) { - fault = readMemAtomic(xc, traceData, EA, Mem, memAccessFlags); + fault = amoMemAtomic(xc, traceData, Mem, EA, memAccessFlags, + amo_op); + %(memacc_code)s; } if (fault == NoFault) { - %(code)s; + %(postacc_code)s; } if (fault == NoFault) { @@ -184,8 +307,31 @@ def template AtomicMemOpLoadExecute {{ } }}; -def template AtomicMemOpStoreExecute {{ - Fault %(class_name)s::%(class_name)sStore::execute(ExecContext *xc, +// initiateAcc() templates + +def template LoadReservedInitiateAcc {{ + Fault + %(class_name)s::%(class_name)sMicro::initiateAcc(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags); + } + + return fault; + } +}}; + +def template StoreCondInitiateAcc {{ + Fault + %(class_name)s::%(class_name)sMicro::initiateAcc(ExecContext *xc, Trace::InstRecord *traceData) const { Addr EA; @@ -196,12 +342,12 @@ def template AtomicMemOpStoreExecute {{ %(ea_code)s; if (fault == NoFault) { - %(code)s; + %(memacc_code)s; } if (fault == NoFault) { - fault = writeMemAtomic(xc, traceData, Mem, EA, memAccessFlags, - nullptr); + fault = writeMemTiming(xc, traceData, Mem, EA, + memAccessFlags, nullptr); } if (fault == NoFault) { @@ -212,8 +358,9 @@ def template AtomicMemOpStoreExecute {{ } }}; -def template AtomicMemOpLoadInitiateAcc {{ - Fault %(class_name)s::%(class_name)sLoad::initiateAcc(ExecContext *xc, +def template AtomicMemOpRMWInitiateAcc {{ + Fault + %(class_name)s::%(class_name)sRMW::initiateAcc(ExecContext *xc, Trace::InstRecord *traceData) const { Addr EA; @@ -222,33 +369,35 @@ def template AtomicMemOpLoadInitiateAcc {{ %(op_src_decl)s; %(op_rd)s; %(ea_code)s; + %(amoop_code)s; + + assert(amo_op); if (fault == NoFault) { - fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags); + fault = initiateMemAMO(xc, traceData, EA, Mem, memAccessFlags, + amo_op); } return fault; } }}; -def template AtomicMemOpStoreInitiateAcc {{ - Fault %(class_name)s::%(class_name)sStore::initiateAcc( +// completeAcc() templates + +def template LoadReservedCompleteAcc {{ + Fault + %(class_name)s::%(class_name)sMicro::completeAcc(PacketPtr pkt, ExecContext *xc, Trace::InstRecord *traceData) const { - Addr EA; Fault fault = NoFault; %(op_decl)s; %(op_rd)s; - %(ea_code)s; - if (fault == NoFault) { - %(code)s; - } + getMem(pkt, Mem, traceData); if (fault == NoFault) { - fault = writeMemTiming(xc, traceData, Mem, EA, memAccessFlags, - nullptr); + %(memacc_code)s; } if (fault == NoFault) { @@ -260,8 +409,8 @@ def template AtomicMemOpStoreInitiateAcc {{ }}; def template StoreCondCompleteAcc {{ - Fault %(class_name)s::completeAcc(Packet *pkt, ExecContext *xc, - Trace::InstRecord *traceData) const + Fault %(class_name)s::%(class_name)sMicro::completeAcc(Packet *pkt, + ExecContext *xc, Trace::InstRecord *traceData) const { Fault fault = NoFault; @@ -283,8 +432,8 @@ def template StoreCondCompleteAcc {{ } }}; -def template AtomicMemOpLoadCompleteAcc {{ - Fault %(class_name)s::%(class_name)sLoad::completeAcc(PacketPtr pkt, +def template AtomicMemOpRMWCompleteAcc {{ + Fault %(class_name)s::%(class_name)sRMW::completeAcc(Packet *pkt, ExecContext *xc, Trace::InstRecord *traceData) const { Fault fault = NoFault; @@ -295,7 +444,7 @@ def template AtomicMemOpLoadCompleteAcc {{ getMem(pkt, Mem, traceData); if (fault == NoFault) { - %(code)s; + %(memacc_code)s; } if (fault == NoFault) { @@ -306,16 +455,20 @@ def template AtomicMemOpLoadCompleteAcc {{ } }}; -def template AtomicMemOpStoreCompleteAcc {{ - Fault %(class_name)s::%(class_name)sStore::completeAcc(PacketPtr pkt, - ExecContext *xc, Trace::InstRecord *traceData) const - { - return NoFault; - } -}}; +// LR/SC/AMO decode formats def format LoadReserved(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}}, mem_flags=[], inst_flags=[]) {{ + macro_ea_code = '' + macro_inst_flags = [] + macro_iop = InstObjParams(name, Name, 'LoadReserved', macro_ea_code, + macro_inst_flags) + header_output = LRSCDeclare.subst(macro_iop) + decoder_output = LRSCMacroConstructor.subst(macro_iop) + decode_block = BasicDecode.subst(macro_iop) + + exec_output = '' + mem_flags = makeList(mem_flags) inst_flags = makeList(inst_flags) iop = InstObjParams(name, Name, 'LoadReserved', @@ -324,16 +477,25 @@ def format LoadReserved(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}}, iop.constructor += '\n\tmemAccessFlags = memAccessFlags | ' + \ '|'.join(['Request::%s' % flag for flag in mem_flags]) + ';' - header_output = LoadStoreDeclare.subst(iop) - decoder_output = LRSCConstructor.subst(iop) - decode_block = BasicDecode.subst(iop) - exec_output = LoadExecute.subst(iop) \ - + LoadInitiateAcc.subst(iop) \ - + LoadCompleteAcc.subst(iop) + decoder_output += LRSCMicroConstructor.subst(iop) + decode_block += BasicDecode.subst(iop) + exec_output += LoadReservedExecute.subst(iop) \ + + LoadReservedInitiateAcc.subst(iop) \ + + LoadReservedCompleteAcc.subst(iop) }}; def format StoreCond(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}}, mem_flags=[], inst_flags=[]) {{ + macro_ea_code = '' + macro_inst_flags = [] + macro_iop = InstObjParams(name, Name, 'StoreCond', macro_ea_code, + macro_inst_flags) + header_output = LRSCDeclare.subst(macro_iop) + decoder_output = LRSCMacroConstructor.subst(macro_iop) + decode_block = BasicDecode.subst(macro_iop) + + exec_output = '' + mem_flags = makeList(mem_flags) inst_flags = makeList(inst_flags) iop = InstObjParams(name, Name, 'StoreCond', @@ -342,37 +504,40 @@ def format StoreCond(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}}, iop.constructor += '\n\tmemAccessFlags = memAccessFlags | ' + \ '|'.join(['Request::%s' % flag for flag in mem_flags]) + ';' - header_output = LoadStoreDeclare.subst(iop) - decoder_output = LRSCConstructor.subst(iop) - decode_block = BasicDecode.subst(iop) - exec_output = StoreCondExecute.subst(iop) \ - + StoreInitiateAcc.subst(iop) \ + decoder_output += LRSCMicroConstructor.subst(iop) + decode_block += BasicDecode.subst(iop) + exec_output += StoreCondExecute.subst(iop) \ + + StoreCondInitiateAcc.subst(iop) \ + StoreCondCompleteAcc.subst(iop) }}; -def format AtomicMemOp(load_code, store_code, ea_code, load_flags=[], - store_flags=[], inst_flags=[]) {{ - macro_iop = InstObjParams(name, Name, 'AtomicMemOp', ea_code, inst_flags) +def format AtomicMemOp(memacc_code, amoop_code, postacc_code={{ }}, + ea_code={{EA = Rs1;}}, mem_flags=[], inst_flags=[]) {{ + macro_ea_code = '' + macro_inst_flags = [] + macro_iop = InstObjParams(name, Name, 'AtomicMemOp', macro_ea_code, + macro_inst_flags) header_output = AtomicMemOpDeclare.subst(macro_iop) decoder_output = AtomicMemOpMacroConstructor.subst(macro_iop) decode_block = BasicDecode.subst(macro_iop) + exec_output = '' - load_inst_flags = makeList(inst_flags) + ["IsMemRef", "IsLoad"] - load_iop = InstObjParams(name, Name, 'AtomicMemOpMicro', - {'ea_code': ea_code, 'code': load_code, 'op_name': 'Load'}, - load_inst_flags) - decoder_output += AtomicMemOpLoadConstructor.subst(load_iop) - exec_output += AtomicMemOpLoadExecute.subst(load_iop) \ - + AtomicMemOpLoadInitiateAcc.subst(load_iop) \ - + AtomicMemOpLoadCompleteAcc.subst(load_iop) - - store_inst_flags = makeList(inst_flags) + ["IsMemRef", "IsStore"] - store_iop = InstObjParams(name, Name, 'AtomicMemOpMicro', - {'ea_code': ea_code, 'code': store_code, 'op_name': 'Store'}, - store_inst_flags) - decoder_output += AtomicMemOpStoreConstructor.subst(store_iop) - exec_output += AtomicMemOpStoreExecute.subst(store_iop) \ - + AtomicMemOpStoreInitiateAcc.subst(store_iop) \ - + AtomicMemOpStoreCompleteAcc.subst(store_iop) + rmw_mem_flags = makeList(mem_flags) + rmw_inst_flags = makeList(inst_flags) + rmw_iop = InstObjParams(name, Name, 'AtomicMemOpMicro', + {'ea_code': ea_code, + 'memacc_code': memacc_code, + 'postacc_code': postacc_code, + 'amoop_code': amoop_code}, + rmw_inst_flags) + + rmw_iop.constructor += '\n\tmemAccessFlags = memAccessFlags | ' + \ + '|'.join(['Request::%s' % flag for flag in rmw_mem_flags]) + ';' + + decoder_output += AtomicMemOpRMWConstructor.subst(rmw_iop) + decode_block += BasicDecode.subst(rmw_iop) + exec_output += AtomicMemOpRMWExecute.subst(rmw_iop) \ + + AtomicMemOpRMWInitiateAcc.subst(rmw_iop) \ + + AtomicMemOpRMWCompleteAcc.subst(rmw_iop) }}; diff --git a/src/arch/riscv/locked_mem.cc b/src/arch/riscv/locked_mem.cc index 3c8dbe948..957cffba3 100644 --- a/src/arch/riscv/locked_mem.cc +++ b/src/arch/riscv/locked_mem.cc @@ -6,7 +6,5 @@ namespace RiscvISA { - -std::stack<Addr> locked_addrs; - + std::unordered_map<int, std::stack<Addr>> locked_addrs; } diff --git a/src/arch/riscv/locked_mem.hh b/src/arch/riscv/locked_mem.hh index b1cde34c6..08d27f15c 100644 --- a/src/arch/riscv/locked_mem.hh +++ b/src/arch/riscv/locked_mem.hh @@ -49,6 +49,7 @@ #define __ARCH_RISCV_LOCKED_MEM_HH__ #include <stack> +#include <unordered_map> #include "arch/registers.hh" #include "base/logging.hh" @@ -67,24 +68,28 @@ const int WARN_FAILURE = 10000; // RISC-V allows multiple locks per hart, but each SC has to unlock the most // recent one, so we use a stack here. -extern std::stack<Addr> locked_addrs; +extern std::unordered_map<int, std::stack<Addr>> locked_addrs; template <class XC> inline void handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask) { - if (locked_addrs.empty()) + std::stack<Addr>& locked_addr_stack = locked_addrs[xc->contextId()]; + + if (locked_addr_stack.empty()) return; Addr snoop_addr = pkt->getAddr() & cacheBlockMask; DPRINTF(LLSC, "Locked snoop on address %x.\n", snoop_addr); - if ((locked_addrs.top() & cacheBlockMask) == snoop_addr) - locked_addrs.pop(); + if ((locked_addr_stack.top() & cacheBlockMask) == snoop_addr) + locked_addr_stack.pop(); } template <class XC> inline void handleLockedRead(XC *xc, const RequestPtr &req) { - locked_addrs.push(req->getPaddr() & ~0xF); + std::stack<Addr>& locked_addr_stack = locked_addrs[xc->contextId()]; + + locked_addr_stack.push(req->getPaddr() & ~0xF); DPRINTF(LLSC, "[cid:%d]: Reserved address %x.\n", req->contextId(), req->getPaddr() & ~0xF); } @@ -96,21 +101,23 @@ handleLockedSnoopHit(XC *xc) template <class XC> inline bool handleLockedWrite(XC *xc, const RequestPtr &req, Addr cacheBlockMask) { + std::stack<Addr>& locked_addr_stack = locked_addrs[xc->contextId()]; + // Normally RISC-V uses zero to indicate success and nonzero to indicate // failure (right now only 1 is reserved), but in gem5 zero indicates // failure and one indicates success, so here we conform to that (it should // be switched in the instruction's implementation) DPRINTF(LLSC, "[cid:%d]: locked_addrs empty? %s.\n", req->contextId(), - locked_addrs.empty() ? "yes" : "no"); - if (!locked_addrs.empty()) { + locked_addr_stack.empty() ? "yes" : "no"); + if (!locked_addr_stack.empty()) { DPRINTF(LLSC, "[cid:%d]: addr = %x.\n", req->contextId(), req->getPaddr() & ~0xF); DPRINTF(LLSC, "[cid:%d]: last locked addr = %x.\n", req->contextId(), - locked_addrs.top()); + locked_addr_stack.top()); } - if (locked_addrs.empty() - || locked_addrs.top() != ((req->getPaddr() & ~0xF))) { + if (locked_addr_stack.empty() + || locked_addr_stack.top() != ((req->getPaddr() & ~0xF))) { req->setExtraData(0); int stCondFailures = xc->readStCondFailures(); xc->setStCondFailures(++stCondFailures); |