diff options
author | ARM gem5 Developers <none@none> | 2014-01-24 15:29:34 -0600 |
---|---|---|
committer | ARM gem5 Developers <none@none> | 2014-01-24 15:29:34 -0600 |
commit | 612f8f074fa1099cf70faf495d46cc647762a031 (patch) | |
tree | bd1e99c43bf15292395eadd4b7ae3f5c823545c3 /src/arch/arm/insts | |
parent | f3585c841e964c98911784a187fc4f081a02a0a6 (diff) | |
download | gem5-612f8f074fa1099cf70faf495d46cc647762a031.tar.xz |
arm: Add support for ARMv8 (AArch64 & AArch32)
Note: AArch64 and AArch32 interworking is not supported. If you use an AArch64
kernel you are restricted to AArch64 user-mode binaries. This will be addressed
in a later patch.
Note: Virtualization is only supported in AArch32 mode. This will also be fixed
in a later patch.
Contributors:
Giacomo Gabrielli (TrustZone, LPAE, system-level AArch64, AArch64 NEON, validation)
Thomas Grocutt (AArch32 Virtualization, AArch64 FP, validation)
Mbou Eyole (AArch64 NEON, validation)
Ali Saidi (AArch64 Linux support, code integration, validation)
Edmund Grimley-Evans (AArch64 FP)
William Wang (AArch64 Linux support)
Rene De Jong (AArch64 Linux support, performance opt.)
Matt Horsnell (AArch64 MP, validation)
Matt Evans (device models, code integration, validation)
Chris Adeniyi-Jones (AArch64 syscall-emulation)
Prakash Ramrakhyani (validation)
Dam Sunwoo (validation)
Chander Sudanthi (validation)
Stephan Diestelhorst (validation)
Andreas Hansson (code integration, performance opt.)
Eric Van Hensbergen (performance opt.)
Gabe Black
Diffstat (limited to 'src/arch/arm/insts')
-rw-r--r-- | src/arch/arm/insts/branch64.cc | 146 | ||||
-rw-r--r-- | src/arch/arm/insts/branch64.hh | 166 | ||||
-rw-r--r-- | src/arch/arm/insts/data64.cc | 203 | ||||
-rw-r--r-- | src/arch/arm/insts/data64.hh | 256 | ||||
-rw-r--r-- | src/arch/arm/insts/fplib.cc | 3086 | ||||
-rw-r--r-- | src/arch/arm/insts/fplib.hh | 283 | ||||
-rw-r--r-- | src/arch/arm/insts/macromem.cc | 528 | ||||
-rw-r--r-- | src/arch/arm/insts/macromem.hh | 207 | ||||
-rw-r--r-- | src/arch/arm/insts/mem.cc | 5 | ||||
-rw-r--r-- | src/arch/arm/insts/mem64.cc | 193 | ||||
-rw-r--r-- | src/arch/arm/insts/mem64.hh | 253 | ||||
-rw-r--r-- | src/arch/arm/insts/misc.cc | 38 | ||||
-rw-r--r-- | src/arch/arm/insts/misc.hh | 55 | ||||
-rw-r--r-- | src/arch/arm/insts/misc64.cc | 73 | ||||
-rw-r--r-- | src/arch/arm/insts/misc64.hh | 92 | ||||
-rw-r--r-- | src/arch/arm/insts/neon64_mem.hh | 128 | ||||
-rw-r--r-- | src/arch/arm/insts/pred_inst.hh | 36 | ||||
-rw-r--r-- | src/arch/arm/insts/static_inst.cc | 312 | ||||
-rw-r--r-- | src/arch/arm/insts/static_inst.hh | 99 | ||||
-rw-r--r-- | src/arch/arm/insts/vfp.cc | 484 | ||||
-rw-r--r-- | src/arch/arm/insts/vfp.hh | 489 |
21 files changed, 6761 insertions, 371 deletions
diff --git a/src/arch/arm/insts/branch64.cc b/src/arch/arm/insts/branch64.cc new file mode 100644 index 000000000..49ba3402a --- /dev/null +++ b/src/arch/arm/insts/branch64.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/branch64.hh" + +namespace ArmISA +{ + +ArmISA::PCState +BranchImm64::branchTarget(const ArmISA::PCState &branchPC) const +{ + ArmISA::PCState pcs = branchPC; + pcs.instNPC(pcs.pc() + imm); + pcs.advance(); + return pcs; +} + +ArmISA::PCState +BranchImmReg64::branchTarget(const ArmISA::PCState &branchPC) const +{ + ArmISA::PCState pcs = branchPC; + pcs.instNPC(pcs.pc() + imm); + pcs.advance(); + return pcs; +} + +ArmISA::PCState +BranchImmImmReg64::branchTarget(const ArmISA::PCState &branchPC) const +{ + ArmISA::PCState pcs = branchPC; + pcs.instNPC(pcs.pc() + imm2); + pcs.advance(); + return pcs; +} + +std::string +BranchImmCond64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false, true, condCode); + printTarget(ss, pc + imm, symtab); + return ss.str(); +} + +std::string +BranchImm64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printTarget(ss, pc + imm, symtab); + return ss.str(); +} + +std::string +BranchReg64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + return ss.str(); +} + +std::string +BranchRet64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + if (op1 != INTREG_X30) + printReg(ss, op1); + return ss.str(); +} + +std::string +BranchEret64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + return ss.str(); +} + +std::string +BranchImmReg64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", "); + printTarget(ss, pc + imm, symtab); + return ss.str(); +} + +std::string +BranchImmImmReg64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", #%#x, ", imm1); + printTarget(ss, pc + imm2, symtab); + return ss.str(); +} + +} // namespace ArmISA diff --git a/src/arch/arm/insts/branch64.hh b/src/arch/arm/insts/branch64.hh new file mode 100644 index 000000000..48881e0c2 --- /dev/null +++ b/src/arch/arm/insts/branch64.hh @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ +#ifndef __ARCH_ARM_INSTS_BRANCH64_HH__ +#define __ARCH_ARM_INSTS_BRANCH64_HH__ + +#include "arch/arm/insts/static_inst.hh" + +namespace ArmISA +{ +// Branch to a target computed with an immediate +class BranchImm64 : public ArmStaticInst +{ + protected: + int64_t imm; + + public: + BranchImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + int64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), imm(_imm) + {} + + ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const; + + /// Explicitly import the otherwise hidden branchTarget + using StaticInst::branchTarget; + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Conditionally Branch to a target computed with an immediate +class BranchImmCond64 : public BranchImm64 +{ + protected: + ConditionCode condCode; + + public: + BranchImmCond64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + int64_t _imm, ConditionCode _condCode) : + BranchImm64(mnem, _machInst, __opClass, _imm), condCode(_condCode) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Branch to a target computed with a register +class BranchReg64 : public ArmStaticInst +{ + protected: + IntRegIndex op1; + + public: + BranchReg64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), op1(_op1) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Ret instruction +class BranchRet64 : public BranchReg64 +{ + public: + BranchRet64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1) : + BranchReg64(mnem, _machInst, __opClass, _op1) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Eret instruction +class BranchEret64 : public ArmStaticInst +{ + public: + BranchEret64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : + ArmStaticInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Branch to a target computed with an immediate and a register +class BranchImmReg64 : public ArmStaticInst +{ + protected: + int64_t imm; + IntRegIndex op1; + + public: + BranchImmReg64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + int64_t _imm, IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), imm(_imm), op1(_op1) + {} + + ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const; + + /// Explicitly import the otherwise hidden branchTarget + using StaticInst::branchTarget; + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Branch to a target computed with two immediates +class BranchImmImmReg64 : public ArmStaticInst +{ + protected: + int64_t imm1; + int64_t imm2; + IntRegIndex op1; + + public: + BranchImmImmReg64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, int64_t _imm1, int64_t _imm2, + IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), + imm1(_imm1), imm2(_imm2), op1(_op1) + {} + + ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const; + + /// Explicitly import the otherwise hidden branchTarget + using StaticInst::branchTarget; + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +} + +#endif //__ARCH_ARM_INSTS_BRANCH_HH__ diff --git a/src/arch/arm/insts/data64.cc b/src/arch/arm/insts/data64.cc new file mode 100644 index 000000000..f65219870 --- /dev/null +++ b/src/arch/arm/insts/data64.cc @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/data64.hh" + +namespace ArmISA +{ + +std::string +DataXImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printDataInst(ss, true, false, /*XXX not really s*/ false, dest, op1, + INTREG_ZERO, INTREG_ZERO, 0, LSL, imm); + return ss.str(); +} + +std::string +DataXImmOnlyOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +DataXSRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printDataInst(ss, false, true, /*XXX not really s*/ false, dest, op1, + op2, INTREG_ZERO, shiftAmt, shiftType, 0); + return ss.str(); +} + +std::string +DataXERegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printDataInst(ss, false, true, /*XXX not really s*/ false, dest, op1, + op2, INTREG_ZERO, shiftAmt, LSL, 0); + return ss.str(); +} + +std::string +DataX1RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + return ss.str(); +} + +std::string +DataX1RegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +DataX1Reg2ImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", #%d, #%d", imm1, imm2); + return ss.str(); +} + +std::string +DataX2RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + return ss.str(); +} + +std::string +DataX2RegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +DataX3RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", "); + printReg(ss, op3); + return ss.str(); +} + +std::string +DataXCondCompImmOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", #%d, #%d", imm, defCc); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string +DataXCondCompRegOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", #%d", defCc); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string +DataXCondSelOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +} diff --git a/src/arch/arm/insts/data64.hh b/src/arch/arm/insts/data64.hh new file mode 100644 index 000000000..8c0677b3d --- /dev/null +++ b/src/arch/arm/insts/data64.hh @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ +#ifndef __ARCH_ARM_INSTS_DATA64_HH__ +#define __ARCH_ARM_INSTS_DATA64_HH__ + +#include "arch/arm/insts/static_inst.hh" +#include "base/trace.hh" + +namespace ArmISA +{ + +class DataXImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + uint64_t imm; + + DataXImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXImmOnlyOp : public ArmStaticInst +{ + protected: + IntRegIndex dest; + uint64_t imm; + + DataXImmOnlyOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXSRegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + int32_t shiftAmt; + ArmShiftType shiftType; + + DataXSRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + int32_t _shiftAmt, ArmShiftType _shiftType) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), + shiftAmt(_shiftAmt), shiftType(_shiftType) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXERegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + ArmExtendType extendType; + int32_t shiftAmt; + + DataXERegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + ArmExtendType _extendType, int32_t _shiftAmt) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), + extendType(_extendType), shiftAmt(_shiftAmt) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX1RegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + + DataX1RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX1RegImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + uint64_t imm; + + DataX1RegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1), + imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX1Reg2ImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + uint64_t imm1, imm2; + + DataX1Reg2ImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm1, + uint64_t _imm2) : + ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1), + imm1(_imm1), imm2(_imm2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX2RegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + + DataX2RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX2RegImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + uint64_t imm; + + DataX2RegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX3RegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2, op3; + + DataX3RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _op3) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), op3(_op3) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXCondCompImmOp : public ArmStaticInst +{ + protected: + IntRegIndex op1; + uint64_t imm; + ConditionCode condCode; + uint8_t defCc; + + DataXCondCompImmOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _op1, uint64_t _imm, + ConditionCode _condCode, uint8_t _defCc) : + ArmStaticInst(mnem, _machInst, __opClass), + op1(_op1), imm(_imm), condCode(_condCode), defCc(_defCc) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXCondCompRegOp : public ArmStaticInst +{ + protected: + IntRegIndex op1, op2; + ConditionCode condCode; + uint8_t defCc; + + DataXCondCompRegOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode, uint8_t _defCc) : + ArmStaticInst(mnem, _machInst, __opClass), + op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXCondSelOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + ConditionCode condCode; + + DataXCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), condCode(_condCode) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +} + +#endif //__ARCH_ARM_INSTS_PREDINST_HH__ diff --git a/src/arch/arm/insts/fplib.cc b/src/arch/arm/insts/fplib.cc new file mode 100644 index 000000000..1f44eed09 --- /dev/null +++ b/src/arch/arm/insts/fplib.cc @@ -0,0 +1,3086 @@ +/* +* Copyright (c) 2012-2013 ARM Limited +* All rights reserved +* +* The license below extends only to copyright in the software and shall +* not be construed as granting a license to any other intellectual +* property including but not limited to intellectual property relating +* to a hardware implementation of the functionality of the software +* licensed hereunder. You may use the software subject to the license +* terms below provided that you ensure that this notice is replicated +* unmodified and in its entirety in all distributions of the software, +* modified or unmodified, in source code or in binary form. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer; +* redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution; +* neither the name of the copyright holders nor the names of its +* contributors may be used to endorse or promote products derived from +* this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +* Authors: Edmund Grimley Evans +* Thomas Grocutt +*/ + +#include <stdint.h> + +#include <cassert> + +#include "fplib.hh" + +namespace ArmISA +{ + +#define FPLIB_RN 0 +#define FPLIB_RP 1 +#define FPLIB_RM 2 +#define FPLIB_RZ 3 +#define FPLIB_FZ 4 +#define FPLIB_DN 8 +#define FPLIB_AHP 16 + +#define FPLIB_IDC 128 // Input Denormal +#define FPLIB_IXC 16 // Inexact +#define FPLIB_UFC 8 // Underflow +#define FPLIB_OFC 4 // Overflow +#define FPLIB_DZC 2 // Division by Zero +#define FPLIB_IOC 1 // Invalid Operation + +static inline uint16_t +lsl16(uint16_t x, uint32_t shift) +{ + return shift < 16 ? x << shift : 0; +} + +static inline uint16_t +lsr16(uint16_t x, uint32_t shift) +{ + return shift < 16 ? x >> shift : 0; +} + +static inline uint32_t +lsl32(uint32_t x, uint32_t shift) +{ + return shift < 32 ? x << shift : 0; +} + +static inline uint32_t +lsr32(uint32_t x, uint32_t shift) +{ + return shift < 32 ? x >> shift : 0; +} + +static inline uint64_t +lsl64(uint64_t x, uint32_t shift) +{ + return shift < 64 ? x << shift : 0; +} + +static inline uint64_t +lsr64(uint64_t x, uint32_t shift) +{ + return shift < 64 ? x >> shift : 0; +} + +static inline void +lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) +{ + if (shift < 64) { + *r1 = x1 << shift | x0 >> (64 - shift); + *r0 = x0 << shift; + } else if (shift < 128) { + *r1 = x0 << (shift - 64); + *r0 = 0; + } else { + *r1 = 0; + *r0 = 0; + } +} + +static inline void +lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) +{ + if (shift < 64) { + *r0 = x0 >> shift | x1 << (64 - shift); + *r1 = x1 >> shift; + } else if (shift < 128) { + *r0 = x1 >> (shift - 64); + *r1 = 0; + } else { + *r0 = 0; + *r1 = 0; + } +} + +static inline void +mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) +{ + uint32_t mask = ((uint32_t)1 << 31) - 1; + uint64_t a0 = a & mask; + uint64_t a1 = a >> 31 & mask; + uint64_t b0 = b & mask; + uint64_t b1 = b >> 31 & mask; + uint64_t p0 = a0 * b0; + uint64_t p2 = a1 * b1; + uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; + uint64_t s0 = p0; + uint64_t s1 = (s0 >> 31) + p1; + uint64_t s2 = (s1 >> 31) + p2; + *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62; + *x1 = s2 >> 2; +} + +static inline +void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b) +{ + uint64_t t0 = (uint64_t)(uint32_t)a * b; + uint64_t t1 = (t0 >> 32) + (a >> 32) * b; + *x0 = t1 << 32 | (uint32_t)t0; + *x1 = t1 >> 32; +} + +static inline void +mul64x64(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) +{ + uint64_t a0 = (uint32_t)a; + uint64_t a1 = a >> 32; + uint64_t b0 = (uint32_t)b; + uint64_t b1 = b >> 32; + uint64_t t1 = (a0 * b0 >> 32) + a1 * b0; + uint64_t t2 = a0 * b1; + uint64_t x = ((uint64_t)(uint32_t)t1 + (uint32_t)t2) >> 32; + x += t1 >> 32; + x += t2 >> 32; + x += a1 * b1; + *x0 = a * b; + *x1 = x; +} + +static inline void +add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, + uint64_t b1) +{ + *x0 = a0 + b0; + *x1 = a1 + b1 + (*x0 < a0); +} + +static inline void +sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, + uint64_t b1) +{ + *x0 = a0 - b0; + *x1 = a1 - b1 - (*x0 > a0); +} + +static inline int +cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) +{ + return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); +} + +static inline uint16_t +fp16_normalise(uint16_t mnt, int *exp) +{ + int shift; + + if (!mnt) { + return 0; + } + + for (shift = 8; shift; shift >>= 1) { + if (!(mnt >> (16 - shift))) { + mnt <<= shift; + *exp -= shift; + } + } + return mnt; +} + +static inline uint32_t +fp32_normalise(uint32_t mnt, int *exp) +{ + int shift; + + if (!mnt) { + return 0; + } + + for (shift = 16; shift; shift >>= 1) { + if (!(mnt >> (32 - shift))) { + mnt <<= shift; + *exp -= shift; + } + } + return mnt; +} + +static inline uint64_t +fp64_normalise(uint64_t mnt, int *exp) +{ + int shift; + + if (!mnt) { + return 0; + } + + for (shift = 32; shift; shift >>= 1) { + if (!(mnt >> (64 - shift))) { + mnt <<= shift; + *exp -= shift; + } + } + return mnt; +} + +static inline void +fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp) +{ + uint64_t x0 = *mnt0; + uint64_t x1 = *mnt1; + int shift; + + if (!x0 && !x1) { + return; + } + + if (!x1) { + x1 = x0; + x0 = 0; + *exp -= 64; + } + + for (shift = 32; shift; shift >>= 1) { + if (!(x1 >> (64 - shift))) { + x1 = x1 << shift | x0 >> (64 - shift); + x0 <<= shift; + *exp -= shift; + } + } + + *mnt0 = x0; + *mnt1 = x1; +} + +static inline uint16_t +fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) +{ + return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1)); +} + +static inline uint32_t +fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) +{ + return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1)); +} + +static inline uint64_t +fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) +{ + return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1)); +} + +static inline uint16_t +fp16_zero(int sgn) +{ + return fp16_pack(sgn, 0, 0); +} + +static inline uint32_t +fp32_zero(int sgn) +{ + return fp32_pack(sgn, 0, 0); +} + +static inline uint64_t +fp64_zero(int sgn) +{ + return fp64_pack(sgn, 0, 0); +} + +static inline uint16_t +fp16_max_normal(int sgn) +{ + return fp16_pack(sgn, 30, -1); +} + +static inline uint32_t +fp32_max_normal(int sgn) +{ + return fp32_pack(sgn, 254, -1); +} + +static inline uint64_t +fp64_max_normal(int sgn) +{ + return fp64_pack(sgn, 2046, -1); +} + +static inline uint16_t +fp16_infinity(int sgn) +{ + return fp16_pack(sgn, 31, 0); +} + +static inline uint32_t +fp32_infinity(int sgn) +{ + return fp32_pack(sgn, 255, 0); +} + +static inline uint64_t +fp64_infinity(int sgn) +{ + return fp64_pack(sgn, 2047, 0); +} + +static inline uint16_t +fp16_defaultNaN() +{ + return fp16_pack(0, 31, (uint16_t)1 << 9); +} + +static inline uint32_t +fp32_defaultNaN() +{ + return fp32_pack(0, 255, (uint32_t)1 << 22); +} + +static inline uint64_t +fp64_defaultNaN() +{ + return fp64_pack(0, 2047, (uint64_t)1 << 51); +} + +static inline void +fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, + int *flags) +{ + *sgn = x >> 15; + *exp = x >> 10 & 31; + *mnt = x & (((uint16_t)1 << 10) - 1); + + // Handle subnormals: + if (*exp) { + *mnt |= (uint16_t)1 << 10; + } else { + ++*exp; + // There is no flush to zero in this case! + } +} + +static inline void +fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, + int *flags) +{ + *sgn = x >> 31; + *exp = x >> 23 & 255; + *mnt = x & (((uint32_t)1 << 23) - 1); + + // Handle subnormals: + if (*exp) { + *mnt |= (uint32_t)1 << 23; + } else { + ++*exp; + if ((mode & FPLIB_FZ) && *mnt) { + *flags |= FPLIB_IDC; + *mnt = 0; + } + } +} + +static inline void +fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, + int *flags) +{ + *sgn = x >> 63; + *exp = x >> 52 & 2047; + *mnt = x & (((uint64_t)1 << 52) - 1); + + // Handle subnormals: + if (*exp) { + *mnt |= (uint64_t)1 << 52; + } else { + ++*exp; + if ((mode & FPLIB_FZ) && *mnt) { + *flags |= FPLIB_IDC; + *mnt = 0; + } + } +} + +static inline uint32_t +fp32_process_NaN(uint32_t a, int mode, int *flags) +{ + if (!(a >> 22 & 1)) { + *flags |= FPLIB_IOC; + a |= (uint32_t)1 << 22; + } + return mode & FPLIB_DN ? fp32_defaultNaN() : a; +} + +static inline uint64_t +fp64_process_NaN(uint64_t a, int mode, int *flags) +{ + if (!(a >> 51 & 1)) { + *flags |= FPLIB_IOC; + a |= (uint64_t)1 << 51; + } + return mode & FPLIB_DN ? fp64_defaultNaN() : a; +} + +static uint32_t +fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_exp = a >> 23 & 255; + uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); + int b_exp = b >> 23 & 255; + uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); + + // Handle signalling NaNs: + if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) + return fp32_process_NaN(b, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 255 && a_mnt) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt) + return fp32_process_NaN(b, mode, flags); + + return 0; +} + +static uint64_t +fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_exp = a >> 52 & 2047; + uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); + int b_exp = b >> 52 & 2047; + uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); + + // Handle signalling NaNs: + if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) + return fp64_process_NaN(b, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 2047 && a_mnt) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt) + return fp64_process_NaN(b, mode, flags); + + return 0; +} + +static uint32_t +fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags) +{ + int a_exp = a >> 23 & 255; + uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); + int b_exp = b >> 23 & 255; + uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); + int c_exp = c >> 23 & 255; + uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1); + + // Handle signalling NaNs: + if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) + return fp32_process_NaN(b, mode, flags); + if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1)) + return fp32_process_NaN(c, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 255 && a_mnt) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt) + return fp32_process_NaN(b, mode, flags); + if (c_exp == 255 && c_mnt) + return fp32_process_NaN(c, mode, flags); + + return 0; +} + +static uint64_t +fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags) +{ + int a_exp = a >> 52 & 2047; + uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); + int b_exp = b >> 52 & 2047; + uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); + int c_exp = c >> 52 & 2047; + uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1); + + // Handle signalling NaNs: + if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) + return fp64_process_NaN(b, mode, flags); + if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1)) + return fp64_process_NaN(c, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 2047 && a_mnt) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt) + return fp64_process_NaN(b, mode, flags); + if (c_exp == 2047 && c_mnt) + return fp64_process_NaN(c, mode, flags); + + return 0; +} + +static uint16_t +fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags) +{ + int biased_exp; // non-negative exponent value for result + uint16_t int_mant; // mantissa for result, less than (1 << 11) + int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 + + assert(rm != FPRounding_TIEAWAY); + + // There is no flush to zero in this case! + + // The bottom 5 bits of mnt are orred together: + mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0); + + if (exp > 0) { + biased_exp = exp; + int_mant = mnt >> 2; + error = mnt & 3; + } else { + biased_exp = 0; + int_mant = lsr16(mnt, 3 - exp); + error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1)); + } + + if (!biased_exp && error) { // xx should also check fpscr_val<11> + *flags |= FPLIB_UFC; + } + + // Round up: + if ((rm == FPLIB_RN && (error == 3 || + (error == 2 && (int_mant & 1)))) || + (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { + ++int_mant; + if (int_mant == (uint32_t)1 << 10) { + // Rounded up from denormalized to normalized + biased_exp = 1; + } + if (int_mant == (uint32_t)1 << 11) { + // Rounded up to next exponent + ++biased_exp; + int_mant >>= 1; + } + } + + // Handle rounding to odd aka Von Neumann rounding: + if (error && rm == FPRounding_ODD) + int_mant |= 1; + + // Handle overflow: + if (!(mode & FPLIB_AHP)) { + if (biased_exp >= 31) { + *flags |= FPLIB_OFC | FPLIB_IXC; + if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || + (rm == FPLIB_RM && sgn)) { + return fp16_infinity(sgn); + } else { + return fp16_max_normal(sgn); + } + } + } else { + if (biased_exp >= 32) { + *flags |= FPLIB_IOC; + return fp16_pack(sgn, 31, -1); + } + } + + if (error) { + *flags |= FPLIB_IXC; + } + + return fp16_pack(sgn, biased_exp, int_mant); +} + +static uint32_t +fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags) +{ + int biased_exp; // non-negative exponent value for result + uint32_t int_mant; // mantissa for result, less than (1 << 24) + int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 + + assert(rm != FPRounding_TIEAWAY); + + // Flush to zero: + if ((mode & FPLIB_FZ) && exp < 1) { + *flags |= FPLIB_UFC; + return fp32_zero(sgn); + } + + // The bottom 8 bits of mnt are orred together: + mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0); + + if (exp > 0) { + biased_exp = exp; + int_mant = mnt >> 2; + error = mnt & 3; + } else { + biased_exp = 0; + int_mant = lsr32(mnt, 3 - exp); + error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1)); + } + + if (!biased_exp && error) { // xx should also check fpscr_val<11> + *flags |= FPLIB_UFC; + } + + // Round up: + if ((rm == FPLIB_RN && (error == 3 || + (error == 2 && (int_mant & 1)))) || + (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { + ++int_mant; + if (int_mant == (uint32_t)1 << 23) { + // Rounded up from denormalized to normalized + biased_exp = 1; + } + if (int_mant == (uint32_t)1 << 24) { + // Rounded up to next exponent + ++biased_exp; + int_mant >>= 1; + } + } + + // Handle rounding to odd aka Von Neumann rounding: + if (error && rm == FPRounding_ODD) + int_mant |= 1; + + // Handle overflow: + if (biased_exp >= 255) { + *flags |= FPLIB_OFC | FPLIB_IXC; + if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || + (rm == FPLIB_RM && sgn)) { + return fp32_infinity(sgn); + } else { + return fp32_max_normal(sgn); + } + } + + if (error) { + *flags |= FPLIB_IXC; + } + + return fp32_pack(sgn, biased_exp, int_mant); +} + +static uint32_t +fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags) +{ + return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); +} + +static uint64_t +fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags) +{ + int biased_exp; // non-negative exponent value for result + uint64_t int_mant; // mantissa for result, less than (1 << 52) + int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 + + assert(rm != FPRounding_TIEAWAY); + + // Flush to zero: + if ((mode & FPLIB_FZ) && exp < 1) { + *flags |= FPLIB_UFC; + return fp64_zero(sgn); + } + + // The bottom 11 bits of mnt are orred together: + mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0); + + if (exp > 0) { + biased_exp = exp; + int_mant = mnt >> 2; + error = mnt & 3; + } else { + biased_exp = 0; + int_mant = lsr64(mnt, 3 - exp); + error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1)); + } + + if (!biased_exp && error) { // xx should also check fpscr_val<11> + *flags |= FPLIB_UFC; + } + + // Round up: + if ((rm == FPLIB_RN && (error == 3 || + (error == 2 && (int_mant & 1)))) || + (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { + ++int_mant; + if (int_mant == (uint64_t)1 << 52) { + // Rounded up from denormalized to normalized + biased_exp = 1; + } + if (int_mant == (uint64_t)1 << 53) { + // Rounded up to next exponent + ++biased_exp; + int_mant >>= 1; + } + } + + // Handle rounding to odd aka Von Neumann rounding: + if (error && rm == FPRounding_ODD) + int_mant |= 1; + + // Handle overflow: + if (biased_exp >= 2047) { + *flags |= FPLIB_OFC | FPLIB_IXC; + if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || + (rm == FPLIB_RM && sgn)) { + return fp64_infinity(sgn); + } else { + return fp64_max_normal(sgn); + } + } + + if (error) { + *flags |= FPLIB_IXC; + } + + return fp64_pack(sgn, biased_exp, int_mant); +} + +static uint64_t +fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags) +{ + return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); +} + +static int +fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint32_t a_mnt, b_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9))) { + if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1))) + *flags |= FPLIB_IOC; + return 0; + } + return a == b || (!a_mnt && !b_mnt); +} + +static int +fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint32_t a_mnt, b_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 1; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 1; +} + +static int +fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint32_t a_mnt, b_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 0; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 0; +} + +static int +fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint64_t a_mnt, b_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1))) + *flags |= FPLIB_IOC; + return 0; + } + return a == b || (!a_mnt && !b_mnt); +} + +static int +fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint64_t a_mnt, b_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 1; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 1; +} + +static int +fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint64_t a_mnt, b_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 0; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 0; +} + +static uint32_t +fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint32_t a_mnt, b_mnt, x, x_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp32_process_NaNs(a, b, mode, flags))) { + return x; + } + + b_sgn ^= neg; + + // Handle infinities and zeroes: + if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } else if (a_exp == 255) { + return fp32_infinity(a_sgn); + } else if (b_exp == 255) { + return fp32_infinity(b_sgn); + } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { + return fp32_zero(a_sgn); + } + + a_mnt <<= 3; + b_mnt <<= 3; + if (a_exp >= b_exp) { + b_mnt = (lsr32(b_mnt, a_exp - b_exp) | + !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); + b_exp = a_exp; + } else { + a_mnt = (lsr32(a_mnt, b_exp - a_exp) | + !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); + a_exp = b_exp; + } + x_sgn = a_sgn; + x_exp = a_exp; + if (a_sgn == b_sgn) { + x_mnt = a_mnt + b_mnt; + } else if (a_mnt >= b_mnt) { + x_mnt = a_mnt - b_mnt; + } else { + x_sgn ^= 1; + x_mnt = b_mnt - a_mnt; + } + + if (!x_mnt) { + // Sign of exact zero result depends on rounding mode + return fp32_zero((mode & 3) == 2); + } + + x_mnt = fp32_normalise(x_mnt, &x_exp); + + return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags); +} + +static uint64_t +fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint64_t a_mnt, b_mnt, x, x_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp64_process_NaNs(a, b, mode, flags))) { + return x; + } + + b_sgn ^= neg; + + // Handle infinities and zeroes: + if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } else if (a_exp == 2047) { + return fp64_infinity(a_sgn); + } else if (b_exp == 2047) { + return fp64_infinity(b_sgn); + } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { + return fp64_zero(a_sgn); + } + + a_mnt <<= 3; + b_mnt <<= 3; + if (a_exp >= b_exp) { + b_mnt = (lsr64(b_mnt, a_exp - b_exp) | + !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); + b_exp = a_exp; + } else { + a_mnt = (lsr64(a_mnt, b_exp - a_exp) | + !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); + a_exp = b_exp; + } + x_sgn = a_sgn; + x_exp = a_exp; + if (a_sgn == b_sgn) { + x_mnt = a_mnt + b_mnt; + } else if (a_mnt >= b_mnt) { + x_mnt = a_mnt - b_mnt; + } else { + x_sgn ^= 1; + x_mnt = b_mnt - a_mnt; + } + + if (!x_mnt) { + // Sign of exact zero result depends on rounding mode + return fp64_zero((mode & 3) == 2); + } + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags); +} + +static uint32_t +fp32_mul(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint32_t a_mnt, b_mnt, x; + uint64_t x_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp32_process_NaNs(a, b, mode, flags))) { + return x; + } + + // Handle infinities and zeroes: + if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } else if (a_exp == 255 || b_exp == 255) { + return fp32_infinity(a_sgn ^ b_sgn); + } else if (!a_mnt || !b_mnt) { + return fp32_zero(a_sgn ^ b_sgn); + } + + // Multiply and normalise: + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp + b_exp - 110; + x_mnt = (uint64_t)a_mnt * b_mnt; + x_mnt = fp64_normalise(x_mnt, &x_exp); + + // Convert to 32 bits, collapsing error into bottom bit: + x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33); + + return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); +} + +static uint64_t +fp64_mul(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint64_t a_mnt, b_mnt, x; + uint64_t x0_mnt, x1_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp64_process_NaNs(a, b, mode, flags))) { + return x; + } + + // Handle infinities and zeroes: + if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } else if (a_exp == 2047 || b_exp == 2047) { + return fp64_infinity(a_sgn ^ b_sgn); + } else if (!a_mnt || !b_mnt) { + return fp64_zero(a_sgn ^ b_sgn); + } + + // Multiply and normalise: + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp + b_exp - 1000; + mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); + fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); + + // Convert to 64 bits, collapsing error into bottom bit: + x0_mnt = x1_mnt << 1 | !!x0_mnt; + + return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); +} + +static uint32_t +fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, + int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; + uint32_t a_mnt, b_mnt, c_mnt, x; + uint64_t x_mnt, y_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); + + x = fp32_process_NaNs3(a, b, c, mode, flags); + + // Quiet NaN added to product of zero and infinity: + if (a_exp == 255 && (a_mnt >> 22 & 1) && + ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) || + (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) { + x = fp32_defaultNaN(); + *flags |= FPLIB_IOC; + } + + if (x) { + return x; + } + + // Handle infinities and zeroes: + if ((b_exp == 255 && !c_mnt) || + (c_exp == 255 && !b_mnt) || + (a_exp == 255 && (b_exp == 255 || c_exp == 255) && + (a_sgn != (b_sgn ^ c_sgn)))) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } + if (a_exp == 255) + return fp32_infinity(a_sgn); + if (b_exp == 255 || c_exp == 255) + return fp32_infinity(b_sgn ^ c_sgn); + if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) + return fp32_zero(a_sgn); + + x_sgn = a_sgn; + x_exp = a_exp + 13; + x_mnt = (uint64_t)a_mnt << 27; + + // Multiply: + y_sgn = b_sgn ^ c_sgn; + y_exp = b_exp + c_exp - 113; + y_mnt = (uint64_t)b_mnt * c_mnt << 3; + if (!y_mnt) { + y_exp = x_exp; + } + + // Add: + if (x_exp >= y_exp) { + y_mnt = (lsr64(y_mnt, x_exp - y_exp) | + !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); + y_exp = x_exp; + } else { + x_mnt = (lsr64(x_mnt, y_exp - x_exp) | + !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); + x_exp = y_exp; + } + if (x_sgn == y_sgn) { + x_mnt = x_mnt + y_mnt; + } else if (x_mnt >= y_mnt) { + x_mnt = x_mnt - y_mnt; + } else { + x_sgn ^= 1; + x_mnt = y_mnt - x_mnt; + } + + if (!x_mnt) { + // Sign of exact zero result depends on rounding mode + return fp32_zero((mode & 3) == 2); + } + + // Normalise and convert to 32 bits, collapsing error into bottom bit: + x_mnt = fp64_normalise(x_mnt, &x_exp); + x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); + + return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); +} + +static uint64_t +fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, + int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; + uint64_t a_mnt, b_mnt, c_mnt, x; + uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); + + x = fp64_process_NaNs3(a, b, c, mode, flags); + + // Quiet NaN added to product of zero and infinity: + if (a_exp == 2047 && (a_mnt >> 51 & 1) && + ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) || + (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) { + x = fp64_defaultNaN(); + *flags |= FPLIB_IOC; + } + + if (x) { + return x; + } + + // Handle infinities and zeroes: + if ((b_exp == 2047 && !c_mnt) || + (c_exp == 2047 && !b_mnt) || + (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) && + (a_sgn != (b_sgn ^ c_sgn)))) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } + if (a_exp == 2047) + return fp64_infinity(a_sgn); + if (b_exp == 2047 || c_exp == 2047) + return fp64_infinity(b_sgn ^ c_sgn); + if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) + return fp64_zero(a_sgn); + + x_sgn = a_sgn; + x_exp = a_exp + 11; + x0_mnt = 0; + x1_mnt = a_mnt; + + // Multiply: + y_sgn = b_sgn ^ c_sgn; + y_exp = b_exp + c_exp - 1003; + mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); + if (!y0_mnt && !y1_mnt) { + y_exp = x_exp; + } + + // Add: + if (x_exp >= y_exp) { + uint64_t t0, t1; + lsl128(&t0, &t1, y0_mnt, y1_mnt, + x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); + lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); + y0_mnt |= !!(t0 | t1); + y_exp = x_exp; + } else { + uint64_t t0, t1; + lsl128(&t0, &t1, x0_mnt, x1_mnt, + y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); + x0_mnt |= !!(t0 | t1); + x_exp = y_exp; + } + if (x_sgn == y_sgn) { + add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); + } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { + sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); + } else { + x_sgn ^= 1; + sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); + } + + if (!x0_mnt && !x1_mnt) { + // Sign of exact zero result depends on rounding mode + return fp64_zero((mode & 3) == 2); + } + + // Normalise and convert to 64 bits, collapsing error into bottom bit: + fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); + x0_mnt = x1_mnt << 1 | !!x0_mnt; + + return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); +} + +static uint32_t +fp32_div(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint32_t a_mnt, b_mnt, x; + uint64_t x_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp32_process_NaNs(a, b, mode, flags))) + return x; + + // Handle infinities and zeroes: + if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } + if (a_exp == 255 || !b_mnt) { + if (a_exp != 255) + *flags |= FPLIB_DZC; + return fp32_infinity(a_sgn ^ b_sgn); + } + if (!a_mnt || b_exp == 255) + return fp32_zero(a_sgn ^ b_sgn); + + // Divide, setting bottom bit if inexact: + a_mnt = fp32_normalise(a_mnt, &a_exp); + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp - b_exp + 172; + x_mnt = ((uint64_t)a_mnt << 18) / b_mnt; + x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18); + + // Normalise and convert to 32 bits, collapsing error into bottom bit: + x_mnt = fp64_normalise(x_mnt, &x_exp); + x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); + + return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); +} + +static uint64_t +fp64_div(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; + uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp64_process_NaNs(a, b, mode, flags))) + return x; + + // Handle infinities and zeroes: + if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } + if (a_exp == 2047 || !b_mnt) { + if (a_exp != 2047) + *flags |= FPLIB_DZC; + return fp64_infinity(a_sgn ^ b_sgn); + } + if (!a_mnt || b_exp == 2047) + return fp64_zero(a_sgn ^ b_sgn); + + // Find reciprocal of divisor with Newton-Raphson: + a_mnt = fp64_normalise(a_mnt, &a_exp); + b_mnt = fp64_normalise(b_mnt, &b_exp); + x_mnt = ~(uint64_t)0 / (b_mnt >> 31); + mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); + sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); + mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); + + // Multiply by dividend: + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp - b_exp + 1031; + mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); + x_mnt = x1_mnt; + + // This is an underestimate, so try adding one: + mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough + c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); + if (c <= 0) { + ++x_mnt; + } + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); +} + +static void +set_fpscr0(FPSCR &fpscr, int flags) +{ + if (flags & FPLIB_IDC) { + fpscr.idc = 1; + } + if (flags & FPLIB_IOC) { + fpscr.ioc = 1; + } + if (flags & FPLIB_DZC) { + fpscr.dzc = 1; + } + if (flags & FPLIB_OFC) { + fpscr.ofc = 1; + } + if (flags & FPLIB_UFC) { + fpscr.ufc = 1; + } + if (flags & FPLIB_IXC) { + fpscr.ixc = 1; + } +} + +static uint32_t +fp32_sqrt(uint32_t a, int mode, int *flags) +{ + int a_sgn, a_exp, x_sgn, x_exp; + uint32_t a_mnt, x, x_mnt; + uint64_t t0, t1; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + + // Handle NaNs: + if (a_exp == 255 && (uint32_t)(a_mnt << 9)) + return fp32_process_NaN(a, mode, flags); + + // Handle infinities and zeroes: + if (!a_mnt) { + return fp32_zero(a_sgn); + } + if (a_exp == 255 && !a_sgn) { + return fp32_infinity(a_sgn); + } + if (a_sgn) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } + + a_mnt = fp32_normalise(a_mnt, &a_exp); + if (!(a_exp & 1)) { + ++a_exp; + a_mnt >>= 1; + } + + // x = (a * 3 + 5) / 8 + x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28); + + // x = (a / x + x) / 2; // 16-bit accuracy + x = (a_mnt / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 16-bit accuracy + x = (a_mnt / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 32-bit accuracy + x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); + + x_sgn = 0; + x_exp = (a_exp + 147) >> 1; + x_mnt = ((x - (1 << 5)) >> 6) + 1; + t1 = (uint64_t)x_mnt * x_mnt; + t0 = (uint64_t)a_mnt << 19; + if (t1 > t0) { + --x_mnt; + } + + x_mnt = fp32_normalise(x_mnt, &x_exp); + + return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); +} + +static uint64_t +fp64_sqrt(uint64_t a, int mode, int *flags) +{ + int a_sgn, a_exp, x_sgn, x_exp, c; + uint64_t a_mnt, x_mnt, r, x0, x1; + uint32_t x; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + + // Handle NaNs: + if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) { + return fp64_process_NaN(a, mode, flags); + } + + // Handle infinities and zeroes: + if (!a_mnt) + return fp64_zero(a_sgn); + if (a_exp == 2047 && !a_sgn) + return fp64_infinity(a_sgn); + if (a_sgn) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } + + a_mnt = fp64_normalise(a_mnt, &a_exp); + if (a_exp & 1) { + ++a_exp; + a_mnt >>= 1; + } + + // x = (a * 3 + 5) / 8 + x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28); + + // x = (a / x + x) / 2; // 16-bit accuracy + x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 16-bit accuracy + x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 32-bit accuracy + x = ((a_mnt / x) >> 2) + (x >> 1); + + // r = 1 / x; // 32-bit accuracy + r = ((uint64_t)1 << 62) / x; + + // r = r * (2 - x * r); // 64-bit accuracy + mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); + lsr128(&x0, &x1, x0, x1, 31); + + // x = (x + a * r) / 2; // 64-bit accuracy + mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); + lsl128(&x0, &x1, x0, x1, 5); + lsr128(&x0, &x1, x0, x1, 56); + + x0 = ((uint64_t)x << 31) + (x0 >> 1); + + x_sgn = 0; + x_exp = (a_exp + 1053) >> 1; + x_mnt = x0; + x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; + mul62x62(&x0, &x1, x_mnt, x_mnt); + lsl128(&x0, &x1, x0, x1, 19); + c = cmp128(x0, x1, 0, a_mnt); + if (c > 0) + --x_mnt; + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); +} + +static int +modeConv(FPSCR fpscr) +{ + return (((int) fpscr) >> 22) & 0xF; +} + +static void +set_fpscr(FPSCR &fpscr, int flags) +{ + // translate back to FPSCR + bool underflow = false; + if (flags & FPLIB_IDC) { + fpscr.idc = 1; + } + if (flags & FPLIB_IOC) { + fpscr.ioc = 1; + } + if (flags & FPLIB_DZC) { + fpscr.dzc = 1; + } + if (flags & FPLIB_OFC) { + fpscr.ofc = 1; + } + if (flags & FPLIB_UFC) { + underflow = true; //xx Why is this required? + fpscr.ufc = 1; + } + if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { + fpscr.ixc = 1; + } +} + +template <> +bool +fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +uint32_t +fplibAbs(uint32_t op) +{ + return op & ~((uint32_t)1 << 31); +} + +template <> +uint64_t +fplibAbs(uint64_t op) +{ + return op & ~((uint64_t)1 << 63); +} + +template <> +uint32_t +fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +int +fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2, result; + uint32_t mnt1, mnt2; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) || + (exp2 == 255 && (uint32_t)(mnt2 << 9))) { + result = 3; + if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) || + (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) || + signal_nans) + flags |= FPLIB_IOC; + } else { + if (op1 == op2 || (!mnt1 && !mnt2)) { + result = 6; + } else if (sgn1 != sgn2) { + result = sgn1 ? 8 : 2; + } else if (exp1 != exp2) { + result = sgn1 ^ (exp1 < exp2) ? 8 : 2; + } else { + result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +int +fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2, result; + uint64_t mnt1, mnt2; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) || + (exp2 == 2047 && (uint64_t)(mnt2 << 12))) { + result = 3; + if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) || + (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) || + signal_nans) + flags |= FPLIB_IOC; + } else { + if (op1 == op2 || (!mnt1 && !mnt2)) { + result = 6; + } else if (sgn1 != sgn2) { + result = sgn1 ? 8 : 2; + } else if (exp1 != exp2) { + result = sgn1 ^ (exp1 < exp2) ? 8 : 2; + } else { + result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +static uint16_t +fp16_FPConvertNaN_32(uint32_t op) +{ + return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13); +} + +static uint16_t +fp16_FPConvertNaN_64(uint64_t op) +{ + return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42); +} + +static uint32_t +fp32_FPConvertNaN_16(uint16_t op) +{ + return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13); +} + +static uint32_t +fp32_FPConvertNaN_64(uint64_t op) +{ + return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29); +} + +static uint64_t +fp64_FPConvertNaN_16(uint16_t op) +{ + return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42); +} + +static uint64_t +fp64_FPConvertNaN_32(uint32_t op) +{ + return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29); +} + +static uint32_t +fp32_FPOnePointFive(int sgn) +{ + return fp32_pack(sgn, 127, (uint64_t)1 << 22); +} + +static uint64_t +fp64_FPOnePointFive(int sgn) +{ + return fp64_pack(sgn, 1023, (uint64_t)1 << 51); +} + +static uint32_t +fp32_FPThree(int sgn) +{ + return fp32_pack(sgn, 128, (uint64_t)1 << 22); +} + +static uint64_t +fp64_FPThree(int sgn) +{ + return fp64_pack(sgn, 1024, (uint64_t)1 << 51); +} + +static uint32_t +fp32_FPTwo(int sgn) +{ + return fp32_pack(sgn, 128, 0); +} + +static uint64_t +fp64_FPTwo(int sgn) +{ + return fp64_pack(sgn, 1024, 0); +} + +template <> +uint16_t +fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt; + uint16_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + bool alt_hp = fpscr.ahp; + + if (exp == 255 && (uint32_t)(mnt << 9)) { + if (alt_hp) { + result = fp16_zero(sgn); + } else if (fpscr.dn) { + result = fp16_defaultNaN(); + } else { + result = fp16_FPConvertNaN_32(op); + } + if (!(mnt >> 22 & 1) || alt_hp) { + flags |= FPLIB_IOC; + } + } else if (exp == 255) { + if (alt_hp) { + result = sgn << 15 | (uint16_t)0x7fff; + flags |= FPLIB_IOC; + } else { + result = fp16_infinity(sgn); + } + } else if (!mnt) { + result = fp16_zero(sgn); + } else { + result = fp16_round_(sgn, exp - 127 + 15, + mnt >> 7 | !!(uint32_t)(mnt << 25), + rounding, mode | alt_hp << 4, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint16_t +fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt; + uint16_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + bool alt_hp = fpscr.ahp; + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + if (alt_hp) { + result = fp16_zero(sgn); + } else if (fpscr.dn) { + result = fp16_defaultNaN(); + } else { + result = fp16_FPConvertNaN_64(op); + } + if (!(mnt >> 51 & 1) || alt_hp) { + flags |= FPLIB_IOC; + } + } else if (exp == 2047) { + if (alt_hp) { + result = sgn << 15 | (uint16_t)0x7fff; + flags |= FPLIB_IOC; + } else { + result = fp16_infinity(sgn); + } + } else if (!mnt) { + result = fp16_zero(sgn); + } else { + result = fp16_round_(sgn, exp - 1023 + 15, + mnt >> 36 | !!(uint64_t)(mnt << 28), + rounding, mode | alt_hp << 4, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint16_t mnt; + uint32_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { + if (fpscr.dn) { + result = fp32_defaultNaN(); + } else { + result = fp32_FPConvertNaN_16(op); + } + if (!(mnt >> 9 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 31 && !fpscr.ahp) { + result = fp32_infinity(sgn); + } else if (!mnt) { + result = fp32_zero(sgn); + } else { + mnt = fp16_normalise(mnt, &exp); + result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt; + uint32_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + if (fpscr.dn) { + result = fp32_defaultNaN(); + } else { + result = fp32_FPConvertNaN_64(op); + } + if (!(mnt >> 51 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 2047) { + result = fp32_infinity(sgn); + } else if (!mnt) { + result = fp32_zero(sgn); + } else { + result = fp32_round_(sgn, exp - 1023 + 127, + mnt >> 20 | !!(uint64_t)(mnt << 44), + rounding, mode, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint16_t mnt; + uint64_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { + if (fpscr.dn) { + result = fp64_defaultNaN(); + } else { + result = fp64_FPConvertNaN_16(op); + } + if (!(mnt >> 9 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 31 && !fpscr.ahp) { + result = fp64_infinity(sgn); + } else if (!mnt) { + result = fp64_zero(sgn); + } else { + mnt = fp16_normalise(mnt, &exp); + result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt; + uint64_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + if (fpscr.dn) { + result = fp64_defaultNaN(); + } else { + result = fp64_FPConvertNaN_32(op); + } + if (!(mnt >> 22 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 255) { + result = fp64_infinity(sgn); + } else if (!mnt) { + result = fp64_zero(sgn); + } else { + mnt = fp32_normalise(mnt, &exp); + result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +static uint32_t +fp32_repack(int sgn, int exp, uint32_t mnt) +{ + return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt); +} + +static uint64_t +fp64_repack(int sgn, int exp, uint64_t mnt) +{ + return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt); +} + +static void +fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn) +{ + // Treat a single quiet-NaN as +Infinity/-Infinity + if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23) + *op1 = fp32_infinity(sgn); + if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23) + *op2 = fp32_infinity(sgn); +} + +static void +fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn) +{ + // Treat a single quiet-NaN as +Infinity/-Infinity + if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52) + *op1 = fp64_infinity(sgn); + if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52) + *op2 = fp64_infinity(sgn); +} + +template <> +uint32_t +fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, x, result; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? + fp32_repack(sgn1, exp1, mnt1) : + fp32_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, x, result; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? + fp64_repack(sgn1, exp1, mnt1) : + fp64_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + fp32_minmaxnum(&op1, &op2, 1); + return fplibMax<uint32_t>(op1, op2, fpscr); +} + +template <> +uint64_t +fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + fp64_minmaxnum(&op1, &op2, 1); + return fplibMax<uint64_t>(op1, op2, fpscr); +} + +template <> +uint32_t +fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, x, result; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? + fp32_repack(sgn1, exp1, mnt1) : + fp32_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, x, result; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? + fp64_repack(sgn1, exp1, mnt1) : + fp64_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + fp32_minmaxnum(&op1, &op2, 0); + return fplibMin<uint32_t>(op1, op2, fpscr); +} + +template <> +uint64_t +fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + fp64_minmaxnum(&op1, &op2, 0); + return fplibMin<uint64_t>(op1, op2, fpscr); +} + +template <> +uint32_t +fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, result; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp32_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { + result = fp32_FPTwo(sgn1 ^ sgn2); + } else if (exp1 == 255 || exp2 == 255) { + result = fp32_infinity(sgn1 ^ sgn2); + } else if (!mnt1 || !mnt2) { + result = fp32_zero(sgn1 ^ sgn2); + } else { + result = fp32_mul(op1, op2, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, result; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp64_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { + result = fp64_FPTwo(sgn1 ^ sgn2); + } else if (exp1 == 2047 || exp2 == 2047) { + result = fp64_infinity(sgn1 ^ sgn2); + } else if (!mnt1 || !mnt2) { + result = fp64_zero(sgn1 ^ sgn2); + } else { + result = fp64_mul(op1, op2, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibNeg(uint32_t op) +{ + return op ^ (uint32_t)1 << 31; +} + +template <> +uint64_t +fplibNeg(uint64_t op) +{ + return op ^ (uint64_t)1 << 63; +} + +static const uint8_t recip_sqrt_estimate[256] = { + 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228, + 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203, + 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181, + 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163, + 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, + 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, + 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118, + 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106, + 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86, + 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68, + 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53, + 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, + 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, + 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, + 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9, + 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 +}; + +template <> +uint32_t +fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } else if (!mnt) { + result = fp32_infinity(sgn); + flags |= FPLIB_DZC; + } else if (sgn) { + result = fp32_defaultNaN(); + flags |= FPLIB_IOC; + } else if (exp == 255) { + result = fp32_zero(0); + } else { + exp += 8; + mnt = fp32_normalise(mnt, &exp); + mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)]; + result = fp32_pack(0, (380 - exp) >> 1, mnt << 15); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } else if (!mnt) { + result = fp64_infinity(sgn); + flags |= FPLIB_DZC; + } else if (sgn) { + result = fp64_defaultNaN(); + flags |= FPLIB_IOC; + } else if (exp == 2047) { + result = fp32_zero(0); + } else { + exp += 11; + mnt = fp64_normalise(mnt, &exp); + mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)]; + result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, result; + + op1 = fplibNeg<uint32_t>(op1); + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp32_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { + result = fp32_FPOnePointFive(0); + } else if (exp1 == 255 || exp2 == 255) { + result = fp32_infinity(sgn1 ^ sgn2); + } else { + result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, result; + + op1 = fplibNeg<uint64_t>(op1); + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp64_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { + result = fp64_FPOnePointFive(0); + } else if (exp1 == 2047 || exp2 == 2047) { + result = fp64_infinity(sgn1 ^ sgn2); + } else { + result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, result; + + op1 = fplibNeg<uint32_t>(op1); + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp32_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { + result = fp32_FPTwo(0); + } else if (exp1 == 255 || exp2 == 255) { + result = fp32_infinity(sgn1 ^ sgn2); + } else { + result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRecipEstimate(uint32_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } else if (exp == 255) { + result = fp32_zero(sgn); + } else if (!mnt) { + result = fp32_infinity(sgn); + flags |= FPLIB_DZC; + } else if (!((uint32_t)(op << 1) >> 22)) { + bool overflow_to_inf; + switch (FPCRRounding(fpscr)) { + case FPRounding_TIEEVEN: + overflow_to_inf = true; + break; + case FPRounding_POSINF: + overflow_to_inf = !sgn; + break; + case FPRounding_NEGINF: + overflow_to_inf = sgn; + break; + case FPRounding_ZERO: + overflow_to_inf = false; + break; + default: + assert(0); + } + result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn); + flags |= FPLIB_OFC | FPLIB_IXC; + } else if (fpscr.fz && exp >= 253) { + result = fp32_zero(sgn); + flags |= FPLIB_UFC; + } else { + exp += 8; + mnt = fp32_normalise(mnt, &exp); + int result_exp = 253 - exp; + uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1; + fraction <<= 15; + if (result_exp == 0) { + fraction >>= 1; + } else if (result_exp == -1) { + fraction >>= 2; + result_exp = 0; + } + result = fp32_pack(sgn, result_exp, fraction); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRecipEstimate(uint64_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } else if (exp == 2047) { + result = fp64_zero(sgn); + } else if (!mnt) { + result = fp64_infinity(sgn); + flags |= FPLIB_DZC; + } else if (!((uint64_t)(op << 1) >> 51)) { + bool overflow_to_inf; + switch (FPCRRounding(fpscr)) { + case FPRounding_TIEEVEN: + overflow_to_inf = true; + break; + case FPRounding_POSINF: + overflow_to_inf = !sgn; + break; + case FPRounding_NEGINF: + overflow_to_inf = sgn; + break; + case FPRounding_ZERO: + overflow_to_inf = false; + break; + default: + assert(0); + } + result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn); + flags |= FPLIB_OFC | FPLIB_IXC; + } else if (fpscr.fz && exp >= 2045) { + result = fp64_zero(sgn); + flags |= FPLIB_UFC; + } else { + exp += 11; + mnt = fp64_normalise(mnt, &exp); + int result_exp = 2045 - exp; + uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1; + fraction <<= 44; + if (result_exp == 0) { + fraction >>= 1; + } else if (result_exp == -1) { + fraction >>= 2; + result_exp = 0; + } + result = fp64_pack(sgn, result_exp, fraction); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, result; + + op1 = fplibNeg<uint64_t>(op1); + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp64_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { + result = fp64_FPTwo(0); + } else if (exp1 == 2047 || exp2 == 2047) { + result = fp64_infinity(sgn1 ^ sgn2); + } else { + result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRecpX(uint32_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } + else { + if (!mnt) { // Zero and denormals + result = fp32_pack(sgn, 254, 0); + } else { // Infinities and normals + result = fp32_pack(sgn, exp ^ 255, 0); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRecpX(uint64_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } + else { + if (!mnt) { // Zero and denormals + result = fp64_pack(sgn, 2046, 0); + } else { // Infinities and normals + result = fp64_pack(sgn, exp ^ 2047, 0); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + // Handle NaNs, infinities and zeroes: + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } else if (exp == 255) { + result = fp32_infinity(sgn); + } else if (!mnt) { + result = fp32_zero(sgn); + } else if (exp >= 150) { + // There are no fractional bits + result = op; + } else { + // Truncate towards zero: + uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp); + int err = exp < 118 ? 1 : + (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0); + switch (rounding) { + case FPRounding_TIEEVEN: + x += (err == 3 || (err == 2 && (x & 1))); + break; + case FPRounding_POSINF: + x += err && !sgn; + break; + case FPRounding_NEGINF: + x += err && sgn; + break; + case FPRounding_ZERO: + break; + case FPRounding_TIEAWAY: + x += err >> 1; + break; + default: + assert(0); + } + + if (x == 0) { + result = fp32_zero(sgn); + } else { + exp = 150; + mnt = fp32_normalise(x, &exp); + result = fp32_pack(sgn, exp + 8, mnt >> 8); + } + + if (err && exact) + flags |= FPLIB_IXC; + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + // Handle NaNs, infinities and zeroes: + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } else if (exp == 2047) { + result = fp64_infinity(sgn); + } else if (!mnt) { + result = fp64_zero(sgn); + } else if (exp >= 1075) { + // There are no fractional bits + result = op; + } else { + // Truncate towards zero: + uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp); + int err = exp < 1011 ? 1 : + (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0); + switch (rounding) { + case FPRounding_TIEEVEN: + x += (err == 3 || (err == 2 && (x & 1))); + break; + case FPRounding_POSINF: + x += err && !sgn; + break; + case FPRounding_NEGINF: + x += err && sgn; + break; + case FPRounding_ZERO: + break; + case FPRounding_TIEAWAY: + x += err >> 1; + break; + default: + assert(0); + } + + if (x == 0) { + result = fp64_zero(sgn); + } else { + exp = 1075; + mnt = fp64_normalise(x, &exp); + result = fp64_pack(sgn, exp + 11, mnt >> 11); + } + + if (err && exact) + flags |= FPLIB_IXC; + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibSqrt(uint32_t op, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibSqrt(uint64_t op, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +static uint64_t +FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, + int *flags) +{ + uint64_t x; + int err; + + if (exp > 1023 + 63) { + *flags = FPLIB_IOC; + return ((uint64_t)!u << 63) - !sgn; + } + + x = lsr64(mnt << 11, 1023 + 63 - exp); + err = (exp > 1023 + 63 - 2 ? 0 : + (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) | + !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1))); + + switch (rounding) { + case FPRounding_TIEEVEN: + x += (err == 3 || (err == 2 && (x & 1))); + break; + case FPRounding_POSINF: + x += err && !sgn; + break; + case FPRounding_NEGINF: + x += err && sgn; + break; + case FPRounding_ZERO: + break; + case FPRounding_TIEAWAY: + x += err >> 1; + break; + default: + assert(0); + } + + if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) { + *flags = FPLIB_IOC; + return ((uint64_t)!u << 63) - !sgn; + } + + if (err) { + *flags = FPLIB_IXC; + } + + return sgn ? -x : x; +} + +static uint32_t +FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, + int *flags) +{ + uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); + if (u ? x >= (uint64_t)1 << 32 : + !(x < (uint64_t)1 << 31 || + (uint64_t)-x <= (uint64_t)1 << 31)) { + *flags = FPLIB_IOC; + x = ((uint32_t)!u << 31) - !sgn; + } + return x; +} + +template <> +uint32_t +fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 255 && (uint32_t)(mnt << 9)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits, + (uint64_t)mnt << (52 - 23), u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint64_t mnt; + uint32_t result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 2047 && (uint64_t)(mnt << 12)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint32_t mnt; + uint64_t result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 255 && (uint32_t)(mnt << 9)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits, + (uint64_t)mnt << (52 - 23), u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 2047 && (uint64_t)(mnt << 12)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +static uint32_t +fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) +{ + int x_sgn = !u && a >> 63; + int x_exp = 190 - fbits; + uint64_t x_mnt = x_sgn ? -a : a; + + // Handle zero: + if (!x_mnt) { + return fp32_zero(0); + } + + // Normalise and convert to 32 bits, collapsing error into bottom bit: + x_mnt = fp64_normalise(x_mnt, &x_exp); + x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); + + return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); +} + +static uint64_t +fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) +{ + int x_sgn = !u && a >> 63; + int x_exp = 1024 + 62 - fbits; + uint64_t x_mnt = x_sgn ? -a : a; + + // Handle zero: + if (!x_mnt) { + return fp64_zero(0); + } + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags); +} + +template <> +uint32_t +fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + uint32_t res = fp32_cvtf(op, fbits, u, + (int)rounding | ((uint32_t)fpscr >> 22 & 12), + &flags); + set_fpscr0(fpscr, flags); + return res; +} + +template <> +uint64_t +fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + uint64_t res = fp64_cvtf(op, fbits, u, + (int)rounding | ((uint32_t)fpscr >> 22 & 12), + &flags); + set_fpscr0(fpscr, flags); + return res; +} + +} diff --git a/src/arch/arm/insts/fplib.hh b/src/arch/arm/insts/fplib.hh new file mode 100644 index 000000000..6263687fc --- /dev/null +++ b/src/arch/arm/insts/fplib.hh @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Edmund Grimley Evans + * Thomas Grocutt + */ + +/** + * @file + * Floating-point library code, which will gradually replace vfp.hh. For + * portability, this library does not use floating-point data types. Currently, + * C's standard integer types are used in the API, though this could be changed + * to something like class Fp32 { uint32_t x; }, etc. + */ + +#ifndef __ARCH_ARM_INSTS_FPLIB_HH__ +#define __ARCH_ARM_INSTS_FPLIB_HH__ + +#include <stdint.h> + +#include "arch/arm/miscregs.hh" + +namespace ArmISA +{ + +enum FPRounding { + FPRounding_TIEEVEN = 0, + FPRounding_POSINF = 1, + FPRounding_NEGINF = 2, + FPRounding_ZERO = 3, + FPRounding_TIEAWAY = 4, + FPRounding_ODD = 5 +}; + +static inline FPRounding +FPCRRounding(FPSCR &fpscr) +{ + return (FPRounding)((uint32_t)fpscr >> 22 & 3); +} + +/** Floating-point absolute value. */ +template <class T> +T fplibAbs(T op); +/** Floating-point add. */ +template <class T> +T fplibAdd(T op1, T op2, FPSCR &fpscr); +/** Floating-point compare (quiet and signaling). */ +template <class T> +int fplibCompare(T op1, T op2, bool signal_nans, FPSCR &fpscr); +/** Floating-point compare equal. */ +template <class T> +bool fplibCompareEQ(T op1, T op2, FPSCR &fpscr); +/** Floating-point compare greater than or equal. */ +template <class T> +bool fplibCompareGE(T op1, T op2, FPSCR &fpscr); +/** Floating-point compare greater than. */ +template <class T> +bool fplibCompareGT(T op1, T op2, FPSCR &fpscr); +/** Floating-point convert precision. */ +template <class T1, class T2> +T2 fplibConvert(T1 op, FPRounding rounding, FPSCR &fpscr); +/** Floating-point division. */ +template <class T> +T fplibDiv(T op1, T op2, FPSCR &fpscr); +/** Floating-point maximum. */ +template <class T> +T fplibMax(T op1, T op2, FPSCR &fpscr); +/** Floating-point maximum number. */ +template <class T> +T fplibMaxNum(T op1, T op2, FPSCR &fpscr); +/** Floating-point minimum. */ +template <class T> +T fplibMin(T op1, T op2, FPSCR &fpscr); +/** Floating-point minimum number. */ +template <class T> +T fplibMinNum(T op1, T op2, FPSCR &fpscr); +/** Floating-point multiply. */ +template <class T> +T fplibMul(T op1, T op2, FPSCR &fpscr); +/** Floating-point multiply-add. */ +template <class T> +T fplibMulAdd(T addend, T op1, T op2, FPSCR &fpscr); +/** Floating-point multiply extended. */ +template <class T> +T fplibMulX(T op1, T op2, FPSCR &fpscr); +/** Floating-point negate. */ +template <class T> +T fplibNeg(T op); +/** Floating-point reciprocal square root estimate. */ +template <class T> +T fplibRSqrtEstimate(T op, FPSCR &fpscr); +/** Floating-point reciprocal square root step. */ +template <class T> +T fplibRSqrtStepFused(T op1, T op2, FPSCR &fpscr); +/** Floating-point reciprocal estimate. */ +template <class T> +T fplibRecipEstimate(T op, FPSCR &fpscr); +/** Floating-point reciprocal step. */ +template <class T> +T fplibRecipStepFused(T op1, T op2, FPSCR &fpscr); +/** Floating-point reciprocal exponent. */ +template <class T> +T fplibRecpX(T op, FPSCR &fpscr); +/** Floating-point convert to integer. */ +template <class T> +T fplibRoundInt(T op, FPRounding rounding, bool exact, FPSCR &fpscr); +/** Floating-point square root. */ +template <class T> +T fplibSqrt(T op, FPSCR &fpscr); +/** Floating-point subtract. */ +template <class T> +T fplibSub(T op1, T op2, FPSCR &fpscr); +/** Floating-point convert to fixed-point. */ +template <class T1, class T2> +T2 fplibFPToFixed(T1 op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr); +/** Floating-point convert from fixed-point. */ +template <class T> +T fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); + +/* Function specializations... */ +template <> +uint32_t fplibAbs(uint32_t op); +template <> +uint64_t fplibAbs(uint64_t op); +template <> +uint32_t fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +int fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr); +template <> +int fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr); +template <> +bool fplibCompareEQ(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +bool fplibCompareEQ(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGE(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGE(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGT(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGT(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint16_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint32_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint32_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint64_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint64_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint32_t fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, + FPSCR &fpscr); +template <> +uint64_t fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, + FPSCR &fpscr); +template <> +uint32_t fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibNeg(uint32_t op); +template <> +uint64_t fplibNeg(uint64_t op); +template <> +uint32_t fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr); +template<> +uint64_t fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibRecipEstimate(uint32_t op, FPSCR &fpscr); +template <> +uint64_t fplibRecipEstimate(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibRecpX(uint32_t op, FPSCR &fpscr); +template <> +uint64_t fplibRecpX(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, + FPSCR &fpscr); +template <> +uint64_t fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, + FPSCR &fpscr); +template <> +uint32_t fplibSqrt(uint32_t op, FPSCR &fpscr); +template <> +uint64_t fplibSqrt(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint32_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint64_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint64_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint32_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint64_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +} + +#endif diff --git a/src/arch/arm/insts/macromem.cc b/src/arch/arm/insts/macromem.cc index 26a916fc7..42cb98a7c 100644 --- a/src/arch/arm/insts/macromem.cc +++ b/src/arch/arm/insts/macromem.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -43,7 +43,9 @@ #include <sstream> #include "arch/arm/insts/macromem.hh" + #include "arch/arm/generated/decoder.hh" +#include "arch/arm/insts/neon64_mem.hh" using namespace std; using namespace ArmISAInst; @@ -177,6 +179,212 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst, } } +PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + uint32_t size, bool fp, bool load, bool noAlloc, + bool signExt, bool exclusive, bool acrel, + int64_t imm, AddrMode mode, + IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) : + PredMacroOp(mnem, machInst, __opClass) +{ + bool writeback = (mode != AddrMd_Offset); + numMicroops = 1 + (size / 4) + (writeback ? 1 : 0); + microOps = new StaticInstPtr[numMicroops]; + + StaticInstPtr *uop = microOps; + + bool post = (mode == AddrMd_PostIndex); + + rn = makeSP(rn); + + *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm); + + if (fp) { + if (size == 16) { + if (load) { + *++uop = new MicroLdrQBFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrQTFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrQBFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + *++uop = new MicroLdrQTFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrQBFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrQTFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrQBFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + *++uop = new MicroStrQTFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + } + } else if (size == 8) { + if (load) { + *++uop = new MicroLdrFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrFpXImmUop(machInst, rt2, + INTREG_UREG0, 8, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrFpXImmUop(machInst, rt2, + INTREG_UREG0, 8, noAlloc, exclusive, acrel); + } + } else if (size == 4) { + if (load) { + *++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrDFpXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } + } + } else { + if (size == 8) { + if (load) { + *++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0, + 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0, + size, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0, + 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0, + size, noAlloc, exclusive, acrel); + } + } else if (size == 4) { + if (load) { + if (signExt) { + *++uop = new MicroLdrDSXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroLdrDUXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } + } else { + *++uop = new MicroStrDXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } + } + } + + if (writeback) { + *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0, + post ? imm : 0); + } + + (*uop)->setLastMicroop(); + + for (StaticInstPtr *curUop = microOps; + !(*curUop)->isLastMicroop(); curUop++) { + (*curUop)->setDelayedCommit(); + } +} + +BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 2; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); + } else { + microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); + } + microOps[0]->setDelayedCommit(); + microOps[1]->setLastMicroop(); +} + +BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 3; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0); + microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0); + } else { + microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0); + microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0); + } + microOps[2] = new MicroAddXiUop(machInst, base, base, imm); + + microOps[0]->setDelayedCommit(); + microOps[1]->setDelayedCommit(); + microOps[2]->setLastMicroop(); +} + +BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 3; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); + } else { + microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); + } + microOps[2] = new MicroAddXiUop(machInst, base, base, imm); + + microOps[0]->setDelayedCommit(); + microOps[1]->setDelayedCommit(); + microOps[2]->setLastMicroop(); +} + +BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, IntRegIndex offset, + ArmExtendType type, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 2; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base, + offset, type, imm); + microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base, + offset, type, imm); + } else { + microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base, + offset, type, imm); + microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base, + offset, type, imm); + } + + microOps[0]->setDelayedCommit(); + microOps[1]->setLastMicroop(); +} + +BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, IntRegIndex dest, + int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 2; + microOps = new StaticInstPtr[numMicroops]; + + microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm); + microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm); + + microOps[0]->setDelayedCommit(); + microOps[1]->setLastMicroop(); +} + VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : @@ -193,7 +401,7 @@ VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, if (deinterleave) numMicroops += (regs / elems); microOps = new StaticInstPtr[numMicroops]; - RegIndex rMid = deinterleave ? NumFloatArchRegs : vd * 2; + RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2; uint32_t noAlign = TLB::MustBeOne; @@ -295,7 +503,7 @@ VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst, numMicroops += (regs / elems); microOps = new StaticInstPtr[numMicroops]; - RegIndex ufp0 = NumFloatArchRegs; + RegIndex ufp0 = NumFloatV7ArchRegs; unsigned uopIdx = 0; switch (loadSize) { @@ -556,7 +764,7 @@ VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, uint32_t noAlign = TLB::MustBeOne; - RegIndex rMid = interleave ? NumFloatArchRegs : vd * 2; + RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2; unsigned uopIdx = 0; if (interleave) { @@ -657,7 +865,7 @@ VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, numMicroops += (regs / elems); microOps = new StaticInstPtr[numMicroops]; - RegIndex ufp0 = NumFloatArchRegs; + RegIndex ufp0 = NumFloatV7ArchRegs; unsigned uopIdx = 0; switch (elems) { @@ -834,6 +1042,285 @@ VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, microOps[numMicroops - 1]->setLastMicroop(); } +VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t numRegs, bool wb) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int totNumBytes = numRegs * dataSize / 8; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for(; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accSize */, eSize); + } + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + for (int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroDeintNeon64( + machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, + numStructElems, numRegs, i /* step */); + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; ++i) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + +VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t numRegs, bool wb) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int totNumBytes = numRegs * dataSize / 8; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + + for(int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroIntNeon64( + machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, + numStructElems, numRegs, i /* step */); + } + + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for(; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accSize */, eSize); + } + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; i++) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + +VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t index, bool wb, + bool replicate) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int eSizeBytes = 1 << eSize; + int totNumBytes = numStructElems * eSizeBytes; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for (; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accSize */, eSize); + } + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + for(int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroUnpackNeon64( + machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, + numStructElems, index, i /* step */, replicate); + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; i++) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + +VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t index, bool wb, + bool replicate) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int eSizeBytes = 1 << eSize; + int totNumBytes = numStructElems * eSizeBytes; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + + for(int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroPackNeon64( + machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, + numStructElems, index, i /* step */, replicate); + } + + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for(; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accsize */, eSize); + } + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; i++) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, IntRegIndex rn, RegIndex vd, bool single, bool up, @@ -846,14 +1333,14 @@ MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, // to be functionally identical except that fldmx is deprecated. For now // we'll assume they're otherwise interchangable. int count = (single ? offset : (offset / 2)); - if (count == 0 || count > NumFloatArchRegs) + if (count == 0 || count > NumFloatV7ArchRegs) warn_once("Bad offset field for VFP load/store multiple.\n"); if (count == 0) { // Force there to be at least one microop so the macroop makes sense. writeback = true; } - if (count > NumFloatArchRegs) - count = NumFloatArchRegs; + if (count > NumFloatV7ArchRegs) + count = NumFloatV7ArchRegs; numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); microOps = new StaticInstPtr[numMicroops]; @@ -934,6 +1421,19 @@ MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, ura); + ss << ", "; + printReg(ss, urb); + ss << ", "; + ccprintf(ss, "#%d", imm); + return ss.str(); +} + +std::string MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; @@ -943,6 +1443,18 @@ MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, ura); + ccprintf(ss, ", "); + printReg(ss, urb); + printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); + return ss.str(); +} + +std::string MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; diff --git a/src/arch/arm/insts/macromem.hh b/src/arch/arm/insts/macromem.hh index 4933a1e7c..fc8e3e1b7 100644 --- a/src/arch/arm/insts/macromem.hh +++ b/src/arch/arm/insts/macromem.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -85,6 +85,27 @@ class MicroOp : public PredOp } }; +class MicroOpX : public ArmStaticInst +{ + protected: + MicroOpX(const char *mnem, ExtMachInst machInst, OpClass __opClass) + : ArmStaticInst(mnem, machInst, __opClass) + {} + + public: + void + advancePC(PCState &pcState) const + { + if (flags[IsLastMicroop]) { + pcState.uEnd(); + } else if (flags[IsMicroop]) { + pcState.uAdvance(); + } else { + pcState.advance(); + } + } +}; + /** * Microops for Neon loads/stores */ @@ -136,6 +157,96 @@ class MicroNeonMixLaneOp : public MicroNeonMixOp }; /** + * Microops for AArch64 NEON load/store (de)interleaving + */ +class MicroNeonMixOp64 : public MicroOp +{ + protected: + RegIndex dest, op1; + uint8_t eSize, dataSize, numStructElems, numRegs, step; + + MicroNeonMixOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex _dest, RegIndex _op1, uint8_t _eSize, + uint8_t _dataSize, uint8_t _numStructElems, + uint8_t _numRegs, uint8_t _step) + : MicroOp(mnem, machInst, __opClass), dest(_dest), op1(_op1), + eSize(_eSize), dataSize(_dataSize), numStructElems(_numStructElems), + numRegs(_numRegs), step(_step) + { + } +}; + +class MicroNeonMixLaneOp64 : public MicroOp +{ + protected: + RegIndex dest, op1; + uint8_t eSize, dataSize, numStructElems, lane, step; + bool replicate; + + MicroNeonMixLaneOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex _dest, RegIndex _op1, + uint8_t _eSize, uint8_t _dataSize, + uint8_t _numStructElems, uint8_t _lane, uint8_t _step, + bool _replicate = false) + : MicroOp(mnem, machInst, __opClass), dest(_dest), op1(_op1), + eSize(_eSize), dataSize(_dataSize), numStructElems(_numStructElems), + lane(_lane), step(_step), replicate(_replicate) + { + } +}; + +/** + * Base classes for microcoded AArch64 NEON memory instructions. + */ +class VldMultOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, numRegs; + bool wb; + + VldMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, + bool wb); +}; + +class VstMultOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, numRegs; + bool wb; + + VstMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, + bool wb); +}; + +class VldSingleOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, index; + bool wb, replicate; + + VldSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t index, + bool wb, bool replicate = false); +}; + +class VstSingleOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, index; + bool wb, replicate; + + VstSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t index, + bool wb, bool replicate = false); +}; + +/** * Microops of the form * PC = IntRegA * CPSR = IntRegB @@ -180,10 +291,10 @@ class MicroIntImmOp : public MicroOp { protected: RegIndex ura, urb; - uint32_t imm; + int32_t imm; MicroIntImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, - RegIndex _ura, RegIndex _urb, uint32_t _imm) + RegIndex _ura, RegIndex _urb, int32_t _imm) : MicroOp(mnem, machInst, __opClass), ura(_ura), urb(_urb), imm(_imm) { @@ -192,6 +303,22 @@ class MicroIntImmOp : public MicroOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class MicroIntImmXOp : public MicroOpX +{ + protected: + RegIndex ura, urb; + int64_t imm; + + MicroIntImmXOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex _ura, RegIndex _urb, int64_t _imm) + : MicroOpX(mnem, machInst, __opClass), + ura(_ura), urb(_urb), imm(_imm) + { + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + /** * Microops of the form IntRegA = IntRegB op IntRegC */ @@ -210,6 +337,25 @@ class MicroIntOp : public MicroOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class MicroIntRegXOp : public MicroOp +{ + protected: + RegIndex ura, urb, urc; + ArmExtendType type; + uint32_t shiftAmt; + + MicroIntRegXOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex _ura, RegIndex _urb, RegIndex _urc, + ArmExtendType _type, uint32_t _shiftAmt) + : MicroOp(mnem, machInst, __opClass), + ura(_ura), urb(_urb), urc(_urc), + type(_type), shiftAmt(_shiftAmt) + { + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + /** * Microops of the form IntRegA = IntRegB op shifted IntRegC */ @@ -261,6 +407,61 @@ class MacroMemOp : public PredMacroOp }; /** + * Base class for pair load/store instructions. + */ +class PairMemOp : public PredMacroOp +{ + public: + enum AddrMode { + AddrMd_Offset, + AddrMd_PreIndex, + AddrMd_PostIndex + }; + + protected: + PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, + bool exclusive, bool acrel, int64_t imm, AddrMode mode, + IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2); +}; + +class BigFpMemImmOp : public PredMacroOp +{ + protected: + BigFpMemImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); +}; + +class BigFpMemPostOp : public PredMacroOp +{ + protected: + BigFpMemPostOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); +}; + +class BigFpMemPreOp : public PredMacroOp +{ + protected: + BigFpMemPreOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); +}; + +class BigFpMemRegOp : public PredMacroOp +{ + protected: + BigFpMemRegOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, + IntRegIndex offset, ArmExtendType type, int64_t imm); +}; + +class BigFpMemLitOp : public PredMacroOp +{ + protected: + BigFpMemLitOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + IntRegIndex dest, int64_t imm); +}; + +/** * Base classes for microcoded integer memory instructions. */ class VldMultOp : public PredMacroOp diff --git a/src/arch/arm/insts/mem.cc b/src/arch/arm/insts/mem.cc index 552803b6a..15702ff83 100644 --- a/src/arch/arm/insts/mem.cc +++ b/src/arch/arm/insts/mem.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -157,6 +157,9 @@ SrsOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const case MODE_ABORT: ss << "abort"; break; + case MODE_HYP: + ss << "hyp"; + break; case MODE_UNDEFINED: ss << "undefined"; break; diff --git a/src/arch/arm/insts/mem64.cc b/src/arch/arm/insts/mem64.cc new file mode 100644 index 000000000..4d1fdd302 --- /dev/null +++ b/src/arch/arm/insts/mem64.cc @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/mem64.hh" +#include "arch/arm/tlb.hh" +#include "base/loader/symtab.hh" +#include "mem/request.hh" + +using namespace std; + +namespace ArmISA +{ + +std::string +SysDC64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, ", ["); + printReg(ss, base); + ccprintf(ss, "]"); + return ss.str(); +} + + + +void +Memory64::startDisassembly(std::ostream &os) const +{ + printMnemonic(os, "", false); + printReg(os, dest); + ccprintf(os, ", ["); + printReg(os, base); +} + +void +Memory64::setExcAcRel(bool exclusive, bool acrel) +{ + if (exclusive) + memAccessFlags |= Request::LLSC; + else + memAccessFlags |= ArmISA::TLB::AllowUnaligned; + if (acrel) { + flags[IsMemBarrier] = true; + flags[IsWriteBarrier] = true; + flags[IsReadBarrier] = true; + } +} + +std::string +MemoryImm64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + if (imm) + ccprintf(ss, ", #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryDImm64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, dest2); + ccprintf(ss, ", ["); + printReg(ss, base); + if (imm) + ccprintf(ss, ", #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryDImmEx64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, result); + ccprintf(ss, ", "); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, dest2); + ccprintf(ss, ", ["); + printReg(ss, base); + if (imm) + ccprintf(ss, ", #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryPreIndex64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + ccprintf(ss, ", #%d]!", imm); + return ss.str(); +} + +std::string +MemoryPostIndex64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + if (imm) + ccprintf(ss, "], #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryReg64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + printExtendOperand(false, ss, offset, type, shiftAmt); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryRaw64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryEx64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, result); + ccprintf(ss, ", ["); + printReg(ss, base); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryLiteral64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", #%d", pc + imm); + return ss.str(); +} +} diff --git a/src/arch/arm/insts/mem64.hh b/src/arch/arm/insts/mem64.hh new file mode 100644 index 000000000..21c1e1ea8 --- /dev/null +++ b/src/arch/arm/insts/mem64.hh @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ +#ifndef __ARCH_ARM_MEM64_HH__ +#define __ARCH_ARM_MEM64_HH__ + +#include "arch/arm/insts/static_inst.hh" + +namespace ArmISA +{ + +class SysDC64 : public ArmStaticInst +{ + protected: + IntRegIndex base; + IntRegIndex dest; + uint64_t imm; + + SysDC64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _base, IntRegIndex _dest, uint64_t _imm) + : ArmStaticInst(mnem, _machInst, __opClass), base(_base), dest(_dest), + imm(_imm) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MightBeMicro64 : public ArmStaticInst +{ + protected: + MightBeMicro64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) + : ArmStaticInst(mnem, _machInst, __opClass) + {} + + void + advancePC(PCState &pcState) const + { + if (flags[IsLastMicroop]) { + pcState.uEnd(); + } else if (flags[IsMicroop]) { + pcState.uAdvance(); + } else { + pcState.advance(); + } + } +}; + +class Memory64 : public MightBeMicro64 +{ + public: + enum AddrMode { + AddrMd_Offset, + AddrMd_PreIndex, + AddrMd_PostIndex + }; + + protected: + + IntRegIndex dest; + IntRegIndex base; + /// True if the base register is SP (used for SP alignment checking). + bool baseIsSP; + static const unsigned numMicroops = 3; + + StaticInstPtr *uops; + + Memory64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _base) + : MightBeMicro64(mnem, _machInst, __opClass), + dest(_dest), base(_base), uops(NULL) + { + baseIsSP = isSP(_base); + } + + virtual + ~Memory64() + { + delete [] uops; + } + + StaticInstPtr + fetchMicroop(MicroPC microPC) const + { + assert(uops != NULL && microPC < numMicroops); + return uops[microPC]; + } + + void startDisassembly(std::ostream &os) const; + + unsigned memAccessFlags; + + void setExcAcRel(bool exclusive, bool acrel); +}; + +class MemoryImm64 : public Memory64 +{ + protected: + int64_t imm; + + MemoryImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm) + : Memory64(mnem, _machInst, __opClass, _dest, _base), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryDImm64 : public MemoryImm64 +{ + protected: + IntRegIndex dest2; + + MemoryDImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _dest2, IntRegIndex _base, + int64_t _imm) + : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm), + dest2(_dest2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryDImmEx64 : public MemoryDImm64 +{ + protected: + IntRegIndex result; + + MemoryDImmEx64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _result, IntRegIndex _dest, IntRegIndex _dest2, + IntRegIndex _base, int32_t _imm) + : MemoryDImm64(mnem, _machInst, __opClass, _dest, _dest2, + _base, _imm), result(_result) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryPreIndex64 : public MemoryImm64 +{ + protected: + MemoryPreIndex64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + int64_t _imm) + : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryPostIndex64 : public MemoryImm64 +{ + protected: + MemoryPostIndex64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + int64_t _imm) + : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryReg64 : public Memory64 +{ + protected: + IntRegIndex offset; + ArmExtendType type; + uint64_t shiftAmt; + + MemoryReg64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + IntRegIndex _offset, ArmExtendType _type, + uint64_t _shiftAmt) + : Memory64(mnem, _machInst, __opClass, _dest, _base), + offset(_offset), type(_type), shiftAmt(_shiftAmt) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryRaw64 : public Memory64 +{ + protected: + MemoryRaw64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base) + : Memory64(mnem, _machInst, __opClass, _dest, _base) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryEx64 : public Memory64 +{ + protected: + IntRegIndex result; + + MemoryEx64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + IntRegIndex _result) + : Memory64(mnem, _machInst, __opClass, _dest, _base), result(_result) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryLiteral64 : public Memory64 +{ + protected: + int64_t imm; + + MemoryLiteral64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, int64_t _imm) + : Memory64(mnem, _machInst, __opClass, _dest, INTREG_ZERO), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; +} + +#endif //__ARCH_ARM_INSTS_MEM_HH__ diff --git a/src/arch/arm/insts/misc.cc b/src/arch/arm/insts/misc.cc index 6320bb6da..efc334c4b 100644 --- a/src/arch/arm/insts/misc.cc +++ b/src/arch/arm/insts/misc.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -146,6 +146,32 @@ MsrRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +MrrcOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest); + ss << ", "; + printReg(ss, dest2); + ss << ", "; + printReg(ss, op1); + return ss.str(); +} + +std::string +McrrOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest); + ss << ", "; + printReg(ss, op1); + ss << ", "; + printReg(ss, op2); + return ss.str(); +} + +std::string ImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; @@ -230,6 +256,16 @@ RegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +RegImmImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest); + ccprintf(ss, ", #%d, #%d", imm1, imm2); + return ss.str(); +} + +std::string RegRegImmImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; diff --git a/src/arch/arm/insts/misc.hh b/src/arch/arm/insts/misc.hh index c9e114f85..3d947a272 100644 --- a/src/arch/arm/insts/misc.hh +++ b/src/arch/arm/insts/misc.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -94,6 +94,42 @@ class MsrRegOp : public MsrBase std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class MrrcOp : public PredOp +{ + protected: + IntRegIndex op1; + IntRegIndex dest; + IntRegIndex dest2; + uint32_t imm; + + MrrcOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1, IntRegIndex _dest, IntRegIndex _dest2, + uint32_t _imm) : + PredOp(mnem, _machInst, __opClass), op1(_op1), dest(_dest), + dest2(_dest2), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class McrrOp : public PredOp +{ + protected: + IntRegIndex op1; + IntRegIndex op2; + IntRegIndex dest; + uint32_t imm; + + McrrOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1, IntRegIndex _op2, IntRegIndex _dest, + uint32_t _imm) : + PredOp(mnem, _machInst, __opClass), op1(_op1), op2(_op2), + dest(_dest), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + class ImmOp : public PredOp { protected: @@ -220,6 +256,23 @@ class RegRegImmOp : public PredOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class RegImmImmOp : public PredOp +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + uint64_t imm1; + uint64_t imm2; + + RegImmImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, uint64_t _imm1, uint64_t _imm2) : + PredOp(mnem, _machInst, __opClass), + dest(_dest), imm1(_imm1), imm2(_imm2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + class RegRegImmImmOp : public PredOp { protected: diff --git a/src/arch/arm/insts/misc64.cc b/src/arch/arm/insts/misc64.cc new file mode 100644 index 000000000..3553020da --- /dev/null +++ b/src/arch/arm/insts/misc64.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/misc64.hh" + +std::string +RegRegImmImmOp64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ss << ", "; + printReg(ss, op1); + ccprintf(ss, ", #%d, #%d", imm1, imm2); + return ss.str(); +} + +std::string +RegRegRegImmOp64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ss << ", "; + printReg(ss, op1); + ss << ", "; + printReg(ss, op2); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +UnknownOp64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + return csprintf("%-10s (inst %#08x)", "unknown", machInst); +} diff --git a/src/arch/arm/insts/misc64.hh b/src/arch/arm/insts/misc64.hh new file mode 100644 index 000000000..5a0e18224 --- /dev/null +++ b/src/arch/arm/insts/misc64.hh @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_ARM_INSTS_MISC64_HH__ +#define __ARCH_ARM_INSTS_MISC64_HH__ + +#include "arch/arm/insts/static_inst.hh" + +class RegRegImmImmOp64 : public ArmStaticInst +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + uint64_t imm1; + uint64_t imm2; + + RegRegImmImmOp64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + uint64_t _imm1, uint64_t _imm2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), imm1(_imm1), imm2(_imm2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class RegRegRegImmOp64 : public ArmStaticInst +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + IntRegIndex op2; + uint64_t imm; + + RegRegRegImmOp64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class UnknownOp64 : public ArmStaticInst +{ + protected: + + UnknownOp64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : + ArmStaticInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +#endif diff --git a/src/arch/arm/insts/neon64_mem.hh b/src/arch/arm/insts/neon64_mem.hh new file mode 100644 index 000000000..01ce1b624 --- /dev/null +++ b/src/arch/arm/insts/neon64_mem.hh @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Mbou Eyole + * Giacomo Gabrielli + */ + +/// @file +/// Utility functions and datatypes used by AArch64 NEON memory instructions. + +#ifndef __ARCH_ARM_INSTS_NEON64_MEM_HH__ +#define __ARCH_ARM_INSTS_NEON64_MEM_HH__ + +namespace ArmISA +{ + +typedef uint64_t XReg; + +/// 128-bit NEON vector register. +struct VReg { + XReg hi; + XReg lo; +}; + +/// Write a single NEON vector element leaving the others untouched. +inline void +writeVecElem(VReg *dest, XReg src, int index, int eSize) +{ + // eSize must be less than 4: + // 0 -> 8-bit elems, + // 1 -> 16-bit elems, + // 2 -> 32-bit elems, + // 3 -> 64-bit elems + assert(eSize <= 3); + + int eBits = 8 << eSize; + int lsbPos = index * eBits; + assert(lsbPos < 128); + int shiftAmt = lsbPos % 64; + + XReg maskBits = -1; + if (eBits == 64) { + maskBits = 0; + } else { + maskBits = maskBits << eBits; + } + maskBits = ~maskBits; + + XReg sMask = maskBits; + maskBits = sMask << shiftAmt; + + if (lsbPos < 64) { + dest->lo = (dest->lo & (~maskBits)) | ((src & sMask) << shiftAmt); + } else { + dest->hi = (dest->hi & (~maskBits)) | ((src & sMask) << shiftAmt); + } +} + +/// Read a single NEON vector element. +inline XReg +readVecElem(VReg src, int index, int eSize) +{ + // eSize must be less than 4: + // 0 -> 8-bit elems, + // 1 -> 16-bit elems, + // 2 -> 32-bit elems, + // 3 -> 64-bit elems + assert(eSize <= 3); + + XReg data; + + int eBits = 8 << eSize; + int lsbPos = index * eBits; + assert(lsbPos < 128); + int shiftAmt = lsbPos % 64; + + XReg maskBits = -1; + if (eBits == 64) { + maskBits = 0; + } else { + maskBits = maskBits << eBits; + } + maskBits = ~maskBits; + + if (lsbPos < 64) { + data = (src.lo >> shiftAmt) & maskBits; + } else { + data = (src.hi >> shiftAmt) & maskBits; + } + return data; +} + +} // namespace ArmISA + +#endif // __ARCH_ARM_INSTS_NEON64_MEM_HH__ diff --git a/src/arch/arm/insts/pred_inst.hh b/src/arch/arm/insts/pred_inst.hh index c441d1f32..c5e2ab386 100644 --- a/src/arch/arm/insts/pred_inst.hh +++ b/src/arch/arm/insts/pred_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -78,7 +78,8 @@ modified_imm(uint8_t ctrlImm, uint8_t dataImm) } static inline uint64_t -simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid) +simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid, + bool isAarch64 = false) { uint64_t bigData = data; immValid = true; @@ -133,12 +134,20 @@ simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid) } break; case 0xf: - if (!op) { - uint64_t bVal = bits(bigData, 6) ? (0x1F) : (0x20); - bigData = (bits(bigData, 5, 0) << 19) | - (bVal << 25) | (bits(bigData, 7) << 31); - bigData |= (bigData << 32); - break; + { + uint64_t bVal = 0; + if (!op) { + bVal = bits(bigData, 6) ? (0x1F) : (0x20); + bigData = (bits(bigData, 5, 0) << 19) | + (bVal << 25) | (bits(bigData, 7) << 31); + bigData |= (bigData << 32); + break; + } else if (isAarch64) { + bVal = bits(bigData, 6) ? (0x0FF) : (0x100); + bigData = (bits(bigData, 5, 0) << 48) | + (bVal << 54) | (bits(bigData, 7) << 63); + break; + } } // Fall through, immediate encoding is invalid. default: @@ -179,11 +188,14 @@ class PredOp : public ArmStaticInst /// Constructor PredOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : - ArmStaticInst(mnem, _machInst, __opClass), - condCode(machInst.itstateMask ? - (ConditionCode)(uint8_t)machInst.itstateCond : - (ConditionCode)(unsigned)machInst.condCode) + ArmStaticInst(mnem, _machInst, __opClass) { + if (machInst.aarch64) + condCode = COND_UC; + else if (machInst.itstateMask) + condCode = (ConditionCode)(uint8_t)machInst.itstateCond; + else + condCode = (ConditionCode)(unsigned)machInst.condCode; } }; diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc index 2a8dee162..260c29a84 100644 --- a/src/arch/arm/insts/static_inst.cc +++ b/src/arch/arm/insts/static_inst.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -86,6 +86,90 @@ ArmStaticInst::shift_rm_imm(uint32_t base, uint32_t shamt, return 0; } +int64_t +ArmStaticInst::shiftReg64(uint64_t base, uint64_t shiftAmt, + ArmShiftType type, uint8_t width) const +{ + shiftAmt = shiftAmt % width; + ArmShiftType shiftType; + shiftType = (ArmShiftType)type; + + switch (shiftType) + { + case LSL: + return base << shiftAmt; + case LSR: + if (shiftAmt == 0) + return base; + else + return (base & mask(width)) >> shiftAmt; + case ASR: + if (shiftAmt == 0) { + return base; + } else { + int sign_bit = bits(base, intWidth - 1); + base >>= shiftAmt; + base = sign_bit ? (base | ~mask(intWidth - shiftAmt)) : base; + return base & mask(intWidth); + } + case ROR: + if (shiftAmt == 0) + return base; + else + return (base << (width - shiftAmt)) | (base >> shiftAmt); + default: + ccprintf(std::cerr, "Unhandled shift type\n"); + exit(1); + break; + } + return 0; +} + +int64_t +ArmStaticInst::extendReg64(uint64_t base, ArmExtendType type, + uint64_t shiftAmt, uint8_t width) const +{ + bool sign_extend = false; + int len = 0; + switch (type) { + case UXTB: + len = 8; + break; + case UXTH: + len = 16; + break; + case UXTW: + len = 32; + break; + case UXTX: + len = 64; + break; + case SXTB: + len = 8; + sign_extend = true; + break; + case SXTH: + len = 16; + sign_extend = true; + break; + case SXTW: + len = 32; + sign_extend = true; + break; + case SXTX: + len = 64; + sign_extend = true; + break; + } + len = len <= width - shiftAmt ? len : width - shiftAmt; + uint64_t tmp = (uint64_t) bits(base, len - 1, 0) << shiftAmt; + if (sign_extend) { + int sign_bit = bits(tmp, len + shiftAmt - 1); + tmp = sign_bit ? (tmp | ~mask(len + shiftAmt)) : tmp; + } + return tmp & mask(width); +} + // Shift Rm by Rs int32_t ArmStaticInst::shift_rm_rs(uint32_t base, uint32_t shamt, @@ -214,22 +298,33 @@ ArmStaticInst::printReg(std::ostream &os, int reg) const switch (regIdxToClass(reg, &rel_reg)) { case IntRegClass: - switch (rel_reg) { - case PCReg: - ccprintf(os, "pc"); - break; - case StackPointerReg: - ccprintf(os, "sp"); - break; - case FramePointerReg: - ccprintf(os, "fp"); - break; - case ReturnAddressReg: - ccprintf(os, "lr"); - break; - default: - ccprintf(os, "r%d", reg); - break; + if (aarch64) { + if (reg == INTREG_UREG0) + ccprintf(os, "ureg0"); + else if (reg == INTREG_SPX) + ccprintf(os, "%s%s", (intWidth == 32) ? "w" : "", "sp"); + else if (reg == INTREG_X31) + ccprintf(os, "%szr", (intWidth == 32) ? "w" : "x"); + else + ccprintf(os, "%s%d", (intWidth == 32) ? "w" : "x", reg); + } else { + switch (rel_reg) { + case PCReg: + ccprintf(os, "pc"); + break; + case StackPointerReg: + ccprintf(os, "sp"); + break; + case FramePointerReg: + ccprintf(os, "fp"); + break; + case ReturnAddressReg: + ccprintf(os, "lr"); + break; + default: + ccprintf(os, "r%d", reg); + break; + } } break; case FloatRegClass: @@ -247,67 +342,102 @@ ArmStaticInst::printReg(std::ostream &os, int reg) const void ArmStaticInst::printMnemonic(std::ostream &os, const std::string &suffix, - bool withPred) const + bool withPred, + bool withCond64, + ConditionCode cond64) const { os << " " << mnemonic; - if (withPred) { - unsigned condCode = machInst.condCode; - switch (condCode) { - case COND_EQ: - os << "eq"; - break; - case COND_NE: - os << "ne"; - break; - case COND_CS: - os << "cs"; - break; - case COND_CC: - os << "cc"; - break; - case COND_MI: - os << "mi"; - break; - case COND_PL: - os << "pl"; - break; - case COND_VS: - os << "vs"; - break; - case COND_VC: - os << "vc"; - break; - case COND_HI: - os << "hi"; - break; - case COND_LS: - os << "ls"; - break; - case COND_GE: - os << "ge"; - break; - case COND_LT: - os << "lt"; - break; - case COND_GT: - os << "gt"; - break; - case COND_LE: - os << "le"; - break; - case COND_AL: - // This one is implicit. - break; - case COND_UC: - // Unconditional. - break; - default: - panic("Unrecognized condition code %d.\n", condCode); - } + if (withPred && !aarch64) { + printCondition(os, machInst.condCode); + os << suffix; + } else if (withCond64) { + os << "."; + printCondition(os, cond64); os << suffix; - if (machInst.bigThumb) - os << ".w"; - os << " "; + } + if (machInst.bigThumb) + os << ".w"; + os << " "; +} + +void +ArmStaticInst::printTarget(std::ostream &os, Addr target, + const SymbolTable *symtab) const +{ + Addr symbolAddr; + std::string symbol; + + if (symtab && symtab->findNearestSymbol(target, symbol, symbolAddr)) { + ccprintf(os, "<%s", symbol); + if (symbolAddr != target) + ccprintf(os, "+%d>", target - symbolAddr); + else + ccprintf(os, ">"); + } else { + ccprintf(os, "%#x", target); + } +} + +void +ArmStaticInst::printCondition(std::ostream &os, + unsigned code, + bool noImplicit) const +{ + switch (code) { + case COND_EQ: + os << "eq"; + break; + case COND_NE: + os << "ne"; + break; + case COND_CS: + os << "cs"; + break; + case COND_CC: + os << "cc"; + break; + case COND_MI: + os << "mi"; + break; + case COND_PL: + os << "pl"; + break; + case COND_VS: + os << "vs"; + break; + case COND_VC: + os << "vc"; + break; + case COND_HI: + os << "hi"; + break; + case COND_LS: + os << "ls"; + break; + case COND_GE: + os << "ge"; + break; + case COND_LT: + os << "lt"; + break; + case COND_GT: + os << "gt"; + break; + case COND_LE: + os << "le"; + break; + case COND_AL: + // This one is implicit. + if (noImplicit) + os << "al"; + break; + case COND_UC: + // Unconditional. + if (noImplicit) + os << "uc"; + break; + default: + panic("Unrecognized condition code %d.\n", code); } } @@ -393,6 +523,38 @@ ArmStaticInst::printShiftOperand(std::ostream &os, } void +ArmStaticInst::printExtendOperand(bool firstOperand, std::ostream &os, + IntRegIndex rm, ArmExtendType type, + int64_t shiftAmt) const +{ + if (!firstOperand) + ccprintf(os, ", "); + printReg(os, rm); + if (type == UXTX && shiftAmt == 0) + return; + switch (type) { + case UXTB: ccprintf(os, ", UXTB"); + break; + case UXTH: ccprintf(os, ", UXTH"); + break; + case UXTW: ccprintf(os, ", UXTW"); + break; + case UXTX: ccprintf(os, ", LSL"); + break; + case SXTB: ccprintf(os, ", SXTB"); + break; + case SXTH: ccprintf(os, ", SXTH"); + break; + case SXTW: ccprintf(os, ", SXTW"); + break; + case SXTX: ccprintf(os, ", SXTW"); + break; + } + if (type == UXTX || shiftAmt) + ccprintf(os, " #%d", shiftAmt); +} + +void ArmStaticInst::printDataInst(std::ostream &os, bool withImm, bool immShift, bool s, IntRegIndex rd, IntRegIndex rn, IntRegIndex rm, IntRegIndex rs, uint32_t shiftAmt, diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh index c36024ecd..aeec67ec2 100644 --- a/src/arch/arm/insts/static_inst.hh +++ b/src/arch/arm/insts/static_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -44,6 +44,7 @@ #include "arch/arm/faults.hh" #include "arch/arm/utility.hh" +#include "arch/arm/system.hh" #include "base/trace.hh" #include "cpu/static_inst.hh" #include "sim/byteswap.hh" @@ -55,6 +56,9 @@ namespace ArmISA class ArmStaticInst : public StaticInst { protected: + bool aarch64; + uint8_t intWidth; + int32_t shift_rm_imm(uint32_t base, uint32_t shamt, uint32_t type, uint32_t cfval) const; int32_t shift_rm_rs(uint32_t base, uint32_t shamt, @@ -65,6 +69,11 @@ class ArmStaticInst : public StaticInst bool shift_carry_rs(uint32_t base, uint32_t shamt, uint32_t type, uint32_t cfval) const; + int64_t shiftReg64(uint64_t base, uint64_t shiftAmt, + ArmShiftType type, uint8_t width) const; + int64_t extendReg64(uint64_t base, ArmExtendType type, + uint64_t shiftAmt, uint8_t width) const; + template<int width> static inline bool saturateOp(int32_t &res, int64_t op1, int64_t op2, bool sub=false) @@ -135,6 +144,11 @@ class ArmStaticInst : public StaticInst OpClass __opClass) : StaticInst(mnem, _machInst, __opClass) { + aarch64 = machInst.aarch64; + if (bits(machInst, 28, 24) == 0x10) + intWidth = 64; // Force 64-bit width for ADR/ADRP + else + intWidth = (aarch64 && bits(machInst, 31)) ? 64 : 32; } /// Print a register name for disassembly given the unique @@ -142,13 +156,22 @@ class ArmStaticInst : public StaticInst void printReg(std::ostream &os, int reg) const; void printMnemonic(std::ostream &os, const std::string &suffix = "", - bool withPred = true) const; + bool withPred = true, + bool withCond64 = false, + ConditionCode cond64 = COND_UC) const; + void printTarget(std::ostream &os, Addr target, + const SymbolTable *symtab) const; + void printCondition(std::ostream &os, unsigned code, + bool noImplicit=false) const; void printMemSymbol(std::ostream &os, const SymbolTable *symtab, const std::string &prefix, const Addr addr, const std::string &suffix) const; void printShiftOperand(std::ostream &os, IntRegIndex rm, bool immShift, uint32_t shiftAmt, IntRegIndex rs, ArmShiftType type) const; + void printExtendOperand(bool firstOperand, std::ostream &os, + IntRegIndex rm, ArmExtendType type, + int64_t shiftAmt) const; void printDataInst(std::ostream &os, bool withImm) const; @@ -166,10 +189,13 @@ class ArmStaticInst : public StaticInst std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; static inline uint32_t - cpsrWriteByInstr(CPSR cpsr, uint32_t val, - uint8_t byteMask, bool affectState, bool nmfi) + cpsrWriteByInstr(CPSR cpsr, uint32_t val, SCR scr, NSACR nsacr, + uint8_t byteMask, bool affectState, bool nmfi, ThreadContext *tc) { - bool privileged = (cpsr.mode != MODE_USER); + bool privileged = (cpsr.mode != MODE_USER); + bool haveVirt = ArmSystem::haveVirtualization(tc); + bool haveSecurity = ArmSystem::haveSecurity(tc); + bool isSecure = inSecureState(scr, cpsr) || !haveSecurity; uint32_t bitMask = 0; @@ -182,14 +208,53 @@ class ArmStaticInst : public StaticInst } if (bits(byteMask, 1)) { unsigned highIdx = affectState ? 15 : 9; - unsigned lowIdx = privileged ? 8 : 9; + unsigned lowIdx = (privileged && (isSecure || scr.aw || haveVirt)) + ? 8 : 9; bitMask = bitMask | mask(highIdx, lowIdx); } if (bits(byteMask, 0)) { if (privileged) { - bitMask = bitMask | mask(7, 6); - if (!badMode((OperatingMode)(val & mask(5)))) { - bitMask = bitMask | mask(5); + bitMask |= 1 << 7; + if ( (!nmfi || !((val >> 6) & 0x1)) && + (isSecure || scr.fw || haveVirt) ) { + bitMask |= 1 << 6; + } + // Now check the new mode is allowed + OperatingMode newMode = (OperatingMode) (val & mask(5)); + OperatingMode oldMode = (OperatingMode)(uint32_t)cpsr.mode; + if (!badMode(newMode)) { + bool validModeChange = true; + // Check for attempts to enter modes only permitted in + // Secure state from Non-secure state. These are Monitor + // mode ('10110'), and FIQ mode ('10001') if the Security + // Extensions have reserved it. + if (!isSecure && newMode == MODE_MON) + validModeChange = false; + if (!isSecure && newMode == MODE_FIQ && nsacr.rfr == '1') + validModeChange = false; + // There is no Hyp mode ('11010') in Secure state, so that + // is UNPREDICTABLE + if (scr.ns == '0' && newMode == MODE_HYP) + validModeChange = false; + // Cannot move into Hyp mode directly from a Non-secure + // PL1 mode + if (!isSecure && oldMode != MODE_HYP && newMode == MODE_HYP) + validModeChange = false; + // Cannot move out of Hyp mode with this function except + // on an exception return + if (oldMode == MODE_HYP && newMode != MODE_HYP && !affectState) + validModeChange = false; + // Must not change to 64 bit when running in 32 bit mode + if (!opModeIs64(oldMode) && opModeIs64(newMode)) + validModeChange = false; + + // If we passed all of the above then set the bit mask to + // copy the mode accross + if (validModeChange) { + bitMask = bitMask | mask(5); + } else { + warn_once("Illegal change to CPSR mode attempted\n"); + } } else { warn_once("Ignoring write of bad mode to CPSR.\n"); } @@ -198,11 +263,7 @@ class ArmStaticInst : public StaticInst bitMask = bitMask | (1 << 5); } - bool cpsr_f = cpsr.f; - uint32_t new_cpsr = ((uint32_t)cpsr & ~bitMask) | (val & bitMask); - if (nmfi && !cpsr_f) - new_cpsr &= ~(1 << 6); - return new_cpsr; + return ((uint32_t)cpsr & ~bitMask) | (val & bitMask); } static inline uint32_t @@ -296,12 +357,12 @@ class ArmStaticInst : public StaticInst inline Fault disabledFault() const { - if (FullSystem) { - return new UndefinedInstruction(); - } else { - return new UndefinedInstruction(machInst, false, mnemonic, true); - } + return new UndefinedInstruction(machInst, false, mnemonic, true); } + + public: + virtual void + annotateFault(ArmFault *fault) {} }; } diff --git a/src/arch/arm/insts/vfp.cc b/src/arch/arm/insts/vfp.cc index ca0f58226..03fdc83fa 100644 --- a/src/arch/arm/insts/vfp.cc +++ b/src/arch/arm/insts/vfp.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -46,6 +46,37 @@ */ std::string +FpCondCompRegOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", #%d", defCc); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string +FpCondSelOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; @@ -92,6 +123,21 @@ FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +FpRegRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest + FP_Reg_Base); + ss << ", "; + printReg(ss, op1 + FP_Reg_Base); + ss << ", "; + printReg(ss, op2 + FP_Reg_Base); + ss << ", "; + printReg(ss, op3 + FP_Reg_Base); + return ss.str(); +} + +std::string FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; @@ -131,24 +177,25 @@ prepFpState(uint32_t rMode) } void -finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush) +finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask) { int exceptions = fetestexcept(FeAllExceptions); bool underflow = false; - if (exceptions & FeInvalid) { + if ((exceptions & FeInvalid) && mask.ioc) { fpscr.ioc = 1; } - if (exceptions & FeDivByZero) { + if ((exceptions & FeDivByZero) && mask.dzc) { fpscr.dzc = 1; } - if (exceptions & FeOverflow) { + if ((exceptions & FeOverflow) && mask.ofc) { fpscr.ofc = 1; } if (exceptions & FeUnderflow) { underflow = true; - fpscr.ufc = 1; + if (mask.ufc) + fpscr.ufc = 1; } - if ((exceptions & FeInexact) && !(underflow && flush)) { + if ((exceptions & FeInexact) && !(underflow && flush) && mask.ixc) { fpscr.ixc = 1; } fesetround(state); @@ -329,19 +376,33 @@ fixFpSFpDDest(FPSCR fpscr, float val) return mid; } -uint16_t -vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, - uint32_t rMode, bool ahp, float op) +static inline uint16_t +vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble) { - uint32_t opBits = fpToBits(op); + uint32_t mWidth; + uint32_t eWidth; + uint32_t eHalfRange; + uint32_t sBitPos; + + if (isDouble) { + mWidth = 52; + eWidth = 11; + } else { + mWidth = 23; + eWidth = 8; + } + sBitPos = eWidth + mWidth; + eHalfRange = (1 << (eWidth-1)) - 1; + // Extract the operand. - bool neg = bits(opBits, 31); - uint32_t exponent = bits(opBits, 30, 23); - uint32_t oldMantissa = bits(opBits, 22, 0); - uint32_t mantissa = oldMantissa >> (23 - 10); + bool neg = bits(opBits, sBitPos); + uint32_t exponent = bits(opBits, sBitPos-1, mWidth); + uint64_t oldMantissa = bits(opBits, mWidth-1, 0); + uint32_t mantissa = oldMantissa >> (mWidth - 10); // Do the conversion. - uint32_t extra = oldMantissa & mask(23 - 10); - if (exponent == 0xff) { + uint64_t extra = oldMantissa & mask(mWidth - 10); + if (exponent == mask(eWidth)) { if (oldMantissa != 0) { // Nans. if (bits(mantissa, 9) == 0) { @@ -379,7 +440,6 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, if (exponent == 0) { // Denormalized. - // If flush to zero is on, this shouldn't happen. assert(!flush); @@ -407,13 +467,13 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, // We need to track the dropped bits differently since // more can be dropped by denormalizing. - bool topOne = bits(extra, 12); - bool restZeros = bits(extra, 11, 0) == 0; + bool topOne = bits(extra, mWidth - 10 - 1); + bool restZeros = bits(extra, mWidth - 10 - 2, 0) == 0; - if (exponent <= (127 - 15)) { + if (exponent <= (eHalfRange - 15)) { // The result is too small. Denormalize. mantissa |= (1 << 10); - while (mantissa && exponent <= (127 - 15)) { + while (mantissa && exponent <= (eHalfRange - 15)) { restZeros = restZeros && !topOne; topOne = bits(mantissa, 0); mantissa = mantissa >> 1; @@ -424,7 +484,7 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, exponent = 0; } else { // Change bias. - exponent -= (127 - 15); + exponent -= (eHalfRange - 15); } if (exponent == 0 && (inexact || fpscr.ufe)) { @@ -488,155 +548,115 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, return result; } -float -vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) +uint16_t +vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, float op) { - float junk = 0.0; + uint64_t opBits = fpToBits(op); + return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, false); +} + +uint16_t +vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, double op) +{ + uint64_t opBits = fpToBits(op); + return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, true); +} + +static inline uint64_t +vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble) +{ + uint32_t mWidth; + uint32_t eWidth; + uint32_t eHalfRange; + uint32_t sBitPos; + + if (isDouble) { + mWidth = 52; + eWidth = 11; + } else { + mWidth = 23; + eWidth = 8; + } + sBitPos = eWidth + mWidth; + eHalfRange = (1 << (eWidth-1)) - 1; + // Extract the bitfields. bool neg = bits(op, 15); uint32_t exponent = bits(op, 14, 10); - uint32_t mantissa = bits(op, 9, 0); + uint64_t mantissa = bits(op, 9, 0); // Do the conversion. if (exponent == 0) { if (mantissa != 0) { // Normalize the value. - exponent = exponent + (127 - 15) + 1; + exponent = exponent + (eHalfRange - 15) + 1; while (mantissa < (1 << 10)) { mantissa = mantissa << 1; exponent--; } } - mantissa = mantissa << (23 - 10); + mantissa = mantissa << (mWidth - 10); } else if (exponent == 0x1f && !ahp) { // Infinities and nans. - exponent = 0xff; + exponent = mask(eWidth); if (mantissa != 0) { // Nans. - mantissa = mantissa << (23 - 10); - if (bits(mantissa, 22) == 0) { + mantissa = mantissa << (mWidth - 10); + if (bits(mantissa, mWidth-1) == 0) { // Signalling nan. fpscr.ioc = 1; - mantissa |= (1 << 22); + mantissa |= (((uint64_t) 1) << (mWidth-1)); } if (defaultNan) { - mantissa &= ~mask(22); + mantissa &= ~mask(mWidth-1); neg = false; } } } else { - exponent = exponent + (127 - 15); - mantissa = mantissa << (23 - 10); + exponent = exponent + (eHalfRange - 15); + mantissa = mantissa << (mWidth - 10); } // Reassemble the result. - uint32_t result = bits(mantissa, 22, 0); - replaceBits(result, 30, 23, exponent); - if (neg) - result |= (1 << 31); + uint64_t result = bits(mantissa, mWidth-1, 0); + replaceBits(result, sBitPos-1, mWidth, exponent); + if (neg) { + result |= (((uint64_t) 1) << sBitPos); + } + return result; +} + +double +vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) +{ + double junk = 0.0; + uint64_t result; + + result = vcvtFpHFp(fpscr, defaultNan, ahp, op, true); return bitsToFp(result, junk); } -uint64_t -vfpFpSToFixed(float val, bool isSigned, bool half, - uint8_t imm, bool rzero) +float +vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) { - int rmode = rzero ? FeRoundZero : fegetround(); - __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); - fesetround(FeRoundNearest); - val = val * powf(2.0, imm); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - fesetround(rmode); - feclearexcept(FeAllExceptions); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - float origVal = val; - val = rintf(val); - int fpType = std::fpclassify(val); - if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { - if (fpType == FP_NAN) { - feraiseexcept(FeInvalid); - } - val = 0.0; - } else if (origVal != val) { - switch (rmode) { - case FeRoundNearest: - if (origVal - val > 0.5) - val += 1.0; - else if (val - origVal > 0.5) - val -= 1.0; - break; - case FeRoundDown: - if (origVal < val) - val -= 1.0; - break; - case FeRoundUpward: - if (origVal > val) - val += 1.0; - break; - } - feraiseexcept(FeInexact); - } + float junk = 0.0; + uint64_t result; - if (isSigned) { - if (half) { - if ((double)val < (int16_t)(1 << 15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)(1 << 15); - } - if ((double)val > (int16_t)mask(15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)mask(15); - } - return (int16_t)val; - } else { - if ((double)val < (int32_t)(1 << 31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)(1 << 31); - } - if ((double)val > (int32_t)mask(31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)mask(31); - } - return (int32_t)val; - } - } else { - if (half) { - if ((double)val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if ((double)val > (mask(16))) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(16); - } - return (uint16_t)val; - } else { - if ((double)val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if ((double)val > (mask(32))) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(32); - } - return (uint32_t)val; - } - } + result = vcvtFpHFp(fpscr, defaultNan, ahp, op, false); + return bitsToFp(result, junk); } float vfpUFixedToFpS(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm) + uint64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = (uint16_t)val; + else if (width == 32) + val = (uint32_t)val; + else if (width != 64) + panic("Unsupported width %d", width); float scale = powf(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -646,11 +666,16 @@ vfpUFixedToFpS(bool flush, bool defaultNan, float vfpSFixedToFpS(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm) + int64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = sext<16>(val & mask(16)); + else if (width == 32) + val = sext<32>(val & mask(32)); + else if (width != 64) + panic("Unsupported width %d", width); + float scale = powf(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -658,106 +683,19 @@ vfpSFixedToFpS(bool flush, bool defaultNan, return fixDivDest(flush, defaultNan, val / scale, (float)val, scale); } -uint64_t -vfpFpDToFixed(double val, bool isSigned, bool half, - uint8_t imm, bool rzero) -{ - int rmode = rzero ? FeRoundZero : fegetround(); - fesetround(FeRoundNearest); - val = val * pow(2.0, imm); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - fesetround(rmode); - feclearexcept(FeAllExceptions); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - double origVal = val; - val = rint(val); - int fpType = std::fpclassify(val); - if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { - if (fpType == FP_NAN) { - feraiseexcept(FeInvalid); - } - val = 0.0; - } else if (origVal != val) { - switch (rmode) { - case FeRoundNearest: - if (origVal - val > 0.5) - val += 1.0; - else if (val - origVal > 0.5) - val -= 1.0; - break; - case FeRoundDown: - if (origVal < val) - val -= 1.0; - break; - case FeRoundUpward: - if (origVal > val) - val += 1.0; - break; - } - feraiseexcept(FeInexact); - } - if (isSigned) { - if (half) { - if (val < (int16_t)(1 << 15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)(1 << 15); - } - if (val > (int16_t)mask(15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)mask(15); - } - return (int16_t)val; - } else { - if (val < (int32_t)(1 << 31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)(1 << 31); - } - if (val > (int32_t)mask(31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)mask(31); - } - return (int32_t)val; - } - } else { - if (half) { - if (val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if (val > mask(16)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(16); - } - return (uint16_t)val; - } else { - if (val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if (val > mask(32)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(32); - } - return (uint32_t)val; - } - } -} double vfpUFixedToFpD(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm) + uint64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = (uint16_t)val; + else if (width == 32) + val = (uint32_t)val; + else if (width != 64) + panic("Unsupported width %d", width); + double scale = pow(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -767,11 +705,16 @@ vfpUFixedToFpD(bool flush, bool defaultNan, double vfpSFixedToFpD(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm) + int64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = sext<16>(val & mask(16)); + else if (width == 32) + val = sext<32>(val & mask(32)); + else if (width != 64) + panic("Unsupported width %d", width); + double scale = pow(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -976,6 +919,85 @@ template double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, double op1, double op2) const; +// @TODO remove this function when we've finished switching all FMA code to use the new FPLIB +template <class fpType> +fpType +FpOp::ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, + fpType (*func)(fpType, fpType, fpType), + bool flush, bool defaultNan, uint32_t rMode) const +{ + const bool single = (sizeof(fpType) == sizeof(float)); + fpType junk = 0.0; + + if (flush && (flushToZero(op1, op2) || flushToZero(op3))) + fpscr.idc = 1; + VfpSavedState state = prepFpState(rMode); + __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3), "=m" (state) + : "m" (op1), "m" (op2), "m" (op3), "m" (state)); + fpType dest = func(op1, op2, op3); + __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); + + int fpClass = std::fpclassify(dest); + // Get NAN behavior right. This varies between x86 and ARM. + if (fpClass == FP_NAN) { + const uint64_t qnan = + single ? 0x7fc00000 : ULL(0x7ff8000000000000); + const bool nan1 = std::isnan(op1); + const bool nan2 = std::isnan(op2); + const bool nan3 = std::isnan(op3); + const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); + const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); + const bool signal3 = nan3 && ((fpToBits(op3) & qnan) != qnan); + if ((!nan1 && !nan2 && !nan3) || (defaultNan == 1)) { + dest = bitsToFp(qnan, junk); + } else if (signal1) { + dest = bitsToFp(fpToBits(op1) | qnan, junk); + } else if (signal2) { + dest = bitsToFp(fpToBits(op2) | qnan, junk); + } else if (signal3) { + dest = bitsToFp(fpToBits(op3) | qnan, junk); + } else if (nan1) { + dest = op1; + } else if (nan2) { + dest = op2; + } else if (nan3) { + dest = op3; + } + } else if (flush && flushToZero(dest)) { + feraiseexcept(FeUnderflow); + } else if (( + (single && (dest == bitsToFp(0x00800000, junk) || + dest == bitsToFp(0x80800000, junk))) || + (!single && + (dest == bitsToFp(ULL(0x0010000000000000), junk) || + dest == bitsToFp(ULL(0x8010000000000000), junk))) + ) && rMode != VfpRoundZero) { + /* + * Correct for the fact that underflow is detected -before- rounding + * in ARM and -after- rounding in x86. + */ + fesetround(FeRoundZero); + __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3) + : "m" (op1), "m" (op2), "m" (op3)); + fpType temp = func(op1, op2, op2); + __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); + if (flush && flushToZero(temp)) { + dest = temp; + } + } + finishVfp(fpscr, state, flush); + return dest; +} + +template +float FpOp::ternaryOp(FPSCR &fpscr, float op1, float op2, float op3, + float (*func)(float, float, float), + bool flush, bool defaultNan, uint32_t rMode) const; +template +double FpOp::ternaryOp(FPSCR &fpscr, double op1, double op2, double op3, + double (*func)(double, double, double), + bool flush, bool defaultNan, uint32_t rMode) const; + template <class fpType> fpType FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh index 9babaae04..f17f90973 100644 --- a/src/arch/arm/insts/vfp.hh +++ b/src/arch/arm/insts/vfp.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -104,7 +104,8 @@ enum VfpRoundingMode VfpRoundNearest = 0, VfpRoundUpward = 1, VfpRoundDown = 2, - VfpRoundZero = 3 + VfpRoundZero = 3, + VfpRoundAway = 4 }; static inline float bitsToFp(uint64_t, float); @@ -212,7 +213,7 @@ isSnan(fpType val) typedef int VfpSavedState; VfpSavedState prepFpState(uint32_t rMode); -void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush); +void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask = FpscrExcMask); template <class fpType> fpType fixDest(FPSCR fpscr, fpType val, fpType op1); @@ -228,7 +229,11 @@ double fixFpSFpDDest(FPSCR fpscr, float val); uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, float op); -float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); +uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, double op); + +float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); +double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); static inline double makeDouble(uint32_t low, uint32_t high) @@ -249,19 +254,192 @@ highFromDouble(double val) return fpToBits(val) >> 32; } -uint64_t vfpFpSToFixed(float val, bool isSigned, bool half, - uint8_t imm, bool rzero = true); +static inline void +setFPExceptions(int exceptions) { + feclearexcept(FeAllExceptions); + feraiseexcept(exceptions); +} + +template <typename T> +uint64_t +vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool + useRmode = true, VfpRoundingMode roundMode = VfpRoundZero, + bool aarch64 = false) +{ + int rmode; + bool roundAwayFix = false; + + if (!useRmode) { + rmode = fegetround(); + } else { + switch (roundMode) + { + case VfpRoundNearest: + rmode = FeRoundNearest; + break; + case VfpRoundUpward: + rmode = FeRoundUpward; + break; + case VfpRoundDown: + rmode = FeRoundDown; + break; + case VfpRoundZero: + rmode = FeRoundZero; + break; + case VfpRoundAway: + // There is no equivalent rounding mode, use round down and we'll + // fix it later + rmode = FeRoundDown; + roundAwayFix = true; + break; + default: + panic("Unsupported roundMode %d\n", roundMode); + } + } + __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); + fesetround(FeRoundNearest); + val = val * pow(2.0, imm); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + fesetround(rmode); + feclearexcept(FeAllExceptions); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + T origVal = val; + val = rint(val); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + + int exceptions = fetestexcept(FeAllExceptions); + + int fpType = std::fpclassify(val); + if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { + if (fpType == FP_NAN) { + exceptions |= FeInvalid; + } + val = 0.0; + } else if (origVal != val) { + switch (rmode) { + case FeRoundNearest: + if (origVal - val > 0.5) + val += 1.0; + else if (val - origVal > 0.5) + val -= 1.0; + break; + case FeRoundDown: + if (roundAwayFix) { + // The ordering on the subtraction looks a bit odd in that we + // don't do the obvious origVal - val, instead we do + // -(val - origVal). This is required to get the corruct bit + // exact behaviour when very close to the 0.5 threshold. + volatile T error = val; + error -= origVal; + error = -error; + if ( (error > 0.5) || + ((error == 0.5) && (val >= 0)) ) + val += 1.0; + } else { + if (origVal < val) + val -= 1.0; + } + break; + case FeRoundUpward: + if (origVal > val) + val += 1.0; + break; + } + exceptions |= FeInexact; + } + + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + + if (isSigned) { + bool outOfRange = false; + int64_t result = (int64_t) val; + uint64_t finalVal; + + if (!aarch64) { + if (width == 16) { + finalVal = (int16_t)val; + } else if (width == 32) { + finalVal =(int32_t)val; + } else if (width == 64) { + finalVal = result; + } else { + panic("Unsupported width %d\n", width); + } + + // check if value is in range + int64_t minVal = ~mask(width-1); + if ((double)val < minVal) { + outOfRange = true; + finalVal = minVal; + } + int64_t maxVal = mask(width-1); + if ((double)val > maxVal) { + outOfRange = true; + finalVal = maxVal; + } + } else { + bool isNeg = val < 0; + finalVal = result & mask(width); + // If the result is supposed to be less than 64 bits check that the + // upper bits that got thrown away are just sign extension bits + if (width != 64) { + outOfRange = ((uint64_t) result >> (width - 1)) != + (isNeg ? mask(64-width+1) : 0); + } + // Check if the original floating point value doesn't matches the + // integer version we are also out of range. So create a saturated + // result. + if (isNeg) { + outOfRange |= val < result; + if (outOfRange) { + finalVal = 1LL << (width-1); + } + } else { + outOfRange |= val > result; + if (outOfRange) { + finalVal = mask(width-1); + } + } + } + + // Raise an exception if the value was out of range + if (outOfRange) { + exceptions |= FeInvalid; + exceptions &= ~FeInexact; + } + setFPExceptions(exceptions); + return finalVal; + } else { + if ((double)val < 0) { + exceptions |= FeInvalid; + exceptions &= ~FeInexact; + setFPExceptions(exceptions); + return 0; + } + + uint64_t result = ((uint64_t) val) & mask(width); + if (val > result) { + exceptions |= FeInvalid; + exceptions &= ~FeInexact; + setFPExceptions(exceptions); + return mask(width); + } + + setFPExceptions(exceptions); + return result; + } +}; + + float vfpUFixedToFpS(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm); + uint64_t val, uint8_t width, uint8_t imm); float vfpSFixedToFpS(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm); + int64_t val, uint8_t width, uint8_t imm); -uint64_t vfpFpDToFixed(double val, bool isSigned, bool half, - uint8_t imm, bool rzero = true); double vfpUFixedToFpD(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm); + uint64_t val, uint8_t width, uint8_t imm); double vfpSFixedToFpD(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm); + int64_t val, uint8_t width, uint8_t imm); float fprSqrtEstimate(FPSCR &fpscr, float op); uint32_t unsignedRSqrtEstimate(uint32_t op); @@ -292,6 +470,20 @@ class VfpMacroOp : public PredMacroOp void nextIdxs(IntRegIndex &dest); }; +template <typename T> +static inline T +fpAdd(T a, T b) +{ + return a + b; +}; + +template <typename T> +static inline T +fpSub(T a, T b) +{ + return a - b; +}; + static inline float fpAddS(float a, float b) { @@ -328,6 +520,54 @@ fpDivD(double a, double b) return a / b; } +template <typename T> +static inline T +fpDiv(T a, T b) +{ + return a / b; +}; + +template <typename T> +static inline T +fpMulX(T a, T b) +{ + uint64_t opData; + uint32_t sign1; + uint32_t sign2; + const bool single = (sizeof(T) == sizeof(float)); + if (single) { + opData = (fpToBits(a)); + sign1 = opData>>31; + opData = (fpToBits(b)); + sign2 = opData>>31; + } else { + opData = (fpToBits(a)); + sign1 = opData>>63; + opData = (fpToBits(b)); + sign2 = opData>>63; + } + bool inf1 = (std::fpclassify(a) == FP_INFINITE); + bool inf2 = (std::fpclassify(b) == FP_INFINITE); + bool zero1 = (std::fpclassify(a) == FP_ZERO); + bool zero2 = (std::fpclassify(b) == FP_ZERO); + if ((inf1 && zero2) || (zero1 && inf2)) { + if(sign1 ^ sign2) + return (T)(-2.0); + else + return (T)(2.0); + } else { + return (a * b); + } +}; + + +template <typename T> +static inline T +fpMul(T a, T b) +{ + return a * b; +}; + static inline float fpMulS(float a, float b) { @@ -340,23 +580,140 @@ fpMulD(double a, double b) return a * b; } -static inline float -fpMaxS(float a, float b) +template <typename T> +static inline T +// @todo remove this when all calls to it have been replaced with the new fplib implementation +fpMulAdd(T op1, T op2, T addend) +{ + T result; + + if (sizeof(T) == sizeof(float)) + result = fmaf(op1, op2, addend); + else + result = fma(op1, op2, addend); + + // ARM doesn't generate signed nan's from this opperation, so fix up the result + if (std::isnan(result) && !std::isnan(op1) && + !std::isnan(op2) && !std::isnan(addend)) + { + uint64_t bitMask = ULL(0x1) << ((sizeof(T) * 8) - 1); + result = bitsToFp(fpToBits(result) & ~bitMask, op1); + } + return result; +} + +template <typename T> +static inline T +fpRIntX(T a, FPSCR &fpscr) +{ + T rVal; + + rVal = rint(a); + if (rVal != a && !std::isnan(a)) + fpscr.ixc = 1; + return (rVal); +}; + +template <typename T> +static inline T +fpMaxNum(T a, T b) { + const bool single = (sizeof(T) == sizeof(float)); + const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); + + if (std::isnan(a)) + return ((fpToBits(a) & qnan) == qnan) ? b : a; + if (std::isnan(b)) + return ((fpToBits(b) & qnan) == qnan) ? a : b; // Handle comparisons of +0 and -0. if (!std::signbit(a) && std::signbit(b)) return a; - return fmaxf(a, b); -} + return fmax(a, b); +}; -static inline float -fpMinS(float a, float b) +template <typename T> +static inline T +fpMax(T a, T b) { + if (std::isnan(a)) + return a; + if (std::isnan(b)) + return b; + return fpMaxNum<T>(a, b); +}; + +template <typename T> +static inline T +fpMinNum(T a, T b) +{ + const bool single = (sizeof(T) == sizeof(float)); + const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); + + if (std::isnan(a)) + return ((fpToBits(a) & qnan) == qnan) ? b : a; + if (std::isnan(b)) + return ((fpToBits(b) & qnan) == qnan) ? a : b; // Handle comparisons of +0 and -0. if (std::signbit(a) && !std::signbit(b)) return a; - return fminf(a, b); -} + return fmin(a, b); +}; + +template <typename T> +static inline T +fpMin(T a, T b) +{ + if (std::isnan(a)) + return a; + if (std::isnan(b)) + return b; + return fpMinNum<T>(a, b); +}; + +template <typename T> +static inline T +fpRSqrts(T a, T b) +{ + int fpClassA = std::fpclassify(a); + int fpClassB = std::fpclassify(b); + T aXb; + int fpClassAxB; + + if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) || + (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { + return 1.5; + } + aXb = a*b; + fpClassAxB = std::fpclassify(aXb); + if(fpClassAxB == FP_SUBNORMAL) { + feraiseexcept(FeUnderflow); + return 1.5; + } + return (3.0 - (a * b)) / 2.0; +}; + +template <typename T> +static inline T +fpRecps(T a, T b) +{ + int fpClassA = std::fpclassify(a); + int fpClassB = std::fpclassify(b); + T aXb; + int fpClassAxB; + + if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) || + (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { + return 2.0; + } + aXb = a*b; + fpClassAxB = std::fpclassify(aXb); + if(fpClassAxB == FP_SUBNORMAL) { + feraiseexcept(FeUnderflow); + return 2.0; + } + return 2.0 - (a * b); +}; + static inline float fpRSqrtsS(float a, float b) @@ -400,6 +757,23 @@ fpRecpsS(float a, float b) return 2.0 - (a * b); } +template <typename T> +static inline T +roundNEven(T a) { + T val; + + val = round(a); + if (a - val == 0.5) { + if ( (((int) a) & 1) == 0 ) val += 1.0; + } + else if (a - val == -0.5) { + if ( (((int) a) & 1) == 0 ) val -= 1.0; + } + return val; +} + + + class FpOp : public PredOp { protected: @@ -457,6 +831,12 @@ class FpOp : public PredOp template <class fpType> fpType + ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, + fpType (*func)(fpType, fpType, fpType), + bool flush, bool defaultNan, uint32_t rMode) const; + + template <class fpType> + fpType binaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType (*func)(fpType, fpType), bool flush, bool defaultNan, uint32_t rMode) const; @@ -478,6 +858,55 @@ class FpOp : public PredOp pcState.advance(); } } + + float + fpSqrt (FPSCR fpscr,float x) const + { + + return unaryOp(fpscr,x,sqrtf,fpscr.fz,fpscr.rMode); + + } + + double + fpSqrt (FPSCR fpscr,double x) const + { + + return unaryOp(fpscr,x,sqrt,fpscr.fz,fpscr.rMode); + + } +}; + +class FpCondCompRegOp : public FpOp +{ + protected: + IntRegIndex op1, op2; + ConditionCode condCode; + uint8_t defCc; + + FpCondCompRegOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode, uint8_t _defCc) : + FpOp(mnem, _machInst, __opClass), + op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class FpCondSelOp : public FpOp +{ + protected: + IntRegIndex dest, op1, op2; + ConditionCode condCode; + + FpCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode) : + FpOp(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), condCode(_condCode) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; class FpRegRegOp : public FpOp @@ -550,6 +979,26 @@ class FpRegRegRegOp : public FpOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class FpRegRegRegRegOp : public FpOp +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + IntRegIndex op2; + IntRegIndex op3; + + FpRegRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop) : + FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2), + op3(_op3) + { + setVfpMicroFlags(mode, flags); + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + class FpRegRegRegImmOp : public FpOp { protected: |