summaryrefslogtreecommitdiff
path: root/src/arch/arm/insts
diff options
context:
space:
mode:
Diffstat (limited to 'src/arch/arm/insts')
-rw-r--r--src/arch/arm/insts/branch64.cc146
-rw-r--r--src/arch/arm/insts/branch64.hh166
-rw-r--r--src/arch/arm/insts/data64.cc203
-rw-r--r--src/arch/arm/insts/data64.hh256
-rw-r--r--src/arch/arm/insts/fplib.cc3086
-rw-r--r--src/arch/arm/insts/fplib.hh283
-rw-r--r--src/arch/arm/insts/macromem.cc528
-rw-r--r--src/arch/arm/insts/macromem.hh207
-rw-r--r--src/arch/arm/insts/mem.cc5
-rw-r--r--src/arch/arm/insts/mem64.cc193
-rw-r--r--src/arch/arm/insts/mem64.hh253
-rw-r--r--src/arch/arm/insts/misc.cc38
-rw-r--r--src/arch/arm/insts/misc.hh55
-rw-r--r--src/arch/arm/insts/misc64.cc73
-rw-r--r--src/arch/arm/insts/misc64.hh92
-rw-r--r--src/arch/arm/insts/neon64_mem.hh128
-rw-r--r--src/arch/arm/insts/pred_inst.hh36
-rw-r--r--src/arch/arm/insts/static_inst.cc312
-rw-r--r--src/arch/arm/insts/static_inst.hh99
-rw-r--r--src/arch/arm/insts/vfp.cc484
-rw-r--r--src/arch/arm/insts/vfp.hh489
21 files changed, 6761 insertions, 371 deletions
diff --git a/src/arch/arm/insts/branch64.cc b/src/arch/arm/insts/branch64.cc
new file mode 100644
index 000000000..49ba3402a
--- /dev/null
+++ b/src/arch/arm/insts/branch64.cc
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#include "arch/arm/insts/branch64.hh"
+
+namespace ArmISA
+{
+
+ArmISA::PCState
+BranchImm64::branchTarget(const ArmISA::PCState &branchPC) const
+{
+ ArmISA::PCState pcs = branchPC;
+ pcs.instNPC(pcs.pc() + imm);
+ pcs.advance();
+ return pcs;
+}
+
+ArmISA::PCState
+BranchImmReg64::branchTarget(const ArmISA::PCState &branchPC) const
+{
+ ArmISA::PCState pcs = branchPC;
+ pcs.instNPC(pcs.pc() + imm);
+ pcs.advance();
+ return pcs;
+}
+
+ArmISA::PCState
+BranchImmImmReg64::branchTarget(const ArmISA::PCState &branchPC) const
+{
+ ArmISA::PCState pcs = branchPC;
+ pcs.instNPC(pcs.pc() + imm2);
+ pcs.advance();
+ return pcs;
+}
+
+std::string
+BranchImmCond64::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false, true, condCode);
+ printTarget(ss, pc + imm, symtab);
+ return ss.str();
+}
+
+std::string
+BranchImm64::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printTarget(ss, pc + imm, symtab);
+ return ss.str();
+}
+
+std::string
+BranchReg64::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, op1);
+ return ss.str();
+}
+
+std::string
+BranchRet64::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ if (op1 != INTREG_X30)
+ printReg(ss, op1);
+ return ss.str();
+}
+
+std::string
+BranchEret64::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ return ss.str();
+}
+
+std::string
+BranchImmReg64::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, op1);
+ ccprintf(ss, ", ");
+ printTarget(ss, pc + imm, symtab);
+ return ss.str();
+}
+
+std::string
+BranchImmImmReg64::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, op1);
+ ccprintf(ss, ", #%#x, ", imm1);
+ printTarget(ss, pc + imm2, symtab);
+ return ss.str();
+}
+
+} // namespace ArmISA
diff --git a/src/arch/arm/insts/branch64.hh b/src/arch/arm/insts/branch64.hh
new file mode 100644
index 000000000..48881e0c2
--- /dev/null
+++ b/src/arch/arm/insts/branch64.hh
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+#ifndef __ARCH_ARM_INSTS_BRANCH64_HH__
+#define __ARCH_ARM_INSTS_BRANCH64_HH__
+
+#include "arch/arm/insts/static_inst.hh"
+
+namespace ArmISA
+{
+// Branch to a target computed with an immediate
+class BranchImm64 : public ArmStaticInst
+{
+ protected:
+ int64_t imm;
+
+ public:
+ BranchImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ int64_t _imm) :
+ ArmStaticInst(mnem, _machInst, __opClass), imm(_imm)
+ {}
+
+ ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const;
+
+ /// Explicitly import the otherwise hidden branchTarget
+ using StaticInst::branchTarget;
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+// Conditionally Branch to a target computed with an immediate
+class BranchImmCond64 : public BranchImm64
+{
+ protected:
+ ConditionCode condCode;
+
+ public:
+ BranchImmCond64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ int64_t _imm, ConditionCode _condCode) :
+ BranchImm64(mnem, _machInst, __opClass, _imm), condCode(_condCode)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+// Branch to a target computed with a register
+class BranchReg64 : public ArmStaticInst
+{
+ protected:
+ IntRegIndex op1;
+
+ public:
+ BranchReg64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _op1) :
+ ArmStaticInst(mnem, _machInst, __opClass), op1(_op1)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+// Ret instruction
+class BranchRet64 : public BranchReg64
+{
+ public:
+ BranchRet64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _op1) :
+ BranchReg64(mnem, _machInst, __opClass, _op1)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+// Eret instruction
+class BranchEret64 : public ArmStaticInst
+{
+ public:
+ BranchEret64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
+ ArmStaticInst(mnem, _machInst, __opClass)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+// Branch to a target computed with an immediate and a register
+class BranchImmReg64 : public ArmStaticInst
+{
+ protected:
+ int64_t imm;
+ IntRegIndex op1;
+
+ public:
+ BranchImmReg64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ int64_t _imm, IntRegIndex _op1) :
+ ArmStaticInst(mnem, _machInst, __opClass), imm(_imm), op1(_op1)
+ {}
+
+ ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const;
+
+ /// Explicitly import the otherwise hidden branchTarget
+ using StaticInst::branchTarget;
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+// Branch to a target computed with two immediates
+class BranchImmImmReg64 : public ArmStaticInst
+{
+ protected:
+ int64_t imm1;
+ int64_t imm2;
+ IntRegIndex op1;
+
+ public:
+ BranchImmImmReg64(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass, int64_t _imm1, int64_t _imm2,
+ IntRegIndex _op1) :
+ ArmStaticInst(mnem, _machInst, __opClass),
+ imm1(_imm1), imm2(_imm2), op1(_op1)
+ {}
+
+ ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const;
+
+ /// Explicitly import the otherwise hidden branchTarget
+ using StaticInst::branchTarget;
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+}
+
+#endif //__ARCH_ARM_INSTS_BRANCH_HH__
diff --git a/src/arch/arm/insts/data64.cc b/src/arch/arm/insts/data64.cc
new file mode 100644
index 000000000..f65219870
--- /dev/null
+++ b/src/arch/arm/insts/data64.cc
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#include "arch/arm/insts/data64.hh"
+
+namespace ArmISA
+{
+
+std::string
+DataXImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printDataInst(ss, true, false, /*XXX not really s*/ false, dest, op1,
+ INTREG_ZERO, INTREG_ZERO, 0, LSL, imm);
+ return ss.str();
+}
+
+std::string
+DataXImmOnlyOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ccprintf(ss, ", #%d", imm);
+ return ss.str();
+}
+
+std::string
+DataXSRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printDataInst(ss, false, true, /*XXX not really s*/ false, dest, op1,
+ op2, INTREG_ZERO, shiftAmt, shiftType, 0);
+ return ss.str();
+}
+
+std::string
+DataXERegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printDataInst(ss, false, true, /*XXX not really s*/ false, dest, op1,
+ op2, INTREG_ZERO, shiftAmt, LSL, 0);
+ return ss.str();
+}
+
+std::string
+DataX1RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ccprintf(ss, ", ");
+ printReg(ss, op1);
+ return ss.str();
+}
+
+std::string
+DataX1RegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ccprintf(ss, ", ");
+ printReg(ss, op1);
+ ccprintf(ss, ", #%d", imm);
+ return ss.str();
+}
+
+std::string
+DataX1Reg2ImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ccprintf(ss, ", ");
+ printReg(ss, op1);
+ ccprintf(ss, ", #%d, #%d", imm1, imm2);
+ return ss.str();
+}
+
+std::string
+DataX2RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ccprintf(ss, ", ");
+ printReg(ss, op1);
+ ccprintf(ss, ", ");
+ printReg(ss, op2);
+ return ss.str();
+}
+
+std::string
+DataX2RegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ccprintf(ss, ", ");
+ printReg(ss, op1);
+ ccprintf(ss, ", ");
+ printReg(ss, op2);
+ ccprintf(ss, ", #%d", imm);
+ return ss.str();
+}
+
+std::string
+DataX3RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ccprintf(ss, ", ");
+ printReg(ss, op1);
+ ccprintf(ss, ", ");
+ printReg(ss, op2);
+ ccprintf(ss, ", ");
+ printReg(ss, op3);
+ return ss.str();
+}
+
+std::string
+DataXCondCompImmOp::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, op1);
+ ccprintf(ss, ", #%d, #%d", imm, defCc);
+ ccprintf(ss, ", ");
+ printCondition(ss, condCode, true);
+ return ss.str();
+}
+
+std::string
+DataXCondCompRegOp::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, op1);
+ ccprintf(ss, ", ");
+ printReg(ss, op2);
+ ccprintf(ss, ", #%d", defCc);
+ ccprintf(ss, ", ");
+ printCondition(ss, condCode, true);
+ return ss.str();
+}
+
+std::string
+DataXCondSelOp::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ccprintf(ss, ", ");
+ printReg(ss, op1);
+ ccprintf(ss, ", ");
+ printReg(ss, op2);
+ ccprintf(ss, ", ");
+ printCondition(ss, condCode, true);
+ return ss.str();
+}
+
+}
diff --git a/src/arch/arm/insts/data64.hh b/src/arch/arm/insts/data64.hh
new file mode 100644
index 000000000..8c0677b3d
--- /dev/null
+++ b/src/arch/arm/insts/data64.hh
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+#ifndef __ARCH_ARM_INSTS_DATA64_HH__
+#define __ARCH_ARM_INSTS_DATA64_HH__
+
+#include "arch/arm/insts/static_inst.hh"
+#include "base/trace.hh"
+
+namespace ArmISA
+{
+
+class DataXImmOp : public ArmStaticInst
+{
+ protected:
+ IntRegIndex dest, op1;
+ uint64_t imm;
+
+ DataXImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) :
+ ArmStaticInst(mnem, _machInst, __opClass),
+ dest(_dest), op1(_op1), imm(_imm)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataXImmOnlyOp : public ArmStaticInst
+{
+ protected:
+ IntRegIndex dest;
+ uint64_t imm;
+
+ DataXImmOnlyOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, uint64_t _imm) :
+ ArmStaticInst(mnem, _machInst, __opClass),
+ dest(_dest), imm(_imm)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataXSRegOp : public ArmStaticInst
+{
+ protected:
+ IntRegIndex dest, op1, op2;
+ int32_t shiftAmt;
+ ArmShiftType shiftType;
+
+ DataXSRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+ int32_t _shiftAmt, ArmShiftType _shiftType) :
+ ArmStaticInst(mnem, _machInst, __opClass),
+ dest(_dest), op1(_op1), op2(_op2),
+ shiftAmt(_shiftAmt), shiftType(_shiftType)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataXERegOp : public ArmStaticInst
+{
+ protected:
+ IntRegIndex dest, op1, op2;
+ ArmExtendType extendType;
+ int32_t shiftAmt;
+
+ DataXERegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+ ArmExtendType _extendType, int32_t _shiftAmt) :
+ ArmStaticInst(mnem, _machInst, __opClass),
+ dest(_dest), op1(_op1), op2(_op2),
+ extendType(_extendType), shiftAmt(_shiftAmt)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataX1RegOp : public ArmStaticInst
+{
+ protected:
+ IntRegIndex dest, op1;
+
+ DataX1RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _op1) :
+ ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataX1RegImmOp : public ArmStaticInst
+{
+ protected:
+ IntRegIndex dest, op1;
+ uint64_t imm;
+
+ DataX1RegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) :
+ ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1),
+ imm(_imm)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataX1Reg2ImmOp : public ArmStaticInst
+{
+ protected:
+ IntRegIndex dest, op1;
+ uint64_t imm1, imm2;
+
+ DataX1Reg2ImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm1,
+ uint64_t _imm2) :
+ ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1),
+ imm1(_imm1), imm2(_imm2)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataX2RegOp : public ArmStaticInst
+{
+ protected:
+ IntRegIndex dest, op1, op2;
+
+ DataX2RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2) :
+ ArmStaticInst(mnem, _machInst, __opClass),
+ dest(_dest), op1(_op1), op2(_op2)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataX2RegImmOp : public ArmStaticInst
+{
+ protected:
+ IntRegIndex dest, op1, op2;
+ uint64_t imm;
+
+ DataX2RegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+ uint64_t _imm) :
+ ArmStaticInst(mnem, _machInst, __opClass),
+ dest(_dest), op1(_op1), op2(_op2), imm(_imm)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataX3RegOp : public ArmStaticInst
+{
+ protected:
+ IntRegIndex dest, op1, op2, op3;
+
+ DataX3RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+ IntRegIndex _op3) :
+ ArmStaticInst(mnem, _machInst, __opClass),
+ dest(_dest), op1(_op1), op2(_op2), op3(_op3)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataXCondCompImmOp : public ArmStaticInst
+{
+ protected:
+ IntRegIndex op1;
+ uint64_t imm;
+ ConditionCode condCode;
+ uint8_t defCc;
+
+ DataXCondCompImmOp(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass, IntRegIndex _op1, uint64_t _imm,
+ ConditionCode _condCode, uint8_t _defCc) :
+ ArmStaticInst(mnem, _machInst, __opClass),
+ op1(_op1), imm(_imm), condCode(_condCode), defCc(_defCc)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataXCondCompRegOp : public ArmStaticInst
+{
+ protected:
+ IntRegIndex op1, op2;
+ ConditionCode condCode;
+ uint8_t defCc;
+
+ DataXCondCompRegOp(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2,
+ ConditionCode _condCode, uint8_t _defCc) :
+ ArmStaticInst(mnem, _machInst, __opClass),
+ op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataXCondSelOp : public ArmStaticInst
+{
+ protected:
+ IntRegIndex dest, op1, op2;
+ ConditionCode condCode;
+
+ DataXCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+ ConditionCode _condCode) :
+ ArmStaticInst(mnem, _machInst, __opClass),
+ dest(_dest), op1(_op1), op2(_op2), condCode(_condCode)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+}
+
+#endif //__ARCH_ARM_INSTS_PREDINST_HH__
diff --git a/src/arch/arm/insts/fplib.cc b/src/arch/arm/insts/fplib.cc
new file mode 100644
index 000000000..1f44eed09
--- /dev/null
+++ b/src/arch/arm/insts/fplib.cc
@@ -0,0 +1,3086 @@
+/*
+* Copyright (c) 2012-2013 ARM Limited
+* All rights reserved
+*
+* The license below extends only to copyright in the software and shall
+* not be construed as granting a license to any other intellectual
+* property including but not limited to intellectual property relating
+* to a hardware implementation of the functionality of the software
+* licensed hereunder. You may use the software subject to the license
+* terms below provided that you ensure that this notice is replicated
+* unmodified and in its entirety in all distributions of the software,
+* modified or unmodified, in source code or in binary form.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met: redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer;
+* redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the distribution;
+* neither the name of the copyright holders nor the names of its
+* contributors may be used to endorse or promote products derived from
+* this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Authors: Edmund Grimley Evans
+* Thomas Grocutt
+*/
+
+#include <stdint.h>
+
+#include <cassert>
+
+#include "fplib.hh"
+
+namespace ArmISA
+{
+
+#define FPLIB_RN 0
+#define FPLIB_RP 1
+#define FPLIB_RM 2
+#define FPLIB_RZ 3
+#define FPLIB_FZ 4
+#define FPLIB_DN 8
+#define FPLIB_AHP 16
+
+#define FPLIB_IDC 128 // Input Denormal
+#define FPLIB_IXC 16 // Inexact
+#define FPLIB_UFC 8 // Underflow
+#define FPLIB_OFC 4 // Overflow
+#define FPLIB_DZC 2 // Division by Zero
+#define FPLIB_IOC 1 // Invalid Operation
+
+static inline uint16_t
+lsl16(uint16_t x, uint32_t shift)
+{
+ return shift < 16 ? x << shift : 0;
+}
+
+static inline uint16_t
+lsr16(uint16_t x, uint32_t shift)
+{
+ return shift < 16 ? x >> shift : 0;
+}
+
+static inline uint32_t
+lsl32(uint32_t x, uint32_t shift)
+{
+ return shift < 32 ? x << shift : 0;
+}
+
+static inline uint32_t
+lsr32(uint32_t x, uint32_t shift)
+{
+ return shift < 32 ? x >> shift : 0;
+}
+
+static inline uint64_t
+lsl64(uint64_t x, uint32_t shift)
+{
+ return shift < 64 ? x << shift : 0;
+}
+
+static inline uint64_t
+lsr64(uint64_t x, uint32_t shift)
+{
+ return shift < 64 ? x >> shift : 0;
+}
+
+static inline void
+lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
+{
+ if (shift < 64) {
+ *r1 = x1 << shift | x0 >> (64 - shift);
+ *r0 = x0 << shift;
+ } else if (shift < 128) {
+ *r1 = x0 << (shift - 64);
+ *r0 = 0;
+ } else {
+ *r1 = 0;
+ *r0 = 0;
+ }
+}
+
+static inline void
+lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
+{
+ if (shift < 64) {
+ *r0 = x0 >> shift | x1 << (64 - shift);
+ *r1 = x1 >> shift;
+ } else if (shift < 128) {
+ *r0 = x1 >> (shift - 64);
+ *r1 = 0;
+ } else {
+ *r0 = 0;
+ *r1 = 0;
+ }
+}
+
+static inline void
+mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
+{
+ uint32_t mask = ((uint32_t)1 << 31) - 1;
+ uint64_t a0 = a & mask;
+ uint64_t a1 = a >> 31 & mask;
+ uint64_t b0 = b & mask;
+ uint64_t b1 = b >> 31 & mask;
+ uint64_t p0 = a0 * b0;
+ uint64_t p2 = a1 * b1;
+ uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
+ uint64_t s0 = p0;
+ uint64_t s1 = (s0 >> 31) + p1;
+ uint64_t s2 = (s1 >> 31) + p2;
+ *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
+ *x1 = s2 >> 2;
+}
+
+static inline
+void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
+{
+ uint64_t t0 = (uint64_t)(uint32_t)a * b;
+ uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
+ *x0 = t1 << 32 | (uint32_t)t0;
+ *x1 = t1 >> 32;
+}
+
+static inline void
+mul64x64(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
+{
+ uint64_t a0 = (uint32_t)a;
+ uint64_t a1 = a >> 32;
+ uint64_t b0 = (uint32_t)b;
+ uint64_t b1 = b >> 32;
+ uint64_t t1 = (a0 * b0 >> 32) + a1 * b0;
+ uint64_t t2 = a0 * b1;
+ uint64_t x = ((uint64_t)(uint32_t)t1 + (uint32_t)t2) >> 32;
+ x += t1 >> 32;
+ x += t2 >> 32;
+ x += a1 * b1;
+ *x0 = a * b;
+ *x1 = x;
+}
+
+static inline void
+add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
+ uint64_t b1)
+{
+ *x0 = a0 + b0;
+ *x1 = a1 + b1 + (*x0 < a0);
+}
+
+static inline void
+sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
+ uint64_t b1)
+{
+ *x0 = a0 - b0;
+ *x1 = a1 - b1 - (*x0 > a0);
+}
+
+static inline int
+cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
+{
+ return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
+}
+
+static inline uint16_t
+fp16_normalise(uint16_t mnt, int *exp)
+{
+ int shift;
+
+ if (!mnt) {
+ return 0;
+ }
+
+ for (shift = 8; shift; shift >>= 1) {
+ if (!(mnt >> (16 - shift))) {
+ mnt <<= shift;
+ *exp -= shift;
+ }
+ }
+ return mnt;
+}
+
+static inline uint32_t
+fp32_normalise(uint32_t mnt, int *exp)
+{
+ int shift;
+
+ if (!mnt) {
+ return 0;
+ }
+
+ for (shift = 16; shift; shift >>= 1) {
+ if (!(mnt >> (32 - shift))) {
+ mnt <<= shift;
+ *exp -= shift;
+ }
+ }
+ return mnt;
+}
+
+static inline uint64_t
+fp64_normalise(uint64_t mnt, int *exp)
+{
+ int shift;
+
+ if (!mnt) {
+ return 0;
+ }
+
+ for (shift = 32; shift; shift >>= 1) {
+ if (!(mnt >> (64 - shift))) {
+ mnt <<= shift;
+ *exp -= shift;
+ }
+ }
+ return mnt;
+}
+
+static inline void
+fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
+{
+ uint64_t x0 = *mnt0;
+ uint64_t x1 = *mnt1;
+ int shift;
+
+ if (!x0 && !x1) {
+ return;
+ }
+
+ if (!x1) {
+ x1 = x0;
+ x0 = 0;
+ *exp -= 64;
+ }
+
+ for (shift = 32; shift; shift >>= 1) {
+ if (!(x1 >> (64 - shift))) {
+ x1 = x1 << shift | x0 >> (64 - shift);
+ x0 <<= shift;
+ *exp -= shift;
+ }
+ }
+
+ *mnt0 = x0;
+ *mnt1 = x1;
+}
+
+static inline uint16_t
+fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
+{
+ return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1));
+}
+
+static inline uint32_t
+fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
+{
+ return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1));
+}
+
+static inline uint64_t
+fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
+{
+ return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1));
+}
+
+static inline uint16_t
+fp16_zero(int sgn)
+{
+ return fp16_pack(sgn, 0, 0);
+}
+
+static inline uint32_t
+fp32_zero(int sgn)
+{
+ return fp32_pack(sgn, 0, 0);
+}
+
+static inline uint64_t
+fp64_zero(int sgn)
+{
+ return fp64_pack(sgn, 0, 0);
+}
+
+static inline uint16_t
+fp16_max_normal(int sgn)
+{
+ return fp16_pack(sgn, 30, -1);
+}
+
+static inline uint32_t
+fp32_max_normal(int sgn)
+{
+ return fp32_pack(sgn, 254, -1);
+}
+
+static inline uint64_t
+fp64_max_normal(int sgn)
+{
+ return fp64_pack(sgn, 2046, -1);
+}
+
+static inline uint16_t
+fp16_infinity(int sgn)
+{
+ return fp16_pack(sgn, 31, 0);
+}
+
+static inline uint32_t
+fp32_infinity(int sgn)
+{
+ return fp32_pack(sgn, 255, 0);
+}
+
+static inline uint64_t
+fp64_infinity(int sgn)
+{
+ return fp64_pack(sgn, 2047, 0);
+}
+
+static inline uint16_t
+fp16_defaultNaN()
+{
+ return fp16_pack(0, 31, (uint16_t)1 << 9);
+}
+
+static inline uint32_t
+fp32_defaultNaN()
+{
+ return fp32_pack(0, 255, (uint32_t)1 << 22);
+}
+
+static inline uint64_t
+fp64_defaultNaN()
+{
+ return fp64_pack(0, 2047, (uint64_t)1 << 51);
+}
+
+static inline void
+fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
+ int *flags)
+{
+ *sgn = x >> 15;
+ *exp = x >> 10 & 31;
+ *mnt = x & (((uint16_t)1 << 10) - 1);
+
+ // Handle subnormals:
+ if (*exp) {
+ *mnt |= (uint16_t)1 << 10;
+ } else {
+ ++*exp;
+ // There is no flush to zero in this case!
+ }
+}
+
+static inline void
+fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
+ int *flags)
+{
+ *sgn = x >> 31;
+ *exp = x >> 23 & 255;
+ *mnt = x & (((uint32_t)1 << 23) - 1);
+
+ // Handle subnormals:
+ if (*exp) {
+ *mnt |= (uint32_t)1 << 23;
+ } else {
+ ++*exp;
+ if ((mode & FPLIB_FZ) && *mnt) {
+ *flags |= FPLIB_IDC;
+ *mnt = 0;
+ }
+ }
+}
+
+static inline void
+fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
+ int *flags)
+{
+ *sgn = x >> 63;
+ *exp = x >> 52 & 2047;
+ *mnt = x & (((uint64_t)1 << 52) - 1);
+
+ // Handle subnormals:
+ if (*exp) {
+ *mnt |= (uint64_t)1 << 52;
+ } else {
+ ++*exp;
+ if ((mode & FPLIB_FZ) && *mnt) {
+ *flags |= FPLIB_IDC;
+ *mnt = 0;
+ }
+ }
+}
+
+static inline uint32_t
+fp32_process_NaN(uint32_t a, int mode, int *flags)
+{
+ if (!(a >> 22 & 1)) {
+ *flags |= FPLIB_IOC;
+ a |= (uint32_t)1 << 22;
+ }
+ return mode & FPLIB_DN ? fp32_defaultNaN() : a;
+}
+
+static inline uint64_t
+fp64_process_NaN(uint64_t a, int mode, int *flags)
+{
+ if (!(a >> 51 & 1)) {
+ *flags |= FPLIB_IOC;
+ a |= (uint64_t)1 << 51;
+ }
+ return mode & FPLIB_DN ? fp64_defaultNaN() : a;
+}
+
+static uint32_t
+fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
+{
+ int a_exp = a >> 23 & 255;
+ uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
+ int b_exp = b >> 23 & 255;
+ uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
+
+ // Handle signalling NaNs:
+ if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
+ return fp32_process_NaN(a, mode, flags);
+ if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
+ return fp32_process_NaN(b, mode, flags);
+
+ // Handle quiet NaNs:
+ if (a_exp == 255 && a_mnt)
+ return fp32_process_NaN(a, mode, flags);
+ if (b_exp == 255 && b_mnt)
+ return fp32_process_NaN(b, mode, flags);
+
+ return 0;
+}
+
+static uint64_t
+fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
+{
+ int a_exp = a >> 52 & 2047;
+ uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
+ int b_exp = b >> 52 & 2047;
+ uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
+
+ // Handle signalling NaNs:
+ if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
+ return fp64_process_NaN(a, mode, flags);
+ if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
+ return fp64_process_NaN(b, mode, flags);
+
+ // Handle quiet NaNs:
+ if (a_exp == 2047 && a_mnt)
+ return fp64_process_NaN(a, mode, flags);
+ if (b_exp == 2047 && b_mnt)
+ return fp64_process_NaN(b, mode, flags);
+
+ return 0;
+}
+
+static uint32_t
+fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
+{
+ int a_exp = a >> 23 & 255;
+ uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
+ int b_exp = b >> 23 & 255;
+ uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
+ int c_exp = c >> 23 & 255;
+ uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1);
+
+ // Handle signalling NaNs:
+ if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
+ return fp32_process_NaN(a, mode, flags);
+ if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
+ return fp32_process_NaN(b, mode, flags);
+ if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1))
+ return fp32_process_NaN(c, mode, flags);
+
+ // Handle quiet NaNs:
+ if (a_exp == 255 && a_mnt)
+ return fp32_process_NaN(a, mode, flags);
+ if (b_exp == 255 && b_mnt)
+ return fp32_process_NaN(b, mode, flags);
+ if (c_exp == 255 && c_mnt)
+ return fp32_process_NaN(c, mode, flags);
+
+ return 0;
+}
+
+static uint64_t
+fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
+{
+ int a_exp = a >> 52 & 2047;
+ uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
+ int b_exp = b >> 52 & 2047;
+ uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
+ int c_exp = c >> 52 & 2047;
+ uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1);
+
+ // Handle signalling NaNs:
+ if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
+ return fp64_process_NaN(a, mode, flags);
+ if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
+ return fp64_process_NaN(b, mode, flags);
+ if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1))
+ return fp64_process_NaN(c, mode, flags);
+
+ // Handle quiet NaNs:
+ if (a_exp == 2047 && a_mnt)
+ return fp64_process_NaN(a, mode, flags);
+ if (b_exp == 2047 && b_mnt)
+ return fp64_process_NaN(b, mode, flags);
+ if (c_exp == 2047 && c_mnt)
+ return fp64_process_NaN(c, mode, flags);
+
+ return 0;
+}
+
+static uint16_t
+fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
+{
+ int biased_exp; // non-negative exponent value for result
+ uint16_t int_mant; // mantissa for result, less than (1 << 11)
+ int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
+
+ assert(rm != FPRounding_TIEAWAY);
+
+ // There is no flush to zero in this case!
+
+ // The bottom 5 bits of mnt are orred together:
+ mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0);
+
+ if (exp > 0) {
+ biased_exp = exp;
+ int_mant = mnt >> 2;
+ error = mnt & 3;
+ } else {
+ biased_exp = 0;
+ int_mant = lsr16(mnt, 3 - exp);
+ error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
+ }
+
+ if (!biased_exp && error) { // xx should also check fpscr_val<11>
+ *flags |= FPLIB_UFC;
+ }
+
+ // Round up:
+ if ((rm == FPLIB_RN && (error == 3 ||
+ (error == 2 && (int_mant & 1)))) ||
+ (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
+ ++int_mant;
+ if (int_mant == (uint32_t)1 << 10) {
+ // Rounded up from denormalized to normalized
+ biased_exp = 1;
+ }
+ if (int_mant == (uint32_t)1 << 11) {
+ // Rounded up to next exponent
+ ++biased_exp;
+ int_mant >>= 1;
+ }
+ }
+
+ // Handle rounding to odd aka Von Neumann rounding:
+ if (error && rm == FPRounding_ODD)
+ int_mant |= 1;
+
+ // Handle overflow:
+ if (!(mode & FPLIB_AHP)) {
+ if (biased_exp >= 31) {
+ *flags |= FPLIB_OFC | FPLIB_IXC;
+ if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
+ (rm == FPLIB_RM && sgn)) {
+ return fp16_infinity(sgn);
+ } else {
+ return fp16_max_normal(sgn);
+ }
+ }
+ } else {
+ if (biased_exp >= 32) {
+ *flags |= FPLIB_IOC;
+ return fp16_pack(sgn, 31, -1);
+ }
+ }
+
+ if (error) {
+ *flags |= FPLIB_IXC;
+ }
+
+ return fp16_pack(sgn, biased_exp, int_mant);
+}
+
+static uint32_t
+fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
+{
+ int biased_exp; // non-negative exponent value for result
+ uint32_t int_mant; // mantissa for result, less than (1 << 24)
+ int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
+
+ assert(rm != FPRounding_TIEAWAY);
+
+ // Flush to zero:
+ if ((mode & FPLIB_FZ) && exp < 1) {
+ *flags |= FPLIB_UFC;
+ return fp32_zero(sgn);
+ }
+
+ // The bottom 8 bits of mnt are orred together:
+ mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0);
+
+ if (exp > 0) {
+ biased_exp = exp;
+ int_mant = mnt >> 2;
+ error = mnt & 3;
+ } else {
+ biased_exp = 0;
+ int_mant = lsr32(mnt, 3 - exp);
+ error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
+ }
+
+ if (!biased_exp && error) { // xx should also check fpscr_val<11>
+ *flags |= FPLIB_UFC;
+ }
+
+ // Round up:
+ if ((rm == FPLIB_RN && (error == 3 ||
+ (error == 2 && (int_mant & 1)))) ||
+ (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
+ ++int_mant;
+ if (int_mant == (uint32_t)1 << 23) {
+ // Rounded up from denormalized to normalized
+ biased_exp = 1;
+ }
+ if (int_mant == (uint32_t)1 << 24) {
+ // Rounded up to next exponent
+ ++biased_exp;
+ int_mant >>= 1;
+ }
+ }
+
+ // Handle rounding to odd aka Von Neumann rounding:
+ if (error && rm == FPRounding_ODD)
+ int_mant |= 1;
+
+ // Handle overflow:
+ if (biased_exp >= 255) {
+ *flags |= FPLIB_OFC | FPLIB_IXC;
+ if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
+ (rm == FPLIB_RM && sgn)) {
+ return fp32_infinity(sgn);
+ } else {
+ return fp32_max_normal(sgn);
+ }
+ }
+
+ if (error) {
+ *flags |= FPLIB_IXC;
+ }
+
+ return fp32_pack(sgn, biased_exp, int_mant);
+}
+
+static uint32_t
+fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
+{
+ return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
+}
+
+static uint64_t
+fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
+{
+ int biased_exp; // non-negative exponent value for result
+ uint64_t int_mant; // mantissa for result, less than (1 << 52)
+ int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
+
+ assert(rm != FPRounding_TIEAWAY);
+
+ // Flush to zero:
+ if ((mode & FPLIB_FZ) && exp < 1) {
+ *flags |= FPLIB_UFC;
+ return fp64_zero(sgn);
+ }
+
+ // The bottom 11 bits of mnt are orred together:
+ mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0);
+
+ if (exp > 0) {
+ biased_exp = exp;
+ int_mant = mnt >> 2;
+ error = mnt & 3;
+ } else {
+ biased_exp = 0;
+ int_mant = lsr64(mnt, 3 - exp);
+ error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
+ }
+
+ if (!biased_exp && error) { // xx should also check fpscr_val<11>
+ *flags |= FPLIB_UFC;
+ }
+
+ // Round up:
+ if ((rm == FPLIB_RN && (error == 3 ||
+ (error == 2 && (int_mant & 1)))) ||
+ (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
+ ++int_mant;
+ if (int_mant == (uint64_t)1 << 52) {
+ // Rounded up from denormalized to normalized
+ biased_exp = 1;
+ }
+ if (int_mant == (uint64_t)1 << 53) {
+ // Rounded up to next exponent
+ ++biased_exp;
+ int_mant >>= 1;
+ }
+ }
+
+ // Handle rounding to odd aka Von Neumann rounding:
+ if (error && rm == FPRounding_ODD)
+ int_mant |= 1;
+
+ // Handle overflow:
+ if (biased_exp >= 2047) {
+ *flags |= FPLIB_OFC | FPLIB_IXC;
+ if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
+ (rm == FPLIB_RM && sgn)) {
+ return fp64_infinity(sgn);
+ } else {
+ return fp64_max_normal(sgn);
+ }
+ }
+
+ if (error) {
+ *flags |= FPLIB_IXC;
+ }
+
+ return fp64_pack(sgn, biased_exp, int_mant);
+}
+
+static uint64_t
+fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
+{
+ return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
+}
+
+static int
+fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
+{
+ int a_sgn, a_exp, b_sgn, b_exp;
+ uint32_t a_mnt, b_mnt;
+
+ fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+ fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+ if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
+ (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
+ if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) ||
+ (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1)))
+ *flags |= FPLIB_IOC;
+ return 0;
+ }
+ return a == b || (!a_mnt && !b_mnt);
+}
+
+static int
+fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
+{
+ int a_sgn, a_exp, b_sgn, b_exp;
+ uint32_t a_mnt, b_mnt;
+
+ fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+ fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+ if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
+ (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
+ *flags |= FPLIB_IOC;
+ return 0;
+ }
+ if (!a_mnt && !b_mnt)
+ return 1;
+ if (a_sgn != b_sgn)
+ return b_sgn;
+ if (a_exp != b_exp)
+ return a_sgn ^ (a_exp > b_exp);
+ if (a_mnt != b_mnt)
+ return a_sgn ^ (a_mnt > b_mnt);
+ return 1;
+}
+
+static int
+fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
+{
+ int a_sgn, a_exp, b_sgn, b_exp;
+ uint32_t a_mnt, b_mnt;
+
+ fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+ fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+ if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
+ (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
+ *flags |= FPLIB_IOC;
+ return 0;
+ }
+ if (!a_mnt && !b_mnt)
+ return 0;
+ if (a_sgn != b_sgn)
+ return b_sgn;
+ if (a_exp != b_exp)
+ return a_sgn ^ (a_exp > b_exp);
+ if (a_mnt != b_mnt)
+ return a_sgn ^ (a_mnt > b_mnt);
+ return 0;
+}
+
+static int
+fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
+{
+ int a_sgn, a_exp, b_sgn, b_exp;
+ uint64_t a_mnt, b_mnt;
+
+ fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+ fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+ if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
+ (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
+ if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) ||
+ (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1)))
+ *flags |= FPLIB_IOC;
+ return 0;
+ }
+ return a == b || (!a_mnt && !b_mnt);
+}
+
+static int
+fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
+{
+ int a_sgn, a_exp, b_sgn, b_exp;
+ uint64_t a_mnt, b_mnt;
+
+ fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+ fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+ if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
+ (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
+ *flags |= FPLIB_IOC;
+ return 0;
+ }
+ if (!a_mnt && !b_mnt)
+ return 1;
+ if (a_sgn != b_sgn)
+ return b_sgn;
+ if (a_exp != b_exp)
+ return a_sgn ^ (a_exp > b_exp);
+ if (a_mnt != b_mnt)
+ return a_sgn ^ (a_mnt > b_mnt);
+ return 1;
+}
+
+static int
+fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
+{
+ int a_sgn, a_exp, b_sgn, b_exp;
+ uint64_t a_mnt, b_mnt;
+
+ fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+ fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+ if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
+ (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
+ *flags |= FPLIB_IOC;
+ return 0;
+ }
+ if (!a_mnt && !b_mnt)
+ return 0;
+ if (a_sgn != b_sgn)
+ return b_sgn;
+ if (a_exp != b_exp)
+ return a_sgn ^ (a_exp > b_exp);
+ if (a_mnt != b_mnt)
+ return a_sgn ^ (a_mnt > b_mnt);
+ return 0;
+}
+
+static uint32_t
+fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
+{
+ int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
+ uint32_t a_mnt, b_mnt, x, x_mnt;
+
+ fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+ fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+ if ((x = fp32_process_NaNs(a, b, mode, flags))) {
+ return x;
+ }
+
+ b_sgn ^= neg;
+
+ // Handle infinities and zeroes:
+ if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) {
+ *flags |= FPLIB_IOC;
+ return fp32_defaultNaN();
+ } else if (a_exp == 255) {
+ return fp32_infinity(a_sgn);
+ } else if (b_exp == 255) {
+ return fp32_infinity(b_sgn);
+ } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
+ return fp32_zero(a_sgn);
+ }
+
+ a_mnt <<= 3;
+ b_mnt <<= 3;
+ if (a_exp >= b_exp) {
+ b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
+ !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
+ b_exp = a_exp;
+ } else {
+ a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
+ !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
+ a_exp = b_exp;
+ }
+ x_sgn = a_sgn;
+ x_exp = a_exp;
+ if (a_sgn == b_sgn) {
+ x_mnt = a_mnt + b_mnt;
+ } else if (a_mnt >= b_mnt) {
+ x_mnt = a_mnt - b_mnt;
+ } else {
+ x_sgn ^= 1;
+ x_mnt = b_mnt - a_mnt;
+ }
+
+ if (!x_mnt) {
+ // Sign of exact zero result depends on rounding mode
+ return fp32_zero((mode & 3) == 2);
+ }
+
+ x_mnt = fp32_normalise(x_mnt, &x_exp);
+
+ return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags);
+}
+
+static uint64_t
+fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
+{
+ int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
+ uint64_t a_mnt, b_mnt, x, x_mnt;
+
+ fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+ fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+ if ((x = fp64_process_NaNs(a, b, mode, flags))) {
+ return x;
+ }
+
+ b_sgn ^= neg;
+
+ // Handle infinities and zeroes:
+ if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) {
+ *flags |= FPLIB_IOC;
+ return fp64_defaultNaN();
+ } else if (a_exp == 2047) {
+ return fp64_infinity(a_sgn);
+ } else if (b_exp == 2047) {
+ return fp64_infinity(b_sgn);
+ } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
+ return fp64_zero(a_sgn);
+ }
+
+ a_mnt <<= 3;
+ b_mnt <<= 3;
+ if (a_exp >= b_exp) {
+ b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
+ !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
+ b_exp = a_exp;
+ } else {
+ a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
+ !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
+ a_exp = b_exp;
+ }
+ x_sgn = a_sgn;
+ x_exp = a_exp;
+ if (a_sgn == b_sgn) {
+ x_mnt = a_mnt + b_mnt;
+ } else if (a_mnt >= b_mnt) {
+ x_mnt = a_mnt - b_mnt;
+ } else {
+ x_sgn ^= 1;
+ x_mnt = b_mnt - a_mnt;
+ }
+
+ if (!x_mnt) {
+ // Sign of exact zero result depends on rounding mode
+ return fp64_zero((mode & 3) == 2);
+ }
+
+ x_mnt = fp64_normalise(x_mnt, &x_exp);
+
+ return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags);
+}
+
+static uint32_t
+fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
+{
+ int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
+ uint32_t a_mnt, b_mnt, x;
+ uint64_t x_mnt;
+
+ fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+ fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+ if ((x = fp32_process_NaNs(a, b, mode, flags))) {
+ return x;
+ }
+
+ // Handle infinities and zeroes:
+ if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) {
+ *flags |= FPLIB_IOC;
+ return fp32_defaultNaN();
+ } else if (a_exp == 255 || b_exp == 255) {
+ return fp32_infinity(a_sgn ^ b_sgn);
+ } else if (!a_mnt || !b_mnt) {
+ return fp32_zero(a_sgn ^ b_sgn);
+ }
+
+ // Multiply and normalise:
+ x_sgn = a_sgn ^ b_sgn;
+ x_exp = a_exp + b_exp - 110;
+ x_mnt = (uint64_t)a_mnt * b_mnt;
+ x_mnt = fp64_normalise(x_mnt, &x_exp);
+
+ // Convert to 32 bits, collapsing error into bottom bit:
+ x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33);
+
+ return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
+}
+
+static uint64_t
+fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
+{
+ int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
+ uint64_t a_mnt, b_mnt, x;
+ uint64_t x0_mnt, x1_mnt;
+
+ fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+ fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+ if ((x = fp64_process_NaNs(a, b, mode, flags))) {
+ return x;
+ }
+
+ // Handle infinities and zeroes:
+ if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) {
+ *flags |= FPLIB_IOC;
+ return fp64_defaultNaN();
+ } else if (a_exp == 2047 || b_exp == 2047) {
+ return fp64_infinity(a_sgn ^ b_sgn);
+ } else if (!a_mnt || !b_mnt) {
+ return fp64_zero(a_sgn ^ b_sgn);
+ }
+
+ // Multiply and normalise:
+ x_sgn = a_sgn ^ b_sgn;
+ x_exp = a_exp + b_exp - 1000;
+ mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
+ fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
+
+ // Convert to 64 bits, collapsing error into bottom bit:
+ x0_mnt = x1_mnt << 1 | !!x0_mnt;
+
+ return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
+}
+
+static uint32_t
+fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
+ int mode, int *flags)
+{
+ int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
+ uint32_t a_mnt, b_mnt, c_mnt, x;
+ uint64_t x_mnt, y_mnt;
+
+ fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+ fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+ fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
+
+ x = fp32_process_NaNs3(a, b, c, mode, flags);
+
+ // Quiet NaN added to product of zero and infinity:
+ if (a_exp == 255 && (a_mnt >> 22 & 1) &&
+ ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) ||
+ (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) {
+ x = fp32_defaultNaN();
+ *flags |= FPLIB_IOC;
+ }
+
+ if (x) {
+ return x;
+ }
+
+ // Handle infinities and zeroes:
+ if ((b_exp == 255 && !c_mnt) ||
+ (c_exp == 255 && !b_mnt) ||
+ (a_exp == 255 && (b_exp == 255 || c_exp == 255) &&
+ (a_sgn != (b_sgn ^ c_sgn)))) {
+ *flags |= FPLIB_IOC;
+ return fp32_defaultNaN();
+ }
+ if (a_exp == 255)
+ return fp32_infinity(a_sgn);
+ if (b_exp == 255 || c_exp == 255)
+ return fp32_infinity(b_sgn ^ c_sgn);
+ if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
+ return fp32_zero(a_sgn);
+
+ x_sgn = a_sgn;
+ x_exp = a_exp + 13;
+ x_mnt = (uint64_t)a_mnt << 27;
+
+ // Multiply:
+ y_sgn = b_sgn ^ c_sgn;
+ y_exp = b_exp + c_exp - 113;
+ y_mnt = (uint64_t)b_mnt * c_mnt << 3;
+ if (!y_mnt) {
+ y_exp = x_exp;
+ }
+
+ // Add:
+ if (x_exp >= y_exp) {
+ y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
+ !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
+ y_exp = x_exp;
+ } else {
+ x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
+ !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
+ x_exp = y_exp;
+ }
+ if (x_sgn == y_sgn) {
+ x_mnt = x_mnt + y_mnt;
+ } else if (x_mnt >= y_mnt) {
+ x_mnt = x_mnt - y_mnt;
+ } else {
+ x_sgn ^= 1;
+ x_mnt = y_mnt - x_mnt;
+ }
+
+ if (!x_mnt) {
+ // Sign of exact zero result depends on rounding mode
+ return fp32_zero((mode & 3) == 2);
+ }
+
+ // Normalise and convert to 32 bits, collapsing error into bottom bit:
+ x_mnt = fp64_normalise(x_mnt, &x_exp);
+ x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
+
+ return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
+}
+
+static uint64_t
+fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
+ int mode, int *flags)
+{
+ int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
+ uint64_t a_mnt, b_mnt, c_mnt, x;
+ uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
+
+ fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+ fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+ fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
+
+ x = fp64_process_NaNs3(a, b, c, mode, flags);
+
+ // Quiet NaN added to product of zero and infinity:
+ if (a_exp == 2047 && (a_mnt >> 51 & 1) &&
+ ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) ||
+ (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) {
+ x = fp64_defaultNaN();
+ *flags |= FPLIB_IOC;
+ }
+
+ if (x) {
+ return x;
+ }
+
+ // Handle infinities and zeroes:
+ if ((b_exp == 2047 && !c_mnt) ||
+ (c_exp == 2047 && !b_mnt) ||
+ (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) &&
+ (a_sgn != (b_sgn ^ c_sgn)))) {
+ *flags |= FPLIB_IOC;
+ return fp64_defaultNaN();
+ }
+ if (a_exp == 2047)
+ return fp64_infinity(a_sgn);
+ if (b_exp == 2047 || c_exp == 2047)
+ return fp64_infinity(b_sgn ^ c_sgn);
+ if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
+ return fp64_zero(a_sgn);
+
+ x_sgn = a_sgn;
+ x_exp = a_exp + 11;
+ x0_mnt = 0;
+ x1_mnt = a_mnt;
+
+ // Multiply:
+ y_sgn = b_sgn ^ c_sgn;
+ y_exp = b_exp + c_exp - 1003;
+ mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
+ if (!y0_mnt && !y1_mnt) {
+ y_exp = x_exp;
+ }
+
+ // Add:
+ if (x_exp >= y_exp) {
+ uint64_t t0, t1;
+ lsl128(&t0, &t1, y0_mnt, y1_mnt,
+ x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
+ lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
+ y0_mnt |= !!(t0 | t1);
+ y_exp = x_exp;
+ } else {
+ uint64_t t0, t1;
+ lsl128(&t0, &t1, x0_mnt, x1_mnt,
+ y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
+ lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
+ x0_mnt |= !!(t0 | t1);
+ x_exp = y_exp;
+ }
+ if (x_sgn == y_sgn) {
+ add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
+ } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
+ sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
+ } else {
+ x_sgn ^= 1;
+ sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
+ }
+
+ if (!x0_mnt && !x1_mnt) {
+ // Sign of exact zero result depends on rounding mode
+ return fp64_zero((mode & 3) == 2);
+ }
+
+ // Normalise and convert to 64 bits, collapsing error into bottom bit:
+ fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
+ x0_mnt = x1_mnt << 1 | !!x0_mnt;
+
+ return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
+}
+
+static uint32_t
+fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
+{
+ int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
+ uint32_t a_mnt, b_mnt, x;
+ uint64_t x_mnt;
+
+ fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+ fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+ if ((x = fp32_process_NaNs(a, b, mode, flags)))
+ return x;
+
+ // Handle infinities and zeroes:
+ if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) {
+ *flags |= FPLIB_IOC;
+ return fp32_defaultNaN();
+ }
+ if (a_exp == 255 || !b_mnt) {
+ if (a_exp != 255)
+ *flags |= FPLIB_DZC;
+ return fp32_infinity(a_sgn ^ b_sgn);
+ }
+ if (!a_mnt || b_exp == 255)
+ return fp32_zero(a_sgn ^ b_sgn);
+
+ // Divide, setting bottom bit if inexact:
+ a_mnt = fp32_normalise(a_mnt, &a_exp);
+ x_sgn = a_sgn ^ b_sgn;
+ x_exp = a_exp - b_exp + 172;
+ x_mnt = ((uint64_t)a_mnt << 18) / b_mnt;
+ x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18);
+
+ // Normalise and convert to 32 bits, collapsing error into bottom bit:
+ x_mnt = fp64_normalise(x_mnt, &x_exp);
+ x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
+
+ return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
+}
+
+static uint64_t
+fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
+{
+ int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
+ uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
+
+ fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+ fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+ if ((x = fp64_process_NaNs(a, b, mode, flags)))
+ return x;
+
+ // Handle infinities and zeroes:
+ if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) {
+ *flags |= FPLIB_IOC;
+ return fp64_defaultNaN();
+ }
+ if (a_exp == 2047 || !b_mnt) {
+ if (a_exp != 2047)
+ *flags |= FPLIB_DZC;
+ return fp64_infinity(a_sgn ^ b_sgn);
+ }
+ if (!a_mnt || b_exp == 2047)
+ return fp64_zero(a_sgn ^ b_sgn);
+
+ // Find reciprocal of divisor with Newton-Raphson:
+ a_mnt = fp64_normalise(a_mnt, &a_exp);
+ b_mnt = fp64_normalise(b_mnt, &b_exp);
+ x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
+ mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
+ sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
+ lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
+ mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
+ lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
+
+ // Multiply by dividend:
+ x_sgn = a_sgn ^ b_sgn;
+ x_exp = a_exp - b_exp + 1031;
+ mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough
+ lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
+ x_mnt = x1_mnt;
+
+ // This is an underestimate, so try adding one:
+ mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough
+ c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
+ if (c <= 0) {
+ ++x_mnt;
+ }
+
+ x_mnt = fp64_normalise(x_mnt, &x_exp);
+
+ return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
+}
+
+static void
+set_fpscr0(FPSCR &fpscr, int flags)
+{
+ if (flags & FPLIB_IDC) {
+ fpscr.idc = 1;
+ }
+ if (flags & FPLIB_IOC) {
+ fpscr.ioc = 1;
+ }
+ if (flags & FPLIB_DZC) {
+ fpscr.dzc = 1;
+ }
+ if (flags & FPLIB_OFC) {
+ fpscr.ofc = 1;
+ }
+ if (flags & FPLIB_UFC) {
+ fpscr.ufc = 1;
+ }
+ if (flags & FPLIB_IXC) {
+ fpscr.ixc = 1;
+ }
+}
+
+static uint32_t
+fp32_sqrt(uint32_t a, int mode, int *flags)
+{
+ int a_sgn, a_exp, x_sgn, x_exp;
+ uint32_t a_mnt, x, x_mnt;
+ uint64_t t0, t1;
+
+ fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+
+ // Handle NaNs:
+ if (a_exp == 255 && (uint32_t)(a_mnt << 9))
+ return fp32_process_NaN(a, mode, flags);
+
+ // Handle infinities and zeroes:
+ if (!a_mnt) {
+ return fp32_zero(a_sgn);
+ }
+ if (a_exp == 255 && !a_sgn) {
+ return fp32_infinity(a_sgn);
+ }
+ if (a_sgn) {
+ *flags |= FPLIB_IOC;
+ return fp32_defaultNaN();
+ }
+
+ a_mnt = fp32_normalise(a_mnt, &a_exp);
+ if (!(a_exp & 1)) {
+ ++a_exp;
+ a_mnt >>= 1;
+ }
+
+ // x = (a * 3 + 5) / 8
+ x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28);
+
+ // x = (a / x + x) / 2; // 16-bit accuracy
+ x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
+
+ // x = (a / x + x) / 2; // 16-bit accuracy
+ x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
+
+ // x = (a / x + x) / 2; // 32-bit accuracy
+ x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
+
+ x_sgn = 0;
+ x_exp = (a_exp + 147) >> 1;
+ x_mnt = ((x - (1 << 5)) >> 6) + 1;
+ t1 = (uint64_t)x_mnt * x_mnt;
+ t0 = (uint64_t)a_mnt << 19;
+ if (t1 > t0) {
+ --x_mnt;
+ }
+
+ x_mnt = fp32_normalise(x_mnt, &x_exp);
+
+ return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
+}
+
+static uint64_t
+fp64_sqrt(uint64_t a, int mode, int *flags)
+{
+ int a_sgn, a_exp, x_sgn, x_exp, c;
+ uint64_t a_mnt, x_mnt, r, x0, x1;
+ uint32_t x;
+
+ fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+
+ // Handle NaNs:
+ if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) {
+ return fp64_process_NaN(a, mode, flags);
+ }
+
+ // Handle infinities and zeroes:
+ if (!a_mnt)
+ return fp64_zero(a_sgn);
+ if (a_exp == 2047 && !a_sgn)
+ return fp64_infinity(a_sgn);
+ if (a_sgn) {
+ *flags |= FPLIB_IOC;
+ return fp64_defaultNaN();
+ }
+
+ a_mnt = fp64_normalise(a_mnt, &a_exp);
+ if (a_exp & 1) {
+ ++a_exp;
+ a_mnt >>= 1;
+ }
+
+ // x = (a * 3 + 5) / 8
+ x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28);
+
+ // x = (a / x + x) / 2; // 16-bit accuracy
+ x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
+
+ // x = (a / x + x) / 2; // 16-bit accuracy
+ x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
+
+ // x = (a / x + x) / 2; // 32-bit accuracy
+ x = ((a_mnt / x) >> 2) + (x >> 1);
+
+ // r = 1 / x; // 32-bit accuracy
+ r = ((uint64_t)1 << 62) / x;
+
+ // r = r * (2 - x * r); // 64-bit accuracy
+ mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
+ lsr128(&x0, &x1, x0, x1, 31);
+
+ // x = (x + a * r) / 2; // 64-bit accuracy
+ mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
+ lsl128(&x0, &x1, x0, x1, 5);
+ lsr128(&x0, &x1, x0, x1, 56);
+
+ x0 = ((uint64_t)x << 31) + (x0 >> 1);
+
+ x_sgn = 0;
+ x_exp = (a_exp + 1053) >> 1;
+ x_mnt = x0;
+ x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
+ mul62x62(&x0, &x1, x_mnt, x_mnt);
+ lsl128(&x0, &x1, x0, x1, 19);
+ c = cmp128(x0, x1, 0, a_mnt);
+ if (c > 0)
+ --x_mnt;
+
+ x_mnt = fp64_normalise(x_mnt, &x_exp);
+
+ return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
+}
+
+static int
+modeConv(FPSCR fpscr)
+{
+ return (((int) fpscr) >> 22) & 0xF;
+}
+
+static void
+set_fpscr(FPSCR &fpscr, int flags)
+{
+ // translate back to FPSCR
+ bool underflow = false;
+ if (flags & FPLIB_IDC) {
+ fpscr.idc = 1;
+ }
+ if (flags & FPLIB_IOC) {
+ fpscr.ioc = 1;
+ }
+ if (flags & FPLIB_DZC) {
+ fpscr.dzc = 1;
+ }
+ if (flags & FPLIB_OFC) {
+ fpscr.ofc = 1;
+ }
+ if (flags & FPLIB_UFC) {
+ underflow = true; //xx Why is this required?
+ fpscr.ufc = 1;
+ }
+ if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
+ fpscr.ixc = 1;
+ }
+}
+
+template <>
+bool
+fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
+{
+ int flags = 0;
+ int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
+ set_fpscr(fpscr, flags);
+ return x;
+}
+
+template <>
+bool
+fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
+{
+ int flags = 0;
+ int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
+ set_fpscr(fpscr, flags);
+ return x;
+}
+
+template <>
+bool
+fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
+{
+ int flags = 0;
+ int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
+ set_fpscr(fpscr, flags);
+ return x;
+}
+
+template <>
+bool
+fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
+{
+ int flags = 0;
+ int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
+ set_fpscr(fpscr, flags);
+ return x;
+}
+
+template <>
+bool
+fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
+{
+ int flags = 0;
+ int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
+ set_fpscr(fpscr, flags);
+ return x;
+}
+
+template <>
+bool
+fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
+{
+ int flags = 0;
+ int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
+ set_fpscr(fpscr, flags);
+ return x;
+}
+
+template <>
+uint32_t
+fplibAbs(uint32_t op)
+{
+ return op & ~((uint32_t)1 << 31);
+}
+
+template <>
+uint64_t
+fplibAbs(uint64_t op)
+{
+ return op & ~((uint64_t)1 << 63);
+}
+
+template <>
+uint32_t
+fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+ int flags = 0;
+ uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+template <>
+uint64_t
+fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+ int flags = 0;
+ uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+template <>
+int
+fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn1, exp1, sgn2, exp2, result;
+ uint32_t mnt1, mnt2;
+
+ fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+ fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+ if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) ||
+ (exp2 == 255 && (uint32_t)(mnt2 << 9))) {
+ result = 3;
+ if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) ||
+ (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) ||
+ signal_nans)
+ flags |= FPLIB_IOC;
+ } else {
+ if (op1 == op2 || (!mnt1 && !mnt2)) {
+ result = 6;
+ } else if (sgn1 != sgn2) {
+ result = sgn1 ? 8 : 2;
+ } else if (exp1 != exp2) {
+ result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
+ } else {
+ result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
+ }
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+int
+fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn1, exp1, sgn2, exp2, result;
+ uint64_t mnt1, mnt2;
+
+ fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+ fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+ if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) ||
+ (exp2 == 2047 && (uint64_t)(mnt2 << 12))) {
+ result = 3;
+ if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) ||
+ (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) ||
+ signal_nans)
+ flags |= FPLIB_IOC;
+ } else {
+ if (op1 == op2 || (!mnt1 && !mnt2)) {
+ result = 6;
+ } else if (sgn1 != sgn2) {
+ result = sgn1 ? 8 : 2;
+ } else if (exp1 != exp2) {
+ result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
+ } else {
+ result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
+ }
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+static uint16_t
+fp16_FPConvertNaN_32(uint32_t op)
+{
+ return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13);
+}
+
+static uint16_t
+fp16_FPConvertNaN_64(uint64_t op)
+{
+ return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42);
+}
+
+static uint32_t
+fp32_FPConvertNaN_16(uint16_t op)
+{
+ return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13);
+}
+
+static uint32_t
+fp32_FPConvertNaN_64(uint64_t op)
+{
+ return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29);
+}
+
+static uint64_t
+fp64_FPConvertNaN_16(uint16_t op)
+{
+ return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42);
+}
+
+static uint64_t
+fp64_FPConvertNaN_32(uint32_t op)
+{
+ return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29);
+}
+
+static uint32_t
+fp32_FPOnePointFive(int sgn)
+{
+ return fp32_pack(sgn, 127, (uint64_t)1 << 22);
+}
+
+static uint64_t
+fp64_FPOnePointFive(int sgn)
+{
+ return fp64_pack(sgn, 1023, (uint64_t)1 << 51);
+}
+
+static uint32_t
+fp32_FPThree(int sgn)
+{
+ return fp32_pack(sgn, 128, (uint64_t)1 << 22);
+}
+
+static uint64_t
+fp64_FPThree(int sgn)
+{
+ return fp64_pack(sgn, 1024, (uint64_t)1 << 51);
+}
+
+static uint32_t
+fp32_FPTwo(int sgn)
+{
+ return fp32_pack(sgn, 128, 0);
+}
+
+static uint64_t
+fp64_FPTwo(int sgn)
+{
+ return fp64_pack(sgn, 1024, 0);
+}
+
+template <>
+uint16_t
+fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn, exp;
+ uint32_t mnt;
+ uint16_t result;
+
+ // Unpack floating-point operand optionally with flush-to-zero:
+ fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+ bool alt_hp = fpscr.ahp;
+
+ if (exp == 255 && (uint32_t)(mnt << 9)) {
+ if (alt_hp) {
+ result = fp16_zero(sgn);
+ } else if (fpscr.dn) {
+ result = fp16_defaultNaN();
+ } else {
+ result = fp16_FPConvertNaN_32(op);
+ }
+ if (!(mnt >> 22 & 1) || alt_hp) {
+ flags |= FPLIB_IOC;
+ }
+ } else if (exp == 255) {
+ if (alt_hp) {
+ result = sgn << 15 | (uint16_t)0x7fff;
+ flags |= FPLIB_IOC;
+ } else {
+ result = fp16_infinity(sgn);
+ }
+ } else if (!mnt) {
+ result = fp16_zero(sgn);
+ } else {
+ result = fp16_round_(sgn, exp - 127 + 15,
+ mnt >> 7 | !!(uint32_t)(mnt << 25),
+ rounding, mode | alt_hp << 4, &flags);
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint16_t
+fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn, exp;
+ uint64_t mnt;
+ uint16_t result;
+
+ // Unpack floating-point operand optionally with flush-to-zero:
+ fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+ bool alt_hp = fpscr.ahp;
+
+ if (exp == 2047 && (uint64_t)(mnt << 12)) {
+ if (alt_hp) {
+ result = fp16_zero(sgn);
+ } else if (fpscr.dn) {
+ result = fp16_defaultNaN();
+ } else {
+ result = fp16_FPConvertNaN_64(op);
+ }
+ if (!(mnt >> 51 & 1) || alt_hp) {
+ flags |= FPLIB_IOC;
+ }
+ } else if (exp == 2047) {
+ if (alt_hp) {
+ result = sgn << 15 | (uint16_t)0x7fff;
+ flags |= FPLIB_IOC;
+ } else {
+ result = fp16_infinity(sgn);
+ }
+ } else if (!mnt) {
+ result = fp16_zero(sgn);
+ } else {
+ result = fp16_round_(sgn, exp - 1023 + 15,
+ mnt >> 36 | !!(uint64_t)(mnt << 28),
+ rounding, mode | alt_hp << 4, &flags);
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint32_t
+fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn, exp;
+ uint16_t mnt;
+ uint32_t result;
+
+ // Unpack floating-point operand optionally with flush-to-zero:
+ fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+ if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
+ if (fpscr.dn) {
+ result = fp32_defaultNaN();
+ } else {
+ result = fp32_FPConvertNaN_16(op);
+ }
+ if (!(mnt >> 9 & 1)) {
+ flags |= FPLIB_IOC;
+ }
+ } else if (exp == 31 && !fpscr.ahp) {
+ result = fp32_infinity(sgn);
+ } else if (!mnt) {
+ result = fp32_zero(sgn);
+ } else {
+ mnt = fp16_normalise(mnt, &exp);
+ result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8);
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint32_t
+fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn, exp;
+ uint64_t mnt;
+ uint32_t result;
+
+ // Unpack floating-point operand optionally with flush-to-zero:
+ fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+ if (exp == 2047 && (uint64_t)(mnt << 12)) {
+ if (fpscr.dn) {
+ result = fp32_defaultNaN();
+ } else {
+ result = fp32_FPConvertNaN_64(op);
+ }
+ if (!(mnt >> 51 & 1)) {
+ flags |= FPLIB_IOC;
+ }
+ } else if (exp == 2047) {
+ result = fp32_infinity(sgn);
+ } else if (!mnt) {
+ result = fp32_zero(sgn);
+ } else {
+ result = fp32_round_(sgn, exp - 1023 + 127,
+ mnt >> 20 | !!(uint64_t)(mnt << 44),
+ rounding, mode, &flags);
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint64_t
+fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn, exp;
+ uint16_t mnt;
+ uint64_t result;
+
+ // Unpack floating-point operand optionally with flush-to-zero:
+ fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+ if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
+ if (fpscr.dn) {
+ result = fp64_defaultNaN();
+ } else {
+ result = fp64_FPConvertNaN_16(op);
+ }
+ if (!(mnt >> 9 & 1)) {
+ flags |= FPLIB_IOC;
+ }
+ } else if (exp == 31 && !fpscr.ahp) {
+ result = fp64_infinity(sgn);
+ } else if (!mnt) {
+ result = fp64_zero(sgn);
+ } else {
+ mnt = fp16_normalise(mnt, &exp);
+ result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37);
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint64_t
+fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn, exp;
+ uint32_t mnt;
+ uint64_t result;
+
+ // Unpack floating-point operand optionally with flush-to-zero:
+ fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+ if (exp == 255 && (uint32_t)(mnt << 9)) {
+ if (fpscr.dn) {
+ result = fp64_defaultNaN();
+ } else {
+ result = fp64_FPConvertNaN_32(op);
+ }
+ if (!(mnt >> 22 & 1)) {
+ flags |= FPLIB_IOC;
+ }
+ } else if (exp == 255) {
+ result = fp64_infinity(sgn);
+ } else if (!mnt) {
+ result = fp64_zero(sgn);
+ } else {
+ mnt = fp32_normalise(mnt, &exp);
+ result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21);
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint32_t
+fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+ int flags = 0;
+ uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+template <>
+uint64_t
+fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+ int flags = 0;
+ uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+template <>
+uint32_t
+fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+ int flags = 0;
+ uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+template <>
+uint64_t
+fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+ int flags = 0;
+ uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+static uint32_t
+fp32_repack(int sgn, int exp, uint32_t mnt)
+{
+ return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt);
+}
+
+static uint64_t
+fp64_repack(int sgn, int exp, uint64_t mnt)
+{
+ return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt);
+}
+
+static void
+fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
+{
+ // Treat a single quiet-NaN as +Infinity/-Infinity
+ if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23)
+ *op1 = fp32_infinity(sgn);
+ if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23)
+ *op2 = fp32_infinity(sgn);
+}
+
+static void
+fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
+{
+ // Treat a single quiet-NaN as +Infinity/-Infinity
+ if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52)
+ *op1 = fp64_infinity(sgn);
+ if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52)
+ *op2 = fp64_infinity(sgn);
+}
+
+template <>
+uint32_t
+fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn1, exp1, sgn2, exp2;
+ uint32_t mnt1, mnt2, x, result;
+
+ fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+ fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+ if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
+ result = x;
+ } else {
+ result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
+ fp32_repack(sgn1, exp1, mnt1) :
+ fp32_repack(sgn2, exp2, mnt2));
+ }
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+template <>
+uint64_t
+fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn1, exp1, sgn2, exp2;
+ uint64_t mnt1, mnt2, x, result;
+
+ fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+ fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+ if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
+ result = x;
+ } else {
+ result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
+ fp64_repack(sgn1, exp1, mnt1) :
+ fp64_repack(sgn2, exp2, mnt2));
+ }
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+template <>
+uint32_t
+fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+ fp32_minmaxnum(&op1, &op2, 1);
+ return fplibMax<uint32_t>(op1, op2, fpscr);
+}
+
+template <>
+uint64_t
+fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+ fp64_minmaxnum(&op1, &op2, 1);
+ return fplibMax<uint64_t>(op1, op2, fpscr);
+}
+
+template <>
+uint32_t
+fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn1, exp1, sgn2, exp2;
+ uint32_t mnt1, mnt2, x, result;
+
+ fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+ fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+ if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
+ result = x;
+ } else {
+ result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
+ fp32_repack(sgn1, exp1, mnt1) :
+ fp32_repack(sgn2, exp2, mnt2));
+ }
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+template <>
+uint64_t
+fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn1, exp1, sgn2, exp2;
+ uint64_t mnt1, mnt2, x, result;
+
+ fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+ fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+ if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
+ result = x;
+ } else {
+ result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
+ fp64_repack(sgn1, exp1, mnt1) :
+ fp64_repack(sgn2, exp2, mnt2));
+ }
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+template <>
+uint32_t
+fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+ fp32_minmaxnum(&op1, &op2, 0);
+ return fplibMin<uint32_t>(op1, op2, fpscr);
+}
+
+template <>
+uint64_t
+fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+ fp64_minmaxnum(&op1, &op2, 0);
+ return fplibMin<uint64_t>(op1, op2, fpscr);
+}
+
+template <>
+uint32_t
+fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+ int flags = 0;
+ uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+template <>
+uint64_t
+fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+ int flags = 0;
+ uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+template <>
+uint32_t
+fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn1, exp1, sgn2, exp2;
+ uint32_t mnt1, mnt2, result;
+
+ fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+ fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+ result = fp32_process_NaNs(op1, op2, mode, &flags);
+ if (!result) {
+ if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
+ result = fp32_FPTwo(sgn1 ^ sgn2);
+ } else if (exp1 == 255 || exp2 == 255) {
+ result = fp32_infinity(sgn1 ^ sgn2);
+ } else if (!mnt1 || !mnt2) {
+ result = fp32_zero(sgn1 ^ sgn2);
+ } else {
+ result = fp32_mul(op1, op2, mode, &flags);
+ }
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint64_t
+fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn1, exp1, sgn2, exp2;
+ uint64_t mnt1, mnt2, result;
+
+ fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+ fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+ result = fp64_process_NaNs(op1, op2, mode, &flags);
+ if (!result) {
+ if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
+ result = fp64_FPTwo(sgn1 ^ sgn2);
+ } else if (exp1 == 2047 || exp2 == 2047) {
+ result = fp64_infinity(sgn1 ^ sgn2);
+ } else if (!mnt1 || !mnt2) {
+ result = fp64_zero(sgn1 ^ sgn2);
+ } else {
+ result = fp64_mul(op1, op2, mode, &flags);
+ }
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint32_t
+fplibNeg(uint32_t op)
+{
+ return op ^ (uint32_t)1 << 31;
+}
+
+template <>
+uint64_t
+fplibNeg(uint64_t op)
+{
+ return op ^ (uint64_t)1 << 63;
+}
+
+static const uint8_t recip_sqrt_estimate[256] = {
+ 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
+ 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
+ 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
+ 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
+ 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
+ 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
+ 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
+ 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
+ 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
+ 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
+ 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
+ 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
+ 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
+ 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
+ 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
+ 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
+};
+
+template <>
+uint32_t
+fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn, exp;
+ uint32_t mnt, result;
+
+ fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+ if (exp == 255 && (uint32_t)(mnt << 9)) {
+ result = fp32_process_NaN(op, mode, &flags);
+ } else if (!mnt) {
+ result = fp32_infinity(sgn);
+ flags |= FPLIB_DZC;
+ } else if (sgn) {
+ result = fp32_defaultNaN();
+ flags |= FPLIB_IOC;
+ } else if (exp == 255) {
+ result = fp32_zero(0);
+ } else {
+ exp += 8;
+ mnt = fp32_normalise(mnt, &exp);
+ mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)];
+ result = fp32_pack(0, (380 - exp) >> 1, mnt << 15);
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint64_t
+fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn, exp;
+ uint64_t mnt, result;
+
+ fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+ if (exp == 2047 && (uint64_t)(mnt << 12)) {
+ result = fp64_process_NaN(op, mode, &flags);
+ } else if (!mnt) {
+ result = fp64_infinity(sgn);
+ flags |= FPLIB_DZC;
+ } else if (sgn) {
+ result = fp64_defaultNaN();
+ flags |= FPLIB_IOC;
+ } else if (exp == 2047) {
+ result = fp32_zero(0);
+ } else {
+ exp += 11;
+ mnt = fp64_normalise(mnt, &exp);
+ mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)];
+ result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44);
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint32_t
+fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn1, exp1, sgn2, exp2;
+ uint32_t mnt1, mnt2, result;
+
+ op1 = fplibNeg<uint32_t>(op1);
+ fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+ fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+ result = fp32_process_NaNs(op1, op2, mode, &flags);
+ if (!result) {
+ if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
+ result = fp32_FPOnePointFive(0);
+ } else if (exp1 == 255 || exp2 == 255) {
+ result = fp32_infinity(sgn1 ^ sgn2);
+ } else {
+ result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
+ }
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint64_t
+fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn1, exp1, sgn2, exp2;
+ uint64_t mnt1, mnt2, result;
+
+ op1 = fplibNeg<uint64_t>(op1);
+ fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+ fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+ result = fp64_process_NaNs(op1, op2, mode, &flags);
+ if (!result) {
+ if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
+ result = fp64_FPOnePointFive(0);
+ } else if (exp1 == 2047 || exp2 == 2047) {
+ result = fp64_infinity(sgn1 ^ sgn2);
+ } else {
+ result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
+ }
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint32_t
+fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn1, exp1, sgn2, exp2;
+ uint32_t mnt1, mnt2, result;
+
+ op1 = fplibNeg<uint32_t>(op1);
+ fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+ fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+ result = fp32_process_NaNs(op1, op2, mode, &flags);
+ if (!result) {
+ if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
+ result = fp32_FPTwo(0);
+ } else if (exp1 == 255 || exp2 == 255) {
+ result = fp32_infinity(sgn1 ^ sgn2);
+ } else {
+ result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
+ }
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint32_t
+fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn, exp;
+ uint32_t mnt, result;
+
+ fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+ if (exp == 255 && (uint32_t)(mnt << 9)) {
+ result = fp32_process_NaN(op, mode, &flags);
+ } else if (exp == 255) {
+ result = fp32_zero(sgn);
+ } else if (!mnt) {
+ result = fp32_infinity(sgn);
+ flags |= FPLIB_DZC;
+ } else if (!((uint32_t)(op << 1) >> 22)) {
+ bool overflow_to_inf;
+ switch (FPCRRounding(fpscr)) {
+ case FPRounding_TIEEVEN:
+ overflow_to_inf = true;
+ break;
+ case FPRounding_POSINF:
+ overflow_to_inf = !sgn;
+ break;
+ case FPRounding_NEGINF:
+ overflow_to_inf = sgn;
+ break;
+ case FPRounding_ZERO:
+ overflow_to_inf = false;
+ break;
+ default:
+ assert(0);
+ }
+ result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
+ flags |= FPLIB_OFC | FPLIB_IXC;
+ } else if (fpscr.fz && exp >= 253) {
+ result = fp32_zero(sgn);
+ flags |= FPLIB_UFC;
+ } else {
+ exp += 8;
+ mnt = fp32_normalise(mnt, &exp);
+ int result_exp = 253 - exp;
+ uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1;
+ fraction <<= 15;
+ if (result_exp == 0) {
+ fraction >>= 1;
+ } else if (result_exp == -1) {
+ fraction >>= 2;
+ result_exp = 0;
+ }
+ result = fp32_pack(sgn, result_exp, fraction);
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint64_t
+fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn, exp;
+ uint64_t mnt, result;
+
+ fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+ if (exp == 2047 && (uint64_t)(mnt << 12)) {
+ result = fp64_process_NaN(op, mode, &flags);
+ } else if (exp == 2047) {
+ result = fp64_zero(sgn);
+ } else if (!mnt) {
+ result = fp64_infinity(sgn);
+ flags |= FPLIB_DZC;
+ } else if (!((uint64_t)(op << 1) >> 51)) {
+ bool overflow_to_inf;
+ switch (FPCRRounding(fpscr)) {
+ case FPRounding_TIEEVEN:
+ overflow_to_inf = true;
+ break;
+ case FPRounding_POSINF:
+ overflow_to_inf = !sgn;
+ break;
+ case FPRounding_NEGINF:
+ overflow_to_inf = sgn;
+ break;
+ case FPRounding_ZERO:
+ overflow_to_inf = false;
+ break;
+ default:
+ assert(0);
+ }
+ result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
+ flags |= FPLIB_OFC | FPLIB_IXC;
+ } else if (fpscr.fz && exp >= 2045) {
+ result = fp64_zero(sgn);
+ flags |= FPLIB_UFC;
+ } else {
+ exp += 11;
+ mnt = fp64_normalise(mnt, &exp);
+ int result_exp = 2045 - exp;
+ uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1;
+ fraction <<= 44;
+ if (result_exp == 0) {
+ fraction >>= 1;
+ } else if (result_exp == -1) {
+ fraction >>= 2;
+ result_exp = 0;
+ }
+ result = fp64_pack(sgn, result_exp, fraction);
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint64_t
+fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn1, exp1, sgn2, exp2;
+ uint64_t mnt1, mnt2, result;
+
+ op1 = fplibNeg<uint64_t>(op1);
+ fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+ fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+ result = fp64_process_NaNs(op1, op2, mode, &flags);
+ if (!result) {
+ if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
+ result = fp64_FPTwo(0);
+ } else if (exp1 == 2047 || exp2 == 2047) {
+ result = fp64_infinity(sgn1 ^ sgn2);
+ } else {
+ result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
+ }
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint32_t
+fplibRecpX(uint32_t op, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn, exp;
+ uint32_t mnt, result;
+
+ fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+ if (exp == 255 && (uint32_t)(mnt << 9)) {
+ result = fp32_process_NaN(op, mode, &flags);
+ }
+ else {
+ if (!mnt) { // Zero and denormals
+ result = fp32_pack(sgn, 254, 0);
+ } else { // Infinities and normals
+ result = fp32_pack(sgn, exp ^ 255, 0);
+ }
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint64_t
+fplibRecpX(uint64_t op, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn, exp;
+ uint64_t mnt, result;
+
+ fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+ if (exp == 2047 && (uint64_t)(mnt << 12)) {
+ result = fp64_process_NaN(op, mode, &flags);
+ }
+ else {
+ if (!mnt) { // Zero and denormals
+ result = fp64_pack(sgn, 2046, 0);
+ } else { // Infinities and normals
+ result = fp64_pack(sgn, exp ^ 2047, 0);
+ }
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint32_t
+fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn, exp;
+ uint32_t mnt, result;
+
+ // Unpack using FPCR to determine if subnormals are flushed-to-zero:
+ fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+ // Handle NaNs, infinities and zeroes:
+ if (exp == 255 && (uint32_t)(mnt << 9)) {
+ result = fp32_process_NaN(op, mode, &flags);
+ } else if (exp == 255) {
+ result = fp32_infinity(sgn);
+ } else if (!mnt) {
+ result = fp32_zero(sgn);
+ } else if (exp >= 150) {
+ // There are no fractional bits
+ result = op;
+ } else {
+ // Truncate towards zero:
+ uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp);
+ int err = exp < 118 ? 1 :
+ (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0);
+ switch (rounding) {
+ case FPRounding_TIEEVEN:
+ x += (err == 3 || (err == 2 && (x & 1)));
+ break;
+ case FPRounding_POSINF:
+ x += err && !sgn;
+ break;
+ case FPRounding_NEGINF:
+ x += err && sgn;
+ break;
+ case FPRounding_ZERO:
+ break;
+ case FPRounding_TIEAWAY:
+ x += err >> 1;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (x == 0) {
+ result = fp32_zero(sgn);
+ } else {
+ exp = 150;
+ mnt = fp32_normalise(x, &exp);
+ result = fp32_pack(sgn, exp + 8, mnt >> 8);
+ }
+
+ if (err && exact)
+ flags |= FPLIB_IXC;
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint64_t
+fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
+{
+ int mode = modeConv(fpscr);
+ int flags = 0;
+ int sgn, exp;
+ uint64_t mnt, result;
+
+ // Unpack using FPCR to determine if subnormals are flushed-to-zero:
+ fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+ // Handle NaNs, infinities and zeroes:
+ if (exp == 2047 && (uint64_t)(mnt << 12)) {
+ result = fp64_process_NaN(op, mode, &flags);
+ } else if (exp == 2047) {
+ result = fp64_infinity(sgn);
+ } else if (!mnt) {
+ result = fp64_zero(sgn);
+ } else if (exp >= 1075) {
+ // There are no fractional bits
+ result = op;
+ } else {
+ // Truncate towards zero:
+ uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp);
+ int err = exp < 1011 ? 1 :
+ (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0);
+ switch (rounding) {
+ case FPRounding_TIEEVEN:
+ x += (err == 3 || (err == 2 && (x & 1)));
+ break;
+ case FPRounding_POSINF:
+ x += err && !sgn;
+ break;
+ case FPRounding_NEGINF:
+ x += err && sgn;
+ break;
+ case FPRounding_ZERO:
+ break;
+ case FPRounding_TIEAWAY:
+ x += err >> 1;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (x == 0) {
+ result = fp64_zero(sgn);
+ } else {
+ exp = 1075;
+ mnt = fp64_normalise(x, &exp);
+ result = fp64_pack(sgn, exp + 11, mnt >> 11);
+ }
+
+ if (err && exact)
+ flags |= FPLIB_IXC;
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint32_t
+fplibSqrt(uint32_t op, FPSCR &fpscr)
+{
+ int flags = 0;
+ uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+template <>
+uint64_t
+fplibSqrt(uint64_t op, FPSCR &fpscr)
+{
+ int flags = 0;
+ uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+template <>
+uint32_t
+fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+ int flags = 0;
+ uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+template <>
+uint64_t
+fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+ int flags = 0;
+ uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
+ set_fpscr0(fpscr, flags);
+ return result;
+}
+
+static uint64_t
+FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
+ int *flags)
+{
+ uint64_t x;
+ int err;
+
+ if (exp > 1023 + 63) {
+ *flags = FPLIB_IOC;
+ return ((uint64_t)!u << 63) - !sgn;
+ }
+
+ x = lsr64(mnt << 11, 1023 + 63 - exp);
+ err = (exp > 1023 + 63 - 2 ? 0 :
+ (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) |
+ !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1)));
+
+ switch (rounding) {
+ case FPRounding_TIEEVEN:
+ x += (err == 3 || (err == 2 && (x & 1)));
+ break;
+ case FPRounding_POSINF:
+ x += err && !sgn;
+ break;
+ case FPRounding_NEGINF:
+ x += err && sgn;
+ break;
+ case FPRounding_ZERO:
+ break;
+ case FPRounding_TIEAWAY:
+ x += err >> 1;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) {
+ *flags = FPLIB_IOC;
+ return ((uint64_t)!u << 63) - !sgn;
+ }
+
+ if (err) {
+ *flags = FPLIB_IXC;
+ }
+
+ return sgn ? -x : x;
+}
+
+static uint32_t
+FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
+ int *flags)
+{
+ uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
+ if (u ? x >= (uint64_t)1 << 32 :
+ !(x < (uint64_t)1 << 31 ||
+ (uint64_t)-x <= (uint64_t)1 << 31)) {
+ *flags = FPLIB_IOC;
+ x = ((uint32_t)!u << 31) - !sgn;
+ }
+ return x;
+}
+
+template <>
+uint32_t
+fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
+{
+ int flags = 0;
+ int sgn, exp;
+ uint32_t mnt, result;
+
+ // Unpack using FPCR to determine if subnormals are flushed-to-zero:
+ fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
+
+ // If NaN, set cumulative flag or take exception:
+ if (exp == 255 && (uint32_t)(mnt << 9)) {
+ flags = FPLIB_IOC;
+ result = 0;
+ } else {
+ result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits,
+ (uint64_t)mnt << (52 - 23), u, rounding, &flags);
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint32_t
+fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
+{
+ int flags = 0;
+ int sgn, exp;
+ uint64_t mnt;
+ uint32_t result;
+
+ // Unpack using FPCR to determine if subnormals are flushed-to-zero:
+ fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
+
+ // If NaN, set cumulative flag or take exception:
+ if (exp == 2047 && (uint64_t)(mnt << 12)) {
+ flags = FPLIB_IOC;
+ result = 0;
+ } else {
+ result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint64_t
+fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
+{
+ int flags = 0;
+ int sgn, exp;
+ uint32_t mnt;
+ uint64_t result;
+
+ // Unpack using FPCR to determine if subnormals are flushed-to-zero:
+ fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
+
+ // If NaN, set cumulative flag or take exception:
+ if (exp == 255 && (uint32_t)(mnt << 9)) {
+ flags = FPLIB_IOC;
+ result = 0;
+ } else {
+ result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits,
+ (uint64_t)mnt << (52 - 23), u, rounding, &flags);
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+template <>
+uint64_t
+fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
+{
+ int flags = 0;
+ int sgn, exp;
+ uint64_t mnt, result;
+
+ // Unpack using FPCR to determine if subnormals are flushed-to-zero:
+ fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
+
+ // If NaN, set cumulative flag or take exception:
+ if (exp == 2047 && (uint64_t)(mnt << 12)) {
+ flags = FPLIB_IOC;
+ result = 0;
+ } else {
+ result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
+ }
+
+ set_fpscr0(fpscr, flags);
+
+ return result;
+}
+
+static uint32_t
+fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
+{
+ int x_sgn = !u && a >> 63;
+ int x_exp = 190 - fbits;
+ uint64_t x_mnt = x_sgn ? -a : a;
+
+ // Handle zero:
+ if (!x_mnt) {
+ return fp32_zero(0);
+ }
+
+ // Normalise and convert to 32 bits, collapsing error into bottom bit:
+ x_mnt = fp64_normalise(x_mnt, &x_exp);
+ x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
+
+ return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
+}
+
+static uint64_t
+fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
+{
+ int x_sgn = !u && a >> 63;
+ int x_exp = 1024 + 62 - fbits;
+ uint64_t x_mnt = x_sgn ? -a : a;
+
+ // Handle zero:
+ if (!x_mnt) {
+ return fp64_zero(0);
+ }
+
+ x_mnt = fp64_normalise(x_mnt, &x_exp);
+
+ return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
+}
+
+template <>
+uint32_t
+fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
+{
+ int flags = 0;
+ uint32_t res = fp32_cvtf(op, fbits, u,
+ (int)rounding | ((uint32_t)fpscr >> 22 & 12),
+ &flags);
+ set_fpscr0(fpscr, flags);
+ return res;
+}
+
+template <>
+uint64_t
+fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
+{
+ int flags = 0;
+ uint64_t res = fp64_cvtf(op, fbits, u,
+ (int)rounding | ((uint32_t)fpscr >> 22 & 12),
+ &flags);
+ set_fpscr0(fpscr, flags);
+ return res;
+}
+
+}
diff --git a/src/arch/arm/insts/fplib.hh b/src/arch/arm/insts/fplib.hh
new file mode 100644
index 000000000..6263687fc
--- /dev/null
+++ b/src/arch/arm/insts/fplib.hh
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2012-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Edmund Grimley Evans
+ * Thomas Grocutt
+ */
+
+/**
+ * @file
+ * Floating-point library code, which will gradually replace vfp.hh. For
+ * portability, this library does not use floating-point data types. Currently,
+ * C's standard integer types are used in the API, though this could be changed
+ * to something like class Fp32 { uint32_t x; }, etc.
+ */
+
+#ifndef __ARCH_ARM_INSTS_FPLIB_HH__
+#define __ARCH_ARM_INSTS_FPLIB_HH__
+
+#include <stdint.h>
+
+#include "arch/arm/miscregs.hh"
+
+namespace ArmISA
+{
+
+enum FPRounding {
+ FPRounding_TIEEVEN = 0,
+ FPRounding_POSINF = 1,
+ FPRounding_NEGINF = 2,
+ FPRounding_ZERO = 3,
+ FPRounding_TIEAWAY = 4,
+ FPRounding_ODD = 5
+};
+
+static inline FPRounding
+FPCRRounding(FPSCR &fpscr)
+{
+ return (FPRounding)((uint32_t)fpscr >> 22 & 3);
+}
+
+/** Floating-point absolute value. */
+template <class T>
+T fplibAbs(T op);
+/** Floating-point add. */
+template <class T>
+T fplibAdd(T op1, T op2, FPSCR &fpscr);
+/** Floating-point compare (quiet and signaling). */
+template <class T>
+int fplibCompare(T op1, T op2, bool signal_nans, FPSCR &fpscr);
+/** Floating-point compare equal. */
+template <class T>
+bool fplibCompareEQ(T op1, T op2, FPSCR &fpscr);
+/** Floating-point compare greater than or equal. */
+template <class T>
+bool fplibCompareGE(T op1, T op2, FPSCR &fpscr);
+/** Floating-point compare greater than. */
+template <class T>
+bool fplibCompareGT(T op1, T op2, FPSCR &fpscr);
+/** Floating-point convert precision. */
+template <class T1, class T2>
+T2 fplibConvert(T1 op, FPRounding rounding, FPSCR &fpscr);
+/** Floating-point division. */
+template <class T>
+T fplibDiv(T op1, T op2, FPSCR &fpscr);
+/** Floating-point maximum. */
+template <class T>
+T fplibMax(T op1, T op2, FPSCR &fpscr);
+/** Floating-point maximum number. */
+template <class T>
+T fplibMaxNum(T op1, T op2, FPSCR &fpscr);
+/** Floating-point minimum. */
+template <class T>
+T fplibMin(T op1, T op2, FPSCR &fpscr);
+/** Floating-point minimum number. */
+template <class T>
+T fplibMinNum(T op1, T op2, FPSCR &fpscr);
+/** Floating-point multiply. */
+template <class T>
+T fplibMul(T op1, T op2, FPSCR &fpscr);
+/** Floating-point multiply-add. */
+template <class T>
+T fplibMulAdd(T addend, T op1, T op2, FPSCR &fpscr);
+/** Floating-point multiply extended. */
+template <class T>
+T fplibMulX(T op1, T op2, FPSCR &fpscr);
+/** Floating-point negate. */
+template <class T>
+T fplibNeg(T op);
+/** Floating-point reciprocal square root estimate. */
+template <class T>
+T fplibRSqrtEstimate(T op, FPSCR &fpscr);
+/** Floating-point reciprocal square root step. */
+template <class T>
+T fplibRSqrtStepFused(T op1, T op2, FPSCR &fpscr);
+/** Floating-point reciprocal estimate. */
+template <class T>
+T fplibRecipEstimate(T op, FPSCR &fpscr);
+/** Floating-point reciprocal step. */
+template <class T>
+T fplibRecipStepFused(T op1, T op2, FPSCR &fpscr);
+/** Floating-point reciprocal exponent. */
+template <class T>
+T fplibRecpX(T op, FPSCR &fpscr);
+/** Floating-point convert to integer. */
+template <class T>
+T fplibRoundInt(T op, FPRounding rounding, bool exact, FPSCR &fpscr);
+/** Floating-point square root. */
+template <class T>
+T fplibSqrt(T op, FPSCR &fpscr);
+/** Floating-point subtract. */
+template <class T>
+T fplibSub(T op1, T op2, FPSCR &fpscr);
+/** Floating-point convert to fixed-point. */
+template <class T1, class T2>
+T2 fplibFPToFixed(T1 op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr);
+/** Floating-point convert from fixed-point. */
+template <class T>
+T fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
+ FPSCR &fpscr);
+
+/* Function specializations... */
+template <>
+uint32_t fplibAbs(uint32_t op);
+template <>
+uint64_t fplibAbs(uint64_t op);
+template <>
+uint32_t fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+int fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr);
+template <>
+int fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr);
+template <>
+bool fplibCompareEQ(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+bool fplibCompareEQ(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+bool fplibCompareGE(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+bool fplibCompareGE(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+bool fplibCompareGT(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+bool fplibCompareGT(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr);
+template <>
+uint16_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr);
+template <>
+uint32_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr);
+template <>
+uint32_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr);
+template <>
+uint64_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr);
+template <>
+uint64_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr);
+template <>
+uint32_t fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2,
+ FPSCR &fpscr);
+template <>
+uint64_t fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2,
+ FPSCR &fpscr);
+template <>
+uint32_t fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibNeg(uint32_t op);
+template <>
+uint64_t fplibNeg(uint64_t op);
+template <>
+uint32_t fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr);
+template<>
+uint64_t fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr);
+template <>
+uint32_t fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibRecipEstimate(uint32_t op, FPSCR &fpscr);
+template <>
+uint64_t fplibRecipEstimate(uint64_t op, FPSCR &fpscr);
+template <>
+uint32_t fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibRecpX(uint32_t op, FPSCR &fpscr);
+template <>
+uint64_t fplibRecpX(uint64_t op, FPSCR &fpscr);
+template <>
+uint32_t fplibRoundInt(uint32_t op, FPRounding rounding, bool exact,
+ FPSCR &fpscr);
+template <>
+uint64_t fplibRoundInt(uint64_t op, FPRounding rounding, bool exact,
+ FPSCR &fpscr);
+template <>
+uint32_t fplibSqrt(uint32_t op, FPSCR &fpscr);
+template <>
+uint64_t fplibSqrt(uint64_t op, FPSCR &fpscr);
+template <>
+uint32_t fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding,
+ FPSCR &fpscr);
+template <>
+uint32_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding,
+ FPSCR &fpscr);
+template <>
+uint64_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding,
+ FPSCR &fpscr);
+template <>
+uint64_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding,
+ FPSCR &fpscr);
+template <>
+uint32_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
+ FPSCR &fpscr);
+template <>
+uint64_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
+ FPSCR &fpscr);
+}
+
+#endif
diff --git a/src/arch/arm/insts/macromem.cc b/src/arch/arm/insts/macromem.cc
index 26a916fc7..42cb98a7c 100644
--- a/src/arch/arm/insts/macromem.cc
+++ b/src/arch/arm/insts/macromem.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -43,7 +43,9 @@
#include <sstream>
#include "arch/arm/insts/macromem.hh"
+
#include "arch/arm/generated/decoder.hh"
+#include "arch/arm/insts/neon64_mem.hh"
using namespace std;
using namespace ArmISAInst;
@@ -177,6 +179,212 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
}
}
+PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ uint32_t size, bool fp, bool load, bool noAlloc,
+ bool signExt, bool exclusive, bool acrel,
+ int64_t imm, AddrMode mode,
+ IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
+ PredMacroOp(mnem, machInst, __opClass)
+{
+ bool writeback = (mode != AddrMd_Offset);
+ numMicroops = 1 + (size / 4) + (writeback ? 1 : 0);
+ microOps = new StaticInstPtr[numMicroops];
+
+ StaticInstPtr *uop = microOps;
+
+ bool post = (mode == AddrMd_PostIndex);
+
+ rn = makeSP(rn);
+
+ *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm);
+
+ if (fp) {
+ if (size == 16) {
+ if (load) {
+ *++uop = new MicroLdrQBFpXImmUop(machInst, rt,
+ INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+ *++uop = new MicroLdrQTFpXImmUop(machInst, rt,
+ INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+ *++uop = new MicroLdrQBFpXImmUop(machInst, rt2,
+ INTREG_UREG0, 16, noAlloc, exclusive, acrel);
+ *++uop = new MicroLdrQTFpXImmUop(machInst, rt2,
+ INTREG_UREG0, 16, noAlloc, exclusive, acrel);
+ } else {
+ *++uop = new MicroStrQBFpXImmUop(machInst, rt,
+ INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+ *++uop = new MicroStrQTFpXImmUop(machInst, rt,
+ INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+ *++uop = new MicroStrQBFpXImmUop(machInst, rt2,
+ INTREG_UREG0, 16, noAlloc, exclusive, acrel);
+ *++uop = new MicroStrQTFpXImmUop(machInst, rt2,
+ INTREG_UREG0, 16, noAlloc, exclusive, acrel);
+ }
+ } else if (size == 8) {
+ if (load) {
+ *++uop = new MicroLdrFpXImmUop(machInst, rt,
+ INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+ *++uop = new MicroLdrFpXImmUop(machInst, rt2,
+ INTREG_UREG0, 8, noAlloc, exclusive, acrel);
+ } else {
+ *++uop = new MicroStrFpXImmUop(machInst, rt,
+ INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+ *++uop = new MicroStrFpXImmUop(machInst, rt2,
+ INTREG_UREG0, 8, noAlloc, exclusive, acrel);
+ }
+ } else if (size == 4) {
+ if (load) {
+ *++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2,
+ INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+ } else {
+ *++uop = new MicroStrDFpXImmUop(machInst, rt, rt2,
+ INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+ }
+ }
+ } else {
+ if (size == 8) {
+ if (load) {
+ *++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0,
+ 0, noAlloc, exclusive, acrel);
+ *++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0,
+ size, noAlloc, exclusive, acrel);
+ } else {
+ *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0,
+ 0, noAlloc, exclusive, acrel);
+ *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0,
+ size, noAlloc, exclusive, acrel);
+ }
+ } else if (size == 4) {
+ if (load) {
+ if (signExt) {
+ *++uop = new MicroLdrDSXImmUop(machInst, rt, rt2,
+ INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+ } else {
+ *++uop = new MicroLdrDUXImmUop(machInst, rt, rt2,
+ INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+ }
+ } else {
+ *++uop = new MicroStrDXImmUop(machInst, rt, rt2,
+ INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+ }
+ }
+ }
+
+ if (writeback) {
+ *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0,
+ post ? imm : 0);
+ }
+
+ (*uop)->setLastMicroop();
+
+ for (StaticInstPtr *curUop = microOps;
+ !(*curUop)->isLastMicroop(); curUop++) {
+ (*curUop)->setDelayedCommit();
+ }
+}
+
+BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
+ OpClass __opClass, bool load, IntRegIndex dest,
+ IntRegIndex base, int64_t imm) :
+ PredMacroOp(mnem, machInst, __opClass)
+{
+ numMicroops = 2;
+ microOps = new StaticInstPtr[numMicroops];
+
+ if (load) {
+ microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm);
+ microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm);
+ } else {
+ microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
+ microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
+ }
+ microOps[0]->setDelayedCommit();
+ microOps[1]->setLastMicroop();
+}
+
+BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
+ OpClass __opClass, bool load, IntRegIndex dest,
+ IntRegIndex base, int64_t imm) :
+ PredMacroOp(mnem, machInst, __opClass)
+{
+ numMicroops = 3;
+ microOps = new StaticInstPtr[numMicroops];
+
+ if (load) {
+ microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0);
+ microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0);
+ } else {
+ microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0);
+ microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
+ }
+ microOps[2] = new MicroAddXiUop(machInst, base, base, imm);
+
+ microOps[0]->setDelayedCommit();
+ microOps[1]->setDelayedCommit();
+ microOps[2]->setLastMicroop();
+}
+
+BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
+ OpClass __opClass, bool load, IntRegIndex dest,
+ IntRegIndex base, int64_t imm) :
+ PredMacroOp(mnem, machInst, __opClass)
+{
+ numMicroops = 3;
+ microOps = new StaticInstPtr[numMicroops];
+
+ if (load) {
+ microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm);
+ microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm);
+ } else {
+ microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
+ microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
+ }
+ microOps[2] = new MicroAddXiUop(machInst, base, base, imm);
+
+ microOps[0]->setDelayedCommit();
+ microOps[1]->setDelayedCommit();
+ microOps[2]->setLastMicroop();
+}
+
+BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
+ OpClass __opClass, bool load, IntRegIndex dest,
+ IntRegIndex base, IntRegIndex offset,
+ ArmExtendType type, int64_t imm) :
+ PredMacroOp(mnem, machInst, __opClass)
+{
+ numMicroops = 2;
+ microOps = new StaticInstPtr[numMicroops];
+
+ if (load) {
+ microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base,
+ offset, type, imm);
+ microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base,
+ offset, type, imm);
+ } else {
+ microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base,
+ offset, type, imm);
+ microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base,
+ offset, type, imm);
+ }
+
+ microOps[0]->setDelayedCommit();
+ microOps[1]->setLastMicroop();
+}
+
+BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
+ OpClass __opClass, IntRegIndex dest,
+ int64_t imm) :
+ PredMacroOp(mnem, machInst, __opClass)
+{
+ numMicroops = 2;
+ microOps = new StaticInstPtr[numMicroops];
+
+ microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm);
+ microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm);
+
+ microOps[0]->setDelayedCommit();
+ microOps[1]->setLastMicroop();
+}
+
VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
@@ -193,7 +401,7 @@ VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
if (deinterleave) numMicroops += (regs / elems);
microOps = new StaticInstPtr[numMicroops];
- RegIndex rMid = deinterleave ? NumFloatArchRegs : vd * 2;
+ RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2;
uint32_t noAlign = TLB::MustBeOne;
@@ -295,7 +503,7 @@ VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
numMicroops += (regs / elems);
microOps = new StaticInstPtr[numMicroops];
- RegIndex ufp0 = NumFloatArchRegs;
+ RegIndex ufp0 = NumFloatV7ArchRegs;
unsigned uopIdx = 0;
switch (loadSize) {
@@ -556,7 +764,7 @@ VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
uint32_t noAlign = TLB::MustBeOne;
- RegIndex rMid = interleave ? NumFloatArchRegs : vd * 2;
+ RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2;
unsigned uopIdx = 0;
if (interleave) {
@@ -657,7 +865,7 @@ VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
numMicroops += (regs / elems);
microOps = new StaticInstPtr[numMicroops];
- RegIndex ufp0 = NumFloatArchRegs;
+ RegIndex ufp0 = NumFloatV7ArchRegs;
unsigned uopIdx = 0;
switch (elems) {
@@ -834,6 +1042,285 @@ VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
microOps[numMicroops - 1]->setLastMicroop();
}
+VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
+ OpClass __opClass, RegIndex rn, RegIndex vd,
+ RegIndex rm, uint8_t eSize, uint8_t dataSize,
+ uint8_t numStructElems, uint8_t numRegs, bool wb) :
+ PredMacroOp(mnem, machInst, __opClass)
+{
+ RegIndex vx = NumFloatV8ArchRegs / 4;
+ RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
+ bool baseIsSP = isSP((IntRegIndex) rnsp);
+
+ numMicroops = wb ? 1 : 0;
+
+ int totNumBytes = numRegs * dataSize / 8;
+ assert(totNumBytes <= 64);
+
+ // The guiding principle here is that no more than 16 bytes can be
+ // transferred at a time
+ int numMemMicroops = totNumBytes / 16;
+ int residuum = totNumBytes % 16;
+ if (residuum)
+ ++numMemMicroops;
+ numMicroops += numMemMicroops;
+
+ int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
+ numMicroops += numMarshalMicroops;
+
+ microOps = new StaticInstPtr[numMicroops];
+ unsigned uopIdx = 0;
+ uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
+ TLB::AllowUnaligned;
+
+ int i = 0;
+ for(; i < numMemMicroops - 1; ++i) {
+ microOps[uopIdx++] = new MicroNeonLoad64(
+ machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
+ baseIsSP, 16 /* accSize */, eSize);
+ }
+ microOps[uopIdx++] = new MicroNeonLoad64(
+ machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
+ residuum ? residuum : 16 /* accSize */, eSize);
+
+ // Writeback microop: the post-increment amount is encoded in "Rm": a
+ // 64-bit general register OR as '11111' for an immediate value equal to
+ // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
+ if (wb) {
+ if (rm != ((RegIndex) INTREG_X31)) {
+ microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
+ UXTX, 0);
+ } else {
+ microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
+ totNumBytes);
+ }
+ }
+
+ for (int i = 0; i < numMarshalMicroops; ++i) {
+ microOps[uopIdx++] = new MicroDeintNeon64(
+ machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
+ numStructElems, numRegs, i /* step */);
+ }
+
+ assert(uopIdx == numMicroops);
+
+ for (int i = 0; i < numMicroops - 1; ++i) {
+ microOps[i]->setDelayedCommit();
+ }
+ microOps[numMicroops - 1]->setLastMicroop();
+}
+
+VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
+ OpClass __opClass, RegIndex rn, RegIndex vd,
+ RegIndex rm, uint8_t eSize, uint8_t dataSize,
+ uint8_t numStructElems, uint8_t numRegs, bool wb) :
+ PredMacroOp(mnem, machInst, __opClass)
+{
+ RegIndex vx = NumFloatV8ArchRegs / 4;
+ RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
+ bool baseIsSP = isSP((IntRegIndex) rnsp);
+
+ numMicroops = wb ? 1 : 0;
+
+ int totNumBytes = numRegs * dataSize / 8;
+ assert(totNumBytes <= 64);
+
+ // The guiding principle here is that no more than 16 bytes can be
+ // transferred at a time
+ int numMemMicroops = totNumBytes / 16;
+ int residuum = totNumBytes % 16;
+ if (residuum)
+ ++numMemMicroops;
+ numMicroops += numMemMicroops;
+
+ int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
+ numMicroops += numMarshalMicroops;
+
+ microOps = new StaticInstPtr[numMicroops];
+ unsigned uopIdx = 0;
+
+ for(int i = 0; i < numMarshalMicroops; ++i) {
+ microOps[uopIdx++] = new MicroIntNeon64(
+ machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
+ numStructElems, numRegs, i /* step */);
+ }
+
+ uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
+ TLB::AllowUnaligned;
+
+ int i = 0;
+ for(; i < numMemMicroops - 1; ++i) {
+ microOps[uopIdx++] = new MicroNeonStore64(
+ machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
+ baseIsSP, 16 /* accSize */, eSize);
+ }
+ microOps[uopIdx++] = new MicroNeonStore64(
+ machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
+ residuum ? residuum : 16 /* accSize */, eSize);
+
+ // Writeback microop: the post-increment amount is encoded in "Rm": a
+ // 64-bit general register OR as '11111' for an immediate value equal to
+ // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
+ if (wb) {
+ if (rm != ((RegIndex) INTREG_X31)) {
+ microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
+ UXTX, 0);
+ } else {
+ microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
+ totNumBytes);
+ }
+ }
+
+ assert(uopIdx == numMicroops);
+
+ for (int i = 0; i < numMicroops - 1; i++) {
+ microOps[i]->setDelayedCommit();
+ }
+ microOps[numMicroops - 1]->setLastMicroop();
+}
+
+VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
+ OpClass __opClass, RegIndex rn, RegIndex vd,
+ RegIndex rm, uint8_t eSize, uint8_t dataSize,
+ uint8_t numStructElems, uint8_t index, bool wb,
+ bool replicate) :
+ PredMacroOp(mnem, machInst, __opClass)
+{
+ RegIndex vx = NumFloatV8ArchRegs / 4;
+ RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
+ bool baseIsSP = isSP((IntRegIndex) rnsp);
+
+ numMicroops = wb ? 1 : 0;
+
+ int eSizeBytes = 1 << eSize;
+ int totNumBytes = numStructElems * eSizeBytes;
+ assert(totNumBytes <= 64);
+
+ // The guiding principle here is that no more than 16 bytes can be
+ // transferred at a time
+ int numMemMicroops = totNumBytes / 16;
+ int residuum = totNumBytes % 16;
+ if (residuum)
+ ++numMemMicroops;
+ numMicroops += numMemMicroops;
+
+ int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
+ numMicroops += numMarshalMicroops;
+
+ microOps = new StaticInstPtr[numMicroops];
+ unsigned uopIdx = 0;
+
+ uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
+ TLB::AllowUnaligned;
+
+ int i = 0;
+ for (; i < numMemMicroops - 1; ++i) {
+ microOps[uopIdx++] = new MicroNeonLoad64(
+ machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
+ baseIsSP, 16 /* accSize */, eSize);
+ }
+ microOps[uopIdx++] = new MicroNeonLoad64(
+ machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
+ residuum ? residuum : 16 /* accSize */, eSize);
+
+ // Writeback microop: the post-increment amount is encoded in "Rm": a
+ // 64-bit general register OR as '11111' for an immediate value equal to
+ // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
+ if (wb) {
+ if (rm != ((RegIndex) INTREG_X31)) {
+ microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
+ UXTX, 0);
+ } else {
+ microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
+ totNumBytes);
+ }
+ }
+
+ for(int i = 0; i < numMarshalMicroops; ++i) {
+ microOps[uopIdx++] = new MicroUnpackNeon64(
+ machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
+ numStructElems, index, i /* step */, replicate);
+ }
+
+ assert(uopIdx == numMicroops);
+
+ for (int i = 0; i < numMicroops - 1; i++) {
+ microOps[i]->setDelayedCommit();
+ }
+ microOps[numMicroops - 1]->setLastMicroop();
+}
+
+VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
+ OpClass __opClass, RegIndex rn, RegIndex vd,
+ RegIndex rm, uint8_t eSize, uint8_t dataSize,
+ uint8_t numStructElems, uint8_t index, bool wb,
+ bool replicate) :
+ PredMacroOp(mnem, machInst, __opClass)
+{
+ RegIndex vx = NumFloatV8ArchRegs / 4;
+ RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
+ bool baseIsSP = isSP((IntRegIndex) rnsp);
+
+ numMicroops = wb ? 1 : 0;
+
+ int eSizeBytes = 1 << eSize;
+ int totNumBytes = numStructElems * eSizeBytes;
+ assert(totNumBytes <= 64);
+
+ // The guiding principle here is that no more than 16 bytes can be
+ // transferred at a time
+ int numMemMicroops = totNumBytes / 16;
+ int residuum = totNumBytes % 16;
+ if (residuum)
+ ++numMemMicroops;
+ numMicroops += numMemMicroops;
+
+ int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
+ numMicroops += numMarshalMicroops;
+
+ microOps = new StaticInstPtr[numMicroops];
+ unsigned uopIdx = 0;
+
+ for(int i = 0; i < numMarshalMicroops; ++i) {
+ microOps[uopIdx++] = new MicroPackNeon64(
+ machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
+ numStructElems, index, i /* step */, replicate);
+ }
+
+ uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
+ TLB::AllowUnaligned;
+
+ int i = 0;
+ for(; i < numMemMicroops - 1; ++i) {
+ microOps[uopIdx++] = new MicroNeonStore64(
+ machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
+ baseIsSP, 16 /* accsize */, eSize);
+ }
+ microOps[uopIdx++] = new MicroNeonStore64(
+ machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
+ residuum ? residuum : 16 /* accSize */, eSize);
+
+ // Writeback microop: the post-increment amount is encoded in "Rm": a
+ // 64-bit general register OR as '11111' for an immediate value equal to
+ // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
+ if (wb) {
+ if (rm != ((RegIndex) INTREG_X31)) {
+ microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
+ UXTX, 0);
+ } else {
+ microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
+ totNumBytes);
+ }
+ }
+
+ assert(uopIdx == numMicroops);
+
+ for (int i = 0; i < numMicroops - 1; i++) {
+ microOps[i]->setDelayedCommit();
+ }
+ microOps[numMicroops - 1]->setLastMicroop();
+}
+
MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
OpClass __opClass, IntRegIndex rn,
RegIndex vd, bool single, bool up,
@@ -846,14 +1333,14 @@ MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
// to be functionally identical except that fldmx is deprecated. For now
// we'll assume they're otherwise interchangable.
int count = (single ? offset : (offset / 2));
- if (count == 0 || count > NumFloatArchRegs)
+ if (count == 0 || count > NumFloatV7ArchRegs)
warn_once("Bad offset field for VFP load/store multiple.\n");
if (count == 0) {
// Force there to be at least one microop so the macroop makes sense.
writeback = true;
}
- if (count > NumFloatArchRegs)
- count = NumFloatArchRegs;
+ if (count > NumFloatV7ArchRegs)
+ count = NumFloatV7ArchRegs;
numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
microOps = new StaticInstPtr[numMicroops];
@@ -934,6 +1421,19 @@ MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
}
std::string
+MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss);
+ printReg(ss, ura);
+ ss << ", ";
+ printReg(ss, urb);
+ ss << ", ";
+ ccprintf(ss, "#%d", imm);
+ return ss.str();
+}
+
+std::string
MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
@@ -943,6 +1443,18 @@ MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
}
std::string
+MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss);
+ printReg(ss, ura);
+ ccprintf(ss, ", ");
+ printReg(ss, urb);
+ printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
+ return ss.str();
+}
+
+std::string
MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
diff --git a/src/arch/arm/insts/macromem.hh b/src/arch/arm/insts/macromem.hh
index 4933a1e7c..fc8e3e1b7 100644
--- a/src/arch/arm/insts/macromem.hh
+++ b/src/arch/arm/insts/macromem.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -85,6 +85,27 @@ class MicroOp : public PredOp
}
};
+class MicroOpX : public ArmStaticInst
+{
+ protected:
+ MicroOpX(const char *mnem, ExtMachInst machInst, OpClass __opClass)
+ : ArmStaticInst(mnem, machInst, __opClass)
+ {}
+
+ public:
+ void
+ advancePC(PCState &pcState) const
+ {
+ if (flags[IsLastMicroop]) {
+ pcState.uEnd();
+ } else if (flags[IsMicroop]) {
+ pcState.uAdvance();
+ } else {
+ pcState.advance();
+ }
+ }
+};
+
/**
* Microops for Neon loads/stores
*/
@@ -136,6 +157,96 @@ class MicroNeonMixLaneOp : public MicroNeonMixOp
};
/**
+ * Microops for AArch64 NEON load/store (de)interleaving
+ */
+class MicroNeonMixOp64 : public MicroOp
+{
+ protected:
+ RegIndex dest, op1;
+ uint8_t eSize, dataSize, numStructElems, numRegs, step;
+
+ MicroNeonMixOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ RegIndex _dest, RegIndex _op1, uint8_t _eSize,
+ uint8_t _dataSize, uint8_t _numStructElems,
+ uint8_t _numRegs, uint8_t _step)
+ : MicroOp(mnem, machInst, __opClass), dest(_dest), op1(_op1),
+ eSize(_eSize), dataSize(_dataSize), numStructElems(_numStructElems),
+ numRegs(_numRegs), step(_step)
+ {
+ }
+};
+
+class MicroNeonMixLaneOp64 : public MicroOp
+{
+ protected:
+ RegIndex dest, op1;
+ uint8_t eSize, dataSize, numStructElems, lane, step;
+ bool replicate;
+
+ MicroNeonMixLaneOp64(const char *mnem, ExtMachInst machInst,
+ OpClass __opClass, RegIndex _dest, RegIndex _op1,
+ uint8_t _eSize, uint8_t _dataSize,
+ uint8_t _numStructElems, uint8_t _lane, uint8_t _step,
+ bool _replicate = false)
+ : MicroOp(mnem, machInst, __opClass), dest(_dest), op1(_op1),
+ eSize(_eSize), dataSize(_dataSize), numStructElems(_numStructElems),
+ lane(_lane), step(_step), replicate(_replicate)
+ {
+ }
+};
+
+/**
+ * Base classes for microcoded AArch64 NEON memory instructions.
+ */
+class VldMultOp64 : public PredMacroOp
+{
+ protected:
+ uint8_t eSize, dataSize, numStructElems, numRegs;
+ bool wb;
+
+ VldMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize,
+ uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs,
+ bool wb);
+};
+
+class VstMultOp64 : public PredMacroOp
+{
+ protected:
+ uint8_t eSize, dataSize, numStructElems, numRegs;
+ bool wb;
+
+ VstMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize,
+ uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs,
+ bool wb);
+};
+
+class VldSingleOp64 : public PredMacroOp
+{
+ protected:
+ uint8_t eSize, dataSize, numStructElems, index;
+ bool wb, replicate;
+
+ VldSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize,
+ uint8_t dataSize, uint8_t numStructElems, uint8_t index,
+ bool wb, bool replicate = false);
+};
+
+class VstSingleOp64 : public PredMacroOp
+{
+ protected:
+ uint8_t eSize, dataSize, numStructElems, index;
+ bool wb, replicate;
+
+ VstSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize,
+ uint8_t dataSize, uint8_t numStructElems, uint8_t index,
+ bool wb, bool replicate = false);
+};
+
+/**
* Microops of the form
* PC = IntRegA
* CPSR = IntRegB
@@ -180,10 +291,10 @@ class MicroIntImmOp : public MicroOp
{
protected:
RegIndex ura, urb;
- uint32_t imm;
+ int32_t imm;
MicroIntImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
- RegIndex _ura, RegIndex _urb, uint32_t _imm)
+ RegIndex _ura, RegIndex _urb, int32_t _imm)
: MicroOp(mnem, machInst, __opClass),
ura(_ura), urb(_urb), imm(_imm)
{
@@ -192,6 +303,22 @@ class MicroIntImmOp : public MicroOp
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
};
+class MicroIntImmXOp : public MicroOpX
+{
+ protected:
+ RegIndex ura, urb;
+ int64_t imm;
+
+ MicroIntImmXOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ RegIndex _ura, RegIndex _urb, int64_t _imm)
+ : MicroOpX(mnem, machInst, __opClass),
+ ura(_ura), urb(_urb), imm(_imm)
+ {
+ }
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
/**
* Microops of the form IntRegA = IntRegB op IntRegC
*/
@@ -210,6 +337,25 @@ class MicroIntOp : public MicroOp
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
};
+class MicroIntRegXOp : public MicroOp
+{
+ protected:
+ RegIndex ura, urb, urc;
+ ArmExtendType type;
+ uint32_t shiftAmt;
+
+ MicroIntRegXOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ RegIndex _ura, RegIndex _urb, RegIndex _urc,
+ ArmExtendType _type, uint32_t _shiftAmt)
+ : MicroOp(mnem, machInst, __opClass),
+ ura(_ura), urb(_urb), urc(_urc),
+ type(_type), shiftAmt(_shiftAmt)
+ {
+ }
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
/**
* Microops of the form IntRegA = IntRegB op shifted IntRegC
*/
@@ -261,6 +407,61 @@ class MacroMemOp : public PredMacroOp
};
/**
+ * Base class for pair load/store instructions.
+ */
+class PairMemOp : public PredMacroOp
+{
+ public:
+ enum AddrMode {
+ AddrMd_Offset,
+ AddrMd_PreIndex,
+ AddrMd_PostIndex
+ };
+
+ protected:
+ PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ uint32_t size, bool fp, bool load, bool noAlloc, bool signExt,
+ bool exclusive, bool acrel, int64_t imm, AddrMode mode,
+ IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2);
+};
+
+class BigFpMemImmOp : public PredMacroOp
+{
+ protected:
+ BigFpMemImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ bool load, IntRegIndex dest, IntRegIndex base, int64_t imm);
+};
+
+class BigFpMemPostOp : public PredMacroOp
+{
+ protected:
+ BigFpMemPostOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ bool load, IntRegIndex dest, IntRegIndex base, int64_t imm);
+};
+
+class BigFpMemPreOp : public PredMacroOp
+{
+ protected:
+ BigFpMemPreOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ bool load, IntRegIndex dest, IntRegIndex base, int64_t imm);
+};
+
+class BigFpMemRegOp : public PredMacroOp
+{
+ protected:
+ BigFpMemRegOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ bool load, IntRegIndex dest, IntRegIndex base,
+ IntRegIndex offset, ArmExtendType type, int64_t imm);
+};
+
+class BigFpMemLitOp : public PredMacroOp
+{
+ protected:
+ BigFpMemLitOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+ IntRegIndex dest, int64_t imm);
+};
+
+/**
* Base classes for microcoded integer memory instructions.
*/
class VldMultOp : public PredMacroOp
diff --git a/src/arch/arm/insts/mem.cc b/src/arch/arm/insts/mem.cc
index 552803b6a..15702ff83 100644
--- a/src/arch/arm/insts/mem.cc
+++ b/src/arch/arm/insts/mem.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -157,6 +157,9 @@ SrsOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
case MODE_ABORT:
ss << "abort";
break;
+ case MODE_HYP:
+ ss << "hyp";
+ break;
case MODE_UNDEFINED:
ss << "undefined";
break;
diff --git a/src/arch/arm/insts/mem64.cc b/src/arch/arm/insts/mem64.cc
new file mode 100644
index 000000000..4d1fdd302
--- /dev/null
+++ b/src/arch/arm/insts/mem64.cc
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#include "arch/arm/insts/mem64.hh"
+#include "arch/arm/tlb.hh"
+#include "base/loader/symtab.hh"
+#include "mem/request.hh"
+
+using namespace std;
+
+namespace ArmISA
+{
+
+std::string
+SysDC64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ ccprintf(ss, ", [");
+ printReg(ss, base);
+ ccprintf(ss, "]");
+ return ss.str();
+}
+
+
+
+void
+Memory64::startDisassembly(std::ostream &os) const
+{
+ printMnemonic(os, "", false);
+ printReg(os, dest);
+ ccprintf(os, ", [");
+ printReg(os, base);
+}
+
+void
+Memory64::setExcAcRel(bool exclusive, bool acrel)
+{
+ if (exclusive)
+ memAccessFlags |= Request::LLSC;
+ else
+ memAccessFlags |= ArmISA::TLB::AllowUnaligned;
+ if (acrel) {
+ flags[IsMemBarrier] = true;
+ flags[IsWriteBarrier] = true;
+ flags[IsReadBarrier] = true;
+ }
+}
+
+std::string
+MemoryImm64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ startDisassembly(ss);
+ if (imm)
+ ccprintf(ss, ", #%d", imm);
+ ccprintf(ss, "]");
+ return ss.str();
+}
+
+std::string
+MemoryDImm64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ccprintf(ss, ", ");
+ printReg(ss, dest2);
+ ccprintf(ss, ", [");
+ printReg(ss, base);
+ if (imm)
+ ccprintf(ss, ", #%d", imm);
+ ccprintf(ss, "]");
+ return ss.str();
+}
+
+std::string
+MemoryDImmEx64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, result);
+ ccprintf(ss, ", ");
+ printReg(ss, dest);
+ ccprintf(ss, ", ");
+ printReg(ss, dest2);
+ ccprintf(ss, ", [");
+ printReg(ss, base);
+ if (imm)
+ ccprintf(ss, ", #%d", imm);
+ ccprintf(ss, "]");
+ return ss.str();
+}
+
+std::string
+MemoryPreIndex64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ startDisassembly(ss);
+ ccprintf(ss, ", #%d]!", imm);
+ return ss.str();
+}
+
+std::string
+MemoryPostIndex64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ startDisassembly(ss);
+ if (imm)
+ ccprintf(ss, "], #%d", imm);
+ ccprintf(ss, "]");
+ return ss.str();
+}
+
+std::string
+MemoryReg64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ startDisassembly(ss);
+ printExtendOperand(false, ss, offset, type, shiftAmt);
+ ccprintf(ss, "]");
+ return ss.str();
+}
+
+std::string
+MemoryRaw64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ startDisassembly(ss);
+ ccprintf(ss, "]");
+ return ss.str();
+}
+
+std::string
+MemoryEx64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ccprintf(ss, ", ");
+ printReg(ss, result);
+ ccprintf(ss, ", [");
+ printReg(ss, base);
+ ccprintf(ss, "]");
+ return ss.str();
+}
+
+std::string
+MemoryLiteral64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ccprintf(ss, ", #%d", pc + imm);
+ return ss.str();
+}
+}
diff --git a/src/arch/arm/insts/mem64.hh b/src/arch/arm/insts/mem64.hh
new file mode 100644
index 000000000..21c1e1ea8
--- /dev/null
+++ b/src/arch/arm/insts/mem64.hh
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+#ifndef __ARCH_ARM_MEM64_HH__
+#define __ARCH_ARM_MEM64_HH__
+
+#include "arch/arm/insts/static_inst.hh"
+
+namespace ArmISA
+{
+
+class SysDC64 : public ArmStaticInst
+{
+ protected:
+ IntRegIndex base;
+ IntRegIndex dest;
+ uint64_t imm;
+
+ SysDC64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _base, IntRegIndex _dest, uint64_t _imm)
+ : ArmStaticInst(mnem, _machInst, __opClass), base(_base), dest(_dest),
+ imm(_imm)
+ {}
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MightBeMicro64 : public ArmStaticInst
+{
+ protected:
+ MightBeMicro64(const char *mnem, ExtMachInst _machInst, OpClass __opClass)
+ : ArmStaticInst(mnem, _machInst, __opClass)
+ {}
+
+ void
+ advancePC(PCState &pcState) const
+ {
+ if (flags[IsLastMicroop]) {
+ pcState.uEnd();
+ } else if (flags[IsMicroop]) {
+ pcState.uAdvance();
+ } else {
+ pcState.advance();
+ }
+ }
+};
+
+class Memory64 : public MightBeMicro64
+{
+ public:
+ enum AddrMode {
+ AddrMd_Offset,
+ AddrMd_PreIndex,
+ AddrMd_PostIndex
+ };
+
+ protected:
+
+ IntRegIndex dest;
+ IntRegIndex base;
+ /// True if the base register is SP (used for SP alignment checking).
+ bool baseIsSP;
+ static const unsigned numMicroops = 3;
+
+ StaticInstPtr *uops;
+
+ Memory64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _base)
+ : MightBeMicro64(mnem, _machInst, __opClass),
+ dest(_dest), base(_base), uops(NULL)
+ {
+ baseIsSP = isSP(_base);
+ }
+
+ virtual
+ ~Memory64()
+ {
+ delete [] uops;
+ }
+
+ StaticInstPtr
+ fetchMicroop(MicroPC microPC) const
+ {
+ assert(uops != NULL && microPC < numMicroops);
+ return uops[microPC];
+ }
+
+ void startDisassembly(std::ostream &os) const;
+
+ unsigned memAccessFlags;
+
+ void setExcAcRel(bool exclusive, bool acrel);
+};
+
+class MemoryImm64 : public Memory64
+{
+ protected:
+ int64_t imm;
+
+ MemoryImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _base, int64_t _imm)
+ : Memory64(mnem, _machInst, __opClass, _dest, _base), imm(_imm)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryDImm64 : public MemoryImm64
+{
+ protected:
+ IntRegIndex dest2;
+
+ MemoryDImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _dest2, IntRegIndex _base,
+ int64_t _imm)
+ : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm),
+ dest2(_dest2)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryDImmEx64 : public MemoryDImm64
+{
+ protected:
+ IntRegIndex result;
+
+ MemoryDImmEx64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _result, IntRegIndex _dest, IntRegIndex _dest2,
+ IntRegIndex _base, int32_t _imm)
+ : MemoryDImm64(mnem, _machInst, __opClass, _dest, _dest2,
+ _base, _imm), result(_result)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryPreIndex64 : public MemoryImm64
+{
+ protected:
+ MemoryPreIndex64(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass, IntRegIndex _dest, IntRegIndex _base,
+ int64_t _imm)
+ : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryPostIndex64 : public MemoryImm64
+{
+ protected:
+ MemoryPostIndex64(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass, IntRegIndex _dest, IntRegIndex _base,
+ int64_t _imm)
+ : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryReg64 : public Memory64
+{
+ protected:
+ IntRegIndex offset;
+ ArmExtendType type;
+ uint64_t shiftAmt;
+
+ MemoryReg64(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass, IntRegIndex _dest, IntRegIndex _base,
+ IntRegIndex _offset, ArmExtendType _type,
+ uint64_t _shiftAmt)
+ : Memory64(mnem, _machInst, __opClass, _dest, _base),
+ offset(_offset), type(_type), shiftAmt(_shiftAmt)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryRaw64 : public Memory64
+{
+ protected:
+ MemoryRaw64(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass, IntRegIndex _dest, IntRegIndex _base)
+ : Memory64(mnem, _machInst, __opClass, _dest, _base)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryEx64 : public Memory64
+{
+ protected:
+ IntRegIndex result;
+
+ MemoryEx64(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass, IntRegIndex _dest, IntRegIndex _base,
+ IntRegIndex _result)
+ : Memory64(mnem, _machInst, __opClass, _dest, _base), result(_result)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryLiteral64 : public Memory64
+{
+ protected:
+ int64_t imm;
+
+ MemoryLiteral64(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass, IntRegIndex _dest, int64_t _imm)
+ : Memory64(mnem, _machInst, __opClass, _dest, INTREG_ZERO), imm(_imm)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+}
+
+#endif //__ARCH_ARM_INSTS_MEM_HH__
diff --git a/src/arch/arm/insts/misc.cc b/src/arch/arm/insts/misc.cc
index 6320bb6da..efc334c4b 100644
--- a/src/arch/arm/insts/misc.cc
+++ b/src/arch/arm/insts/misc.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -146,6 +146,32 @@ MsrRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
}
std::string
+MrrcOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss);
+ printReg(ss, dest);
+ ss << ", ";
+ printReg(ss, dest2);
+ ss << ", ";
+ printReg(ss, op1);
+ return ss.str();
+}
+
+std::string
+McrrOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss);
+ printReg(ss, dest);
+ ss << ", ";
+ printReg(ss, op1);
+ ss << ", ";
+ printReg(ss, op2);
+ return ss.str();
+}
+
+std::string
ImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
@@ -230,6 +256,16 @@ RegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
}
std::string
+RegImmImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss);
+ printReg(ss, dest);
+ ccprintf(ss, ", #%d, #%d", imm1, imm2);
+ return ss.str();
+}
+
+std::string
RegRegImmImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
diff --git a/src/arch/arm/insts/misc.hh b/src/arch/arm/insts/misc.hh
index c9e114f85..3d947a272 100644
--- a/src/arch/arm/insts/misc.hh
+++ b/src/arch/arm/insts/misc.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -94,6 +94,42 @@ class MsrRegOp : public MsrBase
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
};
+class MrrcOp : public PredOp
+{
+ protected:
+ IntRegIndex op1;
+ IntRegIndex dest;
+ IntRegIndex dest2;
+ uint32_t imm;
+
+ MrrcOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _op1, IntRegIndex _dest, IntRegIndex _dest2,
+ uint32_t _imm) :
+ PredOp(mnem, _machInst, __opClass), op1(_op1), dest(_dest),
+ dest2(_dest2), imm(_imm)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class McrrOp : public PredOp
+{
+ protected:
+ IntRegIndex op1;
+ IntRegIndex op2;
+ IntRegIndex dest;
+ uint32_t imm;
+
+ McrrOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _op1, IntRegIndex _op2, IntRegIndex _dest,
+ uint32_t _imm) :
+ PredOp(mnem, _machInst, __opClass), op1(_op1), op2(_op2),
+ dest(_dest), imm(_imm)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
class ImmOp : public PredOp
{
protected:
@@ -220,6 +256,23 @@ class RegRegImmOp : public PredOp
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
};
+class RegImmImmOp : public PredOp
+{
+ protected:
+ IntRegIndex dest;
+ IntRegIndex op1;
+ uint64_t imm1;
+ uint64_t imm2;
+
+ RegImmImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, uint64_t _imm1, uint64_t _imm2) :
+ PredOp(mnem, _machInst, __opClass),
+ dest(_dest), imm1(_imm1), imm2(_imm2)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
class RegRegImmImmOp : public PredOp
{
protected:
diff --git a/src/arch/arm/insts/misc64.cc b/src/arch/arm/insts/misc64.cc
new file mode 100644
index 000000000..3553020da
--- /dev/null
+++ b/src/arch/arm/insts/misc64.cc
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#include "arch/arm/insts/misc64.hh"
+
+std::string
+RegRegImmImmOp64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ss << ", ";
+ printReg(ss, op1);
+ ccprintf(ss, ", #%d, #%d", imm1, imm2);
+ return ss.str();
+}
+
+std::string
+RegRegRegImmOp64::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ss << ", ";
+ printReg(ss, op1);
+ ss << ", ";
+ printReg(ss, op2);
+ ccprintf(ss, ", #%d", imm);
+ return ss.str();
+}
+
+std::string
+UnknownOp64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ return csprintf("%-10s (inst %#08x)", "unknown", machInst);
+}
diff --git a/src/arch/arm/insts/misc64.hh b/src/arch/arm/insts/misc64.hh
new file mode 100644
index 000000000..5a0e18224
--- /dev/null
+++ b/src/arch/arm/insts/misc64.hh
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#ifndef __ARCH_ARM_INSTS_MISC64_HH__
+#define __ARCH_ARM_INSTS_MISC64_HH__
+
+#include "arch/arm/insts/static_inst.hh"
+
+class RegRegImmImmOp64 : public ArmStaticInst
+{
+ protected:
+ IntRegIndex dest;
+ IntRegIndex op1;
+ uint64_t imm1;
+ uint64_t imm2;
+
+ RegRegImmImmOp64(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1,
+ uint64_t _imm1, uint64_t _imm2) :
+ ArmStaticInst(mnem, _machInst, __opClass),
+ dest(_dest), op1(_op1), imm1(_imm1), imm2(_imm2)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class RegRegRegImmOp64 : public ArmStaticInst
+{
+ protected:
+ IntRegIndex dest;
+ IntRegIndex op1;
+ IntRegIndex op2;
+ uint64_t imm;
+
+ RegRegRegImmOp64(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1,
+ IntRegIndex _op2, uint64_t _imm) :
+ ArmStaticInst(mnem, _machInst, __opClass),
+ dest(_dest), op1(_op1), op2(_op2), imm(_imm)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class UnknownOp64 : public ArmStaticInst
+{
+ protected:
+
+ UnknownOp64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
+ ArmStaticInst(mnem, _machInst, __opClass)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+#endif
diff --git a/src/arch/arm/insts/neon64_mem.hh b/src/arch/arm/insts/neon64_mem.hh
new file mode 100644
index 000000000..01ce1b624
--- /dev/null
+++ b/src/arch/arm/insts/neon64_mem.hh
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2012-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Mbou Eyole
+ * Giacomo Gabrielli
+ */
+
+/// @file
+/// Utility functions and datatypes used by AArch64 NEON memory instructions.
+
+#ifndef __ARCH_ARM_INSTS_NEON64_MEM_HH__
+#define __ARCH_ARM_INSTS_NEON64_MEM_HH__
+
+namespace ArmISA
+{
+
+typedef uint64_t XReg;
+
+/// 128-bit NEON vector register.
+struct VReg {
+ XReg hi;
+ XReg lo;
+};
+
+/// Write a single NEON vector element leaving the others untouched.
+inline void
+writeVecElem(VReg *dest, XReg src, int index, int eSize)
+{
+ // eSize must be less than 4:
+ // 0 -> 8-bit elems,
+ // 1 -> 16-bit elems,
+ // 2 -> 32-bit elems,
+ // 3 -> 64-bit elems
+ assert(eSize <= 3);
+
+ int eBits = 8 << eSize;
+ int lsbPos = index * eBits;
+ assert(lsbPos < 128);
+ int shiftAmt = lsbPos % 64;
+
+ XReg maskBits = -1;
+ if (eBits == 64) {
+ maskBits = 0;
+ } else {
+ maskBits = maskBits << eBits;
+ }
+ maskBits = ~maskBits;
+
+ XReg sMask = maskBits;
+ maskBits = sMask << shiftAmt;
+
+ if (lsbPos < 64) {
+ dest->lo = (dest->lo & (~maskBits)) | ((src & sMask) << shiftAmt);
+ } else {
+ dest->hi = (dest->hi & (~maskBits)) | ((src & sMask) << shiftAmt);
+ }
+}
+
+/// Read a single NEON vector element.
+inline XReg
+readVecElem(VReg src, int index, int eSize)
+{
+ // eSize must be less than 4:
+ // 0 -> 8-bit elems,
+ // 1 -> 16-bit elems,
+ // 2 -> 32-bit elems,
+ // 3 -> 64-bit elems
+ assert(eSize <= 3);
+
+ XReg data;
+
+ int eBits = 8 << eSize;
+ int lsbPos = index * eBits;
+ assert(lsbPos < 128);
+ int shiftAmt = lsbPos % 64;
+
+ XReg maskBits = -1;
+ if (eBits == 64) {
+ maskBits = 0;
+ } else {
+ maskBits = maskBits << eBits;
+ }
+ maskBits = ~maskBits;
+
+ if (lsbPos < 64) {
+ data = (src.lo >> shiftAmt) & maskBits;
+ } else {
+ data = (src.hi >> shiftAmt) & maskBits;
+ }
+ return data;
+}
+
+} // namespace ArmISA
+
+#endif // __ARCH_ARM_INSTS_NEON64_MEM_HH__
diff --git a/src/arch/arm/insts/pred_inst.hh b/src/arch/arm/insts/pred_inst.hh
index c441d1f32..c5e2ab386 100644
--- a/src/arch/arm/insts/pred_inst.hh
+++ b/src/arch/arm/insts/pred_inst.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -78,7 +78,8 @@ modified_imm(uint8_t ctrlImm, uint8_t dataImm)
}
static inline uint64_t
-simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid)
+simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid,
+ bool isAarch64 = false)
{
uint64_t bigData = data;
immValid = true;
@@ -133,12 +134,20 @@ simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid)
}
break;
case 0xf:
- if (!op) {
- uint64_t bVal = bits(bigData, 6) ? (0x1F) : (0x20);
- bigData = (bits(bigData, 5, 0) << 19) |
- (bVal << 25) | (bits(bigData, 7) << 31);
- bigData |= (bigData << 32);
- break;
+ {
+ uint64_t bVal = 0;
+ if (!op) {
+ bVal = bits(bigData, 6) ? (0x1F) : (0x20);
+ bigData = (bits(bigData, 5, 0) << 19) |
+ (bVal << 25) | (bits(bigData, 7) << 31);
+ bigData |= (bigData << 32);
+ break;
+ } else if (isAarch64) {
+ bVal = bits(bigData, 6) ? (0x0FF) : (0x100);
+ bigData = (bits(bigData, 5, 0) << 48) |
+ (bVal << 54) | (bits(bigData, 7) << 63);
+ break;
+ }
}
// Fall through, immediate encoding is invalid.
default:
@@ -179,11 +188,14 @@ class PredOp : public ArmStaticInst
/// Constructor
PredOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
- ArmStaticInst(mnem, _machInst, __opClass),
- condCode(machInst.itstateMask ?
- (ConditionCode)(uint8_t)machInst.itstateCond :
- (ConditionCode)(unsigned)machInst.condCode)
+ ArmStaticInst(mnem, _machInst, __opClass)
{
+ if (machInst.aarch64)
+ condCode = COND_UC;
+ else if (machInst.itstateMask)
+ condCode = (ConditionCode)(uint8_t)machInst.itstateCond;
+ else
+ condCode = (ConditionCode)(unsigned)machInst.condCode;
}
};
diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc
index 2a8dee162..260c29a84 100644
--- a/src/arch/arm/insts/static_inst.cc
+++ b/src/arch/arm/insts/static_inst.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -86,6 +86,90 @@ ArmStaticInst::shift_rm_imm(uint32_t base, uint32_t shamt,
return 0;
}
+int64_t
+ArmStaticInst::shiftReg64(uint64_t base, uint64_t shiftAmt,
+ ArmShiftType type, uint8_t width) const
+{
+ shiftAmt = shiftAmt % width;
+ ArmShiftType shiftType;
+ shiftType = (ArmShiftType)type;
+
+ switch (shiftType)
+ {
+ case LSL:
+ return base << shiftAmt;
+ case LSR:
+ if (shiftAmt == 0)
+ return base;
+ else
+ return (base & mask(width)) >> shiftAmt;
+ case ASR:
+ if (shiftAmt == 0) {
+ return base;
+ } else {
+ int sign_bit = bits(base, intWidth - 1);
+ base >>= shiftAmt;
+ base = sign_bit ? (base | ~mask(intWidth - shiftAmt)) : base;
+ return base & mask(intWidth);
+ }
+ case ROR:
+ if (shiftAmt == 0)
+ return base;
+ else
+ return (base << (width - shiftAmt)) | (base >> shiftAmt);
+ default:
+ ccprintf(std::cerr, "Unhandled shift type\n");
+ exit(1);
+ break;
+ }
+ return 0;
+}
+
+int64_t
+ArmStaticInst::extendReg64(uint64_t base, ArmExtendType type,
+ uint64_t shiftAmt, uint8_t width) const
+{
+ bool sign_extend = false;
+ int len = 0;
+ switch (type) {
+ case UXTB:
+ len = 8;
+ break;
+ case UXTH:
+ len = 16;
+ break;
+ case UXTW:
+ len = 32;
+ break;
+ case UXTX:
+ len = 64;
+ break;
+ case SXTB:
+ len = 8;
+ sign_extend = true;
+ break;
+ case SXTH:
+ len = 16;
+ sign_extend = true;
+ break;
+ case SXTW:
+ len = 32;
+ sign_extend = true;
+ break;
+ case SXTX:
+ len = 64;
+ sign_extend = true;
+ break;
+ }
+ len = len <= width - shiftAmt ? len : width - shiftAmt;
+ uint64_t tmp = (uint64_t) bits(base, len - 1, 0) << shiftAmt;
+ if (sign_extend) {
+ int sign_bit = bits(tmp, len + shiftAmt - 1);
+ tmp = sign_bit ? (tmp | ~mask(len + shiftAmt)) : tmp;
+ }
+ return tmp & mask(width);
+}
+
// Shift Rm by Rs
int32_t
ArmStaticInst::shift_rm_rs(uint32_t base, uint32_t shamt,
@@ -214,22 +298,33 @@ ArmStaticInst::printReg(std::ostream &os, int reg) const
switch (regIdxToClass(reg, &rel_reg)) {
case IntRegClass:
- switch (rel_reg) {
- case PCReg:
- ccprintf(os, "pc");
- break;
- case StackPointerReg:
- ccprintf(os, "sp");
- break;
- case FramePointerReg:
- ccprintf(os, "fp");
- break;
- case ReturnAddressReg:
- ccprintf(os, "lr");
- break;
- default:
- ccprintf(os, "r%d", reg);
- break;
+ if (aarch64) {
+ if (reg == INTREG_UREG0)
+ ccprintf(os, "ureg0");
+ else if (reg == INTREG_SPX)
+ ccprintf(os, "%s%s", (intWidth == 32) ? "w" : "", "sp");
+ else if (reg == INTREG_X31)
+ ccprintf(os, "%szr", (intWidth == 32) ? "w" : "x");
+ else
+ ccprintf(os, "%s%d", (intWidth == 32) ? "w" : "x", reg);
+ } else {
+ switch (rel_reg) {
+ case PCReg:
+ ccprintf(os, "pc");
+ break;
+ case StackPointerReg:
+ ccprintf(os, "sp");
+ break;
+ case FramePointerReg:
+ ccprintf(os, "fp");
+ break;
+ case ReturnAddressReg:
+ ccprintf(os, "lr");
+ break;
+ default:
+ ccprintf(os, "r%d", reg);
+ break;
+ }
}
break;
case FloatRegClass:
@@ -247,67 +342,102 @@ ArmStaticInst::printReg(std::ostream &os, int reg) const
void
ArmStaticInst::printMnemonic(std::ostream &os,
const std::string &suffix,
- bool withPred) const
+ bool withPred,
+ bool withCond64,
+ ConditionCode cond64) const
{
os << " " << mnemonic;
- if (withPred) {
- unsigned condCode = machInst.condCode;
- switch (condCode) {
- case COND_EQ:
- os << "eq";
- break;
- case COND_NE:
- os << "ne";
- break;
- case COND_CS:
- os << "cs";
- break;
- case COND_CC:
- os << "cc";
- break;
- case COND_MI:
- os << "mi";
- break;
- case COND_PL:
- os << "pl";
- break;
- case COND_VS:
- os << "vs";
- break;
- case COND_VC:
- os << "vc";
- break;
- case COND_HI:
- os << "hi";
- break;
- case COND_LS:
- os << "ls";
- break;
- case COND_GE:
- os << "ge";
- break;
- case COND_LT:
- os << "lt";
- break;
- case COND_GT:
- os << "gt";
- break;
- case COND_LE:
- os << "le";
- break;
- case COND_AL:
- // This one is implicit.
- break;
- case COND_UC:
- // Unconditional.
- break;
- default:
- panic("Unrecognized condition code %d.\n", condCode);
- }
+ if (withPred && !aarch64) {
+ printCondition(os, machInst.condCode);
+ os << suffix;
+ } else if (withCond64) {
+ os << ".";
+ printCondition(os, cond64);
os << suffix;
- if (machInst.bigThumb)
- os << ".w";
- os << " ";
+ }
+ if (machInst.bigThumb)
+ os << ".w";
+ os << " ";
+}
+
+void
+ArmStaticInst::printTarget(std::ostream &os, Addr target,
+ const SymbolTable *symtab) const
+{
+ Addr symbolAddr;
+ std::string symbol;
+
+ if (symtab && symtab->findNearestSymbol(target, symbol, symbolAddr)) {
+ ccprintf(os, "<%s", symbol);
+ if (symbolAddr != target)
+ ccprintf(os, "+%d>", target - symbolAddr);
+ else
+ ccprintf(os, ">");
+ } else {
+ ccprintf(os, "%#x", target);
+ }
+}
+
+void
+ArmStaticInst::printCondition(std::ostream &os,
+ unsigned code,
+ bool noImplicit) const
+{
+ switch (code) {
+ case COND_EQ:
+ os << "eq";
+ break;
+ case COND_NE:
+ os << "ne";
+ break;
+ case COND_CS:
+ os << "cs";
+ break;
+ case COND_CC:
+ os << "cc";
+ break;
+ case COND_MI:
+ os << "mi";
+ break;
+ case COND_PL:
+ os << "pl";
+ break;
+ case COND_VS:
+ os << "vs";
+ break;
+ case COND_VC:
+ os << "vc";
+ break;
+ case COND_HI:
+ os << "hi";
+ break;
+ case COND_LS:
+ os << "ls";
+ break;
+ case COND_GE:
+ os << "ge";
+ break;
+ case COND_LT:
+ os << "lt";
+ break;
+ case COND_GT:
+ os << "gt";
+ break;
+ case COND_LE:
+ os << "le";
+ break;
+ case COND_AL:
+ // This one is implicit.
+ if (noImplicit)
+ os << "al";
+ break;
+ case COND_UC:
+ // Unconditional.
+ if (noImplicit)
+ os << "uc";
+ break;
+ default:
+ panic("Unrecognized condition code %d.\n", code);
}
}
@@ -393,6 +523,38 @@ ArmStaticInst::printShiftOperand(std::ostream &os,
}
void
+ArmStaticInst::printExtendOperand(bool firstOperand, std::ostream &os,
+ IntRegIndex rm, ArmExtendType type,
+ int64_t shiftAmt) const
+{
+ if (!firstOperand)
+ ccprintf(os, ", ");
+ printReg(os, rm);
+ if (type == UXTX && shiftAmt == 0)
+ return;
+ switch (type) {
+ case UXTB: ccprintf(os, ", UXTB");
+ break;
+ case UXTH: ccprintf(os, ", UXTH");
+ break;
+ case UXTW: ccprintf(os, ", UXTW");
+ break;
+ case UXTX: ccprintf(os, ", LSL");
+ break;
+ case SXTB: ccprintf(os, ", SXTB");
+ break;
+ case SXTH: ccprintf(os, ", SXTH");
+ break;
+ case SXTW: ccprintf(os, ", SXTW");
+ break;
+ case SXTX: ccprintf(os, ", SXTW");
+ break;
+ }
+ if (type == UXTX || shiftAmt)
+ ccprintf(os, " #%d", shiftAmt);
+}
+
+void
ArmStaticInst::printDataInst(std::ostream &os, bool withImm,
bool immShift, bool s, IntRegIndex rd, IntRegIndex rn,
IntRegIndex rm, IntRegIndex rs, uint32_t shiftAmt,
diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh
index c36024ecd..aeec67ec2 100644
--- a/src/arch/arm/insts/static_inst.hh
+++ b/src/arch/arm/insts/static_inst.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -44,6 +44,7 @@
#include "arch/arm/faults.hh"
#include "arch/arm/utility.hh"
+#include "arch/arm/system.hh"
#include "base/trace.hh"
#include "cpu/static_inst.hh"
#include "sim/byteswap.hh"
@@ -55,6 +56,9 @@ namespace ArmISA
class ArmStaticInst : public StaticInst
{
protected:
+ bool aarch64;
+ uint8_t intWidth;
+
int32_t shift_rm_imm(uint32_t base, uint32_t shamt,
uint32_t type, uint32_t cfval) const;
int32_t shift_rm_rs(uint32_t base, uint32_t shamt,
@@ -65,6 +69,11 @@ class ArmStaticInst : public StaticInst
bool shift_carry_rs(uint32_t base, uint32_t shamt,
uint32_t type, uint32_t cfval) const;
+ int64_t shiftReg64(uint64_t base, uint64_t shiftAmt,
+ ArmShiftType type, uint8_t width) const;
+ int64_t extendReg64(uint64_t base, ArmExtendType type,
+ uint64_t shiftAmt, uint8_t width) const;
+
template<int width>
static inline bool
saturateOp(int32_t &res, int64_t op1, int64_t op2, bool sub=false)
@@ -135,6 +144,11 @@ class ArmStaticInst : public StaticInst
OpClass __opClass)
: StaticInst(mnem, _machInst, __opClass)
{
+ aarch64 = machInst.aarch64;
+ if (bits(machInst, 28, 24) == 0x10)
+ intWidth = 64; // Force 64-bit width for ADR/ADRP
+ else
+ intWidth = (aarch64 && bits(machInst, 31)) ? 64 : 32;
}
/// Print a register name for disassembly given the unique
@@ -142,13 +156,22 @@ class ArmStaticInst : public StaticInst
void printReg(std::ostream &os, int reg) const;
void printMnemonic(std::ostream &os,
const std::string &suffix = "",
- bool withPred = true) const;
+ bool withPred = true,
+ bool withCond64 = false,
+ ConditionCode cond64 = COND_UC) const;
+ void printTarget(std::ostream &os, Addr target,
+ const SymbolTable *symtab) const;
+ void printCondition(std::ostream &os, unsigned code,
+ bool noImplicit=false) const;
void printMemSymbol(std::ostream &os, const SymbolTable *symtab,
const std::string &prefix, const Addr addr,
const std::string &suffix) const;
void printShiftOperand(std::ostream &os, IntRegIndex rm,
bool immShift, uint32_t shiftAmt,
IntRegIndex rs, ArmShiftType type) const;
+ void printExtendOperand(bool firstOperand, std::ostream &os,
+ IntRegIndex rm, ArmExtendType type,
+ int64_t shiftAmt) const;
void printDataInst(std::ostream &os, bool withImm) const;
@@ -166,10 +189,13 @@ class ArmStaticInst : public StaticInst
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
static inline uint32_t
- cpsrWriteByInstr(CPSR cpsr, uint32_t val,
- uint8_t byteMask, bool affectState, bool nmfi)
+ cpsrWriteByInstr(CPSR cpsr, uint32_t val, SCR scr, NSACR nsacr,
+ uint8_t byteMask, bool affectState, bool nmfi, ThreadContext *tc)
{
- bool privileged = (cpsr.mode != MODE_USER);
+ bool privileged = (cpsr.mode != MODE_USER);
+ bool haveVirt = ArmSystem::haveVirtualization(tc);
+ bool haveSecurity = ArmSystem::haveSecurity(tc);
+ bool isSecure = inSecureState(scr, cpsr) || !haveSecurity;
uint32_t bitMask = 0;
@@ -182,14 +208,53 @@ class ArmStaticInst : public StaticInst
}
if (bits(byteMask, 1)) {
unsigned highIdx = affectState ? 15 : 9;
- unsigned lowIdx = privileged ? 8 : 9;
+ unsigned lowIdx = (privileged && (isSecure || scr.aw || haveVirt))
+ ? 8 : 9;
bitMask = bitMask | mask(highIdx, lowIdx);
}
if (bits(byteMask, 0)) {
if (privileged) {
- bitMask = bitMask | mask(7, 6);
- if (!badMode((OperatingMode)(val & mask(5)))) {
- bitMask = bitMask | mask(5);
+ bitMask |= 1 << 7;
+ if ( (!nmfi || !((val >> 6) & 0x1)) &&
+ (isSecure || scr.fw || haveVirt) ) {
+ bitMask |= 1 << 6;
+ }
+ // Now check the new mode is allowed
+ OperatingMode newMode = (OperatingMode) (val & mask(5));
+ OperatingMode oldMode = (OperatingMode)(uint32_t)cpsr.mode;
+ if (!badMode(newMode)) {
+ bool validModeChange = true;
+ // Check for attempts to enter modes only permitted in
+ // Secure state from Non-secure state. These are Monitor
+ // mode ('10110'), and FIQ mode ('10001') if the Security
+ // Extensions have reserved it.
+ if (!isSecure && newMode == MODE_MON)
+ validModeChange = false;
+ if (!isSecure && newMode == MODE_FIQ && nsacr.rfr == '1')
+ validModeChange = false;
+ // There is no Hyp mode ('11010') in Secure state, so that
+ // is UNPREDICTABLE
+ if (scr.ns == '0' && newMode == MODE_HYP)
+ validModeChange = false;
+ // Cannot move into Hyp mode directly from a Non-secure
+ // PL1 mode
+ if (!isSecure && oldMode != MODE_HYP && newMode == MODE_HYP)
+ validModeChange = false;
+ // Cannot move out of Hyp mode with this function except
+ // on an exception return
+ if (oldMode == MODE_HYP && newMode != MODE_HYP && !affectState)
+ validModeChange = false;
+ // Must not change to 64 bit when running in 32 bit mode
+ if (!opModeIs64(oldMode) && opModeIs64(newMode))
+ validModeChange = false;
+
+ // If we passed all of the above then set the bit mask to
+ // copy the mode accross
+ if (validModeChange) {
+ bitMask = bitMask | mask(5);
+ } else {
+ warn_once("Illegal change to CPSR mode attempted\n");
+ }
} else {
warn_once("Ignoring write of bad mode to CPSR.\n");
}
@@ -198,11 +263,7 @@ class ArmStaticInst : public StaticInst
bitMask = bitMask | (1 << 5);
}
- bool cpsr_f = cpsr.f;
- uint32_t new_cpsr = ((uint32_t)cpsr & ~bitMask) | (val & bitMask);
- if (nmfi && !cpsr_f)
- new_cpsr &= ~(1 << 6);
- return new_cpsr;
+ return ((uint32_t)cpsr & ~bitMask) | (val & bitMask);
}
static inline uint32_t
@@ -296,12 +357,12 @@ class ArmStaticInst : public StaticInst
inline Fault
disabledFault() const
{
- if (FullSystem) {
- return new UndefinedInstruction();
- } else {
- return new UndefinedInstruction(machInst, false, mnemonic, true);
- }
+ return new UndefinedInstruction(machInst, false, mnemonic, true);
}
+
+ public:
+ virtual void
+ annotateFault(ArmFault *fault) {}
};
}
diff --git a/src/arch/arm/insts/vfp.cc b/src/arch/arm/insts/vfp.cc
index ca0f58226..03fdc83fa 100644
--- a/src/arch/arm/insts/vfp.cc
+++ b/src/arch/arm/insts/vfp.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -46,6 +46,37 @@
*/
std::string
+FpCondCompRegOp::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, op1);
+ ccprintf(ss, ", ");
+ printReg(ss, op2);
+ ccprintf(ss, ", #%d", defCc);
+ ccprintf(ss, ", ");
+ printCondition(ss, condCode, true);
+ return ss.str();
+}
+
+std::string
+FpCondSelOp::generateDisassembly(
+ Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss, "", false);
+ printReg(ss, dest);
+ ccprintf(ss, ", ");
+ printReg(ss, op1);
+ ccprintf(ss, ", ");
+ printReg(ss, op2);
+ ccprintf(ss, ", ");
+ printCondition(ss, condCode, true);
+ return ss.str();
+}
+
+std::string
FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
@@ -92,6 +123,21 @@ FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
}
std::string
+FpRegRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+ std::stringstream ss;
+ printMnemonic(ss);
+ printReg(ss, dest + FP_Reg_Base);
+ ss << ", ";
+ printReg(ss, op1 + FP_Reg_Base);
+ ss << ", ";
+ printReg(ss, op2 + FP_Reg_Base);
+ ss << ", ";
+ printReg(ss, op3 + FP_Reg_Base);
+ return ss.str();
+}
+
+std::string
FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
@@ -131,24 +177,25 @@ prepFpState(uint32_t rMode)
}
void
-finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush)
+finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask)
{
int exceptions = fetestexcept(FeAllExceptions);
bool underflow = false;
- if (exceptions & FeInvalid) {
+ if ((exceptions & FeInvalid) && mask.ioc) {
fpscr.ioc = 1;
}
- if (exceptions & FeDivByZero) {
+ if ((exceptions & FeDivByZero) && mask.dzc) {
fpscr.dzc = 1;
}
- if (exceptions & FeOverflow) {
+ if ((exceptions & FeOverflow) && mask.ofc) {
fpscr.ofc = 1;
}
if (exceptions & FeUnderflow) {
underflow = true;
- fpscr.ufc = 1;
+ if (mask.ufc)
+ fpscr.ufc = 1;
}
- if ((exceptions & FeInexact) && !(underflow && flush)) {
+ if ((exceptions & FeInexact) && !(underflow && flush) && mask.ixc) {
fpscr.ixc = 1;
}
fesetround(state);
@@ -329,19 +376,33 @@ fixFpSFpDDest(FPSCR fpscr, float val)
return mid;
}
-uint16_t
-vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
- uint32_t rMode, bool ahp, float op)
+static inline uint16_t
+vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan,
+ uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble)
{
- uint32_t opBits = fpToBits(op);
+ uint32_t mWidth;
+ uint32_t eWidth;
+ uint32_t eHalfRange;
+ uint32_t sBitPos;
+
+ if (isDouble) {
+ mWidth = 52;
+ eWidth = 11;
+ } else {
+ mWidth = 23;
+ eWidth = 8;
+ }
+ sBitPos = eWidth + mWidth;
+ eHalfRange = (1 << (eWidth-1)) - 1;
+
// Extract the operand.
- bool neg = bits(opBits, 31);
- uint32_t exponent = bits(opBits, 30, 23);
- uint32_t oldMantissa = bits(opBits, 22, 0);
- uint32_t mantissa = oldMantissa >> (23 - 10);
+ bool neg = bits(opBits, sBitPos);
+ uint32_t exponent = bits(opBits, sBitPos-1, mWidth);
+ uint64_t oldMantissa = bits(opBits, mWidth-1, 0);
+ uint32_t mantissa = oldMantissa >> (mWidth - 10);
// Do the conversion.
- uint32_t extra = oldMantissa & mask(23 - 10);
- if (exponent == 0xff) {
+ uint64_t extra = oldMantissa & mask(mWidth - 10);
+ if (exponent == mask(eWidth)) {
if (oldMantissa != 0) {
// Nans.
if (bits(mantissa, 9) == 0) {
@@ -379,7 +440,6 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
if (exponent == 0) {
// Denormalized.
-
// If flush to zero is on, this shouldn't happen.
assert(!flush);
@@ -407,13 +467,13 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
// We need to track the dropped bits differently since
// more can be dropped by denormalizing.
- bool topOne = bits(extra, 12);
- bool restZeros = bits(extra, 11, 0) == 0;
+ bool topOne = bits(extra, mWidth - 10 - 1);
+ bool restZeros = bits(extra, mWidth - 10 - 2, 0) == 0;
- if (exponent <= (127 - 15)) {
+ if (exponent <= (eHalfRange - 15)) {
// The result is too small. Denormalize.
mantissa |= (1 << 10);
- while (mantissa && exponent <= (127 - 15)) {
+ while (mantissa && exponent <= (eHalfRange - 15)) {
restZeros = restZeros && !topOne;
topOne = bits(mantissa, 0);
mantissa = mantissa >> 1;
@@ -424,7 +484,7 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
exponent = 0;
} else {
// Change bias.
- exponent -= (127 - 15);
+ exponent -= (eHalfRange - 15);
}
if (exponent == 0 && (inexact || fpscr.ufe)) {
@@ -488,155 +548,115 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
return result;
}
-float
-vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
+uint16_t
+vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
+ uint32_t rMode, bool ahp, float op)
{
- float junk = 0.0;
+ uint64_t opBits = fpToBits(op);
+ return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, false);
+}
+
+uint16_t
+vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,
+ uint32_t rMode, bool ahp, double op)
+{
+ uint64_t opBits = fpToBits(op);
+ return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, true);
+}
+
+static inline uint64_t
+vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble)
+{
+ uint32_t mWidth;
+ uint32_t eWidth;
+ uint32_t eHalfRange;
+ uint32_t sBitPos;
+
+ if (isDouble) {
+ mWidth = 52;
+ eWidth = 11;
+ } else {
+ mWidth = 23;
+ eWidth = 8;
+ }
+ sBitPos = eWidth + mWidth;
+ eHalfRange = (1 << (eWidth-1)) - 1;
+
// Extract the bitfields.
bool neg = bits(op, 15);
uint32_t exponent = bits(op, 14, 10);
- uint32_t mantissa = bits(op, 9, 0);
+ uint64_t mantissa = bits(op, 9, 0);
// Do the conversion.
if (exponent == 0) {
if (mantissa != 0) {
// Normalize the value.
- exponent = exponent + (127 - 15) + 1;
+ exponent = exponent + (eHalfRange - 15) + 1;
while (mantissa < (1 << 10)) {
mantissa = mantissa << 1;
exponent--;
}
}
- mantissa = mantissa << (23 - 10);
+ mantissa = mantissa << (mWidth - 10);
} else if (exponent == 0x1f && !ahp) {
// Infinities and nans.
- exponent = 0xff;
+ exponent = mask(eWidth);
if (mantissa != 0) {
// Nans.
- mantissa = mantissa << (23 - 10);
- if (bits(mantissa, 22) == 0) {
+ mantissa = mantissa << (mWidth - 10);
+ if (bits(mantissa, mWidth-1) == 0) {
// Signalling nan.
fpscr.ioc = 1;
- mantissa |= (1 << 22);
+ mantissa |= (((uint64_t) 1) << (mWidth-1));
}
if (defaultNan) {
- mantissa &= ~mask(22);
+ mantissa &= ~mask(mWidth-1);
neg = false;
}
}
} else {
- exponent = exponent + (127 - 15);
- mantissa = mantissa << (23 - 10);
+ exponent = exponent + (eHalfRange - 15);
+ mantissa = mantissa << (mWidth - 10);
}
// Reassemble the result.
- uint32_t result = bits(mantissa, 22, 0);
- replaceBits(result, 30, 23, exponent);
- if (neg)
- result |= (1 << 31);
+ uint64_t result = bits(mantissa, mWidth-1, 0);
+ replaceBits(result, sBitPos-1, mWidth, exponent);
+ if (neg) {
+ result |= (((uint64_t) 1) << sBitPos);
+ }
+ return result;
+}
+
+double
+vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
+{
+ double junk = 0.0;
+ uint64_t result;
+
+ result = vcvtFpHFp(fpscr, defaultNan, ahp, op, true);
return bitsToFp(result, junk);
}
-uint64_t
-vfpFpSToFixed(float val, bool isSigned, bool half,
- uint8_t imm, bool rzero)
+float
+vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
{
- int rmode = rzero ? FeRoundZero : fegetround();
- __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
- fesetround(FeRoundNearest);
- val = val * powf(2.0, imm);
- __asm__ __volatile__("" : "=m" (val) : "m" (val));
- fesetround(rmode);
- feclearexcept(FeAllExceptions);
- __asm__ __volatile__("" : "=m" (val) : "m" (val));
- float origVal = val;
- val = rintf(val);
- int fpType = std::fpclassify(val);
- if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
- if (fpType == FP_NAN) {
- feraiseexcept(FeInvalid);
- }
- val = 0.0;
- } else if (origVal != val) {
- switch (rmode) {
- case FeRoundNearest:
- if (origVal - val > 0.5)
- val += 1.0;
- else if (val - origVal > 0.5)
- val -= 1.0;
- break;
- case FeRoundDown:
- if (origVal < val)
- val -= 1.0;
- break;
- case FeRoundUpward:
- if (origVal > val)
- val += 1.0;
- break;
- }
- feraiseexcept(FeInexact);
- }
+ float junk = 0.0;
+ uint64_t result;
- if (isSigned) {
- if (half) {
- if ((double)val < (int16_t)(1 << 15)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int16_t)(1 << 15);
- }
- if ((double)val > (int16_t)mask(15)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int16_t)mask(15);
- }
- return (int16_t)val;
- } else {
- if ((double)val < (int32_t)(1 << 31)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int32_t)(1 << 31);
- }
- if ((double)val > (int32_t)mask(31)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int32_t)mask(31);
- }
- return (int32_t)val;
- }
- } else {
- if (half) {
- if ((double)val < 0) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return 0;
- }
- if ((double)val > (mask(16))) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return mask(16);
- }
- return (uint16_t)val;
- } else {
- if ((double)val < 0) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return 0;
- }
- if ((double)val > (mask(32))) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return mask(32);
- }
- return (uint32_t)val;
- }
- }
+ result = vcvtFpHFp(fpscr, defaultNan, ahp, op, false);
+ return bitsToFp(result, junk);
}
float
vfpUFixedToFpS(bool flush, bool defaultNan,
- uint32_t val, bool half, uint8_t imm)
+ uint64_t val, uint8_t width, uint8_t imm)
{
fesetround(FeRoundNearest);
- if (half)
+ if (width == 16)
val = (uint16_t)val;
+ else if (width == 32)
+ val = (uint32_t)val;
+ else if (width != 64)
+ panic("Unsupported width %d", width);
float scale = powf(2.0, imm);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
@@ -646,11 +666,16 @@ vfpUFixedToFpS(bool flush, bool defaultNan,
float
vfpSFixedToFpS(bool flush, bool defaultNan,
- int32_t val, bool half, uint8_t imm)
+ int64_t val, uint8_t width, uint8_t imm)
{
fesetround(FeRoundNearest);
- if (half)
+ if (width == 16)
val = sext<16>(val & mask(16));
+ else if (width == 32)
+ val = sext<32>(val & mask(32));
+ else if (width != 64)
+ panic("Unsupported width %d", width);
+
float scale = powf(2.0, imm);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
@@ -658,106 +683,19 @@ vfpSFixedToFpS(bool flush, bool defaultNan,
return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
}
-uint64_t
-vfpFpDToFixed(double val, bool isSigned, bool half,
- uint8_t imm, bool rzero)
-{
- int rmode = rzero ? FeRoundZero : fegetround();
- fesetround(FeRoundNearest);
- val = val * pow(2.0, imm);
- __asm__ __volatile__("" : "=m" (val) : "m" (val));
- fesetround(rmode);
- feclearexcept(FeAllExceptions);
- __asm__ __volatile__("" : "=m" (val) : "m" (val));
- double origVal = val;
- val = rint(val);
- int fpType = std::fpclassify(val);
- if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
- if (fpType == FP_NAN) {
- feraiseexcept(FeInvalid);
- }
- val = 0.0;
- } else if (origVal != val) {
- switch (rmode) {
- case FeRoundNearest:
- if (origVal - val > 0.5)
- val += 1.0;
- else if (val - origVal > 0.5)
- val -= 1.0;
- break;
- case FeRoundDown:
- if (origVal < val)
- val -= 1.0;
- break;
- case FeRoundUpward:
- if (origVal > val)
- val += 1.0;
- break;
- }
- feraiseexcept(FeInexact);
- }
- if (isSigned) {
- if (half) {
- if (val < (int16_t)(1 << 15)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int16_t)(1 << 15);
- }
- if (val > (int16_t)mask(15)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int16_t)mask(15);
- }
- return (int16_t)val;
- } else {
- if (val < (int32_t)(1 << 31)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int32_t)(1 << 31);
- }
- if (val > (int32_t)mask(31)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return (int32_t)mask(31);
- }
- return (int32_t)val;
- }
- } else {
- if (half) {
- if (val < 0) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return 0;
- }
- if (val > mask(16)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return mask(16);
- }
- return (uint16_t)val;
- } else {
- if (val < 0) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return 0;
- }
- if (val > mask(32)) {
- feraiseexcept(FeInvalid);
- feclearexcept(FeInexact);
- return mask(32);
- }
- return (uint32_t)val;
- }
- }
-}
double
vfpUFixedToFpD(bool flush, bool defaultNan,
- uint32_t val, bool half, uint8_t imm)
+ uint64_t val, uint8_t width, uint8_t imm)
{
fesetround(FeRoundNearest);
- if (half)
+ if (width == 16)
val = (uint16_t)val;
+ else if (width == 32)
+ val = (uint32_t)val;
+ else if (width != 64)
+ panic("Unsupported width %d", width);
+
double scale = pow(2.0, imm);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
@@ -767,11 +705,16 @@ vfpUFixedToFpD(bool flush, bool defaultNan,
double
vfpSFixedToFpD(bool flush, bool defaultNan,
- int32_t val, bool half, uint8_t imm)
+ int64_t val, uint8_t width, uint8_t imm)
{
fesetround(FeRoundNearest);
- if (half)
+ if (width == 16)
val = sext<16>(val & mask(16));
+ else if (width == 32)
+ val = sext<32>(val & mask(32));
+ else if (width != 64)
+ panic("Unsupported width %d", width);
+
double scale = pow(2.0, imm);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
@@ -976,6 +919,85 @@ template
double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
double op1, double op2) const;
+// @TODO remove this function when we've finished switching all FMA code to use the new FPLIB
+template <class fpType>
+fpType
+FpOp::ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
+ fpType (*func)(fpType, fpType, fpType),
+ bool flush, bool defaultNan, uint32_t rMode) const
+{
+ const bool single = (sizeof(fpType) == sizeof(float));
+ fpType junk = 0.0;
+
+ if (flush && (flushToZero(op1, op2) || flushToZero(op3)))
+ fpscr.idc = 1;
+ VfpSavedState state = prepFpState(rMode);
+ __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3), "=m" (state)
+ : "m" (op1), "m" (op2), "m" (op3), "m" (state));
+ fpType dest = func(op1, op2, op3);
+ __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
+
+ int fpClass = std::fpclassify(dest);
+ // Get NAN behavior right. This varies between x86 and ARM.
+ if (fpClass == FP_NAN) {
+ const uint64_t qnan =
+ single ? 0x7fc00000 : ULL(0x7ff8000000000000);
+ const bool nan1 = std::isnan(op1);
+ const bool nan2 = std::isnan(op2);
+ const bool nan3 = std::isnan(op3);
+ const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
+ const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
+ const bool signal3 = nan3 && ((fpToBits(op3) & qnan) != qnan);
+ if ((!nan1 && !nan2 && !nan3) || (defaultNan == 1)) {
+ dest = bitsToFp(qnan, junk);
+ } else if (signal1) {
+ dest = bitsToFp(fpToBits(op1) | qnan, junk);
+ } else if (signal2) {
+ dest = bitsToFp(fpToBits(op2) | qnan, junk);
+ } else if (signal3) {
+ dest = bitsToFp(fpToBits(op3) | qnan, junk);
+ } else if (nan1) {
+ dest = op1;
+ } else if (nan2) {
+ dest = op2;
+ } else if (nan3) {
+ dest = op3;
+ }
+ } else if (flush && flushToZero(dest)) {
+ feraiseexcept(FeUnderflow);
+ } else if ((
+ (single && (dest == bitsToFp(0x00800000, junk) ||
+ dest == bitsToFp(0x80800000, junk))) ||
+ (!single &&
+ (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
+ dest == bitsToFp(ULL(0x8010000000000000), junk)))
+ ) && rMode != VfpRoundZero) {
+ /*
+ * Correct for the fact that underflow is detected -before- rounding
+ * in ARM and -after- rounding in x86.
+ */
+ fesetround(FeRoundZero);
+ __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3)
+ : "m" (op1), "m" (op2), "m" (op3));
+ fpType temp = func(op1, op2, op2);
+ __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
+ if (flush && flushToZero(temp)) {
+ dest = temp;
+ }
+ }
+ finishVfp(fpscr, state, flush);
+ return dest;
+}
+
+template
+float FpOp::ternaryOp(FPSCR &fpscr, float op1, float op2, float op3,
+ float (*func)(float, float, float),
+ bool flush, bool defaultNan, uint32_t rMode) const;
+template
+double FpOp::ternaryOp(FPSCR &fpscr, double op1, double op2, double op3,
+ double (*func)(double, double, double),
+ bool flush, bool defaultNan, uint32_t rMode) const;
+
template <class fpType>
fpType
FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh
index 9babaae04..f17f90973 100644
--- a/src/arch/arm/insts/vfp.hh
+++ b/src/arch/arm/insts/vfp.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -104,7 +104,8 @@ enum VfpRoundingMode
VfpRoundNearest = 0,
VfpRoundUpward = 1,
VfpRoundDown = 2,
- VfpRoundZero = 3
+ VfpRoundZero = 3,
+ VfpRoundAway = 4
};
static inline float bitsToFp(uint64_t, float);
@@ -212,7 +213,7 @@ isSnan(fpType val)
typedef int VfpSavedState;
VfpSavedState prepFpState(uint32_t rMode);
-void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush);
+void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask = FpscrExcMask);
template <class fpType>
fpType fixDest(FPSCR fpscr, fpType val, fpType op1);
@@ -228,7 +229,11 @@ double fixFpSFpDDest(FPSCR fpscr, float val);
uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
uint32_t rMode, bool ahp, float op);
-float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
+uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,
+ uint32_t rMode, bool ahp, double op);
+
+float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
+double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
static inline double
makeDouble(uint32_t low, uint32_t high)
@@ -249,19 +254,192 @@ highFromDouble(double val)
return fpToBits(val) >> 32;
}
-uint64_t vfpFpSToFixed(float val, bool isSigned, bool half,
- uint8_t imm, bool rzero = true);
+static inline void
+setFPExceptions(int exceptions) {
+ feclearexcept(FeAllExceptions);
+ feraiseexcept(exceptions);
+}
+
+template <typename T>
+uint64_t
+vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool
+ useRmode = true, VfpRoundingMode roundMode = VfpRoundZero,
+ bool aarch64 = false)
+{
+ int rmode;
+ bool roundAwayFix = false;
+
+ if (!useRmode) {
+ rmode = fegetround();
+ } else {
+ switch (roundMode)
+ {
+ case VfpRoundNearest:
+ rmode = FeRoundNearest;
+ break;
+ case VfpRoundUpward:
+ rmode = FeRoundUpward;
+ break;
+ case VfpRoundDown:
+ rmode = FeRoundDown;
+ break;
+ case VfpRoundZero:
+ rmode = FeRoundZero;
+ break;
+ case VfpRoundAway:
+ // There is no equivalent rounding mode, use round down and we'll
+ // fix it later
+ rmode = FeRoundDown;
+ roundAwayFix = true;
+ break;
+ default:
+ panic("Unsupported roundMode %d\n", roundMode);
+ }
+ }
+ __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
+ fesetround(FeRoundNearest);
+ val = val * pow(2.0, imm);
+ __asm__ __volatile__("" : "=m" (val) : "m" (val));
+ fesetround(rmode);
+ feclearexcept(FeAllExceptions);
+ __asm__ __volatile__("" : "=m" (val) : "m" (val));
+ T origVal = val;
+ val = rint(val);
+ __asm__ __volatile__("" : "=m" (val) : "m" (val));
+
+ int exceptions = fetestexcept(FeAllExceptions);
+
+ int fpType = std::fpclassify(val);
+ if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
+ if (fpType == FP_NAN) {
+ exceptions |= FeInvalid;
+ }
+ val = 0.0;
+ } else if (origVal != val) {
+ switch (rmode) {
+ case FeRoundNearest:
+ if (origVal - val > 0.5)
+ val += 1.0;
+ else if (val - origVal > 0.5)
+ val -= 1.0;
+ break;
+ case FeRoundDown:
+ if (roundAwayFix) {
+ // The ordering on the subtraction looks a bit odd in that we
+ // don't do the obvious origVal - val, instead we do
+ // -(val - origVal). This is required to get the corruct bit
+ // exact behaviour when very close to the 0.5 threshold.
+ volatile T error = val;
+ error -= origVal;
+ error = -error;
+ if ( (error > 0.5) ||
+ ((error == 0.5) && (val >= 0)) )
+ val += 1.0;
+ } else {
+ if (origVal < val)
+ val -= 1.0;
+ }
+ break;
+ case FeRoundUpward:
+ if (origVal > val)
+ val += 1.0;
+ break;
+ }
+ exceptions |= FeInexact;
+ }
+
+ __asm__ __volatile__("" : "=m" (val) : "m" (val));
+
+ if (isSigned) {
+ bool outOfRange = false;
+ int64_t result = (int64_t) val;
+ uint64_t finalVal;
+
+ if (!aarch64) {
+ if (width == 16) {
+ finalVal = (int16_t)val;
+ } else if (width == 32) {
+ finalVal =(int32_t)val;
+ } else if (width == 64) {
+ finalVal = result;
+ } else {
+ panic("Unsupported width %d\n", width);
+ }
+
+ // check if value is in range
+ int64_t minVal = ~mask(width-1);
+ if ((double)val < minVal) {
+ outOfRange = true;
+ finalVal = minVal;
+ }
+ int64_t maxVal = mask(width-1);
+ if ((double)val > maxVal) {
+ outOfRange = true;
+ finalVal = maxVal;
+ }
+ } else {
+ bool isNeg = val < 0;
+ finalVal = result & mask(width);
+ // If the result is supposed to be less than 64 bits check that the
+ // upper bits that got thrown away are just sign extension bits
+ if (width != 64) {
+ outOfRange = ((uint64_t) result >> (width - 1)) !=
+ (isNeg ? mask(64-width+1) : 0);
+ }
+ // Check if the original floating point value doesn't matches the
+ // integer version we are also out of range. So create a saturated
+ // result.
+ if (isNeg) {
+ outOfRange |= val < result;
+ if (outOfRange) {
+ finalVal = 1LL << (width-1);
+ }
+ } else {
+ outOfRange |= val > result;
+ if (outOfRange) {
+ finalVal = mask(width-1);
+ }
+ }
+ }
+
+ // Raise an exception if the value was out of range
+ if (outOfRange) {
+ exceptions |= FeInvalid;
+ exceptions &= ~FeInexact;
+ }
+ setFPExceptions(exceptions);
+ return finalVal;
+ } else {
+ if ((double)val < 0) {
+ exceptions |= FeInvalid;
+ exceptions &= ~FeInexact;
+ setFPExceptions(exceptions);
+ return 0;
+ }
+
+ uint64_t result = ((uint64_t) val) & mask(width);
+ if (val > result) {
+ exceptions |= FeInvalid;
+ exceptions &= ~FeInexact;
+ setFPExceptions(exceptions);
+ return mask(width);
+ }
+
+ setFPExceptions(exceptions);
+ return result;
+ }
+};
+
+
float vfpUFixedToFpS(bool flush, bool defaultNan,
- uint32_t val, bool half, uint8_t imm);
+ uint64_t val, uint8_t width, uint8_t imm);
float vfpSFixedToFpS(bool flush, bool defaultNan,
- int32_t val, bool half, uint8_t imm);
+ int64_t val, uint8_t width, uint8_t imm);
-uint64_t vfpFpDToFixed(double val, bool isSigned, bool half,
- uint8_t imm, bool rzero = true);
double vfpUFixedToFpD(bool flush, bool defaultNan,
- uint32_t val, bool half, uint8_t imm);
+ uint64_t val, uint8_t width, uint8_t imm);
double vfpSFixedToFpD(bool flush, bool defaultNan,
- int32_t val, bool half, uint8_t imm);
+ int64_t val, uint8_t width, uint8_t imm);
float fprSqrtEstimate(FPSCR &fpscr, float op);
uint32_t unsignedRSqrtEstimate(uint32_t op);
@@ -292,6 +470,20 @@ class VfpMacroOp : public PredMacroOp
void nextIdxs(IntRegIndex &dest);
};
+template <typename T>
+static inline T
+fpAdd(T a, T b)
+{
+ return a + b;
+};
+
+template <typename T>
+static inline T
+fpSub(T a, T b)
+{
+ return a - b;
+};
+
static inline float
fpAddS(float a, float b)
{
@@ -328,6 +520,54 @@ fpDivD(double a, double b)
return a / b;
}
+template <typename T>
+static inline T
+fpDiv(T a, T b)
+{
+ return a / b;
+};
+
+template <typename T>
+static inline T
+fpMulX(T a, T b)
+{
+ uint64_t opData;
+ uint32_t sign1;
+ uint32_t sign2;
+ const bool single = (sizeof(T) == sizeof(float));
+ if (single) {
+ opData = (fpToBits(a));
+ sign1 = opData>>31;
+ opData = (fpToBits(b));
+ sign2 = opData>>31;
+ } else {
+ opData = (fpToBits(a));
+ sign1 = opData>>63;
+ opData = (fpToBits(b));
+ sign2 = opData>>63;
+ }
+ bool inf1 = (std::fpclassify(a) == FP_INFINITE);
+ bool inf2 = (std::fpclassify(b) == FP_INFINITE);
+ bool zero1 = (std::fpclassify(a) == FP_ZERO);
+ bool zero2 = (std::fpclassify(b) == FP_ZERO);
+ if ((inf1 && zero2) || (zero1 && inf2)) {
+ if(sign1 ^ sign2)
+ return (T)(-2.0);
+ else
+ return (T)(2.0);
+ } else {
+ return (a * b);
+ }
+};
+
+
+template <typename T>
+static inline T
+fpMul(T a, T b)
+{
+ return a * b;
+};
+
static inline float
fpMulS(float a, float b)
{
@@ -340,23 +580,140 @@ fpMulD(double a, double b)
return a * b;
}
-static inline float
-fpMaxS(float a, float b)
+template <typename T>
+static inline T
+// @todo remove this when all calls to it have been replaced with the new fplib implementation
+fpMulAdd(T op1, T op2, T addend)
+{
+ T result;
+
+ if (sizeof(T) == sizeof(float))
+ result = fmaf(op1, op2, addend);
+ else
+ result = fma(op1, op2, addend);
+
+ // ARM doesn't generate signed nan's from this opperation, so fix up the result
+ if (std::isnan(result) && !std::isnan(op1) &&
+ !std::isnan(op2) && !std::isnan(addend))
+ {
+ uint64_t bitMask = ULL(0x1) << ((sizeof(T) * 8) - 1);
+ result = bitsToFp(fpToBits(result) & ~bitMask, op1);
+ }
+ return result;
+}
+
+template <typename T>
+static inline T
+fpRIntX(T a, FPSCR &fpscr)
+{
+ T rVal;
+
+ rVal = rint(a);
+ if (rVal != a && !std::isnan(a))
+ fpscr.ixc = 1;
+ return (rVal);
+};
+
+template <typename T>
+static inline T
+fpMaxNum(T a, T b)
{
+ const bool single = (sizeof(T) == sizeof(float));
+ const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
+
+ if (std::isnan(a))
+ return ((fpToBits(a) & qnan) == qnan) ? b : a;
+ if (std::isnan(b))
+ return ((fpToBits(b) & qnan) == qnan) ? a : b;
// Handle comparisons of +0 and -0.
if (!std::signbit(a) && std::signbit(b))
return a;
- return fmaxf(a, b);
-}
+ return fmax(a, b);
+};
-static inline float
-fpMinS(float a, float b)
+template <typename T>
+static inline T
+fpMax(T a, T b)
{
+ if (std::isnan(a))
+ return a;
+ if (std::isnan(b))
+ return b;
+ return fpMaxNum<T>(a, b);
+};
+
+template <typename T>
+static inline T
+fpMinNum(T a, T b)
+{
+ const bool single = (sizeof(T) == sizeof(float));
+ const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
+
+ if (std::isnan(a))
+ return ((fpToBits(a) & qnan) == qnan) ? b : a;
+ if (std::isnan(b))
+ return ((fpToBits(b) & qnan) == qnan) ? a : b;
// Handle comparisons of +0 and -0.
if (std::signbit(a) && !std::signbit(b))
return a;
- return fminf(a, b);
-}
+ return fmin(a, b);
+};
+
+template <typename T>
+static inline T
+fpMin(T a, T b)
+{
+ if (std::isnan(a))
+ return a;
+ if (std::isnan(b))
+ return b;
+ return fpMinNum<T>(a, b);
+};
+
+template <typename T>
+static inline T
+fpRSqrts(T a, T b)
+{
+ int fpClassA = std::fpclassify(a);
+ int fpClassB = std::fpclassify(b);
+ T aXb;
+ int fpClassAxB;
+
+ if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
+ (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
+ return 1.5;
+ }
+ aXb = a*b;
+ fpClassAxB = std::fpclassify(aXb);
+ if(fpClassAxB == FP_SUBNORMAL) {
+ feraiseexcept(FeUnderflow);
+ return 1.5;
+ }
+ return (3.0 - (a * b)) / 2.0;
+};
+
+template <typename T>
+static inline T
+fpRecps(T a, T b)
+{
+ int fpClassA = std::fpclassify(a);
+ int fpClassB = std::fpclassify(b);
+ T aXb;
+ int fpClassAxB;
+
+ if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
+ (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
+ return 2.0;
+ }
+ aXb = a*b;
+ fpClassAxB = std::fpclassify(aXb);
+ if(fpClassAxB == FP_SUBNORMAL) {
+ feraiseexcept(FeUnderflow);
+ return 2.0;
+ }
+ return 2.0 - (a * b);
+};
+
static inline float
fpRSqrtsS(float a, float b)
@@ -400,6 +757,23 @@ fpRecpsS(float a, float b)
return 2.0 - (a * b);
}
+template <typename T>
+static inline T
+roundNEven(T a) {
+ T val;
+
+ val = round(a);
+ if (a - val == 0.5) {
+ if ( (((int) a) & 1) == 0 ) val += 1.0;
+ }
+ else if (a - val == -0.5) {
+ if ( (((int) a) & 1) == 0 ) val -= 1.0;
+ }
+ return val;
+}
+
+
+
class FpOp : public PredOp
{
protected:
@@ -457,6 +831,12 @@ class FpOp : public PredOp
template <class fpType>
fpType
+ ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
+ fpType (*func)(fpType, fpType, fpType),
+ bool flush, bool defaultNan, uint32_t rMode) const;
+
+ template <class fpType>
+ fpType
binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
fpType (*func)(fpType, fpType),
bool flush, bool defaultNan, uint32_t rMode) const;
@@ -478,6 +858,55 @@ class FpOp : public PredOp
pcState.advance();
}
}
+
+ float
+ fpSqrt (FPSCR fpscr,float x) const
+ {
+
+ return unaryOp(fpscr,x,sqrtf,fpscr.fz,fpscr.rMode);
+
+ }
+
+ double
+ fpSqrt (FPSCR fpscr,double x) const
+ {
+
+ return unaryOp(fpscr,x,sqrt,fpscr.fz,fpscr.rMode);
+
+ }
+};
+
+class FpCondCompRegOp : public FpOp
+{
+ protected:
+ IntRegIndex op1, op2;
+ ConditionCode condCode;
+ uint8_t defCc;
+
+ FpCondCompRegOp(const char *mnem, ExtMachInst _machInst,
+ OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2,
+ ConditionCode _condCode, uint8_t _defCc) :
+ FpOp(mnem, _machInst, __opClass),
+ op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class FpCondSelOp : public FpOp
+{
+ protected:
+ IntRegIndex dest, op1, op2;
+ ConditionCode condCode;
+
+ FpCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+ ConditionCode _condCode) :
+ FpOp(mnem, _machInst, __opClass),
+ dest(_dest), op1(_op1), op2(_op2), condCode(_condCode)
+ {}
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
};
class FpRegRegOp : public FpOp
@@ -550,6 +979,26 @@ class FpRegRegRegOp : public FpOp
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
};
+class FpRegRegRegRegOp : public FpOp
+{
+ protected:
+ IntRegIndex dest;
+ IntRegIndex op1;
+ IntRegIndex op2;
+ IntRegIndex op3;
+
+ FpRegRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+ IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+ IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop) :
+ FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2),
+ op3(_op3)
+ {
+ setVfpMicroFlags(mode, flags);
+ }
+
+ std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
class FpRegRegRegImmOp : public FpOp
{
protected: