diff options
Diffstat (limited to 'src')
133 files changed, 39112 insertions, 2525 deletions
diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py index 55a589c32..eaec92f4d 100644 --- a/src/arch/arm/ArmISA.py +++ b/src/arch/arm/ArmISA.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012 ARM Limited +# Copyright (c) 2012-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -34,8 +34,10 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Authors: Andreas Sandberg +# Giacomo Gabrielli from m5.params import * +from m5.proxy import * from m5.SimObject import SimObject class ArmISA(SimObject): @@ -43,12 +45,9 @@ class ArmISA(SimObject): cxx_class = 'ArmISA::ISA' cxx_header = "arch/arm/isa.hh" - # 0x35 Implementor is '5' from "M5" - # 0x0 Variant - # 0xf Architecture from CPUID scheme - # 0xc00 Primary part number ("c" or higher implies ARM v7) - # 0x0 Revision - midr = Param.UInt32(0x350fc000, "Main ID Register") + system = Param.System(Parent.any, "System this ISA object belongs to") + + midr = Param.UInt32(0x410fc0f0, "MIDR value") # See section B4.1.93 - B4.1.94 of the ARM ARM # @@ -56,19 +55,19 @@ class ArmISA(SimObject): # Note: ThumbEE is disabled for now since we don't support CP14 # config registers and jumping to ThumbEE vectors id_pfr0 = Param.UInt32(0x00000031, "Processor Feature Register 0") - # !Timer | !Virti | !M Profile | !TrustZone | ARMv4 - id_pfr1 = Param.UInt32(0x00000001, "Processor Feature Register 1") + # !Timer | Virti | !M Profile | TrustZone | ARMv4 + id_pfr1 = Param.UInt32(0x00001011, "Processor Feature Register 1") # See section B4.1.89 - B4.1.92 of the ARM ARM # VMSAv7 support - id_mmfr0 = Param.UInt32(0x00000003, "Memory Model Feature Register 0") + id_mmfr0 = Param.UInt32(0x10201103, "Memory Model Feature Register 0") id_mmfr1 = Param.UInt32(0x00000000, "Memory Model Feature Register 1") # no HW access | WFI stalling | ISB and DSB | # all TLB maintenance | no Harvard id_mmfr2 = Param.UInt32(0x01230000, "Memory Model Feature Register 2") # SuperSec | Coherent TLB | Bcast Maint | # BP Maint | Cache Maint Set/way | Cache Maint MVA - id_mmfr3 = Param.UInt32(0xF0102211, "Memory Model Feature Register 3") + id_mmfr3 = Param.UInt32(0x02102211, "Memory Model Feature Register 3") # See section B4.1.84 of ARM ARM # All values are latest for ARMv7-A profile @@ -79,5 +78,40 @@ class ArmISA(SimObject): id_isar4 = Param.UInt32(0x10010142, "Instruction Set Attribute Register 4") id_isar5 = Param.UInt32(0x00000000, "Instruction Set Attribute Register 5") + fpsid = Param.UInt32(0x410430a0, "Floating-point System ID Register") + + # [31:0] is implementation defined + id_aa64afr0_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Auxiliary Feature Register 0") + # Reserved for future expansion + id_aa64afr1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Auxiliary Feature Register 1") + + # 1 CTX CMPs | 2 WRPs | 2 BRPs | !PMU | !Trace | Debug v8-A + id_aa64dfr0_el1 = Param.UInt64(0x0000000000101006, + "AArch64 Debug Feature Register 0") + # Reserved for future expansion + id_aa64dfr1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Debug Feature Register 1") + + # !CRC32 | !SHA2 | !SHA1 | !AES + id_aa64isar0_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Instruction Set Attribute Register 0") + # Reserved for future expansion + id_aa64isar1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Instruction Set Attribute Register 1") + + # 4K | 64K | !16K | !BigEndEL0 | !SNSMem | !BigEnd | 8b ASID | 40b PA + id_aa64mmfr0_el1 = Param.UInt64(0x0000000000f00002, + "AArch64 Memory Model Feature Register 0") + # Reserved for future expansion + id_aa64mmfr1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Memory Model Feature Register 1") - fpsid = Param.UInt32(0x410430A0, "Floating-point System ID Register") + # !GICv3 CP15 | AdvSIMD | FP | !EL3 | !EL2 | EL1 (AArch64) | EL0 (AArch64) + # (no AArch32/64 interprocessing support for now) + id_aa64pfr0_el1 = Param.UInt64(0x0000000000000011, + "AArch64 Processor Feature Register 0") + # Reserved for future expansion + id_aa64pfr1_el1 = Param.UInt64(0x0000000000000000, + "AArch64 Processor Feature Register 1") diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py index b48c2a29d..39b7ec8ff 100644 --- a/src/arch/arm/ArmSystem.py +++ b/src/arch/arm/ArmSystem.py @@ -1,4 +1,4 @@ -# Copyright (c) 2009 ARM Limited +# Copyright (c) 2009, 2012-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -44,7 +44,8 @@ class ArmMachineType(Enum): 'RealView_PBX' : 1901, 'VExpress_ELT' : 2272, 'VExpress_CA9' : 2272, - 'VExpress_EMM' : 2272} + 'VExpress_EMM' : 2272, + 'VExpress_EMM64' : 2272} class ArmSystem(System): type = 'ArmSystem' @@ -54,6 +55,23 @@ class ArmSystem(System): boot_loader = Param.String("", "File that contains the boot loader code if any") gic_cpu_addr = Param.Addr(0, "Addres of the GIC CPU interface") flags_addr = Param.Addr(0, "Address of the flags register for MP booting") + have_security = Param.Bool(False, + "True if Security Extensions are implemented") + have_virtualization = Param.Bool(False, + "True if Virtualization Extensions are implemented") + have_lpae = Param.Bool(False, "True if LPAE is implemented") + have_generic_timer = Param.Bool(False, + "True if the Generic Timer extension is implemented") + highest_el_is_64 = Param.Bool(False, + "True if the register width of the highest implemented exception level " + "is 64 bits (ARMv8)") + reset_addr_64 = Param.UInt64(0x0, + "Reset address if the highest implemented exception level is 64 bits " + "(ARMv8)") + phys_addr_range_64 = Param.UInt8(40, + "Supported physical address range in bits when using AArch64 (ARMv8)") + have_large_asid_64 = Param.Bool(False, + "True if ASID is 16 bits in AArch64 (ARMv8)") class LinuxArmSystem(ArmSystem): type = 'LinuxArmSystem' @@ -61,8 +79,10 @@ class LinuxArmSystem(ArmSystem): load_addr_mask = 0x0fffffff machine_type = Param.ArmMachineType('RealView_PBX', "Machine id from http://www.arm.linux.org.uk/developer/machines/") - atags_addr = Param.Addr(0x100, - "Address where default atags structure should be written") + atags_addr = Param.Addr("Address where default atags structure should " \ + "be written") + boot_release_addr = Param.Addr(0xfff8, "Address where secondary CPUs " \ + "spin waiting boot in the loader") dtb_filename = Param.String("", "File that contains the Device Tree Blob. Don't use DTB if empty.") early_kernel_symbols = Param.Bool(False, diff --git a/src/arch/arm/ArmTLB.py b/src/arch/arm/ArmTLB.py index c70dd80c8..01ac8016a 100644 --- a/src/arch/arm/ArmTLB.py +++ b/src/arch/arm/ArmTLB.py @@ -1,6 +1,6 @@ # -*- mode:python -*- -# Copyright (c) 2009 ARM Limited +# Copyright (c) 2009, 2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -42,10 +42,12 @@ from m5.params import * from m5.proxy import * from MemObject import MemObject +# Basic stage 1 translation objects class ArmTableWalker(MemObject): type = 'ArmTableWalker' cxx_class = 'ArmISA::TableWalker' cxx_header = "arch/arm/table_walker.hh" + is_stage2 = Param.Bool(False, "Is this object for stage 2 translation?") port = MasterPort("Port for TableWalker to do walk the translation with") sys = Param.System(Parent.any, "system object parameter") num_squash_per_cycle = Param.Unsigned(2, @@ -57,3 +59,28 @@ class ArmTLB(SimObject): cxx_header = "arch/arm/tlb.hh" size = Param.Int(64, "TLB size") walker = Param.ArmTableWalker(ArmTableWalker(), "HW Table walker") + is_stage2 = Param.Bool(False, "Is this a stage 2 TLB?") + +# Stage 2 translation objects, only used when virtualisation is being used +class ArmStage2TableWalker(ArmTableWalker): + is_stage2 = True + +class ArmStage2TLB(ArmTLB): + size = 32 + walker = ArmStage2TableWalker() + is_stage2 = True + +class ArmStage2MMU(SimObject): + type = 'ArmStage2MMU' + cxx_class = 'ArmISA::Stage2MMU' + cxx_header = 'arch/arm/stage2_mmu.hh' + tlb = Param.ArmTLB("Stage 1 TLB") + stage2_tlb = Param.ArmTLB("Stage 2 TLB") + +class ArmStage2IMMU(ArmStage2MMU): + tlb = Parent.itb + stage2_tlb = ArmStage2TLB(walker = ArmStage2TableWalker()) + +class ArmStage2DMMU(ArmStage2MMU): + tlb = Parent.dtb + stage2_tlb = ArmStage2TLB(walker = ArmStage2TableWalker()) diff --git a/src/arch/arm/SConscript b/src/arch/arm/SConscript index 8d13a9b2d..aa9ce417b 100644 --- a/src/arch/arm/SConscript +++ b/src/arch/arm/SConscript @@ -1,6 +1,6 @@ # -*- mode:python -*- -# Copyright (c) 2009 ARM Limited +# Copyright (c) 2009, 2012-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -49,12 +49,17 @@ if env['TARGET_ISA'] == 'arm': Dir('isa/formats') Source('decoder.cc') Source('faults.cc') + Source('insts/branch64.cc') + Source('insts/data64.cc') Source('insts/macromem.cc') Source('insts/mem.cc') + Source('insts/mem64.cc') Source('insts/misc.cc') + Source('insts/misc64.cc') Source('insts/pred_inst.cc') Source('insts/static_inst.cc') Source('insts/vfp.cc') + Source('insts/fplib.cc') Source('interrupts.cc') Source('isa.cc') Source('linux/linux.cc') @@ -67,6 +72,8 @@ if env['TARGET_ISA'] == 'arm': Source('stacktrace.cc') Source('system.cc') Source('table_walker.cc') + Source('stage2_mmu.cc') + Source('stage2_lookup.cc') Source('tlb.cc') Source('utility.cc') Source('vtophys.cc') diff --git a/src/arch/arm/decoder.cc b/src/arch/arm/decoder.cc index e957ce0e7..940d85b8e 100644 --- a/src/arch/arm/decoder.cc +++ b/src/arch/arm/decoder.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2012 Google * All rights reserved. * @@ -47,9 +59,11 @@ Decoder::process() if (!emi.thumb) { emi.instBits = data; - emi.sevenAndFour = bits(data, 7) && bits(data, 4); - emi.isMisc = (bits(data, 24, 23) == 0x2 && - bits(data, 20) == 0); + if (!emi.aarch64) { + emi.sevenAndFour = bits(data, 7) && bits(data, 4); + emi.isMisc = (bits(data, 24, 23) == 0x2 && + bits(data, 20) == 0); + } consumeBytes(4); DPRINTF(Decoder, "Arm inst: %#x.\n", (uint64_t)emi); } else { @@ -112,6 +126,7 @@ Decoder::moreBytes(const PCState &pc, Addr fetchPC, MachInst inst) data = inst; offset = (fetchPC >= pc.instAddr()) ? 0 : pc.instAddr() - fetchPC; emi.thumb = pc.thumb(); + emi.aarch64 = pc.aarch64(); emi.fpscrLen = fpscrLen; emi.fpscrStride = fpscrStride; diff --git a/src/arch/arm/decoder.hh b/src/arch/arm/decoder.hh index 72776bcfd..315a3b6ad 100644 --- a/src/arch/arm/decoder.hh +++ b/src/arch/arm/decoder.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2012 Google * All rights reserved. * diff --git a/src/arch/arm/faults.cc b/src/arch/arm/faults.cc index be1c7ecc2..f8313efd2 100644 --- a/src/arch/arm/faults.cc +++ b/src/arch/arm/faults.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -40,9 +40,15 @@ * * Authors: Ali Saidi * Gabe Black + * Giacomo Gabrielli + * Thomas Grocutt */ #include "arch/arm/faults.hh" +#include "arch/arm/system.hh" +#include "arch/arm/utility.hh" +#include "arch/arm/insts/static_inst.hh" +#include "base/compiler.hh" #include "base/trace.hh" #include "cpu/base.hh" #include "cpu/thread_context.hh" @@ -52,61 +58,413 @@ namespace ArmISA { -template<> ArmFault::FaultVals ArmFaultVals<Reset>::vals = -{"reset", 0x00, MODE_SVC, 0, 0, true, true, FaultStat()}; +uint8_t ArmFault::shortDescFaultSources[] = { + 0x01, // AlignmentFault + 0x04, // InstructionCacheMaintenance + 0xff, // SynchExtAbtOnTranslTableWalkL0 (INVALID) + 0x0c, // SynchExtAbtOnTranslTableWalkL1 + 0x0e, // SynchExtAbtOnTranslTableWalkL2 + 0xff, // SynchExtAbtOnTranslTableWalkL3 (INVALID) + 0xff, // SynchPtyErrOnTranslTableWalkL0 (INVALID) + 0x1c, // SynchPtyErrOnTranslTableWalkL1 + 0x1e, // SynchPtyErrOnTranslTableWalkL2 + 0xff, // SynchPtyErrOnTranslTableWalkL3 (INVALID) + 0xff, // TranslationL0 (INVALID) + 0x05, // TranslationL1 + 0x07, // TranslationL2 + 0xff, // TranslationL3 (INVALID) + 0xff, // AccessFlagL0 (INVALID) + 0x03, // AccessFlagL1 + 0x06, // AccessFlagL2 + 0xff, // AccessFlagL3 (INVALID) + 0xff, // DomainL0 (INVALID) + 0x09, // DomainL1 + 0x0b, // DomainL2 + 0xff, // DomainL3 (INVALID) + 0xff, // PermissionL0 (INVALID) + 0x0d, // PermissionL1 + 0x0f, // PermissionL2 + 0xff, // PermissionL3 (INVALID) + 0x02, // DebugEvent + 0x08, // SynchronousExternalAbort + 0x10, // TLBConflictAbort + 0x19, // SynchPtyErrOnMemoryAccess + 0x16, // AsynchronousExternalAbort + 0x18, // AsynchPtyErrOnMemoryAccess + 0xff, // AddressSizeL0 (INVALID) + 0xff, // AddressSizeL1 (INVALID) + 0xff, // AddressSizeL2 (INVALID) + 0xff, // AddressSizeL3 (INVALID) + 0x40, // PrefetchTLBMiss + 0x80 // PrefetchUncacheable +}; -template<> ArmFault::FaultVals ArmFaultVals<UndefinedInstruction>::vals = -{"Undefined Instruction", 0x04, MODE_UNDEFINED, 4 ,2, false, false, - FaultStat()} ; +static_assert(sizeof(ArmFault::shortDescFaultSources) == + ArmFault::NumFaultSources, + "Invalid size of ArmFault::shortDescFaultSources[]"); -template<> ArmFault::FaultVals ArmFaultVals<SupervisorCall>::vals = -{"Supervisor Call", 0x08, MODE_SVC, 4, 2, false, false, FaultStat()}; +uint8_t ArmFault::longDescFaultSources[] = { + 0x21, // AlignmentFault + 0xff, // InstructionCacheMaintenance (INVALID) + 0xff, // SynchExtAbtOnTranslTableWalkL0 (INVALID) + 0x15, // SynchExtAbtOnTranslTableWalkL1 + 0x16, // SynchExtAbtOnTranslTableWalkL2 + 0x17, // SynchExtAbtOnTranslTableWalkL3 + 0xff, // SynchPtyErrOnTranslTableWalkL0 (INVALID) + 0x1d, // SynchPtyErrOnTranslTableWalkL1 + 0x1e, // SynchPtyErrOnTranslTableWalkL2 + 0x1f, // SynchPtyErrOnTranslTableWalkL3 + 0xff, // TranslationL0 (INVALID) + 0x05, // TranslationL1 + 0x06, // TranslationL2 + 0x07, // TranslationL3 + 0xff, // AccessFlagL0 (INVALID) + 0x09, // AccessFlagL1 + 0x0a, // AccessFlagL2 + 0x0b, // AccessFlagL3 + 0xff, // DomainL0 (INVALID) + 0x3d, // DomainL1 + 0x3e, // DomainL2 + 0xff, // DomainL3 (RESERVED) + 0xff, // PermissionL0 (INVALID) + 0x0d, // PermissionL1 + 0x0e, // PermissionL2 + 0x0f, // PermissionL3 + 0x22, // DebugEvent + 0x10, // SynchronousExternalAbort + 0x30, // TLBConflictAbort + 0x18, // SynchPtyErrOnMemoryAccess + 0x11, // AsynchronousExternalAbort + 0x19, // AsynchPtyErrOnMemoryAccess + 0xff, // AddressSizeL0 (INVALID) + 0xff, // AddressSizeL1 (INVALID) + 0xff, // AddressSizeL2 (INVALID) + 0xff, // AddressSizeL3 (INVALID) + 0x40, // PrefetchTLBMiss + 0x80 // PrefetchUncacheable +}; -template<> ArmFault::FaultVals ArmFaultVals<PrefetchAbort>::vals = -{"Prefetch Abort", 0x0C, MODE_ABORT, 4, 4, true, false, FaultStat()}; +static_assert(sizeof(ArmFault::longDescFaultSources) == + ArmFault::NumFaultSources, + "Invalid size of ArmFault::longDescFaultSources[]"); -template<> ArmFault::FaultVals ArmFaultVals<DataAbort>::vals = -{"Data Abort", 0x10, MODE_ABORT, 8, 8, true, false, FaultStat()}; +uint8_t ArmFault::aarch64FaultSources[] = { + 0x21, // AlignmentFault + 0xff, // InstructionCacheMaintenance (INVALID) + 0x14, // SynchExtAbtOnTranslTableWalkL0 + 0x15, // SynchExtAbtOnTranslTableWalkL1 + 0x16, // SynchExtAbtOnTranslTableWalkL2 + 0x17, // SynchExtAbtOnTranslTableWalkL3 + 0x1c, // SynchPtyErrOnTranslTableWalkL0 + 0x1d, // SynchPtyErrOnTranslTableWalkL1 + 0x1e, // SynchPtyErrOnTranslTableWalkL2 + 0x1f, // SynchPtyErrOnTranslTableWalkL3 + 0x04, // TranslationL0 + 0x05, // TranslationL1 + 0x06, // TranslationL2 + 0x07, // TranslationL3 + 0x08, // AccessFlagL0 + 0x09, // AccessFlagL1 + 0x0a, // AccessFlagL2 + 0x0b, // AccessFlagL3 + // @todo: Section & Page Domain Fault in AArch64? + 0xff, // DomainL0 (INVALID) + 0xff, // DomainL1 (INVALID) + 0xff, // DomainL2 (INVALID) + 0xff, // DomainL3 (INVALID) + 0x0c, // PermissionL0 + 0x0d, // PermissionL1 + 0x0e, // PermissionL2 + 0x0f, // PermissionL3 + 0xff, // DebugEvent (INVALID) + 0x10, // SynchronousExternalAbort + 0x30, // TLBConflictAbort + 0x18, // SynchPtyErrOnMemoryAccess + 0xff, // AsynchronousExternalAbort (INVALID) + 0xff, // AsynchPtyErrOnMemoryAccess (INVALID) + 0x00, // AddressSizeL0 + 0x01, // AddressSizeL1 + 0x02, // AddressSizeL2 + 0x03, // AddressSizeL3 + 0x40, // PrefetchTLBMiss + 0x80 // PrefetchUncacheable +}; -template<> ArmFault::FaultVals ArmFaultVals<Interrupt>::vals = -{"IRQ", 0x18, MODE_IRQ, 4, 4, true, false, FaultStat()}; +static_assert(sizeof(ArmFault::aarch64FaultSources) == + ArmFault::NumFaultSources, + "Invalid size of ArmFault::aarch64FaultSources[]"); -template<> ArmFault::FaultVals ArmFaultVals<FastInterrupt>::vals = -{"FIQ", 0x1C, MODE_FIQ, 4, 4, true, true, FaultStat()}; +// Fields: name, offset, cur{ELT,ELH}Offset, lowerEL{64,32}Offset, next mode, +// {ARM, Thumb, ARM_ELR, Thumb_ELR} PC offset, hyp trap, +// {A, F} disable, class, stat +template<> ArmFault::FaultVals ArmFaultVals<Reset>::vals = { + // Some dummy values (the reset vector has an IMPLEMENTATION DEFINED + // location in AArch64) + "Reset", 0x000, 0x000, 0x000, 0x000, 0x000, MODE_SVC, + 0, 0, 0, 0, false, true, true, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<UndefinedInstruction>::vals = { + "Undefined Instruction", 0x004, 0x000, 0x200, 0x400, 0x600, MODE_UNDEFINED, + 4, 2, 0, 0, true, false, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<SupervisorCall>::vals = { + "Supervisor Call", 0x008, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 4, 2, 4, 2, true, false, false, EC_SVC_TO_HYP, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<SecureMonitorCall>::vals = { + "Secure Monitor Call", 0x008, 0x000, 0x200, 0x400, 0x600, MODE_MON, + 4, 4, 4, 4, false, true, true, EC_SMC_TO_HYP, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<HypervisorCall>::vals = { + "Hypervisor Call", 0x008, 0x000, 0x200, 0x400, 0x600, MODE_HYP, + 4, 4, 4, 4, true, false, false, EC_HVC, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<PrefetchAbort>::vals = { + "Prefetch Abort", 0x00C, 0x000, 0x200, 0x400, 0x600, MODE_ABORT, + 4, 4, 0, 0, true, true, false, EC_PREFETCH_ABORT_TO_HYP, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<DataAbort>::vals = { + "Data Abort", 0x010, 0x000, 0x200, 0x400, 0x600, MODE_ABORT, + 8, 8, 0, 0, true, true, false, EC_DATA_ABORT_TO_HYP, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<VirtualDataAbort>::vals = { + "Virtual Data Abort", 0x010, 0x000, 0x200, 0x400, 0x600, MODE_ABORT, + 8, 8, 0, 0, true, true, false, EC_INVALID, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<HypervisorTrap>::vals = { + // @todo: double check these values + "Hypervisor Trap", 0x014, 0x000, 0x200, 0x400, 0x600, MODE_HYP, + 0, 0, 0, 0, false, false, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<Interrupt>::vals = { + "IRQ", 0x018, 0x080, 0x280, 0x480, 0x680, MODE_IRQ, + 4, 4, 0, 0, false, true, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<VirtualInterrupt>::vals = { + "Virtual IRQ", 0x018, 0x080, 0x280, 0x480, 0x680, MODE_IRQ, + 4, 4, 0, 0, false, true, false, EC_INVALID, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<FastInterrupt>::vals = { + "FIQ", 0x01C, 0x100, 0x300, 0x500, 0x700, MODE_FIQ, + 4, 4, 0, 0, false, true, true, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<VirtualFastInterrupt>::vals = { + "Virtual FIQ", 0x01C, 0x100, 0x300, 0x500, 0x700, MODE_FIQ, + 4, 4, 0, 0, false, true, true, EC_INVALID, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<SupervisorTrap>::vals = { + // Some dummy values (SupervisorTrap is AArch64-only) + "Supervisor Trap", 0x014, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 0, 0, 0, 0, false, false, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<SecureMonitorTrap>::vals = { + // Some dummy values (SecureMonitorTrap is AArch64-only) + "Secure Monitor Trap", 0x014, 0x000, 0x200, 0x400, 0x600, MODE_MON, + 0, 0, 0, 0, false, false, false, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<PCAlignmentFault>::vals = { + // Some dummy values (PCAlignmentFault is AArch64-only) + "PC Alignment Fault", 0x000, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 0, 0, 0, 0, true, false, false, EC_PC_ALIGNMENT, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<SPAlignmentFault>::vals = { + // Some dummy values (SPAlignmentFault is AArch64-only) + "SP Alignment Fault", 0x000, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 0, 0, 0, 0, true, false, false, EC_STACK_PTR_ALIGNMENT, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<SystemError>::vals = { + // Some dummy values (SError is AArch64-only) + "SError", 0x000, 0x180, 0x380, 0x580, 0x780, MODE_SVC, + 0, 0, 0, 0, false, true, true, EC_SERROR, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<FlushPipe>::vals = { + // Some dummy values + "Pipe Flush", 0x000, 0x000, 0x000, 0x000, 0x000, MODE_SVC, + 0, 0, 0, 0, false, true, true, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<ArmSev>::vals = { + // Some dummy values + "ArmSev Flush", 0x000, 0x000, 0x000, 0x000, 0x000, MODE_SVC, + 0, 0, 0, 0, false, true, true, EC_UNKNOWN, FaultStat() +}; +template<> ArmFault::FaultVals ArmFaultVals<IllegalInstSetStateFault>::vals = { + // Some dummy values (SPAlignmentFault is AArch64-only) + "Illegal Inst Set State Fault", 0x000, 0x000, 0x200, 0x400, 0x600, MODE_SVC, + 0, 0, 0, 0, true, false, false, EC_ILLEGAL_INST, FaultStat() +}; -template<> ArmFault::FaultVals ArmFaultVals<FlushPipe>::vals = -{"Pipe Flush", 0x00, MODE_SVC, 0, 0, true, true, FaultStat()}; // dummy values - -template<> ArmFault::FaultVals ArmFaultVals<ArmSev>::vals = -{"ArmSev Flush", 0x00, MODE_SVC, 0, 0, true, true, FaultStat()}; // dummy values -Addr +Addr ArmFault::getVector(ThreadContext *tc) { - // ARM ARM B1-3 + Addr base; - SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR); + // ARM ARM issue C B1.8.1 + bool haveSecurity = ArmSystem::haveSecurity(tc); // panic if SCTLR.VE because I have no idea what to do with vectored // interrupts + SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR); assert(!sctlr.ve); + // Check for invalid modes + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + assert(haveSecurity || cpsr.mode != MODE_MON); + assert(ArmSystem::haveVirtualization(tc) || cpsr.mode != MODE_HYP); - if (!sctlr.v) - return offset(); - return offset() + HighVecs; + switch (cpsr.mode) + { + case MODE_MON: + base = tc->readMiscReg(MISCREG_MVBAR); + break; + case MODE_HYP: + base = tc->readMiscReg(MISCREG_HVBAR); + break; + default: + if (sctlr.v) { + base = HighVecs; + } else { + base = haveSecurity ? tc->readMiscReg(MISCREG_VBAR) : 0; + } + break; + } + return base + offset(tc); +} +Addr +ArmFault::getVector64(ThreadContext *tc) +{ + Addr vbar; + switch (toEL) { + case EL3: + assert(ArmSystem::haveSecurity(tc)); + vbar = tc->readMiscReg(MISCREG_VBAR_EL3); + break; + // @todo: uncomment this to enable Virtualization + // case EL2: + // assert(ArmSystem::haveVirtualization(tc)); + // vbar = tc->readMiscReg(MISCREG_VBAR_EL2); + // break; + case EL1: + vbar = tc->readMiscReg(MISCREG_VBAR_EL1); + break; + default: + panic("Invalid target exception level"); + break; + } + return vbar + offset64(); } -void +MiscRegIndex +ArmFault::getSyndromeReg64() const +{ + switch (toEL) { + case EL1: + return MISCREG_ESR_EL1; + case EL2: + return MISCREG_ESR_EL2; + case EL3: + return MISCREG_ESR_EL3; + default: + panic("Invalid exception level"); + break; + } +} + +MiscRegIndex +ArmFault::getFaultAddrReg64() const +{ + switch (toEL) { + case EL1: + return MISCREG_FAR_EL1; + case EL2: + return MISCREG_FAR_EL2; + case EL3: + return MISCREG_FAR_EL3; + default: + panic("Invalid exception level"); + break; + } +} + +void +ArmFault::setSyndrome(ThreadContext *tc, MiscRegIndex syndrome_reg) +{ + uint32_t value; + uint32_t exc_class = (uint32_t) ec(tc); + uint32_t issVal = iss(); + assert(!from64 || ArmSystem::highestELIs64(tc)); + + value = exc_class << 26; + + // HSR.IL not valid for Prefetch Aborts (0x20, 0x21) and Data Aborts (0x24, + // 0x25) for which the ISS information is not valid (ARMv7). + // @todo: ARMv8 revises AArch32 functionality: when HSR.IL is not + // valid it is treated as RES1. + if (to64) { + value |= 1 << 25; + } else if ((bits(exc_class, 5, 3) != 4) || + (bits(exc_class, 2) && bits(issVal, 24))) { + if (!machInst.thumb || machInst.bigThumb) + value |= 1 << 25; + } + // Condition code valid for EC[5:4] nonzero + if (!from64 && ((bits(exc_class, 5, 4) == 0) && + (bits(exc_class, 3, 0) != 0))) { + if (!machInst.thumb) { + uint32_t cond; + ConditionCode condCode = (ConditionCode) (uint32_t) machInst.condCode; + // If its on unconditional instruction report with a cond code of + // 0xE, ie the unconditional code + cond = (condCode == COND_UC) ? COND_AL : condCode; + value |= cond << 20; + value |= 1 << 24; + } + value |= bits(issVal, 19, 0); + } else { + value |= issVal; + } + tc->setMiscReg(syndrome_reg, value); +} + +void ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst) { - // ARM ARM B1.6.3 + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + + if (ArmSystem::highestELIs64(tc)) { // ARMv8 + // Determine source exception level and mode + fromMode = (OperatingMode) (uint8_t) cpsr.mode; + fromEL = opModeToEL(fromMode); + if (opModeIs64(fromMode)) + from64 = true; + + // Determine target exception level + if (ArmSystem::haveSecurity(tc) && routeToMonitor(tc)) + toEL = EL3; + else + toEL = opModeToEL(nextMode()); + if (fromEL > toEL) + toEL = fromEL; + + if (toEL == ArmSystem::highestEL(tc) || ELIs64(tc, toEL)) { + // Invoke exception handler in AArch64 state + to64 = true; + invoke64(tc, inst); + return; + } + } + + // ARMv7 (ARM ARM issue C B1.9) + + bool have_security = ArmSystem::haveSecurity(tc); + bool have_virtualization = ArmSystem::haveVirtualization(tc); + FaultBase::invoke(tc); if (!FullSystem) return; countStat()++; SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR); - CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + SCR scr = tc->readMiscReg(MISCREG_SCR); CPSR saved_cpsr = tc->readMiscReg(MISCREG_CPSR); saved_cpsr.nz = tc->readIntReg(INTREG_CONDCODES_NZ); saved_cpsr.c = tc->readIntReg(INTREG_CONDCODES_C); @@ -118,22 +476,73 @@ ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst) saved_cpsr.it2 = it.top6; saved_cpsr.it1 = it.bottom2; - cpsr.mode = nextMode(); + // if we have a valid instruction then use it to annotate this fault with + // extra information. This is used to generate the correct fault syndrome + // information + if (inst) { + ArmStaticInst *armInst = reinterpret_cast<ArmStaticInst *>(inst.get()); + armInst->annotateFault(this); + } + + if (have_security && routeToMonitor(tc)) + cpsr.mode = MODE_MON; + else if (have_virtualization && routeToHyp(tc)) + cpsr.mode = MODE_HYP; + else + cpsr.mode = nextMode(); + + // Ensure Secure state if initially in Monitor mode + if (have_security && saved_cpsr.mode == MODE_MON) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + if (scr.ns) { + scr.ns = 0; + tc->setMiscRegNoEffect(MISCREG_SCR, scr); + } + } + + // some bits are set differently if we have been routed to hyp mode + if (cpsr.mode == MODE_HYP) { + SCTLR hsctlr = tc->readMiscReg(MISCREG_HSCTLR); + cpsr.t = hsctlr.te; + cpsr.e = hsctlr.ee; + if (!scr.ea) {cpsr.a = 1;} + if (!scr.fiq) {cpsr.f = 1;} + if (!scr.irq) {cpsr.i = 1;} + } else if (cpsr.mode == MODE_MON) { + // Special case handling when entering monitor mode + cpsr.t = sctlr.te; + cpsr.e = sctlr.ee; + cpsr.a = 1; + cpsr.f = 1; + cpsr.i = 1; + } else { + cpsr.t = sctlr.te; + cpsr.e = sctlr.ee; + + // The *Disable functions are virtual and different per fault + cpsr.a = cpsr.a | abortDisable(tc); + cpsr.f = cpsr.f | fiqDisable(tc); + cpsr.i = 1; + } cpsr.it1 = cpsr.it2 = 0; cpsr.j = 0; - - cpsr.t = sctlr.te; - cpsr.a = cpsr.a | abortDisable(); - cpsr.f = cpsr.f | fiqDisable(); - cpsr.i = 1; - cpsr.e = sctlr.ee; tc->setMiscReg(MISCREG_CPSR, cpsr); + // Make sure mailbox sets to one always tc->setMiscReg(MISCREG_SEV_MAILBOX, 1); - tc->setIntReg(INTREG_LR, curPc + - (saved_cpsr.t ? thumbPcOffset() : armPcOffset())); - switch (nextMode()) { + // Clear the exclusive monitor + tc->setMiscReg(MISCREG_LOCKFLAG, 0); + + if (cpsr.mode == MODE_HYP) { + tc->setMiscReg(MISCREG_ELR_HYP, curPc + + (saved_cpsr.t ? thumbPcOffset(true) : armPcOffset(true))); + } else { + tc->setIntReg(INTREG_LR, curPc + + (saved_cpsr.t ? thumbPcOffset(false) : armPcOffset(false))); + } + + switch (cpsr.mode) { case MODE_FIQ: tc->setMiscReg(MISCREG_SPSR_FIQ, saved_cpsr); break; @@ -143,12 +552,23 @@ ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst) case MODE_SVC: tc->setMiscReg(MISCREG_SPSR_SVC, saved_cpsr); break; - case MODE_UNDEFINED: - tc->setMiscReg(MISCREG_SPSR_UND, saved_cpsr); + case MODE_MON: + assert(have_security); + tc->setMiscReg(MISCREG_SPSR_MON, saved_cpsr); break; case MODE_ABORT: tc->setMiscReg(MISCREG_SPSR_ABT, saved_cpsr); break; + case MODE_UNDEFINED: + tc->setMiscReg(MISCREG_SPSR_UND, saved_cpsr); + if (ec(tc) != EC_UNKNOWN) + setSyndrome(tc, MISCREG_HSR); + break; + case MODE_HYP: + assert(have_virtualization); + tc->setMiscReg(MISCREG_SPSR_HYP, saved_cpsr); + setSyndrome(tc, MISCREG_HSR); + break; default: panic("unknown Mode\n"); } @@ -161,7 +581,100 @@ ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst) pc.nextThumb(pc.thumb()); pc.jazelle(cpsr.j); pc.nextJazelle(pc.jazelle()); + pc.aarch64(!cpsr.width); + pc.nextAArch64(!cpsr.width); + tc->pcState(pc); +} + +void +ArmFault::invoke64(ThreadContext *tc, StaticInstPtr inst) +{ + // Determine actual misc. register indices for ELR_ELx and SPSR_ELx + MiscRegIndex elr_idx, spsr_idx; + switch (toEL) { + case EL1: + elr_idx = MISCREG_ELR_EL1; + spsr_idx = MISCREG_SPSR_EL1; + break; + // @todo: uncomment this to enable Virtualization + // case EL2: + // assert(ArmSystem::haveVirtualization()); + // elr_idx = MISCREG_ELR_EL2; + // spsr_idx = MISCREG_SPSR_EL2; + // break; + case EL3: + assert(ArmSystem::haveSecurity(tc)); + elr_idx = MISCREG_ELR_EL3; + spsr_idx = MISCREG_SPSR_EL3; + break; + default: + panic("Invalid target exception level"); + break; + } + + // Save process state into SPSR_ELx + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + CPSR spsr = cpsr; + spsr.nz = tc->readIntReg(INTREG_CONDCODES_NZ); + spsr.c = tc->readIntReg(INTREG_CONDCODES_C); + spsr.v = tc->readIntReg(INTREG_CONDCODES_V); + if (from64) { + // Force some bitfields to 0 + spsr.q = 0; + spsr.it1 = 0; + spsr.j = 0; + spsr.res0_23_22 = 0; + spsr.ge = 0; + spsr.it2 = 0; + spsr.t = 0; + } else { + spsr.ge = tc->readIntReg(INTREG_CONDCODES_GE); + ITSTATE it = tc->pcState().itstate(); + spsr.it2 = it.top6; + spsr.it1 = it.bottom2; + // Force some bitfields to 0 + spsr.res0_23_22 = 0; + spsr.ss = 0; + } + tc->setMiscReg(spsr_idx, spsr); + + // Save preferred return address into ELR_ELx + Addr curr_pc = tc->pcState().pc(); + Addr ret_addr = curr_pc; + if (from64) + ret_addr += armPcElrOffset(); + else + ret_addr += spsr.t ? thumbPcElrOffset() : armPcElrOffset(); + tc->setMiscReg(elr_idx, ret_addr); + + // Update process state + OperatingMode64 mode = 0; + mode.spX = 1; + mode.el = toEL; + mode.width = 0; + cpsr.mode = mode; + cpsr.daif = 0xf; + cpsr.il = 0; + cpsr.ss = 0; + tc->setMiscReg(MISCREG_CPSR, cpsr); + + // Set PC to start of exception handler + Addr new_pc = purifyTaggedAddr(getVector64(tc), tc, toEL); + DPRINTF(Faults, "Invoking Fault (AArch64 target EL):%s cpsr:%#x PC:%#x " + "elr:%#x newVec: %#x\n", name(), cpsr, curr_pc, ret_addr, new_pc); + PCState pc(new_pc); + pc.aarch64(!cpsr.width); + pc.nextAArch64(!cpsr.width); tc->pcState(pc); + + // If we have a valid instruction then use it to annotate this fault with + // extra information. This is used to generate the correct fault syndrome + // information + if (inst) + reinterpret_cast<ArmStaticInst *>(inst.get())->annotateFault(this); + // Save exception syndrome + if ((nextMode() != MODE_IRQ) && (nextMode() != MODE_FIQ)) + setSyndrome(tc, getSyndromeReg64()); } void @@ -171,7 +684,25 @@ Reset::invoke(ThreadContext *tc, StaticInstPtr inst) tc->getCpuPtr()->clearInterrupts(); tc->clearArchRegs(); } - ArmFault::invoke(tc, inst); + if (!ArmSystem::highestELIs64(tc)) { + ArmFault::invoke(tc, inst); + tc->setMiscReg(MISCREG_VMPIDR, + getMPIDR(dynamic_cast<ArmSystem*>(tc->getSystemPtr()), tc)); + + // Unless we have SMC code to get us there, boot in HYP! + if (ArmSystem::haveVirtualization(tc) && + !ArmSystem::haveSecurity(tc)) { + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + cpsr.mode = MODE_HYP; + tc->setMiscReg(MISCREG_CPSR, cpsr); + } + } else { + // Advance the PC to the IMPLEMENTATION DEFINED reset value + PCState pc = ArmSystem::resetAddr64(tc); + pc.aarch64(true); + pc.nextAArch64(true); + tc->pcState(pc); + } } void @@ -196,6 +727,45 @@ UndefinedInstruction::invoke(ThreadContext *tc, StaticInstPtr inst) } } +bool +UndefinedInstruction::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + + // if in Hyp mode then stay in Hyp mode + toHyp = scr.ns && (cpsr.mode == MODE_HYP); + // if HCR.TGE is set to 1, take to Hyp mode through Hyp Trap vector + toHyp |= !inSecureState(scr, cpsr) && hcr.tge && (cpsr.mode == MODE_USER); + return toHyp; +} + +uint32_t +UndefinedInstruction::iss() const +{ + if (overrideEc == EC_INVALID) + return issRaw; + + uint32_t new_iss = 0; + uint32_t op0, op1, op2, CRn, CRm, Rt, dir; + + dir = bits(machInst, 21, 21); + op0 = bits(machInst, 20, 19); + op1 = bits(machInst, 18, 16); + CRn = bits(machInst, 15, 12); + CRm = bits(machInst, 11, 8); + op2 = bits(machInst, 7, 5); + Rt = bits(machInst, 4, 0); + + new_iss = op0 << 20 | op2 << 17 | op1 << 14 | CRn << 10 | + Rt << 5 | CRm << 1 | dir; + + return new_iss; +} + void SupervisorCall::invoke(ThreadContext *tc, StaticInstPtr inst) { @@ -207,7 +777,12 @@ SupervisorCall::invoke(ThreadContext *tc, StaticInstPtr inst) // As of now, there isn't a 32 bit thumb version of this instruction. assert(!machInst.bigThumb); uint32_t callNum; - callNum = tc->readIntReg(INTREG_R7); + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + OperatingMode mode = (OperatingMode)(uint8_t)cpsr.mode; + if (opModeIs64(mode)) + callNum = tc->readIntReg(INTREG_X8); + else + callNum = tc->readIntReg(INTREG_R7); tc->syscall(callNum); // Advance the PC since that won't happen automatically. @@ -217,21 +792,593 @@ SupervisorCall::invoke(ThreadContext *tc, StaticInstPtr inst) tc->pcState(pc); } +bool +SupervisorCall::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + + // if in Hyp mode then stay in Hyp mode + toHyp = scr.ns && (cpsr.mode == MODE_HYP); + // if HCR.TGE is set to 1, take to Hyp mode through Hyp Trap vector + toHyp |= !inSecureState(scr, cpsr) && hcr.tge && (cpsr.mode == MODE_USER); + return toHyp; +} + +ExceptionClass +SupervisorCall::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : + (from64 ? EC_SVC_64 : vals.ec); +} + +uint32_t +SupervisorCall::iss() const +{ + // Even if we have a 24 bit imm from an arm32 instruction then we only use + // the bottom 16 bits for the ISS value (it doesn't hurt for AArch64 SVC). + return issRaw & 0xFFFF; +} + +uint32_t +SecureMonitorCall::iss() const +{ + if (from64) + return bits(machInst, 20, 5); + return 0; +} + +ExceptionClass +UndefinedInstruction::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : vals.ec; +} + + +HypervisorCall::HypervisorCall(ExtMachInst _machInst, uint32_t _imm) : + ArmFaultVals<HypervisorCall>(_machInst, _imm) +{} + +ExceptionClass +HypervisorTrap::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : vals.ec; +} + +template<class T> +FaultOffset +ArmFaultVals<T>::offset(ThreadContext *tc) +{ + bool isHypTrap = false; + + // Normally we just use the exception vector from the table at the top if + // this file, however if this exception has caused a transition to hype + // mode, and its an exception type that would only do this if it has been + // trapped then we use the hyp trap vector instead of the normal vector + if (vals.hypTrappable) { + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + if (cpsr.mode == MODE_HYP) { + CPSR spsr = tc->readMiscReg(MISCREG_SPSR_HYP); + isHypTrap = spsr.mode != MODE_HYP; + } + } + return isHypTrap ? 0x14 : vals.offset; +} + +// void +// SupervisorCall::setSyndrome64(ThreadContext *tc, MiscRegIndex esr_idx) +// { +// ESR esr = 0; +// esr.ec = machInst.aarch64 ? SvcAArch64 : SvcAArch32; +// esr.il = !machInst.thumb; +// if (machInst.aarch64) +// esr.imm16 = bits(machInst.instBits, 20, 5); +// else if (machInst.thumb) +// esr.imm16 = bits(machInst.instBits, 7, 0); +// else +// esr.imm16 = bits(machInst.instBits, 15, 0); +// tc->setMiscReg(esr_idx, esr); +// } + +void +SecureMonitorCall::invoke(ThreadContext *tc, StaticInstPtr inst) +{ + if (FullSystem) { + ArmFault::invoke(tc, inst); + return; + } +} + +ExceptionClass +SecureMonitorCall::ec(ThreadContext *tc) const +{ + return (from64 ? EC_SMC_64 : vals.ec); +} + +ExceptionClass +SupervisorTrap::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : vals.ec; +} + +ExceptionClass +SecureMonitorTrap::ec(ThreadContext *tc) const +{ + return (overrideEc != EC_INVALID) ? overrideEc : + (from64 ? EC_SMC_64 : vals.ec); +} + template<class T> void AbortFault<T>::invoke(ThreadContext *tc, StaticInstPtr inst) { + if (tranMethod == ArmFault::UnknownTran) { + tranMethod = longDescFormatInUse(tc) ? ArmFault::LpaeTran + : ArmFault::VmsaTran; + + if ((tranMethod == ArmFault::VmsaTran) && this->routeToMonitor(tc)) { + // See ARM ARM B3-1416 + bool override_LPAE = false; + TTBCR ttbcr_s = tc->readMiscReg(MISCREG_TTBCR_S); + TTBCR M5_VAR_USED ttbcr_ns = tc->readMiscReg(MISCREG_TTBCR_NS); + if (ttbcr_s.eae) { + override_LPAE = true; + } else { + // Unimplemented code option, not seen in testing. May need + // extension according to the manual exceprt above. + DPRINTF(Faults, "Warning: Incomplete translation method " + "override detected.\n"); + } + if (override_LPAE) + tranMethod = ArmFault::LpaeTran; + } + } + + if (source == ArmFault::AsynchronousExternalAbort) { + tc->getCpuPtr()->clearInterrupt(INT_ABT, 0); + } + // Get effective fault source encoding + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + FSR fsr = getFsr(tc); + + // source must be determined BEFORE invoking generic routines which will + // try to set hsr etc. and are based upon source! ArmFaultVals<T>::invoke(tc, inst); + + if (cpsr.width) { // AArch32 + if (cpsr.mode == MODE_HYP) { + tc->setMiscReg(T::HFarIndex, faultAddr); + } else if (stage2) { + tc->setMiscReg(MISCREG_HPFAR, (faultAddr >> 8) & ~0xf); + tc->setMiscReg(T::HFarIndex, OVAddr); + } else { + tc->setMiscReg(T::FsrIndex, fsr); + tc->setMiscReg(T::FarIndex, faultAddr); + } + DPRINTF(Faults, "Abort Fault source=%#x fsr=%#x faultAddr=%#x "\ + "tranMethod=%#x\n", source, fsr, faultAddr, tranMethod); + } else { // AArch64 + // Set the FAR register. Nothing else to do if we are in AArch64 state + // because the syndrome register has already been set inside invoke64() + tc->setMiscReg(AbortFault<T>::getFaultAddrReg64(), faultAddr); + } +} + +template<class T> +FSR +AbortFault<T>::getFsr(ThreadContext *tc) +{ FSR fsr = 0; - fsr.fsLow = bits(status, 3, 0); - fsr.fsHigh = bits(status, 4); - fsr.domain = domain; - fsr.wnr = (write ? 1 : 0); - fsr.ext = 0; - tc->setMiscReg(T::FsrIndex, fsr); - tc->setMiscReg(T::FarIndex, faultAddr); - DPRINTF(Faults, "Abort Fault fsr=%#x faultAddr=%#x\n", fsr, faultAddr); + if (((CPSR) tc->readMiscRegNoEffect(MISCREG_CPSR)).width) { + // AArch32 + assert(tranMethod != ArmFault::UnknownTran); + if (tranMethod == ArmFault::LpaeTran) { + srcEncoded = ArmFault::longDescFaultSources[source]; + fsr.status = srcEncoded; + fsr.lpae = 1; + } else { + srcEncoded = ArmFault::shortDescFaultSources[source]; + fsr.fsLow = bits(srcEncoded, 3, 0); + fsr.fsHigh = bits(srcEncoded, 4); + fsr.domain = static_cast<uint8_t>(domain); + } + fsr.wnr = (write ? 1 : 0); + fsr.ext = 0; + } else { + // AArch64 + srcEncoded = ArmFault::aarch64FaultSources[source]; + } + if (srcEncoded == ArmFault::FaultSourceInvalid) { + panic("Invalid fault source\n"); + } + return fsr; +} + +template<class T> +bool +AbortFault<T>::abortDisable(ThreadContext *tc) +{ + if (ArmSystem::haveSecurity(tc)) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return (!scr.ns || scr.aw); + } + return true; +} + +template<class T> +void +AbortFault<T>::annotate(ArmFault::AnnotationIDs id, uint64_t val) +{ + switch (id) + { + case ArmFault::S1PTW: + s1ptw = val; + break; + case ArmFault::OVA: + OVAddr = val; + break; + + // Just ignore unknown ID's + default: + break; + } +} + +template<class T> +uint32_t +AbortFault<T>::iss() const +{ + uint32_t val; + + val = srcEncoded & 0x3F; + val |= write << 6; + val |= s1ptw << 7; + return (val); +} + +template<class T> +bool +AbortFault<T>::isMMUFault() const +{ + // NOTE: Not relying on LL information being aligned to lowest bits here + return + (source == ArmFault::AlignmentFault) || + ((source >= ArmFault::TranslationLL) && + (source < ArmFault::TranslationLL + 4)) || + ((source >= ArmFault::AccessFlagLL) && + (source < ArmFault::AccessFlagLL + 4)) || + ((source >= ArmFault::DomainLL) && + (source < ArmFault::DomainLL + 4)) || + ((source >= ArmFault::PermissionLL) && + (source < ArmFault::PermissionLL + 4)); +} + +ExceptionClass +PrefetchAbort::ec(ThreadContext *tc) const +{ + if (to64) { + // AArch64 + if (toEL == fromEL) + return EC_PREFETCH_ABORT_CURR_EL; + else + return EC_PREFETCH_ABORT_LOWER_EL; + } else { + // AArch32 + // Abort faults have different EC codes depending on whether + // the fault originated within HYP mode, or not. So override + // the method and add the extra adjustment of the EC value. + + ExceptionClass ec = ArmFaultVals<PrefetchAbort>::vals.ec; + + CPSR spsr = tc->readMiscReg(MISCREG_SPSR_HYP); + if (spsr.mode == MODE_HYP) { + ec = ((ExceptionClass) (((uint32_t) ec) + 1)); + } + return ec; + } +} + +bool +PrefetchAbort::routeToMonitor(ThreadContext *tc) const +{ + SCR scr = 0; + if (from64) + scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + else + scr = tc->readMiscRegNoEffect(MISCREG_SCR); + + return scr.ea && !isMMUFault(); +} + +bool +PrefetchAbort::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + HDCR hdcr = tc->readMiscRegNoEffect(MISCREG_HDCR); + + // if in Hyp mode then stay in Hyp mode + toHyp = scr.ns && (cpsr.mode == MODE_HYP); + // otherwise, check whether to take to Hyp mode through Hyp Trap vector + toHyp |= (stage2 || + ( (source == DebugEvent) && hdcr.tde && (cpsr.mode != MODE_HYP)) || + ( (source == SynchronousExternalAbort) && hcr.tge && (cpsr.mode == MODE_USER)) + ) && !inSecureState(scr, cpsr); + return toHyp; +} + +ExceptionClass +DataAbort::ec(ThreadContext *tc) const +{ + if (to64) { + // AArch64 + if (source == ArmFault::AsynchronousExternalAbort) { + panic("Asynchronous External Abort should be handled with \ + SystemErrors (SErrors)!"); + } + if (toEL == fromEL) + return EC_DATA_ABORT_CURR_EL; + else + return EC_DATA_ABORT_LOWER_EL; + } else { + // AArch32 + // Abort faults have different EC codes depending on whether + // the fault originated within HYP mode, or not. So override + // the method and add the extra adjustment of the EC value. + + ExceptionClass ec = ArmFaultVals<DataAbort>::vals.ec; + + CPSR spsr = tc->readMiscReg(MISCREG_SPSR_HYP); + if (spsr.mode == MODE_HYP) { + ec = ((ExceptionClass) (((uint32_t) ec) + 1)); + } + return ec; + } +} + +bool +DataAbort::routeToMonitor(ThreadContext *tc) const +{ + SCR scr = 0; + if (from64) + scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + else + scr = tc->readMiscRegNoEffect(MISCREG_SCR); + + return scr.ea && !isMMUFault(); +} + +bool +DataAbort::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + HDCR hdcr = tc->readMiscRegNoEffect(MISCREG_HDCR); + + // if in Hyp mode then stay in Hyp mode + toHyp = scr.ns && (cpsr.mode == MODE_HYP); + // otherwise, check whether to take to Hyp mode through Hyp Trap vector + toHyp |= (stage2 || + ( (cpsr.mode != MODE_HYP) && ( ((source == AsynchronousExternalAbort) && hcr.amo) || + ((source == DebugEvent) && hdcr.tde) ) + ) || + ( (cpsr.mode == MODE_USER) && hcr.tge && + ((source == AlignmentFault) || + (source == SynchronousExternalAbort)) + ) + ) && !inSecureState(scr, cpsr); + return toHyp; +} + +uint32_t +DataAbort::iss() const +{ + uint32_t val; + + // Add on the data abort specific fields to the generic abort ISS value + val = AbortFault<DataAbort>::iss(); + // ISS is valid if not caused by a stage 1 page table walk, and when taken + // to AArch64 only when directed to EL2 + if (!s1ptw && (!to64 || toEL == EL2)) { + val |= isv << 24; + if (isv) { + val |= sas << 22; + val |= sse << 21; + val |= srt << 16; + // AArch64 only. These assignments are safe on AArch32 as well + // because these vars are initialized to false + val |= sf << 15; + val |= ar << 14; + } + } + return (val); +} + +void +DataAbort::annotate(AnnotationIDs id, uint64_t val) +{ + AbortFault<DataAbort>::annotate(id, val); + switch (id) + { + case SAS: + isv = true; + sas = val; + break; + case SSE: + isv = true; + sse = val; + break; + case SRT: + isv = true; + srt = val; + break; + case SF: + isv = true; + sf = val; + break; + case AR: + isv = true; + ar = val; + break; + // Just ignore unknown ID's + default: + break; + } +} + +void +VirtualDataAbort::invoke(ThreadContext *tc, StaticInstPtr inst) +{ + AbortFault<VirtualDataAbort>::invoke(tc, inst); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + hcr.va = 0; + tc->setMiscRegNoEffect(MISCREG_HCR, hcr); +} + +bool +Interrupt::routeToMonitor(ThreadContext *tc) const +{ + assert(ArmSystem::haveSecurity(tc)); + SCR scr = 0; + if (from64) + scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + else + scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return scr.irq; +} + +bool +Interrupt::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + // Determine whether IRQs are routed to Hyp mode. + toHyp = (!scr.irq && hcr.imo && !inSecureState(scr, cpsr)) || + (cpsr.mode == MODE_HYP); + return toHyp; +} + +bool +Interrupt::abortDisable(ThreadContext *tc) +{ + if (ArmSystem::haveSecurity(tc)) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return (!scr.ns || scr.aw); + } + return true; +} + +VirtualInterrupt::VirtualInterrupt() +{} + +bool +FastInterrupt::routeToMonitor(ThreadContext *tc) const +{ + assert(ArmSystem::haveSecurity(tc)); + SCR scr = 0; + if (from64) + scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + else + scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return scr.fiq; +} + +bool +FastInterrupt::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + // Determine whether IRQs are routed to Hyp mode. + toHyp = (!scr.fiq && hcr.fmo && !inSecureState(scr, cpsr)) || + (cpsr.mode == MODE_HYP); + return toHyp; +} + +bool +FastInterrupt::abortDisable(ThreadContext *tc) +{ + if (ArmSystem::haveSecurity(tc)) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return (!scr.ns || scr.aw); + } + return true; +} + +bool +FastInterrupt::fiqDisable(ThreadContext *tc) +{ + if (ArmSystem::haveVirtualization(tc)) { + return true; + } else if (ArmSystem::haveSecurity(tc)) { + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR); + return (!scr.ns || scr.fw); + } + return true; +} + +VirtualFastInterrupt::VirtualFastInterrupt() +{} + +void +PCAlignmentFault::invoke(ThreadContext *tc, StaticInstPtr inst) +{ + ArmFaultVals<PCAlignmentFault>::invoke(tc, inst); + assert(from64); + // Set the FAR + tc->setMiscReg(getFaultAddrReg64(), faultPC); +} + +SPAlignmentFault::SPAlignmentFault() +{} + +SystemError::SystemError() +{} + +void +SystemError::invoke(ThreadContext *tc, StaticInstPtr inst) +{ + tc->getCpuPtr()->clearInterrupt(INT_ABT, 0); + ArmFault::invoke(tc, inst); +} + +bool +SystemError::routeToMonitor(ThreadContext *tc) const +{ + assert(ArmSystem::haveSecurity(tc)); + assert(from64); + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + return scr.ea; +} + +bool +SystemError::routeToHyp(ThreadContext *tc) const +{ + bool toHyp; + assert(from64); + + SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3); + HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR); + CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR); + + toHyp = (!scr.ea && hcr.amo && !inSecureState(scr, cpsr)) || + (!scr.ea && !scr.rw && !hcr.amo && !inSecureState(scr,cpsr)); + return toHyp; } void @@ -247,11 +1394,6 @@ FlushPipe::invoke(ThreadContext *tc, StaticInstPtr inst) { tc->pcState(pc); } -template void AbortFault<PrefetchAbort>::invoke(ThreadContext *tc, - StaticInstPtr inst); -template void AbortFault<DataAbort>::invoke(ThreadContext *tc, - StaticInstPtr inst); - void ArmSev::invoke(ThreadContext *tc, StaticInstPtr inst) { DPRINTF(Faults, "Invoking ArmSev Fault\n"); @@ -265,6 +1407,34 @@ ArmSev::invoke(ThreadContext *tc, StaticInstPtr inst) { tc->getCpuPtr()->clearInterrupt(INT_SEV, 0); } -// return via SUBS pc, lr, xxx; rfe, movs, ldm +// Instantiate all the templates to make the linker happy +template class ArmFaultVals<Reset>; +template class ArmFaultVals<UndefinedInstruction>; +template class ArmFaultVals<SupervisorCall>; +template class ArmFaultVals<SecureMonitorCall>; +template class ArmFaultVals<HypervisorCall>; +template class ArmFaultVals<PrefetchAbort>; +template class ArmFaultVals<DataAbort>; +template class ArmFaultVals<VirtualDataAbort>; +template class ArmFaultVals<HypervisorTrap>; +template class ArmFaultVals<Interrupt>; +template class ArmFaultVals<VirtualInterrupt>; +template class ArmFaultVals<FastInterrupt>; +template class ArmFaultVals<VirtualFastInterrupt>; +template class ArmFaultVals<SupervisorTrap>; +template class ArmFaultVals<SecureMonitorTrap>; +template class ArmFaultVals<PCAlignmentFault>; +template class ArmFaultVals<SPAlignmentFault>; +template class ArmFaultVals<SystemError>; +template class ArmFaultVals<FlushPipe>; +template class ArmFaultVals<ArmSev>; +template class AbortFault<PrefetchAbort>; +template class AbortFault<DataAbort>; +template class AbortFault<VirtualDataAbort>; + + +IllegalInstSetStateFault::IllegalInstSetStateFault() +{} + } // namespace ArmISA diff --git a/src/arch/arm/faults.hh b/src/arch/arm/faults.hh index 9858e52ef..a5720f115 100644 --- a/src/arch/arm/faults.hh +++ b/src/arch/arm/faults.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -40,12 +40,15 @@ * * Authors: Ali Saidi * Gabe Black + * Giacomo Gabrielli + * Thomas Grocutt */ #ifndef __ARM_FAULTS_HH__ #define __ARM_FAULTS_HH__ #include "arch/arm/miscregs.hh" +#include "arch/arm/pagetable.hh" #include "arch/arm/types.hh" #include "base/misc.hh" #include "sim/faults.hh" @@ -60,63 +63,146 @@ typedef const Addr FaultOffset; class ArmFault : public FaultBase { protected: + ExtMachInst machInst; + uint32_t issRaw; + + // Helper variables for ARMv8 exception handling + bool from64; // True if the exception is generated from the AArch64 state + bool to64; // True if the exception is taken in AArch64 state + ExceptionLevel fromEL; // Source exception level + ExceptionLevel toEL; // Target exception level + OperatingMode fromMode; // Source operating mode + Addr getVector(ThreadContext *tc); + Addr getVector64(ThreadContext *tc); public: - enum StatusEncoding + /// Generic fault source enums used to index into + /// {short/long/aarch64}DescFaultSources[] to get the actual encodings based + /// on the current register width state and the translation table format in + /// use + enum FaultSource { - // Fault Status register encodings - // ARM ARM B3.9.4 - AlignmentFault = 0x1, - DebugEvent = 0x2, - AccessFlag0 = 0x3, - InstructionCacheMaintenance = 0x4, - Translation0 = 0x5, - AccessFlag1 = 0x6, - Translation1 = 0x7, - SynchronousExternalAbort0 = 0x8, - Domain0 = 0x9, - SynchronousExternalAbort1 = 0x8, - Domain1 = 0xb, - TranslationTableWalkExtAbt0 = 0xc, - Permission0 = 0xd, - TranslationTableWalkExtAbt1 = 0xe, - Permission1 = 0xf, - AsynchronousExternalAbort = 0x16, - MemoryAccessAsynchronousParityError = 0x18, - MemoryAccessSynchronousParityError = 0x19, - TranslationTableWalkPrtyErr0 = 0x1c, - TranslationTableWalkPrtyErr1 = 0x1e, - - // not a real fault. This is a status code - // to allow the translation function to inform - // the memory access function not to proceed - // for a Prefetch that misses in the TLB. - PrefetchTLBMiss = 0x1f, - PrefetchUncacheable = 0x20 + AlignmentFault = 0, + InstructionCacheMaintenance, // Short-desc. format only + SynchExtAbtOnTranslTableWalkLL, + SynchPtyErrOnTranslTableWalkLL = SynchExtAbtOnTranslTableWalkLL + 4, + TranslationLL = SynchPtyErrOnTranslTableWalkLL + 4, + AccessFlagLL = TranslationLL + 4, + DomainLL = AccessFlagLL + 4, + PermissionLL = DomainLL + 4, + DebugEvent = PermissionLL + 4, + SynchronousExternalAbort, + TLBConflictAbort, // Requires LPAE + SynchPtyErrOnMemoryAccess, + AsynchronousExternalAbort, + AsynchPtyErrOnMemoryAccess, + AddressSizeLL, // AArch64 only + + // Not real faults. These are faults to allow the translation function + // to inform the memory access function not to proceed for a prefetch + // that misses in the TLB or that targets an uncacheable address + PrefetchTLBMiss = AddressSizeLL + 4, + PrefetchUncacheable, + + NumFaultSources, + FaultSourceInvalid = 0xff + }; + + /// Encodings of the fault sources when the short-desc. translation table + /// format is in use (ARM ARM Issue C B3.13.3) + static uint8_t shortDescFaultSources[NumFaultSources]; + /// Encodings of the fault sources when the long-desc. translation table + /// format is in use (ARM ARM Issue C B3.13.3) + static uint8_t longDescFaultSources[NumFaultSources]; + /// Encodings of the fault sources in AArch64 state + static uint8_t aarch64FaultSources[NumFaultSources]; + + enum AnnotationIDs + { + S1PTW, // DataAbort, PrefetchAbort: Stage 1 Page Table Walk, + OVA, // DataAbort, PrefetchAbort: stage 1 Virtual Address for stage 2 faults + SAS, // DataAbort: Syndrome Access Size + SSE, // DataAbort: Syndrome Sign Extend + SRT, // DataAbort: Syndrome Register Transfer + + // AArch64 only + SF, // DataAbort: width of the accessed register is SixtyFour + AR // DataAbort: Acquire/Release semantics + }; + + enum TranMethod + { + LpaeTran, + VmsaTran, + UnknownTran }; struct FaultVals { const FaultName name; + const FaultOffset offset; + + // Offsets used for exceptions taken in AArch64 state + const uint16_t currELTOffset; + const uint16_t currELHOffset; + const uint16_t lowerEL64Offset; + const uint16_t lowerEL32Offset; + const OperatingMode nextMode; + const uint8_t armPcOffset; const uint8_t thumbPcOffset; + // The following two values are used in place of armPcOffset and + // thumbPcOffset when the exception return address is saved into ELR + // registers (exceptions taken in HYP mode or in AArch64 state) + const uint8_t armPcElrOffset; + const uint8_t thumbPcElrOffset; + + const bool hypTrappable; const bool abortDisable; const bool fiqDisable; + + // Exception class used to appropriately set the syndrome register + // (exceptions taken in HYP mode or in AArch64 state) + const ExceptionClass ec; + FaultStat count; }; + ArmFault(ExtMachInst _machInst = 0, uint32_t _iss = 0) : + machInst(_machInst), issRaw(_iss), from64(false), to64(false) {} + + // Returns the actual syndrome register to use based on the target + // exception level + MiscRegIndex getSyndromeReg64() const; + // Returns the actual fault address register to use based on the target + // exception level + MiscRegIndex getFaultAddrReg64() const; + void invoke(ThreadContext *tc, StaticInstPtr inst = StaticInst::nullStaticInstPtr); + void invoke64(ThreadContext *tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr); + virtual void annotate(AnnotationIDs id, uint64_t val) {} virtual FaultStat& countStat() = 0; - virtual FaultOffset offset() = 0; + virtual FaultOffset offset(ThreadContext *tc) = 0; + virtual FaultOffset offset64() = 0; virtual OperatingMode nextMode() = 0; - virtual uint8_t armPcOffset() = 0; - virtual uint8_t thumbPcOffset() = 0; - virtual bool abortDisable() = 0; - virtual bool fiqDisable() = 0; + virtual bool routeToMonitor(ThreadContext *tc) const = 0; + virtual bool routeToHyp(ThreadContext *tc) const { return false; } + virtual uint8_t armPcOffset(bool isHyp) = 0; + virtual uint8_t thumbPcOffset(bool isHyp) = 0; + virtual uint8_t armPcElrOffset() = 0; + virtual uint8_t thumbPcElrOffset() = 0; + virtual bool abortDisable(ThreadContext *tc) = 0; + virtual bool fiqDisable(ThreadContext *tc) = 0; + virtual ExceptionClass ec(ThreadContext *tc) const = 0; + virtual uint32_t iss() const = 0; + virtual bool isStage2() const { return false; } + virtual FSR getFsr(ThreadContext *tc) { return 0; } + virtual void setSyndrome(ThreadContext *tc, MiscRegIndex syndrome_reg); }; template<typename T> @@ -126,14 +212,38 @@ class ArmFaultVals : public ArmFault static FaultVals vals; public: + ArmFaultVals<T>(ExtMachInst _machInst = 0, uint32_t _iss = 0) : + ArmFault(_machInst, _iss) {} FaultName name() const { return vals.name; } - FaultStat & countStat() {return vals.count;} - FaultOffset offset() { return vals.offset; } + FaultStat & countStat() { return vals.count; } + FaultOffset offset(ThreadContext *tc); + + FaultOffset + offset64() + { + if (toEL == fromEL) { + if (opModeIsT(fromMode)) + return vals.currELTOffset; + return vals.currELHOffset; + } else { + if (from64) + return vals.lowerEL64Offset; + return vals.lowerEL32Offset; + } + } + OperatingMode nextMode() { return vals.nextMode; } - uint8_t armPcOffset() { return vals.armPcOffset; } - uint8_t thumbPcOffset() { return vals.thumbPcOffset; } - bool abortDisable() { return vals.abortDisable; } - bool fiqDisable() { return vals.fiqDisable; } + virtual bool routeToMonitor(ThreadContext *tc) const { return false; } + uint8_t armPcOffset(bool isHyp) { return isHyp ? vals.armPcElrOffset + : vals.armPcOffset; } + uint8_t thumbPcOffset(bool isHyp) { return isHyp ? vals.thumbPcElrOffset + : vals.thumbPcOffset; } + uint8_t armPcElrOffset() { return vals.armPcElrOffset; } + uint8_t thumbPcElrOffset() { return vals.thumbPcElrOffset; } + virtual bool abortDisable(ThreadContext* tc) { return vals.abortDisable; } + virtual bool fiqDisable(ThreadContext* tc) { return vals.fiqDisable; } + virtual ExceptionClass ec(ThreadContext *tc) const { return vals.ec; } + virtual uint32_t iss() const { return issRaw; } }; class Reset : public ArmFaultVals<Reset> @@ -146,87 +256,283 @@ class Reset : public ArmFaultVals<Reset> class UndefinedInstruction : public ArmFaultVals<UndefinedInstruction> { protected: - ExtMachInst machInst; bool unknown; const char *mnemonic; bool disabled; + ExceptionClass overrideEc; public: UndefinedInstruction(ExtMachInst _machInst, bool _unknown, const char *_mnemonic = NULL, bool _disabled = false) : - machInst(_machInst), unknown(_unknown), - mnemonic(_mnemonic), disabled(_disabled) - { - } - UndefinedInstruction() : - machInst(0), unknown(false), mnemonic("undefined"), disabled(false) + ArmFaultVals<UndefinedInstruction>(_machInst), + unknown(_unknown), mnemonic(_mnemonic), disabled(_disabled), + overrideEc(EC_INVALID) + {} + UndefinedInstruction(ExtMachInst _machInst, uint32_t _iss, ExceptionClass _overrideEc) : + ArmFaultVals<UndefinedInstruction>(_machInst, _iss), + overrideEc(_overrideEc) {} void invoke(ThreadContext *tc, StaticInstPtr inst = StaticInst::nullStaticInstPtr); + bool routeToHyp(ThreadContext *tc) const; + ExceptionClass ec(ThreadContext *tc) const; + uint32_t iss() const; }; class SupervisorCall : public ArmFaultVals<SupervisorCall> { protected: - ExtMachInst machInst; - + ExceptionClass overrideEc; public: - SupervisorCall(ExtMachInst _machInst) : machInst(_machInst) + SupervisorCall(ExtMachInst _machInst, uint32_t _iss, + ExceptionClass _overrideEc = EC_INVALID) : + ArmFaultVals<SupervisorCall>(_machInst, _iss), + overrideEc(_overrideEc) {} - SupervisorCall() : machInst(0) + + void invoke(ThreadContext *tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr); + bool routeToHyp(ThreadContext *tc) const; + ExceptionClass ec(ThreadContext *tc) const; + uint32_t iss() const; +}; + +class SecureMonitorCall : public ArmFaultVals<SecureMonitorCall> +{ + public: + SecureMonitorCall(ExtMachInst _machInst) : + ArmFaultVals<SecureMonitorCall>(_machInst) {} void invoke(ThreadContext *tc, StaticInstPtr inst = StaticInst::nullStaticInstPtr); + ExceptionClass ec(ThreadContext *tc) const; + uint32_t iss() const; +}; + +class SupervisorTrap : public ArmFaultVals<SupervisorTrap> +{ + protected: + ExtMachInst machInst; + ExceptionClass overrideEc; + + public: + SupervisorTrap(ExtMachInst _machInst, uint32_t _iss, + ExceptionClass _overrideEc = EC_INVALID) : + ArmFaultVals<SupervisorTrap>(_machInst, _iss), + overrideEc(_overrideEc) + {} + + ExceptionClass ec(ThreadContext *tc) const; +}; + +class SecureMonitorTrap : public ArmFaultVals<SecureMonitorTrap> +{ + protected: + ExtMachInst machInst; + ExceptionClass overrideEc; + + public: + SecureMonitorTrap(ExtMachInst _machInst, uint32_t _iss, + ExceptionClass _overrideEc = EC_INVALID) : + ArmFaultVals<SecureMonitorTrap>(_machInst, _iss), + overrideEc(_overrideEc) + {} + + ExceptionClass ec(ThreadContext *tc) const; +}; + +class HypervisorCall : public ArmFaultVals<HypervisorCall> +{ + public: + HypervisorCall(ExtMachInst _machInst, uint32_t _imm); +}; + +class HypervisorTrap : public ArmFaultVals<HypervisorTrap> +{ + protected: + ExtMachInst machInst; + ExceptionClass overrideEc; + + public: + HypervisorTrap(ExtMachInst _machInst, uint32_t _iss, + ExceptionClass _overrideEc = EC_INVALID) : + ArmFaultVals<HypervisorTrap>(_machInst, _iss), + overrideEc(_overrideEc) + {} + + ExceptionClass ec(ThreadContext *tc) const; }; template <class T> class AbortFault : public ArmFaultVals<T> { protected: + /** + * The virtual address the fault occured at. If 2 stages of + * translation are being used then this is the intermediate + * physical address that is the starting point for the second + * stage of translation. + */ Addr faultAddr; + /** + * Original virtual address. If the fault was generated on the + * second stage of translation then this variable stores the + * virtual address used in the original stage 1 translation. + */ + Addr OVAddr; bool write; - uint8_t domain; - uint8_t status; + TlbEntry::DomainType domain; + uint8_t source; + uint8_t srcEncoded; + bool stage2; + bool s1ptw; + ArmFault::TranMethod tranMethod; public: - AbortFault(Addr _faultAddr, bool _write, - uint8_t _domain, uint8_t _status) : - faultAddr(_faultAddr), write(_write), - domain(_domain), status(_status) + AbortFault(Addr _faultAddr, bool _write, TlbEntry::DomainType _domain, uint8_t _source, + bool _stage2, ArmFault::TranMethod _tranMethod = ArmFault::UnknownTran) : + faultAddr(_faultAddr), write(_write), domain(_domain), source(_source), + stage2(_stage2), s1ptw(false), tranMethod(_tranMethod) {} void invoke(ThreadContext *tc, StaticInstPtr inst = StaticInst::nullStaticInstPtr); + + FSR getFsr(ThreadContext *tc); + bool abortDisable(ThreadContext *tc); + uint32_t iss() const; + bool isStage2() const { return stage2; } + void annotate(ArmFault::AnnotationIDs id, uint64_t val); + bool isMMUFault() const; }; class PrefetchAbort : public AbortFault<PrefetchAbort> { public: - static const MiscRegIndex FsrIndex = MISCREG_IFSR; - static const MiscRegIndex FarIndex = MISCREG_IFAR; + static const MiscRegIndex FsrIndex = MISCREG_IFSR; + static const MiscRegIndex FarIndex = MISCREG_IFAR; + static const MiscRegIndex HFarIndex = MISCREG_HIFAR; - PrefetchAbort(Addr _addr, uint8_t _status) : - AbortFault<PrefetchAbort>(_addr, false, 0, _status) + PrefetchAbort(Addr _addr, uint8_t _source, bool _stage2 = false, + ArmFault::TranMethod _tranMethod = ArmFault::UnknownTran) : + AbortFault<PrefetchAbort>(_addr, false, TlbEntry::DomainType::NoAccess, + _source, _stage2, _tranMethod) {} + + ExceptionClass ec(ThreadContext *tc) const; + // @todo: external aborts should be routed if SCR.EA == 1 + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; }; class DataAbort : public AbortFault<DataAbort> { public: - static const MiscRegIndex FsrIndex = MISCREG_DFSR; - static const MiscRegIndex FarIndex = MISCREG_DFAR; + static const MiscRegIndex FsrIndex = MISCREG_DFSR; + static const MiscRegIndex FarIndex = MISCREG_DFAR; + static const MiscRegIndex HFarIndex = MISCREG_HDFAR; + bool isv; + uint8_t sas; + uint8_t sse; + uint8_t srt; + + // AArch64 only + bool sf; + bool ar; + + DataAbort(Addr _addr, TlbEntry::DomainType _domain, bool _write, uint8_t _source, + bool _stage2 = false, ArmFault::TranMethod _tranMethod = ArmFault::UnknownTran) : + AbortFault<DataAbort>(_addr, _write, _domain, _source, _stage2, + _tranMethod), + isv(false), sas (0), sse(0), srt(0), sf(false), ar(false) + {} + + ExceptionClass ec(ThreadContext *tc) const; + // @todo: external aborts should be routed if SCR.EA == 1 + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; + uint32_t iss() const; + void annotate(AnnotationIDs id, uint64_t val); +}; + +class VirtualDataAbort : public AbortFault<VirtualDataAbort> +{ + public: + static const MiscRegIndex FsrIndex = MISCREG_DFSR; + static const MiscRegIndex FarIndex = MISCREG_DFAR; + static const MiscRegIndex HFarIndex = MISCREG_HDFAR; - DataAbort(Addr _addr, uint8_t _domain, bool _write, uint8_t _status) : - AbortFault<DataAbort>(_addr, _write, _domain, _status) + VirtualDataAbort(Addr _addr, TlbEntry::DomainType _domain, bool _write, + uint8_t _source) : + AbortFault<VirtualDataAbort>(_addr, _write, _domain, _source, false) {} + + void invoke(ThreadContext *tc, StaticInstPtr inst); }; -class Interrupt : public ArmFaultVals<Interrupt> {}; -class FastInterrupt : public ArmFaultVals<FastInterrupt> {}; +class Interrupt : public ArmFaultVals<Interrupt> +{ + public: + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; + bool abortDisable(ThreadContext *tc); +}; + +class VirtualInterrupt : public ArmFaultVals<VirtualInterrupt> +{ + public: + VirtualInterrupt(); +}; + +class FastInterrupt : public ArmFaultVals<FastInterrupt> +{ + public: + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; + bool abortDisable(ThreadContext *tc); + bool fiqDisable(ThreadContext *tc); +}; + +class VirtualFastInterrupt : public ArmFaultVals<VirtualFastInterrupt> +{ + public: + VirtualFastInterrupt(); +}; + +/// PC alignment fault (AArch64 only) +class PCAlignmentFault : public ArmFaultVals<PCAlignmentFault> +{ + protected: + /// The unaligned value of the PC + Addr faultPC; + public: + PCAlignmentFault(Addr _faultPC) : faultPC(_faultPC) + {} + void invoke(ThreadContext *tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr); +}; + +/// Stack pointer alignment fault (AArch64 only) +class SPAlignmentFault : public ArmFaultVals<SPAlignmentFault> +{ + public: + SPAlignmentFault(); +}; + +/// System error (AArch64 only) +class SystemError : public ArmFaultVals<SystemError> +{ + public: + SystemError(); + void invoke(ThreadContext *tc, + StaticInstPtr inst = StaticInst::nullStaticInstPtr); + bool routeToMonitor(ThreadContext *tc) const; + bool routeToHyp(ThreadContext *tc) const; +}; // A fault that flushes the pipe, excluding the faulting instructions class FlushPipe : public ArmFaultVals<FlushPipe> @@ -246,6 +552,13 @@ class ArmSev : public ArmFaultVals<ArmSev> StaticInstPtr inst = StaticInst::nullStaticInstPtr); }; +/// Illegal Instruction Set State fault (AArch64 only) +class IllegalInstSetStateFault : public ArmFaultVals<IllegalInstSetStateFault> +{ + public: + IllegalInstSetStateFault(); +}; + } // namespace ArmISA #endif // __ARM_FAULTS_HH__ diff --git a/src/arch/arm/insts/branch64.cc b/src/arch/arm/insts/branch64.cc new file mode 100644 index 000000000..49ba3402a --- /dev/null +++ b/src/arch/arm/insts/branch64.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/branch64.hh" + +namespace ArmISA +{ + +ArmISA::PCState +BranchImm64::branchTarget(const ArmISA::PCState &branchPC) const +{ + ArmISA::PCState pcs = branchPC; + pcs.instNPC(pcs.pc() + imm); + pcs.advance(); + return pcs; +} + +ArmISA::PCState +BranchImmReg64::branchTarget(const ArmISA::PCState &branchPC) const +{ + ArmISA::PCState pcs = branchPC; + pcs.instNPC(pcs.pc() + imm); + pcs.advance(); + return pcs; +} + +ArmISA::PCState +BranchImmImmReg64::branchTarget(const ArmISA::PCState &branchPC) const +{ + ArmISA::PCState pcs = branchPC; + pcs.instNPC(pcs.pc() + imm2); + pcs.advance(); + return pcs; +} + +std::string +BranchImmCond64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false, true, condCode); + printTarget(ss, pc + imm, symtab); + return ss.str(); +} + +std::string +BranchImm64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printTarget(ss, pc + imm, symtab); + return ss.str(); +} + +std::string +BranchReg64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + return ss.str(); +} + +std::string +BranchRet64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + if (op1 != INTREG_X30) + printReg(ss, op1); + return ss.str(); +} + +std::string +BranchEret64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + return ss.str(); +} + +std::string +BranchImmReg64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", "); + printTarget(ss, pc + imm, symtab); + return ss.str(); +} + +std::string +BranchImmImmReg64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", #%#x, ", imm1); + printTarget(ss, pc + imm2, symtab); + return ss.str(); +} + +} // namespace ArmISA diff --git a/src/arch/arm/insts/branch64.hh b/src/arch/arm/insts/branch64.hh new file mode 100644 index 000000000..48881e0c2 --- /dev/null +++ b/src/arch/arm/insts/branch64.hh @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ +#ifndef __ARCH_ARM_INSTS_BRANCH64_HH__ +#define __ARCH_ARM_INSTS_BRANCH64_HH__ + +#include "arch/arm/insts/static_inst.hh" + +namespace ArmISA +{ +// Branch to a target computed with an immediate +class BranchImm64 : public ArmStaticInst +{ + protected: + int64_t imm; + + public: + BranchImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + int64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), imm(_imm) + {} + + ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const; + + /// Explicitly import the otherwise hidden branchTarget + using StaticInst::branchTarget; + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Conditionally Branch to a target computed with an immediate +class BranchImmCond64 : public BranchImm64 +{ + protected: + ConditionCode condCode; + + public: + BranchImmCond64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + int64_t _imm, ConditionCode _condCode) : + BranchImm64(mnem, _machInst, __opClass, _imm), condCode(_condCode) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Branch to a target computed with a register +class BranchReg64 : public ArmStaticInst +{ + protected: + IntRegIndex op1; + + public: + BranchReg64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), op1(_op1) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Ret instruction +class BranchRet64 : public BranchReg64 +{ + public: + BranchRet64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1) : + BranchReg64(mnem, _machInst, __opClass, _op1) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Eret instruction +class BranchEret64 : public ArmStaticInst +{ + public: + BranchEret64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : + ArmStaticInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Branch to a target computed with an immediate and a register +class BranchImmReg64 : public ArmStaticInst +{ + protected: + int64_t imm; + IntRegIndex op1; + + public: + BranchImmReg64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + int64_t _imm, IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), imm(_imm), op1(_op1) + {} + + ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const; + + /// Explicitly import the otherwise hidden branchTarget + using StaticInst::branchTarget; + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +// Branch to a target computed with two immediates +class BranchImmImmReg64 : public ArmStaticInst +{ + protected: + int64_t imm1; + int64_t imm2; + IntRegIndex op1; + + public: + BranchImmImmReg64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, int64_t _imm1, int64_t _imm2, + IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), + imm1(_imm1), imm2(_imm2), op1(_op1) + {} + + ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const; + + /// Explicitly import the otherwise hidden branchTarget + using StaticInst::branchTarget; + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +} + +#endif //__ARCH_ARM_INSTS_BRANCH_HH__ diff --git a/src/arch/arm/insts/data64.cc b/src/arch/arm/insts/data64.cc new file mode 100644 index 000000000..f65219870 --- /dev/null +++ b/src/arch/arm/insts/data64.cc @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/data64.hh" + +namespace ArmISA +{ + +std::string +DataXImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printDataInst(ss, true, false, /*XXX not really s*/ false, dest, op1, + INTREG_ZERO, INTREG_ZERO, 0, LSL, imm); + return ss.str(); +} + +std::string +DataXImmOnlyOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +DataXSRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printDataInst(ss, false, true, /*XXX not really s*/ false, dest, op1, + op2, INTREG_ZERO, shiftAmt, shiftType, 0); + return ss.str(); +} + +std::string +DataXERegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printDataInst(ss, false, true, /*XXX not really s*/ false, dest, op1, + op2, INTREG_ZERO, shiftAmt, LSL, 0); + return ss.str(); +} + +std::string +DataX1RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + return ss.str(); +} + +std::string +DataX1RegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +DataX1Reg2ImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", #%d, #%d", imm1, imm2); + return ss.str(); +} + +std::string +DataX2RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + return ss.str(); +} + +std::string +DataX2RegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +DataX3RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", "); + printReg(ss, op3); + return ss.str(); +} + +std::string +DataXCondCompImmOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", #%d, #%d", imm, defCc); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string +DataXCondCompRegOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", #%d", defCc); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string +DataXCondSelOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +} diff --git a/src/arch/arm/insts/data64.hh b/src/arch/arm/insts/data64.hh new file mode 100644 index 000000000..8c0677b3d --- /dev/null +++ b/src/arch/arm/insts/data64.hh @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ +#ifndef __ARCH_ARM_INSTS_DATA64_HH__ +#define __ARCH_ARM_INSTS_DATA64_HH__ + +#include "arch/arm/insts/static_inst.hh" +#include "base/trace.hh" + +namespace ArmISA +{ + +class DataXImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + uint64_t imm; + + DataXImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXImmOnlyOp : public ArmStaticInst +{ + protected: + IntRegIndex dest; + uint64_t imm; + + DataXImmOnlyOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXSRegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + int32_t shiftAmt; + ArmShiftType shiftType; + + DataXSRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + int32_t _shiftAmt, ArmShiftType _shiftType) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), + shiftAmt(_shiftAmt), shiftType(_shiftType) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXERegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + ArmExtendType extendType; + int32_t shiftAmt; + + DataXERegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + ArmExtendType _extendType, int32_t _shiftAmt) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), + extendType(_extendType), shiftAmt(_shiftAmt) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX1RegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + + DataX1RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1) : + ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX1RegImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + uint64_t imm; + + DataX1RegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1), + imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX1Reg2ImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1; + uint64_t imm1, imm2; + + DataX1Reg2ImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm1, + uint64_t _imm2) : + ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1), + imm1(_imm1), imm2(_imm2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX2RegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + + DataX2RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX2RegImmOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + uint64_t imm; + + DataX2RegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataX3RegOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2, op3; + + DataX3RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _op3) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), op3(_op3) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXCondCompImmOp : public ArmStaticInst +{ + protected: + IntRegIndex op1; + uint64_t imm; + ConditionCode condCode; + uint8_t defCc; + + DataXCondCompImmOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _op1, uint64_t _imm, + ConditionCode _condCode, uint8_t _defCc) : + ArmStaticInst(mnem, _machInst, __opClass), + op1(_op1), imm(_imm), condCode(_condCode), defCc(_defCc) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXCondCompRegOp : public ArmStaticInst +{ + protected: + IntRegIndex op1, op2; + ConditionCode condCode; + uint8_t defCc; + + DataXCondCompRegOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode, uint8_t _defCc) : + ArmStaticInst(mnem, _machInst, __opClass), + op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class DataXCondSelOp : public ArmStaticInst +{ + protected: + IntRegIndex dest, op1, op2; + ConditionCode condCode; + + DataXCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), condCode(_condCode) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +} + +#endif //__ARCH_ARM_INSTS_PREDINST_HH__ diff --git a/src/arch/arm/insts/fplib.cc b/src/arch/arm/insts/fplib.cc new file mode 100644 index 000000000..1f44eed09 --- /dev/null +++ b/src/arch/arm/insts/fplib.cc @@ -0,0 +1,3086 @@ +/* +* Copyright (c) 2012-2013 ARM Limited +* All rights reserved +* +* The license below extends only to copyright in the software and shall +* not be construed as granting a license to any other intellectual +* property including but not limited to intellectual property relating +* to a hardware implementation of the functionality of the software +* licensed hereunder. You may use the software subject to the license +* terms below provided that you ensure that this notice is replicated +* unmodified and in its entirety in all distributions of the software, +* modified or unmodified, in source code or in binary form. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer; +* redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution; +* neither the name of the copyright holders nor the names of its +* contributors may be used to endorse or promote products derived from +* this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +* Authors: Edmund Grimley Evans +* Thomas Grocutt +*/ + +#include <stdint.h> + +#include <cassert> + +#include "fplib.hh" + +namespace ArmISA +{ + +#define FPLIB_RN 0 +#define FPLIB_RP 1 +#define FPLIB_RM 2 +#define FPLIB_RZ 3 +#define FPLIB_FZ 4 +#define FPLIB_DN 8 +#define FPLIB_AHP 16 + +#define FPLIB_IDC 128 // Input Denormal +#define FPLIB_IXC 16 // Inexact +#define FPLIB_UFC 8 // Underflow +#define FPLIB_OFC 4 // Overflow +#define FPLIB_DZC 2 // Division by Zero +#define FPLIB_IOC 1 // Invalid Operation + +static inline uint16_t +lsl16(uint16_t x, uint32_t shift) +{ + return shift < 16 ? x << shift : 0; +} + +static inline uint16_t +lsr16(uint16_t x, uint32_t shift) +{ + return shift < 16 ? x >> shift : 0; +} + +static inline uint32_t +lsl32(uint32_t x, uint32_t shift) +{ + return shift < 32 ? x << shift : 0; +} + +static inline uint32_t +lsr32(uint32_t x, uint32_t shift) +{ + return shift < 32 ? x >> shift : 0; +} + +static inline uint64_t +lsl64(uint64_t x, uint32_t shift) +{ + return shift < 64 ? x << shift : 0; +} + +static inline uint64_t +lsr64(uint64_t x, uint32_t shift) +{ + return shift < 64 ? x >> shift : 0; +} + +static inline void +lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) +{ + if (shift < 64) { + *r1 = x1 << shift | x0 >> (64 - shift); + *r0 = x0 << shift; + } else if (shift < 128) { + *r1 = x0 << (shift - 64); + *r0 = 0; + } else { + *r1 = 0; + *r0 = 0; + } +} + +static inline void +lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift) +{ + if (shift < 64) { + *r0 = x0 >> shift | x1 << (64 - shift); + *r1 = x1 >> shift; + } else if (shift < 128) { + *r0 = x1 >> (shift - 64); + *r1 = 0; + } else { + *r0 = 0; + *r1 = 0; + } +} + +static inline void +mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) +{ + uint32_t mask = ((uint32_t)1 << 31) - 1; + uint64_t a0 = a & mask; + uint64_t a1 = a >> 31 & mask; + uint64_t b0 = b & mask; + uint64_t b1 = b >> 31 & mask; + uint64_t p0 = a0 * b0; + uint64_t p2 = a1 * b1; + uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2; + uint64_t s0 = p0; + uint64_t s1 = (s0 >> 31) + p1; + uint64_t s2 = (s1 >> 31) + p2; + *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62; + *x1 = s2 >> 2; +} + +static inline +void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b) +{ + uint64_t t0 = (uint64_t)(uint32_t)a * b; + uint64_t t1 = (t0 >> 32) + (a >> 32) * b; + *x0 = t1 << 32 | (uint32_t)t0; + *x1 = t1 >> 32; +} + +static inline void +mul64x64(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b) +{ + uint64_t a0 = (uint32_t)a; + uint64_t a1 = a >> 32; + uint64_t b0 = (uint32_t)b; + uint64_t b1 = b >> 32; + uint64_t t1 = (a0 * b0 >> 32) + a1 * b0; + uint64_t t2 = a0 * b1; + uint64_t x = ((uint64_t)(uint32_t)t1 + (uint32_t)t2) >> 32; + x += t1 >> 32; + x += t2 >> 32; + x += a1 * b1; + *x0 = a * b; + *x1 = x; +} + +static inline void +add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, + uint64_t b1) +{ + *x0 = a0 + b0; + *x1 = a1 + b1 + (*x0 < a0); +} + +static inline void +sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0, + uint64_t b1) +{ + *x0 = a0 - b0; + *x1 = a1 - b1 - (*x0 > a0); +} + +static inline int +cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) +{ + return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0); +} + +static inline uint16_t +fp16_normalise(uint16_t mnt, int *exp) +{ + int shift; + + if (!mnt) { + return 0; + } + + for (shift = 8; shift; shift >>= 1) { + if (!(mnt >> (16 - shift))) { + mnt <<= shift; + *exp -= shift; + } + } + return mnt; +} + +static inline uint32_t +fp32_normalise(uint32_t mnt, int *exp) +{ + int shift; + + if (!mnt) { + return 0; + } + + for (shift = 16; shift; shift >>= 1) { + if (!(mnt >> (32 - shift))) { + mnt <<= shift; + *exp -= shift; + } + } + return mnt; +} + +static inline uint64_t +fp64_normalise(uint64_t mnt, int *exp) +{ + int shift; + + if (!mnt) { + return 0; + } + + for (shift = 32; shift; shift >>= 1) { + if (!(mnt >> (64 - shift))) { + mnt <<= shift; + *exp -= shift; + } + } + return mnt; +} + +static inline void +fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp) +{ + uint64_t x0 = *mnt0; + uint64_t x1 = *mnt1; + int shift; + + if (!x0 && !x1) { + return; + } + + if (!x1) { + x1 = x0; + x0 = 0; + *exp -= 64; + } + + for (shift = 32; shift; shift >>= 1) { + if (!(x1 >> (64 - shift))) { + x1 = x1 << shift | x0 >> (64 - shift); + x0 <<= shift; + *exp -= shift; + } + } + + *mnt0 = x0; + *mnt1 = x1; +} + +static inline uint16_t +fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt) +{ + return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1)); +} + +static inline uint32_t +fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt) +{ + return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1)); +} + +static inline uint64_t +fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt) +{ + return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1)); +} + +static inline uint16_t +fp16_zero(int sgn) +{ + return fp16_pack(sgn, 0, 0); +} + +static inline uint32_t +fp32_zero(int sgn) +{ + return fp32_pack(sgn, 0, 0); +} + +static inline uint64_t +fp64_zero(int sgn) +{ + return fp64_pack(sgn, 0, 0); +} + +static inline uint16_t +fp16_max_normal(int sgn) +{ + return fp16_pack(sgn, 30, -1); +} + +static inline uint32_t +fp32_max_normal(int sgn) +{ + return fp32_pack(sgn, 254, -1); +} + +static inline uint64_t +fp64_max_normal(int sgn) +{ + return fp64_pack(sgn, 2046, -1); +} + +static inline uint16_t +fp16_infinity(int sgn) +{ + return fp16_pack(sgn, 31, 0); +} + +static inline uint32_t +fp32_infinity(int sgn) +{ + return fp32_pack(sgn, 255, 0); +} + +static inline uint64_t +fp64_infinity(int sgn) +{ + return fp64_pack(sgn, 2047, 0); +} + +static inline uint16_t +fp16_defaultNaN() +{ + return fp16_pack(0, 31, (uint16_t)1 << 9); +} + +static inline uint32_t +fp32_defaultNaN() +{ + return fp32_pack(0, 255, (uint32_t)1 << 22); +} + +static inline uint64_t +fp64_defaultNaN() +{ + return fp64_pack(0, 2047, (uint64_t)1 << 51); +} + +static inline void +fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode, + int *flags) +{ + *sgn = x >> 15; + *exp = x >> 10 & 31; + *mnt = x & (((uint16_t)1 << 10) - 1); + + // Handle subnormals: + if (*exp) { + *mnt |= (uint16_t)1 << 10; + } else { + ++*exp; + // There is no flush to zero in this case! + } +} + +static inline void +fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode, + int *flags) +{ + *sgn = x >> 31; + *exp = x >> 23 & 255; + *mnt = x & (((uint32_t)1 << 23) - 1); + + // Handle subnormals: + if (*exp) { + *mnt |= (uint32_t)1 << 23; + } else { + ++*exp; + if ((mode & FPLIB_FZ) && *mnt) { + *flags |= FPLIB_IDC; + *mnt = 0; + } + } +} + +static inline void +fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode, + int *flags) +{ + *sgn = x >> 63; + *exp = x >> 52 & 2047; + *mnt = x & (((uint64_t)1 << 52) - 1); + + // Handle subnormals: + if (*exp) { + *mnt |= (uint64_t)1 << 52; + } else { + ++*exp; + if ((mode & FPLIB_FZ) && *mnt) { + *flags |= FPLIB_IDC; + *mnt = 0; + } + } +} + +static inline uint32_t +fp32_process_NaN(uint32_t a, int mode, int *flags) +{ + if (!(a >> 22 & 1)) { + *flags |= FPLIB_IOC; + a |= (uint32_t)1 << 22; + } + return mode & FPLIB_DN ? fp32_defaultNaN() : a; +} + +static inline uint64_t +fp64_process_NaN(uint64_t a, int mode, int *flags) +{ + if (!(a >> 51 & 1)) { + *flags |= FPLIB_IOC; + a |= (uint64_t)1 << 51; + } + return mode & FPLIB_DN ? fp64_defaultNaN() : a; +} + +static uint32_t +fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_exp = a >> 23 & 255; + uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); + int b_exp = b >> 23 & 255; + uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); + + // Handle signalling NaNs: + if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) + return fp32_process_NaN(b, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 255 && a_mnt) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt) + return fp32_process_NaN(b, mode, flags); + + return 0; +} + +static uint64_t +fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_exp = a >> 52 & 2047; + uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); + int b_exp = b >> 52 & 2047; + uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); + + // Handle signalling NaNs: + if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) + return fp64_process_NaN(b, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 2047 && a_mnt) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt) + return fp64_process_NaN(b, mode, flags); + + return 0; +} + +static uint32_t +fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags) +{ + int a_exp = a >> 23 & 255; + uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1); + int b_exp = b >> 23 & 255; + uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1); + int c_exp = c >> 23 & 255; + uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1); + + // Handle signalling NaNs: + if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1)) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1)) + return fp32_process_NaN(b, mode, flags); + if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1)) + return fp32_process_NaN(c, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 255 && a_mnt) + return fp32_process_NaN(a, mode, flags); + if (b_exp == 255 && b_mnt) + return fp32_process_NaN(b, mode, flags); + if (c_exp == 255 && c_mnt) + return fp32_process_NaN(c, mode, flags); + + return 0; +} + +static uint64_t +fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags) +{ + int a_exp = a >> 52 & 2047; + uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1); + int b_exp = b >> 52 & 2047; + uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1); + int c_exp = c >> 52 & 2047; + uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1); + + // Handle signalling NaNs: + if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1)) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1)) + return fp64_process_NaN(b, mode, flags); + if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1)) + return fp64_process_NaN(c, mode, flags); + + // Handle quiet NaNs: + if (a_exp == 2047 && a_mnt) + return fp64_process_NaN(a, mode, flags); + if (b_exp == 2047 && b_mnt) + return fp64_process_NaN(b, mode, flags); + if (c_exp == 2047 && c_mnt) + return fp64_process_NaN(c, mode, flags); + + return 0; +} + +static uint16_t +fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags) +{ + int biased_exp; // non-negative exponent value for result + uint16_t int_mant; // mantissa for result, less than (1 << 11) + int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 + + assert(rm != FPRounding_TIEAWAY); + + // There is no flush to zero in this case! + + // The bottom 5 bits of mnt are orred together: + mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0); + + if (exp > 0) { + biased_exp = exp; + int_mant = mnt >> 2; + error = mnt & 3; + } else { + biased_exp = 0; + int_mant = lsr16(mnt, 3 - exp); + error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1)); + } + + if (!biased_exp && error) { // xx should also check fpscr_val<11> + *flags |= FPLIB_UFC; + } + + // Round up: + if ((rm == FPLIB_RN && (error == 3 || + (error == 2 && (int_mant & 1)))) || + (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { + ++int_mant; + if (int_mant == (uint32_t)1 << 10) { + // Rounded up from denormalized to normalized + biased_exp = 1; + } + if (int_mant == (uint32_t)1 << 11) { + // Rounded up to next exponent + ++biased_exp; + int_mant >>= 1; + } + } + + // Handle rounding to odd aka Von Neumann rounding: + if (error && rm == FPRounding_ODD) + int_mant |= 1; + + // Handle overflow: + if (!(mode & FPLIB_AHP)) { + if (biased_exp >= 31) { + *flags |= FPLIB_OFC | FPLIB_IXC; + if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || + (rm == FPLIB_RM && sgn)) { + return fp16_infinity(sgn); + } else { + return fp16_max_normal(sgn); + } + } + } else { + if (biased_exp >= 32) { + *flags |= FPLIB_IOC; + return fp16_pack(sgn, 31, -1); + } + } + + if (error) { + *flags |= FPLIB_IXC; + } + + return fp16_pack(sgn, biased_exp, int_mant); +} + +static uint32_t +fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags) +{ + int biased_exp; // non-negative exponent value for result + uint32_t int_mant; // mantissa for result, less than (1 << 24) + int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 + + assert(rm != FPRounding_TIEAWAY); + + // Flush to zero: + if ((mode & FPLIB_FZ) && exp < 1) { + *flags |= FPLIB_UFC; + return fp32_zero(sgn); + } + + // The bottom 8 bits of mnt are orred together: + mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0); + + if (exp > 0) { + biased_exp = exp; + int_mant = mnt >> 2; + error = mnt & 3; + } else { + biased_exp = 0; + int_mant = lsr32(mnt, 3 - exp); + error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1)); + } + + if (!biased_exp && error) { // xx should also check fpscr_val<11> + *flags |= FPLIB_UFC; + } + + // Round up: + if ((rm == FPLIB_RN && (error == 3 || + (error == 2 && (int_mant & 1)))) || + (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { + ++int_mant; + if (int_mant == (uint32_t)1 << 23) { + // Rounded up from denormalized to normalized + biased_exp = 1; + } + if (int_mant == (uint32_t)1 << 24) { + // Rounded up to next exponent + ++biased_exp; + int_mant >>= 1; + } + } + + // Handle rounding to odd aka Von Neumann rounding: + if (error && rm == FPRounding_ODD) + int_mant |= 1; + + // Handle overflow: + if (biased_exp >= 255) { + *flags |= FPLIB_OFC | FPLIB_IXC; + if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || + (rm == FPLIB_RM && sgn)) { + return fp32_infinity(sgn); + } else { + return fp32_max_normal(sgn); + } + } + + if (error) { + *flags |= FPLIB_IXC; + } + + return fp32_pack(sgn, biased_exp, int_mant); +} + +static uint32_t +fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags) +{ + return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags); +} + +static uint64_t +fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags) +{ + int biased_exp; // non-negative exponent value for result + uint64_t int_mant; // mantissa for result, less than (1 << 52) + int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5 + + assert(rm != FPRounding_TIEAWAY); + + // Flush to zero: + if ((mode & FPLIB_FZ) && exp < 1) { + *flags |= FPLIB_UFC; + return fp64_zero(sgn); + } + + // The bottom 11 bits of mnt are orred together: + mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0); + + if (exp > 0) { + biased_exp = exp; + int_mant = mnt >> 2; + error = mnt & 3; + } else { + biased_exp = 0; + int_mant = lsr64(mnt, 3 - exp); + error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1)); + } + + if (!biased_exp && error) { // xx should also check fpscr_val<11> + *flags |= FPLIB_UFC; + } + + // Round up: + if ((rm == FPLIB_RN && (error == 3 || + (error == 2 && (int_mant & 1)))) || + (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) { + ++int_mant; + if (int_mant == (uint64_t)1 << 52) { + // Rounded up from denormalized to normalized + biased_exp = 1; + } + if (int_mant == (uint64_t)1 << 53) { + // Rounded up to next exponent + ++biased_exp; + int_mant >>= 1; + } + } + + // Handle rounding to odd aka Von Neumann rounding: + if (error && rm == FPRounding_ODD) + int_mant |= 1; + + // Handle overflow: + if (biased_exp >= 2047) { + *flags |= FPLIB_OFC | FPLIB_IXC; + if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) || + (rm == FPLIB_RM && sgn)) { + return fp64_infinity(sgn); + } else { + return fp64_max_normal(sgn); + } + } + + if (error) { + *flags |= FPLIB_IXC; + } + + return fp64_pack(sgn, biased_exp, int_mant); +} + +static uint64_t +fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags) +{ + return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags); +} + +static int +fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint32_t a_mnt, b_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9))) { + if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1))) + *flags |= FPLIB_IOC; + return 0; + } + return a == b || (!a_mnt && !b_mnt); +} + +static int +fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint32_t a_mnt, b_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 1; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 1; +} + +static int +fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint32_t a_mnt, b_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) || + (b_exp == 255 && (uint32_t)(b_mnt << 9))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 0; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 0; +} + +static int +fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint64_t a_mnt, b_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1))) + *flags |= FPLIB_IOC; + return 0; + } + return a == b || (!a_mnt && !b_mnt); +} + +static int +fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint64_t a_mnt, b_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 1; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 1; +} + +static int +fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp; + uint64_t a_mnt, b_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) || + (b_exp == 2047 && (uint64_t)(b_mnt << 12))) { + *flags |= FPLIB_IOC; + return 0; + } + if (!a_mnt && !b_mnt) + return 0; + if (a_sgn != b_sgn) + return b_sgn; + if (a_exp != b_exp) + return a_sgn ^ (a_exp > b_exp); + if (a_mnt != b_mnt) + return a_sgn ^ (a_mnt > b_mnt); + return 0; +} + +static uint32_t +fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint32_t a_mnt, b_mnt, x, x_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp32_process_NaNs(a, b, mode, flags))) { + return x; + } + + b_sgn ^= neg; + + // Handle infinities and zeroes: + if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } else if (a_exp == 255) { + return fp32_infinity(a_sgn); + } else if (b_exp == 255) { + return fp32_infinity(b_sgn); + } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { + return fp32_zero(a_sgn); + } + + a_mnt <<= 3; + b_mnt <<= 3; + if (a_exp >= b_exp) { + b_mnt = (lsr32(b_mnt, a_exp - b_exp) | + !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1))); + b_exp = a_exp; + } else { + a_mnt = (lsr32(a_mnt, b_exp - a_exp) | + !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1))); + a_exp = b_exp; + } + x_sgn = a_sgn; + x_exp = a_exp; + if (a_sgn == b_sgn) { + x_mnt = a_mnt + b_mnt; + } else if (a_mnt >= b_mnt) { + x_mnt = a_mnt - b_mnt; + } else { + x_sgn ^= 1; + x_mnt = b_mnt - a_mnt; + } + + if (!x_mnt) { + // Sign of exact zero result depends on rounding mode + return fp32_zero((mode & 3) == 2); + } + + x_mnt = fp32_normalise(x_mnt, &x_exp); + + return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags); +} + +static uint64_t +fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint64_t a_mnt, b_mnt, x, x_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp64_process_NaNs(a, b, mode, flags))) { + return x; + } + + b_sgn ^= neg; + + // Handle infinities and zeroes: + if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } else if (a_exp == 2047) { + return fp64_infinity(a_sgn); + } else if (b_exp == 2047) { + return fp64_infinity(b_sgn); + } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) { + return fp64_zero(a_sgn); + } + + a_mnt <<= 3; + b_mnt <<= 3; + if (a_exp >= b_exp) { + b_mnt = (lsr64(b_mnt, a_exp - b_exp) | + !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1))); + b_exp = a_exp; + } else { + a_mnt = (lsr64(a_mnt, b_exp - a_exp) | + !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1))); + a_exp = b_exp; + } + x_sgn = a_sgn; + x_exp = a_exp; + if (a_sgn == b_sgn) { + x_mnt = a_mnt + b_mnt; + } else if (a_mnt >= b_mnt) { + x_mnt = a_mnt - b_mnt; + } else { + x_sgn ^= 1; + x_mnt = b_mnt - a_mnt; + } + + if (!x_mnt) { + // Sign of exact zero result depends on rounding mode + return fp64_zero((mode & 3) == 2); + } + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags); +} + +static uint32_t +fp32_mul(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint32_t a_mnt, b_mnt, x; + uint64_t x_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp32_process_NaNs(a, b, mode, flags))) { + return x; + } + + // Handle infinities and zeroes: + if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } else if (a_exp == 255 || b_exp == 255) { + return fp32_infinity(a_sgn ^ b_sgn); + } else if (!a_mnt || !b_mnt) { + return fp32_zero(a_sgn ^ b_sgn); + } + + // Multiply and normalise: + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp + b_exp - 110; + x_mnt = (uint64_t)a_mnt * b_mnt; + x_mnt = fp64_normalise(x_mnt, &x_exp); + + // Convert to 32 bits, collapsing error into bottom bit: + x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33); + + return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); +} + +static uint64_t +fp64_mul(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint64_t a_mnt, b_mnt, x; + uint64_t x0_mnt, x1_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp64_process_NaNs(a, b, mode, flags))) { + return x; + } + + // Handle infinities and zeroes: + if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } else if (a_exp == 2047 || b_exp == 2047) { + return fp64_infinity(a_sgn ^ b_sgn); + } else if (!a_mnt || !b_mnt) { + return fp64_zero(a_sgn ^ b_sgn); + } + + // Multiply and normalise: + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp + b_exp - 1000; + mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt); + fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); + + // Convert to 64 bits, collapsing error into bottom bit: + x0_mnt = x1_mnt << 1 | !!x0_mnt; + + return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags); +} + +static uint32_t +fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale, + int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; + uint32_t a_mnt, b_mnt, c_mnt, x; + uint64_t x_mnt, y_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); + + x = fp32_process_NaNs3(a, b, c, mode, flags); + + // Quiet NaN added to product of zero and infinity: + if (a_exp == 255 && (a_mnt >> 22 & 1) && + ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) || + (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) { + x = fp32_defaultNaN(); + *flags |= FPLIB_IOC; + } + + if (x) { + return x; + } + + // Handle infinities and zeroes: + if ((b_exp == 255 && !c_mnt) || + (c_exp == 255 && !b_mnt) || + (a_exp == 255 && (b_exp == 255 || c_exp == 255) && + (a_sgn != (b_sgn ^ c_sgn)))) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } + if (a_exp == 255) + return fp32_infinity(a_sgn); + if (b_exp == 255 || c_exp == 255) + return fp32_infinity(b_sgn ^ c_sgn); + if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) + return fp32_zero(a_sgn); + + x_sgn = a_sgn; + x_exp = a_exp + 13; + x_mnt = (uint64_t)a_mnt << 27; + + // Multiply: + y_sgn = b_sgn ^ c_sgn; + y_exp = b_exp + c_exp - 113; + y_mnt = (uint64_t)b_mnt * c_mnt << 3; + if (!y_mnt) { + y_exp = x_exp; + } + + // Add: + if (x_exp >= y_exp) { + y_mnt = (lsr64(y_mnt, x_exp - y_exp) | + !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1))); + y_exp = x_exp; + } else { + x_mnt = (lsr64(x_mnt, y_exp - x_exp) | + !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1))); + x_exp = y_exp; + } + if (x_sgn == y_sgn) { + x_mnt = x_mnt + y_mnt; + } else if (x_mnt >= y_mnt) { + x_mnt = x_mnt - y_mnt; + } else { + x_sgn ^= 1; + x_mnt = y_mnt - x_mnt; + } + + if (!x_mnt) { + // Sign of exact zero result depends on rounding mode + return fp32_zero((mode & 3) == 2); + } + + // Normalise and convert to 32 bits, collapsing error into bottom bit: + x_mnt = fp64_normalise(x_mnt, &x_exp); + x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); + + return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags); +} + +static uint64_t +fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale, + int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp; + uint64_t a_mnt, b_mnt, c_mnt, x; + uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags); + + x = fp64_process_NaNs3(a, b, c, mode, flags); + + // Quiet NaN added to product of zero and infinity: + if (a_exp == 2047 && (a_mnt >> 51 & 1) && + ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) || + (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) { + x = fp64_defaultNaN(); + *flags |= FPLIB_IOC; + } + + if (x) { + return x; + } + + // Handle infinities and zeroes: + if ((b_exp == 2047 && !c_mnt) || + (c_exp == 2047 && !b_mnt) || + (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) && + (a_sgn != (b_sgn ^ c_sgn)))) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } + if (a_exp == 2047) + return fp64_infinity(a_sgn); + if (b_exp == 2047 || c_exp == 2047) + return fp64_infinity(b_sgn ^ c_sgn); + if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn)) + return fp64_zero(a_sgn); + + x_sgn = a_sgn; + x_exp = a_exp + 11; + x0_mnt = 0; + x1_mnt = a_mnt; + + // Multiply: + y_sgn = b_sgn ^ c_sgn; + y_exp = b_exp + c_exp - 1003; + mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3); + if (!y0_mnt && !y1_mnt) { + y_exp = x_exp; + } + + // Add: + if (x_exp >= y_exp) { + uint64_t t0, t1; + lsl128(&t0, &t1, y0_mnt, y1_mnt, + x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0); + lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp); + y0_mnt |= !!(t0 | t1); + y_exp = x_exp; + } else { + uint64_t t0, t1; + lsl128(&t0, &t1, x0_mnt, x1_mnt, + y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0); + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp); + x0_mnt |= !!(t0 | t1); + x_exp = y_exp; + } + if (x_sgn == y_sgn) { + add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); + } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) { + sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt); + } else { + x_sgn ^= 1; + sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt); + } + + if (!x0_mnt && !x1_mnt) { + // Sign of exact zero result depends on rounding mode + return fp64_zero((mode & 3) == 2); + } + + // Normalise and convert to 64 bits, collapsing error into bottom bit: + fp128_normalise(&x0_mnt, &x1_mnt, &x_exp); + x0_mnt = x1_mnt << 1 | !!x0_mnt; + + return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags); +} + +static uint32_t +fp32_div(uint32_t a, uint32_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp; + uint32_t a_mnt, b_mnt, x; + uint64_t x_mnt; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp32_process_NaNs(a, b, mode, flags))) + return x; + + // Handle infinities and zeroes: + if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } + if (a_exp == 255 || !b_mnt) { + if (a_exp != 255) + *flags |= FPLIB_DZC; + return fp32_infinity(a_sgn ^ b_sgn); + } + if (!a_mnt || b_exp == 255) + return fp32_zero(a_sgn ^ b_sgn); + + // Divide, setting bottom bit if inexact: + a_mnt = fp32_normalise(a_mnt, &a_exp); + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp - b_exp + 172; + x_mnt = ((uint64_t)a_mnt << 18) / b_mnt; + x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18); + + // Normalise and convert to 32 bits, collapsing error into bottom bit: + x_mnt = fp64_normalise(x_mnt, &x_exp); + x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); + + return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); +} + +static uint64_t +fp64_div(uint64_t a, uint64_t b, int mode, int *flags) +{ + int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c; + uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags); + + if ((x = fp64_process_NaNs(a, b, mode, flags))) + return x; + + // Handle infinities and zeroes: + if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } + if (a_exp == 2047 || !b_mnt) { + if (a_exp != 2047) + *flags |= FPLIB_DZC; + return fp64_infinity(a_sgn ^ b_sgn); + } + if (!a_mnt || b_exp == 2047) + return fp64_zero(a_sgn ^ b_sgn); + + // Find reciprocal of divisor with Newton-Raphson: + a_mnt = fp64_normalise(a_mnt, &a_exp); + b_mnt = fp64_normalise(b_mnt, &b_exp); + x_mnt = ~(uint64_t)0 / (b_mnt >> 31); + mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt); + sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt); + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32); + mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt); + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33); + + // Multiply by dividend: + x_sgn = a_sgn ^ b_sgn; + x_exp = a_exp - b_exp + 1031; + mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough + lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4); + x_mnt = x1_mnt; + + // This is an underestimate, so try adding one: + mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough + c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11); + if (c <= 0) { + ++x_mnt; + } + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); +} + +static void +set_fpscr0(FPSCR &fpscr, int flags) +{ + if (flags & FPLIB_IDC) { + fpscr.idc = 1; + } + if (flags & FPLIB_IOC) { + fpscr.ioc = 1; + } + if (flags & FPLIB_DZC) { + fpscr.dzc = 1; + } + if (flags & FPLIB_OFC) { + fpscr.ofc = 1; + } + if (flags & FPLIB_UFC) { + fpscr.ufc = 1; + } + if (flags & FPLIB_IXC) { + fpscr.ixc = 1; + } +} + +static uint32_t +fp32_sqrt(uint32_t a, int mode, int *flags) +{ + int a_sgn, a_exp, x_sgn, x_exp; + uint32_t a_mnt, x, x_mnt; + uint64_t t0, t1; + + fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + + // Handle NaNs: + if (a_exp == 255 && (uint32_t)(a_mnt << 9)) + return fp32_process_NaN(a, mode, flags); + + // Handle infinities and zeroes: + if (!a_mnt) { + return fp32_zero(a_sgn); + } + if (a_exp == 255 && !a_sgn) { + return fp32_infinity(a_sgn); + } + if (a_sgn) { + *flags |= FPLIB_IOC; + return fp32_defaultNaN(); + } + + a_mnt = fp32_normalise(a_mnt, &a_exp); + if (!(a_exp & 1)) { + ++a_exp; + a_mnt >>= 1; + } + + // x = (a * 3 + 5) / 8 + x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28); + + // x = (a / x + x) / 2; // 16-bit accuracy + x = (a_mnt / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 16-bit accuracy + x = (a_mnt / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 32-bit accuracy + x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1); + + x_sgn = 0; + x_exp = (a_exp + 147) >> 1; + x_mnt = ((x - (1 << 5)) >> 6) + 1; + t1 = (uint64_t)x_mnt * x_mnt; + t0 = (uint64_t)a_mnt << 19; + if (t1 > t0) { + --x_mnt; + } + + x_mnt = fp32_normalise(x_mnt, &x_exp); + + return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags); +} + +static uint64_t +fp64_sqrt(uint64_t a, int mode, int *flags) +{ + int a_sgn, a_exp, x_sgn, x_exp, c; + uint64_t a_mnt, x_mnt, r, x0, x1; + uint32_t x; + + fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags); + + // Handle NaNs: + if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) { + return fp64_process_NaN(a, mode, flags); + } + + // Handle infinities and zeroes: + if (!a_mnt) + return fp64_zero(a_sgn); + if (a_exp == 2047 && !a_sgn) + return fp64_infinity(a_sgn); + if (a_sgn) { + *flags |= FPLIB_IOC; + return fp64_defaultNaN(); + } + + a_mnt = fp64_normalise(a_mnt, &a_exp); + if (a_exp & 1) { + ++a_exp; + a_mnt >>= 1; + } + + // x = (a * 3 + 5) / 8 + x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28); + + // x = (a / x + x) / 2; // 16-bit accuracy + x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 16-bit accuracy + x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15; + + // x = (a / x + x) / 2; // 32-bit accuracy + x = ((a_mnt / x) >> 2) + (x >> 1); + + // r = 1 / x; // 32-bit accuracy + r = ((uint64_t)1 << 62) / x; + + // r = r * (2 - x * r); // 64-bit accuracy + mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r); + lsr128(&x0, &x1, x0, x1, 31); + + // x = (x + a * r) / 2; // 64-bit accuracy + mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2); + lsl128(&x0, &x1, x0, x1, 5); + lsr128(&x0, &x1, x0, x1, 56); + + x0 = ((uint64_t)x << 31) + (x0 >> 1); + + x_sgn = 0; + x_exp = (a_exp + 1053) >> 1; + x_mnt = x0; + x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1; + mul62x62(&x0, &x1, x_mnt, x_mnt); + lsl128(&x0, &x1, x0, x1, 19); + c = cmp128(x0, x1, 0, a_mnt); + if (c > 0) + --x_mnt; + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags); +} + +static int +modeConv(FPSCR fpscr) +{ + return (((int) fpscr) >> 22) & 0xF; +} + +static void +set_fpscr(FPSCR &fpscr, int flags) +{ + // translate back to FPSCR + bool underflow = false; + if (flags & FPLIB_IDC) { + fpscr.idc = 1; + } + if (flags & FPLIB_IOC) { + fpscr.ioc = 1; + } + if (flags & FPLIB_DZC) { + fpscr.dzc = 1; + } + if (flags & FPLIB_OFC) { + fpscr.ofc = 1; + } + if (flags & FPLIB_UFC) { + underflow = true; //xx Why is this required? + fpscr.ufc = 1; + } + if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) { + fpscr.ixc = 1; + } +} + +template <> +bool +fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +bool +fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr) +{ + int flags = 0; + int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags); + set_fpscr(fpscr, flags); + return x; +} + +template <> +uint32_t +fplibAbs(uint32_t op) +{ + return op & ~((uint32_t)1 << 31); +} + +template <> +uint64_t +fplibAbs(uint64_t op) +{ + return op & ~((uint64_t)1 << 63); +} + +template <> +uint32_t +fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +int +fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2, result; + uint32_t mnt1, mnt2; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) || + (exp2 == 255 && (uint32_t)(mnt2 << 9))) { + result = 3; + if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) || + (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) || + signal_nans) + flags |= FPLIB_IOC; + } else { + if (op1 == op2 || (!mnt1 && !mnt2)) { + result = 6; + } else if (sgn1 != sgn2) { + result = sgn1 ? 8 : 2; + } else if (exp1 != exp2) { + result = sgn1 ^ (exp1 < exp2) ? 8 : 2; + } else { + result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +int +fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2, result; + uint64_t mnt1, mnt2; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) || + (exp2 == 2047 && (uint64_t)(mnt2 << 12))) { + result = 3; + if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) || + (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) || + signal_nans) + flags |= FPLIB_IOC; + } else { + if (op1 == op2 || (!mnt1 && !mnt2)) { + result = 6; + } else if (sgn1 != sgn2) { + result = sgn1 ? 8 : 2; + } else if (exp1 != exp2) { + result = sgn1 ^ (exp1 < exp2) ? 8 : 2; + } else { + result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2; + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +static uint16_t +fp16_FPConvertNaN_32(uint32_t op) +{ + return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13); +} + +static uint16_t +fp16_FPConvertNaN_64(uint64_t op) +{ + return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42); +} + +static uint32_t +fp32_FPConvertNaN_16(uint16_t op) +{ + return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13); +} + +static uint32_t +fp32_FPConvertNaN_64(uint64_t op) +{ + return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29); +} + +static uint64_t +fp64_FPConvertNaN_16(uint16_t op) +{ + return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42); +} + +static uint64_t +fp64_FPConvertNaN_32(uint32_t op) +{ + return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29); +} + +static uint32_t +fp32_FPOnePointFive(int sgn) +{ + return fp32_pack(sgn, 127, (uint64_t)1 << 22); +} + +static uint64_t +fp64_FPOnePointFive(int sgn) +{ + return fp64_pack(sgn, 1023, (uint64_t)1 << 51); +} + +static uint32_t +fp32_FPThree(int sgn) +{ + return fp32_pack(sgn, 128, (uint64_t)1 << 22); +} + +static uint64_t +fp64_FPThree(int sgn) +{ + return fp64_pack(sgn, 1024, (uint64_t)1 << 51); +} + +static uint32_t +fp32_FPTwo(int sgn) +{ + return fp32_pack(sgn, 128, 0); +} + +static uint64_t +fp64_FPTwo(int sgn) +{ + return fp64_pack(sgn, 1024, 0); +} + +template <> +uint16_t +fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt; + uint16_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + bool alt_hp = fpscr.ahp; + + if (exp == 255 && (uint32_t)(mnt << 9)) { + if (alt_hp) { + result = fp16_zero(sgn); + } else if (fpscr.dn) { + result = fp16_defaultNaN(); + } else { + result = fp16_FPConvertNaN_32(op); + } + if (!(mnt >> 22 & 1) || alt_hp) { + flags |= FPLIB_IOC; + } + } else if (exp == 255) { + if (alt_hp) { + result = sgn << 15 | (uint16_t)0x7fff; + flags |= FPLIB_IOC; + } else { + result = fp16_infinity(sgn); + } + } else if (!mnt) { + result = fp16_zero(sgn); + } else { + result = fp16_round_(sgn, exp - 127 + 15, + mnt >> 7 | !!(uint32_t)(mnt << 25), + rounding, mode | alt_hp << 4, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint16_t +fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt; + uint16_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + bool alt_hp = fpscr.ahp; + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + if (alt_hp) { + result = fp16_zero(sgn); + } else if (fpscr.dn) { + result = fp16_defaultNaN(); + } else { + result = fp16_FPConvertNaN_64(op); + } + if (!(mnt >> 51 & 1) || alt_hp) { + flags |= FPLIB_IOC; + } + } else if (exp == 2047) { + if (alt_hp) { + result = sgn << 15 | (uint16_t)0x7fff; + flags |= FPLIB_IOC; + } else { + result = fp16_infinity(sgn); + } + } else if (!mnt) { + result = fp16_zero(sgn); + } else { + result = fp16_round_(sgn, exp - 1023 + 15, + mnt >> 36 | !!(uint64_t)(mnt << 28), + rounding, mode | alt_hp << 4, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint16_t mnt; + uint32_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { + if (fpscr.dn) { + result = fp32_defaultNaN(); + } else { + result = fp32_FPConvertNaN_16(op); + } + if (!(mnt >> 9 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 31 && !fpscr.ahp) { + result = fp32_infinity(sgn); + } else if (!mnt) { + result = fp32_zero(sgn); + } else { + mnt = fp16_normalise(mnt, &exp); + result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt; + uint32_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + if (fpscr.dn) { + result = fp32_defaultNaN(); + } else { + result = fp32_FPConvertNaN_64(op); + } + if (!(mnt >> 51 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 2047) { + result = fp32_infinity(sgn); + } else if (!mnt) { + result = fp32_zero(sgn); + } else { + result = fp32_round_(sgn, exp - 1023 + 127, + mnt >> 20 | !!(uint64_t)(mnt << 44), + rounding, mode, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint16_t mnt; + uint64_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) { + if (fpscr.dn) { + result = fp64_defaultNaN(); + } else { + result = fp64_FPConvertNaN_16(op); + } + if (!(mnt >> 9 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 31 && !fpscr.ahp) { + result = fp64_infinity(sgn); + } else if (!mnt) { + result = fp64_zero(sgn); + } else { + mnt = fp16_normalise(mnt, &exp); + result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt; + uint64_t result; + + // Unpack floating-point operand optionally with flush-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + if (fpscr.dn) { + result = fp64_defaultNaN(); + } else { + result = fp64_FPConvertNaN_32(op); + } + if (!(mnt >> 22 & 1)) { + flags |= FPLIB_IOC; + } + } else if (exp == 255) { + result = fp64_infinity(sgn); + } else if (!mnt) { + result = fp64_zero(sgn); + } else { + mnt = fp32_normalise(mnt, &exp); + result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +static uint32_t +fp32_repack(int sgn, int exp, uint32_t mnt) +{ + return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt); +} + +static uint64_t +fp64_repack(int sgn, int exp, uint64_t mnt) +{ + return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt); +} + +static void +fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn) +{ + // Treat a single quiet-NaN as +Infinity/-Infinity + if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23) + *op1 = fp32_infinity(sgn); + if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23) + *op2 = fp32_infinity(sgn); +} + +static void +fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn) +{ + // Treat a single quiet-NaN as +Infinity/-Infinity + if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52) + *op1 = fp64_infinity(sgn); + if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52) + *op2 = fp64_infinity(sgn); +} + +template <> +uint32_t +fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, x, result; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? + fp32_repack(sgn1, exp1, mnt1) : + fp32_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, x, result; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ? + fp64_repack(sgn1, exp1, mnt1) : + fp64_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + fp32_minmaxnum(&op1, &op2, 1); + return fplibMax<uint32_t>(op1, op2, fpscr); +} + +template <> +uint64_t +fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + fp64_minmaxnum(&op1, &op2, 1); + return fplibMax<uint64_t>(op1, op2, fpscr); +} + +template <> +uint32_t +fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, x, result; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? + fp32_repack(sgn1, exp1, mnt1) : + fp32_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, x, result; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) { + result = x; + } else { + result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ? + fp64_repack(sgn1, exp1, mnt1) : + fp64_repack(sgn2, exp2, mnt2)); + } + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + fp32_minmaxnum(&op1, &op2, 0); + return fplibMin<uint32_t>(op1, op2, fpscr); +} + +template <> +uint64_t +fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + fp64_minmaxnum(&op1, &op2, 0); + return fplibMin<uint64_t>(op1, op2, fpscr); +} + +template <> +uint32_t +fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, result; + + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp32_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { + result = fp32_FPTwo(sgn1 ^ sgn2); + } else if (exp1 == 255 || exp2 == 255) { + result = fp32_infinity(sgn1 ^ sgn2); + } else if (!mnt1 || !mnt2) { + result = fp32_zero(sgn1 ^ sgn2); + } else { + result = fp32_mul(op1, op2, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, result; + + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp64_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { + result = fp64_FPTwo(sgn1 ^ sgn2); + } else if (exp1 == 2047 || exp2 == 2047) { + result = fp64_infinity(sgn1 ^ sgn2); + } else if (!mnt1 || !mnt2) { + result = fp64_zero(sgn1 ^ sgn2); + } else { + result = fp64_mul(op1, op2, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibNeg(uint32_t op) +{ + return op ^ (uint32_t)1 << 31; +} + +template <> +uint64_t +fplibNeg(uint64_t op) +{ + return op ^ (uint64_t)1 << 63; +} + +static const uint8_t recip_sqrt_estimate[256] = { + 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228, + 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203, + 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181, + 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163, + 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146, + 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, + 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118, + 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106, + 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86, + 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68, + 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53, + 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, + 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, + 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, + 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9, + 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 +}; + +template <> +uint32_t +fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } else if (!mnt) { + result = fp32_infinity(sgn); + flags |= FPLIB_DZC; + } else if (sgn) { + result = fp32_defaultNaN(); + flags |= FPLIB_IOC; + } else if (exp == 255) { + result = fp32_zero(0); + } else { + exp += 8; + mnt = fp32_normalise(mnt, &exp); + mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)]; + result = fp32_pack(0, (380 - exp) >> 1, mnt << 15); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } else if (!mnt) { + result = fp64_infinity(sgn); + flags |= FPLIB_DZC; + } else if (sgn) { + result = fp64_defaultNaN(); + flags |= FPLIB_IOC; + } else if (exp == 2047) { + result = fp32_zero(0); + } else { + exp += 11; + mnt = fp64_normalise(mnt, &exp); + mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)]; + result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, result; + + op1 = fplibNeg<uint32_t>(op1); + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp32_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { + result = fp32_FPOnePointFive(0); + } else if (exp1 == 255 || exp2 == 255) { + result = fp32_infinity(sgn1 ^ sgn2); + } else { + result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, result; + + op1 = fplibNeg<uint64_t>(op1); + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp64_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { + result = fp64_FPOnePointFive(0); + } else if (exp1 == 2047 || exp2 == 2047) { + result = fp64_infinity(sgn1 ^ sgn2); + } else { + result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint32_t mnt1, mnt2, result; + + op1 = fplibNeg<uint32_t>(op1); + fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp32_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) { + result = fp32_FPTwo(0); + } else if (exp1 == 255 || exp2 == 255) { + result = fp32_infinity(sgn1 ^ sgn2); + } else { + result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRecipEstimate(uint32_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } else if (exp == 255) { + result = fp32_zero(sgn); + } else if (!mnt) { + result = fp32_infinity(sgn); + flags |= FPLIB_DZC; + } else if (!((uint32_t)(op << 1) >> 22)) { + bool overflow_to_inf; + switch (FPCRRounding(fpscr)) { + case FPRounding_TIEEVEN: + overflow_to_inf = true; + break; + case FPRounding_POSINF: + overflow_to_inf = !sgn; + break; + case FPRounding_NEGINF: + overflow_to_inf = sgn; + break; + case FPRounding_ZERO: + overflow_to_inf = false; + break; + default: + assert(0); + } + result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn); + flags |= FPLIB_OFC | FPLIB_IXC; + } else if (fpscr.fz && exp >= 253) { + result = fp32_zero(sgn); + flags |= FPLIB_UFC; + } else { + exp += 8; + mnt = fp32_normalise(mnt, &exp); + int result_exp = 253 - exp; + uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1; + fraction <<= 15; + if (result_exp == 0) { + fraction >>= 1; + } else if (result_exp == -1) { + fraction >>= 2; + result_exp = 0; + } + result = fp32_pack(sgn, result_exp, fraction); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRecipEstimate(uint64_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } else if (exp == 2047) { + result = fp64_zero(sgn); + } else if (!mnt) { + result = fp64_infinity(sgn); + flags |= FPLIB_DZC; + } else if (!((uint64_t)(op << 1) >> 51)) { + bool overflow_to_inf; + switch (FPCRRounding(fpscr)) { + case FPRounding_TIEEVEN: + overflow_to_inf = true; + break; + case FPRounding_POSINF: + overflow_to_inf = !sgn; + break; + case FPRounding_NEGINF: + overflow_to_inf = sgn; + break; + case FPRounding_ZERO: + overflow_to_inf = false; + break; + default: + assert(0); + } + result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn); + flags |= FPLIB_OFC | FPLIB_IXC; + } else if (fpscr.fz && exp >= 2045) { + result = fp64_zero(sgn); + flags |= FPLIB_UFC; + } else { + exp += 11; + mnt = fp64_normalise(mnt, &exp); + int result_exp = 2045 - exp; + uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1; + fraction <<= 44; + if (result_exp == 0) { + fraction >>= 1; + } else if (result_exp == -1) { + fraction >>= 2; + result_exp = 0; + } + result = fp64_pack(sgn, result_exp, fraction); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn1, exp1, sgn2, exp2; + uint64_t mnt1, mnt2, result; + + op1 = fplibNeg<uint64_t>(op1); + fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags); + fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags); + + result = fp64_process_NaNs(op1, op2, mode, &flags); + if (!result) { + if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) { + result = fp64_FPTwo(0); + } else if (exp1 == 2047 || exp2 == 2047) { + result = fp64_infinity(sgn1 ^ sgn2); + } else { + result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRecpX(uint32_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } + else { + if (!mnt) { // Zero and denormals + result = fp32_pack(sgn, 254, 0); + } else { // Infinities and normals + result = fp32_pack(sgn, exp ^ 255, 0); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRecpX(uint64_t op, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } + else { + if (!mnt) { // Zero and denormals + result = fp64_pack(sgn, 2046, 0); + } else { // Infinities and normals + result = fp64_pack(sgn, exp ^ 2047, 0); + } + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + // Handle NaNs, infinities and zeroes: + if (exp == 255 && (uint32_t)(mnt << 9)) { + result = fp32_process_NaN(op, mode, &flags); + } else if (exp == 255) { + result = fp32_infinity(sgn); + } else if (!mnt) { + result = fp32_zero(sgn); + } else if (exp >= 150) { + // There are no fractional bits + result = op; + } else { + // Truncate towards zero: + uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp); + int err = exp < 118 ? 1 : + (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0); + switch (rounding) { + case FPRounding_TIEEVEN: + x += (err == 3 || (err == 2 && (x & 1))); + break; + case FPRounding_POSINF: + x += err && !sgn; + break; + case FPRounding_NEGINF: + x += err && sgn; + break; + case FPRounding_ZERO: + break; + case FPRounding_TIEAWAY: + x += err >> 1; + break; + default: + assert(0); + } + + if (x == 0) { + result = fp32_zero(sgn); + } else { + exp = 150; + mnt = fp32_normalise(x, &exp); + result = fp32_pack(sgn, exp + 8, mnt >> 8); + } + + if (err && exact) + flags |= FPLIB_IXC; + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr) +{ + int mode = modeConv(fpscr); + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags); + + // Handle NaNs, infinities and zeroes: + if (exp == 2047 && (uint64_t)(mnt << 12)) { + result = fp64_process_NaN(op, mode, &flags); + } else if (exp == 2047) { + result = fp64_infinity(sgn); + } else if (!mnt) { + result = fp64_zero(sgn); + } else if (exp >= 1075) { + // There are no fractional bits + result = op; + } else { + // Truncate towards zero: + uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp); + int err = exp < 1011 ? 1 : + (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0); + switch (rounding) { + case FPRounding_TIEEVEN: + x += (err == 3 || (err == 2 && (x & 1))); + break; + case FPRounding_POSINF: + x += err && !sgn; + break; + case FPRounding_NEGINF: + x += err && sgn; + break; + case FPRounding_ZERO: + break; + case FPRounding_TIEAWAY: + x += err >> 1; + break; + default: + assert(0); + } + + if (x == 0) { + result = fp64_zero(sgn); + } else { + exp = 1075; + mnt = fp64_normalise(x, &exp); + result = fp64_pack(sgn, exp + 11, mnt >> 11); + } + + if (err && exact) + flags |= FPLIB_IXC; + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibSqrt(uint32_t op, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibSqrt(uint64_t op, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint32_t +fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +template <> +uint64_t +fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr) +{ + int flags = 0; + uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags); + set_fpscr0(fpscr, flags); + return result; +} + +static uint64_t +FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, + int *flags) +{ + uint64_t x; + int err; + + if (exp > 1023 + 63) { + *flags = FPLIB_IOC; + return ((uint64_t)!u << 63) - !sgn; + } + + x = lsr64(mnt << 11, 1023 + 63 - exp); + err = (exp > 1023 + 63 - 2 ? 0 : + (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) | + !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1))); + + switch (rounding) { + case FPRounding_TIEEVEN: + x += (err == 3 || (err == 2 && (x & 1))); + break; + case FPRounding_POSINF: + x += err && !sgn; + break; + case FPRounding_NEGINF: + x += err && sgn; + break; + case FPRounding_ZERO: + break; + case FPRounding_TIEAWAY: + x += err >> 1; + break; + default: + assert(0); + } + + if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) { + *flags = FPLIB_IOC; + return ((uint64_t)!u << 63) - !sgn; + } + + if (err) { + *flags = FPLIB_IXC; + } + + return sgn ? -x : x; +} + +static uint32_t +FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding, + int *flags) +{ + uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags); + if (u ? x >= (uint64_t)1 << 32 : + !(x < (uint64_t)1 << 31 || + (uint64_t)-x <= (uint64_t)1 << 31)) { + *flags = FPLIB_IOC; + x = ((uint32_t)!u << 31) - !sgn; + } + return x; +} + +template <> +uint32_t +fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint32_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 255 && (uint32_t)(mnt << 9)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits, + (uint64_t)mnt << (52 - 23), u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint32_t +fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint64_t mnt; + uint32_t result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 2047 && (uint64_t)(mnt << 12)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint32_t mnt; + uint64_t result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 255 && (uint32_t)(mnt << 9)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits, + (uint64_t)mnt << (52 - 23), u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +template <> +uint64_t +fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + int sgn, exp; + uint64_t mnt, result; + + // Unpack using FPCR to determine if subnormals are flushed-to-zero: + fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags); + + // If NaN, set cumulative flag or take exception: + if (exp == 2047 && (uint64_t)(mnt << 12)) { + flags = FPLIB_IOC; + result = 0; + } else { + result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags); + } + + set_fpscr0(fpscr, flags); + + return result; +} + +static uint32_t +fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) +{ + int x_sgn = !u && a >> 63; + int x_exp = 190 - fbits; + uint64_t x_mnt = x_sgn ? -a : a; + + // Handle zero: + if (!x_mnt) { + return fp32_zero(0); + } + + // Normalise and convert to 32 bits, collapsing error into bottom bit: + x_mnt = fp64_normalise(x_mnt, &x_exp); + x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1); + + return fp32_round(x_sgn, x_exp, x_mnt, mode, flags); +} + +static uint64_t +fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags) +{ + int x_sgn = !u && a >> 63; + int x_exp = 1024 + 62 - fbits; + uint64_t x_mnt = x_sgn ? -a : a; + + // Handle zero: + if (!x_mnt) { + return fp64_zero(0); + } + + x_mnt = fp64_normalise(x_mnt, &x_exp); + + return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags); +} + +template <> +uint32_t +fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + uint32_t res = fp32_cvtf(op, fbits, u, + (int)rounding | ((uint32_t)fpscr >> 22 & 12), + &flags); + set_fpscr0(fpscr, flags); + return res; +} + +template <> +uint64_t +fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr) +{ + int flags = 0; + uint64_t res = fp64_cvtf(op, fbits, u, + (int)rounding | ((uint32_t)fpscr >> 22 & 12), + &flags); + set_fpscr0(fpscr, flags); + return res; +} + +} diff --git a/src/arch/arm/insts/fplib.hh b/src/arch/arm/insts/fplib.hh new file mode 100644 index 000000000..6263687fc --- /dev/null +++ b/src/arch/arm/insts/fplib.hh @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Edmund Grimley Evans + * Thomas Grocutt + */ + +/** + * @file + * Floating-point library code, which will gradually replace vfp.hh. For + * portability, this library does not use floating-point data types. Currently, + * C's standard integer types are used in the API, though this could be changed + * to something like class Fp32 { uint32_t x; }, etc. + */ + +#ifndef __ARCH_ARM_INSTS_FPLIB_HH__ +#define __ARCH_ARM_INSTS_FPLIB_HH__ + +#include <stdint.h> + +#include "arch/arm/miscregs.hh" + +namespace ArmISA +{ + +enum FPRounding { + FPRounding_TIEEVEN = 0, + FPRounding_POSINF = 1, + FPRounding_NEGINF = 2, + FPRounding_ZERO = 3, + FPRounding_TIEAWAY = 4, + FPRounding_ODD = 5 +}; + +static inline FPRounding +FPCRRounding(FPSCR &fpscr) +{ + return (FPRounding)((uint32_t)fpscr >> 22 & 3); +} + +/** Floating-point absolute value. */ +template <class T> +T fplibAbs(T op); +/** Floating-point add. */ +template <class T> +T fplibAdd(T op1, T op2, FPSCR &fpscr); +/** Floating-point compare (quiet and signaling). */ +template <class T> +int fplibCompare(T op1, T op2, bool signal_nans, FPSCR &fpscr); +/** Floating-point compare equal. */ +template <class T> +bool fplibCompareEQ(T op1, T op2, FPSCR &fpscr); +/** Floating-point compare greater than or equal. */ +template <class T> +bool fplibCompareGE(T op1, T op2, FPSCR &fpscr); +/** Floating-point compare greater than. */ +template <class T> +bool fplibCompareGT(T op1, T op2, FPSCR &fpscr); +/** Floating-point convert precision. */ +template <class T1, class T2> +T2 fplibConvert(T1 op, FPRounding rounding, FPSCR &fpscr); +/** Floating-point division. */ +template <class T> +T fplibDiv(T op1, T op2, FPSCR &fpscr); +/** Floating-point maximum. */ +template <class T> +T fplibMax(T op1, T op2, FPSCR &fpscr); +/** Floating-point maximum number. */ +template <class T> +T fplibMaxNum(T op1, T op2, FPSCR &fpscr); +/** Floating-point minimum. */ +template <class T> +T fplibMin(T op1, T op2, FPSCR &fpscr); +/** Floating-point minimum number. */ +template <class T> +T fplibMinNum(T op1, T op2, FPSCR &fpscr); +/** Floating-point multiply. */ +template <class T> +T fplibMul(T op1, T op2, FPSCR &fpscr); +/** Floating-point multiply-add. */ +template <class T> +T fplibMulAdd(T addend, T op1, T op2, FPSCR &fpscr); +/** Floating-point multiply extended. */ +template <class T> +T fplibMulX(T op1, T op2, FPSCR &fpscr); +/** Floating-point negate. */ +template <class T> +T fplibNeg(T op); +/** Floating-point reciprocal square root estimate. */ +template <class T> +T fplibRSqrtEstimate(T op, FPSCR &fpscr); +/** Floating-point reciprocal square root step. */ +template <class T> +T fplibRSqrtStepFused(T op1, T op2, FPSCR &fpscr); +/** Floating-point reciprocal estimate. */ +template <class T> +T fplibRecipEstimate(T op, FPSCR &fpscr); +/** Floating-point reciprocal step. */ +template <class T> +T fplibRecipStepFused(T op1, T op2, FPSCR &fpscr); +/** Floating-point reciprocal exponent. */ +template <class T> +T fplibRecpX(T op, FPSCR &fpscr); +/** Floating-point convert to integer. */ +template <class T> +T fplibRoundInt(T op, FPRounding rounding, bool exact, FPSCR &fpscr); +/** Floating-point square root. */ +template <class T> +T fplibSqrt(T op, FPSCR &fpscr); +/** Floating-point subtract. */ +template <class T> +T fplibSub(T op1, T op2, FPSCR &fpscr); +/** Floating-point convert to fixed-point. */ +template <class T1, class T2> +T2 fplibFPToFixed(T1 op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr); +/** Floating-point convert from fixed-point. */ +template <class T> +T fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); + +/* Function specializations... */ +template <> +uint32_t fplibAbs(uint32_t op); +template <> +uint64_t fplibAbs(uint64_t op); +template <> +uint32_t fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +int fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr); +template <> +int fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr); +template <> +bool fplibCompareEQ(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +bool fplibCompareEQ(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGE(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGE(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGT(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +bool fplibCompareGT(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint16_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint32_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint32_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint64_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint64_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr); +template <> +uint32_t fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, + FPSCR &fpscr); +template <> +uint64_t fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, + FPSCR &fpscr); +template <> +uint32_t fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibNeg(uint32_t op); +template <> +uint64_t fplibNeg(uint64_t op); +template <> +uint32_t fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr); +template<> +uint64_t fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibRecipEstimate(uint32_t op, FPSCR &fpscr); +template <> +uint64_t fplibRecipEstimate(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibRecpX(uint32_t op, FPSCR &fpscr); +template <> +uint64_t fplibRecpX(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, + FPSCR &fpscr); +template <> +uint64_t fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, + FPSCR &fpscr); +template <> +uint32_t fplibSqrt(uint32_t op, FPSCR &fpscr); +template <> +uint64_t fplibSqrt(uint64_t op, FPSCR &fpscr); +template <> +uint32_t fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr); +template <> +uint64_t fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr); +template <> +uint32_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint32_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint64_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint64_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint32_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +template <> +uint64_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, + FPSCR &fpscr); +} + +#endif diff --git a/src/arch/arm/insts/macromem.cc b/src/arch/arm/insts/macromem.cc index 26a916fc7..42cb98a7c 100644 --- a/src/arch/arm/insts/macromem.cc +++ b/src/arch/arm/insts/macromem.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -43,7 +43,9 @@ #include <sstream> #include "arch/arm/insts/macromem.hh" + #include "arch/arm/generated/decoder.hh" +#include "arch/arm/insts/neon64_mem.hh" using namespace std; using namespace ArmISAInst; @@ -177,6 +179,212 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst, } } +PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + uint32_t size, bool fp, bool load, bool noAlloc, + bool signExt, bool exclusive, bool acrel, + int64_t imm, AddrMode mode, + IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) : + PredMacroOp(mnem, machInst, __opClass) +{ + bool writeback = (mode != AddrMd_Offset); + numMicroops = 1 + (size / 4) + (writeback ? 1 : 0); + microOps = new StaticInstPtr[numMicroops]; + + StaticInstPtr *uop = microOps; + + bool post = (mode == AddrMd_PostIndex); + + rn = makeSP(rn); + + *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm); + + if (fp) { + if (size == 16) { + if (load) { + *++uop = new MicroLdrQBFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrQTFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrQBFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + *++uop = new MicroLdrQTFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrQBFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrQTFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrQBFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + *++uop = new MicroStrQTFpXImmUop(machInst, rt2, + INTREG_UREG0, 16, noAlloc, exclusive, acrel); + } + } else if (size == 8) { + if (load) { + *++uop = new MicroLdrFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrFpXImmUop(machInst, rt2, + INTREG_UREG0, 8, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrFpXImmUop(machInst, rt, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrFpXImmUop(machInst, rt2, + INTREG_UREG0, 8, noAlloc, exclusive, acrel); + } + } else if (size == 4) { + if (load) { + *++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrDFpXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } + } + } else { + if (size == 8) { + if (load) { + *++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0, + 0, noAlloc, exclusive, acrel); + *++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0, + size, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0, + 0, noAlloc, exclusive, acrel); + *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0, + size, noAlloc, exclusive, acrel); + } + } else if (size == 4) { + if (load) { + if (signExt) { + *++uop = new MicroLdrDSXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } else { + *++uop = new MicroLdrDUXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } + } else { + *++uop = new MicroStrDXImmUop(machInst, rt, rt2, + INTREG_UREG0, 0, noAlloc, exclusive, acrel); + } + } + } + + if (writeback) { + *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0, + post ? imm : 0); + } + + (*uop)->setLastMicroop(); + + for (StaticInstPtr *curUop = microOps; + !(*curUop)->isLastMicroop(); curUop++) { + (*curUop)->setDelayedCommit(); + } +} + +BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 2; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); + } else { + microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); + } + microOps[0]->setDelayedCommit(); + microOps[1]->setLastMicroop(); +} + +BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 3; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0); + microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0); + } else { + microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0); + microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0); + } + microOps[2] = new MicroAddXiUop(machInst, base, base, imm); + + microOps[0]->setDelayedCommit(); + microOps[1]->setDelayedCommit(); + microOps[2]->setLastMicroop(); +} + +BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 3; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); + } else { + microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); + microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); + } + microOps[2] = new MicroAddXiUop(machInst, base, base, imm); + + microOps[0]->setDelayedCommit(); + microOps[1]->setDelayedCommit(); + microOps[2]->setLastMicroop(); +} + +BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, bool load, IntRegIndex dest, + IntRegIndex base, IntRegIndex offset, + ArmExtendType type, int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 2; + microOps = new StaticInstPtr[numMicroops]; + + if (load) { + microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base, + offset, type, imm); + microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base, + offset, type, imm); + } else { + microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base, + offset, type, imm); + microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base, + offset, type, imm); + } + + microOps[0]->setDelayedCommit(); + microOps[1]->setLastMicroop(); +} + +BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst, + OpClass __opClass, IntRegIndex dest, + int64_t imm) : + PredMacroOp(mnem, machInst, __opClass) +{ + numMicroops = 2; + microOps = new StaticInstPtr[numMicroops]; + + microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm); + microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm); + + microOps[0]->setDelayedCommit(); + microOps[1]->setLastMicroop(); +} + VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm) : @@ -193,7 +401,7 @@ VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, if (deinterleave) numMicroops += (regs / elems); microOps = new StaticInstPtr[numMicroops]; - RegIndex rMid = deinterleave ? NumFloatArchRegs : vd * 2; + RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2; uint32_t noAlign = TLB::MustBeOne; @@ -295,7 +503,7 @@ VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst, numMicroops += (regs / elems); microOps = new StaticInstPtr[numMicroops]; - RegIndex ufp0 = NumFloatArchRegs; + RegIndex ufp0 = NumFloatV7ArchRegs; unsigned uopIdx = 0; switch (loadSize) { @@ -556,7 +764,7 @@ VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, uint32_t noAlign = TLB::MustBeOne; - RegIndex rMid = interleave ? NumFloatArchRegs : vd * 2; + RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2; unsigned uopIdx = 0; if (interleave) { @@ -657,7 +865,7 @@ VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, numMicroops += (regs / elems); microOps = new StaticInstPtr[numMicroops]; - RegIndex ufp0 = NumFloatArchRegs; + RegIndex ufp0 = NumFloatV7ArchRegs; unsigned uopIdx = 0; switch (elems) { @@ -834,6 +1042,285 @@ VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst, microOps[numMicroops - 1]->setLastMicroop(); } +VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t numRegs, bool wb) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int totNumBytes = numRegs * dataSize / 8; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0); + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for(; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accSize */, eSize); + } + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + for (int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroDeintNeon64( + machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, + numStructElems, numRegs, i /* step */); + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; ++i) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + +VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t numRegs, bool wb) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int totNumBytes = numRegs * dataSize / 8; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + + for(int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroIntNeon64( + machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, + numStructElems, numRegs, i /* step */); + } + + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for(; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accSize */, eSize); + } + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; i++) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + +VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t index, bool wb, + bool replicate) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int eSizeBytes = 1 << eSize; + int totNumBytes = numStructElems * eSizeBytes; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0); + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for (; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accSize */, eSize); + } + microOps[uopIdx++] = new MicroNeonLoad64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + for(int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroUnpackNeon64( + machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize, + numStructElems, index, i /* step */, replicate); + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; i++) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + +VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t index, bool wb, + bool replicate) : + PredMacroOp(mnem, machInst, __opClass) +{ + RegIndex vx = NumFloatV8ArchRegs / 4; + RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn); + bool baseIsSP = isSP((IntRegIndex) rnsp); + + numMicroops = wb ? 1 : 0; + + int eSizeBytes = 1 << eSize; + int totNumBytes = numStructElems * eSizeBytes; + assert(totNumBytes <= 64); + + // The guiding principle here is that no more than 16 bytes can be + // transferred at a time + int numMemMicroops = totNumBytes / 16; + int residuum = totNumBytes % 16; + if (residuum) + ++numMemMicroops; + numMicroops += numMemMicroops; + + int numMarshalMicroops = totNumBytes > 32 ? 2 : 1; + numMicroops += numMarshalMicroops; + + microOps = new StaticInstPtr[numMicroops]; + unsigned uopIdx = 0; + + for(int i = 0; i < numMarshalMicroops; ++i) { + microOps[uopIdx++] = new MicroPackNeon64( + machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize, + numStructElems, index, i /* step */, replicate); + } + + uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize | + TLB::AllowUnaligned; + + int i = 0; + for(; i < numMemMicroops - 1; ++i) { + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, + baseIsSP, 16 /* accsize */, eSize); + } + microOps[uopIdx++] = new MicroNeonStore64( + machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP, + residuum ? residuum : 16 /* accSize */, eSize); + + // Writeback microop: the post-increment amount is encoded in "Rm": a + // 64-bit general register OR as '11111' for an immediate value equal to + // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64) + if (wb) { + if (rm != ((RegIndex) INTREG_X31)) { + microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm, + UXTX, 0); + } else { + microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp, + totNumBytes); + } + } + + assert(uopIdx == numMicroops); + + for (int i = 0; i < numMicroops - 1; i++) { + microOps[i]->setDelayedCommit(); + } + microOps[numMicroops - 1]->setLastMicroop(); +} + MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, IntRegIndex rn, RegIndex vd, bool single, bool up, @@ -846,14 +1333,14 @@ MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst, // to be functionally identical except that fldmx is deprecated. For now // we'll assume they're otherwise interchangable. int count = (single ? offset : (offset / 2)); - if (count == 0 || count > NumFloatArchRegs) + if (count == 0 || count > NumFloatV7ArchRegs) warn_once("Bad offset field for VFP load/store multiple.\n"); if (count == 0) { // Force there to be at least one microop so the macroop makes sense. writeback = true; } - if (count > NumFloatArchRegs) - count = NumFloatArchRegs; + if (count > NumFloatV7ArchRegs) + count = NumFloatV7ArchRegs; numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0); microOps = new StaticInstPtr[numMicroops]; @@ -934,6 +1421,19 @@ MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, ura); + ss << ", "; + printReg(ss, urb); + ss << ", "; + ccprintf(ss, "#%d", imm); + return ss.str(); +} + +std::string MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; @@ -943,6 +1443,18 @@ MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, ura); + ccprintf(ss, ", "); + printReg(ss, urb); + printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt); + return ss.str(); +} + +std::string MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; diff --git a/src/arch/arm/insts/macromem.hh b/src/arch/arm/insts/macromem.hh index 4933a1e7c..fc8e3e1b7 100644 --- a/src/arch/arm/insts/macromem.hh +++ b/src/arch/arm/insts/macromem.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -85,6 +85,27 @@ class MicroOp : public PredOp } }; +class MicroOpX : public ArmStaticInst +{ + protected: + MicroOpX(const char *mnem, ExtMachInst machInst, OpClass __opClass) + : ArmStaticInst(mnem, machInst, __opClass) + {} + + public: + void + advancePC(PCState &pcState) const + { + if (flags[IsLastMicroop]) { + pcState.uEnd(); + } else if (flags[IsMicroop]) { + pcState.uAdvance(); + } else { + pcState.advance(); + } + } +}; + /** * Microops for Neon loads/stores */ @@ -136,6 +157,96 @@ class MicroNeonMixLaneOp : public MicroNeonMixOp }; /** + * Microops for AArch64 NEON load/store (de)interleaving + */ +class MicroNeonMixOp64 : public MicroOp +{ + protected: + RegIndex dest, op1; + uint8_t eSize, dataSize, numStructElems, numRegs, step; + + MicroNeonMixOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex _dest, RegIndex _op1, uint8_t _eSize, + uint8_t _dataSize, uint8_t _numStructElems, + uint8_t _numRegs, uint8_t _step) + : MicroOp(mnem, machInst, __opClass), dest(_dest), op1(_op1), + eSize(_eSize), dataSize(_dataSize), numStructElems(_numStructElems), + numRegs(_numRegs), step(_step) + { + } +}; + +class MicroNeonMixLaneOp64 : public MicroOp +{ + protected: + RegIndex dest, op1; + uint8_t eSize, dataSize, numStructElems, lane, step; + bool replicate; + + MicroNeonMixLaneOp64(const char *mnem, ExtMachInst machInst, + OpClass __opClass, RegIndex _dest, RegIndex _op1, + uint8_t _eSize, uint8_t _dataSize, + uint8_t _numStructElems, uint8_t _lane, uint8_t _step, + bool _replicate = false) + : MicroOp(mnem, machInst, __opClass), dest(_dest), op1(_op1), + eSize(_eSize), dataSize(_dataSize), numStructElems(_numStructElems), + lane(_lane), step(_step), replicate(_replicate) + { + } +}; + +/** + * Base classes for microcoded AArch64 NEON memory instructions. + */ +class VldMultOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, numRegs; + bool wb; + + VldMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, + bool wb); +}; + +class VstMultOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, numRegs; + bool wb; + + VstMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, + bool wb); +}; + +class VldSingleOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, index; + bool wb, replicate; + + VldSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t index, + bool wb, bool replicate = false); +}; + +class VstSingleOp64 : public PredMacroOp +{ + protected: + uint8_t eSize, dataSize, numStructElems, index; + bool wb, replicate; + + VstSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, + uint8_t dataSize, uint8_t numStructElems, uint8_t index, + bool wb, bool replicate = false); +}; + +/** * Microops of the form * PC = IntRegA * CPSR = IntRegB @@ -180,10 +291,10 @@ class MicroIntImmOp : public MicroOp { protected: RegIndex ura, urb; - uint32_t imm; + int32_t imm; MicroIntImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, - RegIndex _ura, RegIndex _urb, uint32_t _imm) + RegIndex _ura, RegIndex _urb, int32_t _imm) : MicroOp(mnem, machInst, __opClass), ura(_ura), urb(_urb), imm(_imm) { @@ -192,6 +303,22 @@ class MicroIntImmOp : public MicroOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class MicroIntImmXOp : public MicroOpX +{ + protected: + RegIndex ura, urb; + int64_t imm; + + MicroIntImmXOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex _ura, RegIndex _urb, int64_t _imm) + : MicroOpX(mnem, machInst, __opClass), + ura(_ura), urb(_urb), imm(_imm) + { + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + /** * Microops of the form IntRegA = IntRegB op IntRegC */ @@ -210,6 +337,25 @@ class MicroIntOp : public MicroOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class MicroIntRegXOp : public MicroOp +{ + protected: + RegIndex ura, urb, urc; + ArmExtendType type; + uint32_t shiftAmt; + + MicroIntRegXOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex _ura, RegIndex _urb, RegIndex _urc, + ArmExtendType _type, uint32_t _shiftAmt) + : MicroOp(mnem, machInst, __opClass), + ura(_ura), urb(_urb), urc(_urc), + type(_type), shiftAmt(_shiftAmt) + { + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + /** * Microops of the form IntRegA = IntRegB op shifted IntRegC */ @@ -261,6 +407,61 @@ class MacroMemOp : public PredMacroOp }; /** + * Base class for pair load/store instructions. + */ +class PairMemOp : public PredMacroOp +{ + public: + enum AddrMode { + AddrMd_Offset, + AddrMd_PreIndex, + AddrMd_PostIndex + }; + + protected: + PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, + bool exclusive, bool acrel, int64_t imm, AddrMode mode, + IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2); +}; + +class BigFpMemImmOp : public PredMacroOp +{ + protected: + BigFpMemImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); +}; + +class BigFpMemPostOp : public PredMacroOp +{ + protected: + BigFpMemPostOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); +}; + +class BigFpMemPreOp : public PredMacroOp +{ + protected: + BigFpMemPreOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); +}; + +class BigFpMemRegOp : public PredMacroOp +{ + protected: + BigFpMemRegOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + bool load, IntRegIndex dest, IntRegIndex base, + IntRegIndex offset, ArmExtendType type, int64_t imm); +}; + +class BigFpMemLitOp : public PredMacroOp +{ + protected: + BigFpMemLitOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + IntRegIndex dest, int64_t imm); +}; + +/** * Base classes for microcoded integer memory instructions. */ class VldMultOp : public PredMacroOp diff --git a/src/arch/arm/insts/mem.cc b/src/arch/arm/insts/mem.cc index 552803b6a..15702ff83 100644 --- a/src/arch/arm/insts/mem.cc +++ b/src/arch/arm/insts/mem.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -157,6 +157,9 @@ SrsOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const case MODE_ABORT: ss << "abort"; break; + case MODE_HYP: + ss << "hyp"; + break; case MODE_UNDEFINED: ss << "undefined"; break; diff --git a/src/arch/arm/insts/mem64.cc b/src/arch/arm/insts/mem64.cc new file mode 100644 index 000000000..4d1fdd302 --- /dev/null +++ b/src/arch/arm/insts/mem64.cc @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/mem64.hh" +#include "arch/arm/tlb.hh" +#include "base/loader/symtab.hh" +#include "mem/request.hh" + +using namespace std; + +namespace ArmISA +{ + +std::string +SysDC64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + ccprintf(ss, ", ["); + printReg(ss, base); + ccprintf(ss, "]"); + return ss.str(); +} + + + +void +Memory64::startDisassembly(std::ostream &os) const +{ + printMnemonic(os, "", false); + printReg(os, dest); + ccprintf(os, ", ["); + printReg(os, base); +} + +void +Memory64::setExcAcRel(bool exclusive, bool acrel) +{ + if (exclusive) + memAccessFlags |= Request::LLSC; + else + memAccessFlags |= ArmISA::TLB::AllowUnaligned; + if (acrel) { + flags[IsMemBarrier] = true; + flags[IsWriteBarrier] = true; + flags[IsReadBarrier] = true; + } +} + +std::string +MemoryImm64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + if (imm) + ccprintf(ss, ", #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryDImm64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, dest2); + ccprintf(ss, ", ["); + printReg(ss, base); + if (imm) + ccprintf(ss, ", #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryDImmEx64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, result); + ccprintf(ss, ", "); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, dest2); + ccprintf(ss, ", ["); + printReg(ss, base); + if (imm) + ccprintf(ss, ", #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryPreIndex64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + ccprintf(ss, ", #%d]!", imm); + return ss.str(); +} + +std::string +MemoryPostIndex64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + if (imm) + ccprintf(ss, "], #%d", imm); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryReg64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + printExtendOperand(false, ss, offset, type, shiftAmt); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryRaw64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + startDisassembly(ss); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryEx64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, result); + ccprintf(ss, ", ["); + printReg(ss, base); + ccprintf(ss, "]"); + return ss.str(); +} + +std::string +MemoryLiteral64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", #%d", pc + imm); + return ss.str(); +} +} diff --git a/src/arch/arm/insts/mem64.hh b/src/arch/arm/insts/mem64.hh new file mode 100644 index 000000000..21c1e1ea8 --- /dev/null +++ b/src/arch/arm/insts/mem64.hh @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ +#ifndef __ARCH_ARM_MEM64_HH__ +#define __ARCH_ARM_MEM64_HH__ + +#include "arch/arm/insts/static_inst.hh" + +namespace ArmISA +{ + +class SysDC64 : public ArmStaticInst +{ + protected: + IntRegIndex base; + IntRegIndex dest; + uint64_t imm; + + SysDC64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _base, IntRegIndex _dest, uint64_t _imm) + : ArmStaticInst(mnem, _machInst, __opClass), base(_base), dest(_dest), + imm(_imm) + {} + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MightBeMicro64 : public ArmStaticInst +{ + protected: + MightBeMicro64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) + : ArmStaticInst(mnem, _machInst, __opClass) + {} + + void + advancePC(PCState &pcState) const + { + if (flags[IsLastMicroop]) { + pcState.uEnd(); + } else if (flags[IsMicroop]) { + pcState.uAdvance(); + } else { + pcState.advance(); + } + } +}; + +class Memory64 : public MightBeMicro64 +{ + public: + enum AddrMode { + AddrMd_Offset, + AddrMd_PreIndex, + AddrMd_PostIndex + }; + + protected: + + IntRegIndex dest; + IntRegIndex base; + /// True if the base register is SP (used for SP alignment checking). + bool baseIsSP; + static const unsigned numMicroops = 3; + + StaticInstPtr *uops; + + Memory64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _base) + : MightBeMicro64(mnem, _machInst, __opClass), + dest(_dest), base(_base), uops(NULL) + { + baseIsSP = isSP(_base); + } + + virtual + ~Memory64() + { + delete [] uops; + } + + StaticInstPtr + fetchMicroop(MicroPC microPC) const + { + assert(uops != NULL && microPC < numMicroops); + return uops[microPC]; + } + + void startDisassembly(std::ostream &os) const; + + unsigned memAccessFlags; + + void setExcAcRel(bool exclusive, bool acrel); +}; + +class MemoryImm64 : public Memory64 +{ + protected: + int64_t imm; + + MemoryImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm) + : Memory64(mnem, _machInst, __opClass, _dest, _base), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryDImm64 : public MemoryImm64 +{ + protected: + IntRegIndex dest2; + + MemoryDImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _dest2, IntRegIndex _base, + int64_t _imm) + : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm), + dest2(_dest2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryDImmEx64 : public MemoryDImm64 +{ + protected: + IntRegIndex result; + + MemoryDImmEx64(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _result, IntRegIndex _dest, IntRegIndex _dest2, + IntRegIndex _base, int32_t _imm) + : MemoryDImm64(mnem, _machInst, __opClass, _dest, _dest2, + _base, _imm), result(_result) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryPreIndex64 : public MemoryImm64 +{ + protected: + MemoryPreIndex64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + int64_t _imm) + : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryPostIndex64 : public MemoryImm64 +{ + protected: + MemoryPostIndex64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + int64_t _imm) + : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryReg64 : public Memory64 +{ + protected: + IntRegIndex offset; + ArmExtendType type; + uint64_t shiftAmt; + + MemoryReg64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + IntRegIndex _offset, ArmExtendType _type, + uint64_t _shiftAmt) + : Memory64(mnem, _machInst, __opClass, _dest, _base), + offset(_offset), type(_type), shiftAmt(_shiftAmt) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryRaw64 : public Memory64 +{ + protected: + MemoryRaw64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base) + : Memory64(mnem, _machInst, __opClass, _dest, _base) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryEx64 : public Memory64 +{ + protected: + IntRegIndex result; + + MemoryEx64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _base, + IntRegIndex _result) + : Memory64(mnem, _machInst, __opClass, _dest, _base), result(_result) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class MemoryLiteral64 : public Memory64 +{ + protected: + int64_t imm; + + MemoryLiteral64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, int64_t _imm) + : Memory64(mnem, _machInst, __opClass, _dest, INTREG_ZERO), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; +} + +#endif //__ARCH_ARM_INSTS_MEM_HH__ diff --git a/src/arch/arm/insts/misc.cc b/src/arch/arm/insts/misc.cc index 6320bb6da..efc334c4b 100644 --- a/src/arch/arm/insts/misc.cc +++ b/src/arch/arm/insts/misc.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -146,6 +146,32 @@ MsrRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +MrrcOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest); + ss << ", "; + printReg(ss, dest2); + ss << ", "; + printReg(ss, op1); + return ss.str(); +} + +std::string +McrrOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest); + ss << ", "; + printReg(ss, op1); + ss << ", "; + printReg(ss, op2); + return ss.str(); +} + +std::string ImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; @@ -230,6 +256,16 @@ RegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +RegImmImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest); + ccprintf(ss, ", #%d, #%d", imm1, imm2); + return ss.str(); +} + +std::string RegRegImmImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; diff --git a/src/arch/arm/insts/misc.hh b/src/arch/arm/insts/misc.hh index c9e114f85..3d947a272 100644 --- a/src/arch/arm/insts/misc.hh +++ b/src/arch/arm/insts/misc.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -94,6 +94,42 @@ class MsrRegOp : public MsrBase std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class MrrcOp : public PredOp +{ + protected: + IntRegIndex op1; + IntRegIndex dest; + IntRegIndex dest2; + uint32_t imm; + + MrrcOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1, IntRegIndex _dest, IntRegIndex _dest2, + uint32_t _imm) : + PredOp(mnem, _machInst, __opClass), op1(_op1), dest(_dest), + dest2(_dest2), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class McrrOp : public PredOp +{ + protected: + IntRegIndex op1; + IntRegIndex op2; + IntRegIndex dest; + uint32_t imm; + + McrrOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _op1, IntRegIndex _op2, IntRegIndex _dest, + uint32_t _imm) : + PredOp(mnem, _machInst, __opClass), op1(_op1), op2(_op2), + dest(_dest), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + class ImmOp : public PredOp { protected: @@ -220,6 +256,23 @@ class RegRegImmOp : public PredOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class RegImmImmOp : public PredOp +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + uint64_t imm1; + uint64_t imm2; + + RegImmImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, uint64_t _imm1, uint64_t _imm2) : + PredOp(mnem, _machInst, __opClass), + dest(_dest), imm1(_imm1), imm2(_imm2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + class RegRegImmImmOp : public PredOp { protected: diff --git a/src/arch/arm/insts/misc64.cc b/src/arch/arm/insts/misc64.cc new file mode 100644 index 000000000..3553020da --- /dev/null +++ b/src/arch/arm/insts/misc64.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#include "arch/arm/insts/misc64.hh" + +std::string +RegRegImmImmOp64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ss << ", "; + printReg(ss, op1); + ccprintf(ss, ", #%d, #%d", imm1, imm2); + return ss.str(); +} + +std::string +RegRegRegImmOp64::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ss << ", "; + printReg(ss, op1); + ss << ", "; + printReg(ss, op2); + ccprintf(ss, ", #%d", imm); + return ss.str(); +} + +std::string +UnknownOp64::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + return csprintf("%-10s (inst %#08x)", "unknown", machInst); +} diff --git a/src/arch/arm/insts/misc64.hh b/src/arch/arm/insts/misc64.hh new file mode 100644 index 000000000..5a0e18224 --- /dev/null +++ b/src/arch/arm/insts/misc64.hh @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Gabe Black + */ + +#ifndef __ARCH_ARM_INSTS_MISC64_HH__ +#define __ARCH_ARM_INSTS_MISC64_HH__ + +#include "arch/arm/insts/static_inst.hh" + +class RegRegImmImmOp64 : public ArmStaticInst +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + uint64_t imm1; + uint64_t imm2; + + RegRegImmImmOp64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + uint64_t _imm1, uint64_t _imm2) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), imm1(_imm1), imm2(_imm2) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class RegRegRegImmOp64 : public ArmStaticInst +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + IntRegIndex op2; + uint64_t imm; + + RegRegRegImmOp64(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2, uint64_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), imm(_imm) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class UnknownOp64 : public ArmStaticInst +{ + protected: + + UnknownOp64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : + ArmStaticInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +#endif diff --git a/src/arch/arm/insts/neon64_mem.hh b/src/arch/arm/insts/neon64_mem.hh new file mode 100644 index 000000000..01ce1b624 --- /dev/null +++ b/src/arch/arm/insts/neon64_mem.hh @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Mbou Eyole + * Giacomo Gabrielli + */ + +/// @file +/// Utility functions and datatypes used by AArch64 NEON memory instructions. + +#ifndef __ARCH_ARM_INSTS_NEON64_MEM_HH__ +#define __ARCH_ARM_INSTS_NEON64_MEM_HH__ + +namespace ArmISA +{ + +typedef uint64_t XReg; + +/// 128-bit NEON vector register. +struct VReg { + XReg hi; + XReg lo; +}; + +/// Write a single NEON vector element leaving the others untouched. +inline void +writeVecElem(VReg *dest, XReg src, int index, int eSize) +{ + // eSize must be less than 4: + // 0 -> 8-bit elems, + // 1 -> 16-bit elems, + // 2 -> 32-bit elems, + // 3 -> 64-bit elems + assert(eSize <= 3); + + int eBits = 8 << eSize; + int lsbPos = index * eBits; + assert(lsbPos < 128); + int shiftAmt = lsbPos % 64; + + XReg maskBits = -1; + if (eBits == 64) { + maskBits = 0; + } else { + maskBits = maskBits << eBits; + } + maskBits = ~maskBits; + + XReg sMask = maskBits; + maskBits = sMask << shiftAmt; + + if (lsbPos < 64) { + dest->lo = (dest->lo & (~maskBits)) | ((src & sMask) << shiftAmt); + } else { + dest->hi = (dest->hi & (~maskBits)) | ((src & sMask) << shiftAmt); + } +} + +/// Read a single NEON vector element. +inline XReg +readVecElem(VReg src, int index, int eSize) +{ + // eSize must be less than 4: + // 0 -> 8-bit elems, + // 1 -> 16-bit elems, + // 2 -> 32-bit elems, + // 3 -> 64-bit elems + assert(eSize <= 3); + + XReg data; + + int eBits = 8 << eSize; + int lsbPos = index * eBits; + assert(lsbPos < 128); + int shiftAmt = lsbPos % 64; + + XReg maskBits = -1; + if (eBits == 64) { + maskBits = 0; + } else { + maskBits = maskBits << eBits; + } + maskBits = ~maskBits; + + if (lsbPos < 64) { + data = (src.lo >> shiftAmt) & maskBits; + } else { + data = (src.hi >> shiftAmt) & maskBits; + } + return data; +} + +} // namespace ArmISA + +#endif // __ARCH_ARM_INSTS_NEON64_MEM_HH__ diff --git a/src/arch/arm/insts/pred_inst.hh b/src/arch/arm/insts/pred_inst.hh index c441d1f32..c5e2ab386 100644 --- a/src/arch/arm/insts/pred_inst.hh +++ b/src/arch/arm/insts/pred_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -78,7 +78,8 @@ modified_imm(uint8_t ctrlImm, uint8_t dataImm) } static inline uint64_t -simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid) +simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid, + bool isAarch64 = false) { uint64_t bigData = data; immValid = true; @@ -133,12 +134,20 @@ simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid) } break; case 0xf: - if (!op) { - uint64_t bVal = bits(bigData, 6) ? (0x1F) : (0x20); - bigData = (bits(bigData, 5, 0) << 19) | - (bVal << 25) | (bits(bigData, 7) << 31); - bigData |= (bigData << 32); - break; + { + uint64_t bVal = 0; + if (!op) { + bVal = bits(bigData, 6) ? (0x1F) : (0x20); + bigData = (bits(bigData, 5, 0) << 19) | + (bVal << 25) | (bits(bigData, 7) << 31); + bigData |= (bigData << 32); + break; + } else if (isAarch64) { + bVal = bits(bigData, 6) ? (0x0FF) : (0x100); + bigData = (bits(bigData, 5, 0) << 48) | + (bVal << 54) | (bits(bigData, 7) << 63); + break; + } } // Fall through, immediate encoding is invalid. default: @@ -179,11 +188,14 @@ class PredOp : public ArmStaticInst /// Constructor PredOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) : - ArmStaticInst(mnem, _machInst, __opClass), - condCode(machInst.itstateMask ? - (ConditionCode)(uint8_t)machInst.itstateCond : - (ConditionCode)(unsigned)machInst.condCode) + ArmStaticInst(mnem, _machInst, __opClass) { + if (machInst.aarch64) + condCode = COND_UC; + else if (machInst.itstateMask) + condCode = (ConditionCode)(uint8_t)machInst.itstateCond; + else + condCode = (ConditionCode)(unsigned)machInst.condCode; } }; diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc index 2a8dee162..260c29a84 100644 --- a/src/arch/arm/insts/static_inst.cc +++ b/src/arch/arm/insts/static_inst.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -86,6 +86,90 @@ ArmStaticInst::shift_rm_imm(uint32_t base, uint32_t shamt, return 0; } +int64_t +ArmStaticInst::shiftReg64(uint64_t base, uint64_t shiftAmt, + ArmShiftType type, uint8_t width) const +{ + shiftAmt = shiftAmt % width; + ArmShiftType shiftType; + shiftType = (ArmShiftType)type; + + switch (shiftType) + { + case LSL: + return base << shiftAmt; + case LSR: + if (shiftAmt == 0) + return base; + else + return (base & mask(width)) >> shiftAmt; + case ASR: + if (shiftAmt == 0) { + return base; + } else { + int sign_bit = bits(base, intWidth - 1); + base >>= shiftAmt; + base = sign_bit ? (base | ~mask(intWidth - shiftAmt)) : base; + return base & mask(intWidth); + } + case ROR: + if (shiftAmt == 0) + return base; + else + return (base << (width - shiftAmt)) | (base >> shiftAmt); + default: + ccprintf(std::cerr, "Unhandled shift type\n"); + exit(1); + break; + } + return 0; +} + +int64_t +ArmStaticInst::extendReg64(uint64_t base, ArmExtendType type, + uint64_t shiftAmt, uint8_t width) const +{ + bool sign_extend = false; + int len = 0; + switch (type) { + case UXTB: + len = 8; + break; + case UXTH: + len = 16; + break; + case UXTW: + len = 32; + break; + case UXTX: + len = 64; + break; + case SXTB: + len = 8; + sign_extend = true; + break; + case SXTH: + len = 16; + sign_extend = true; + break; + case SXTW: + len = 32; + sign_extend = true; + break; + case SXTX: + len = 64; + sign_extend = true; + break; + } + len = len <= width - shiftAmt ? len : width - shiftAmt; + uint64_t tmp = (uint64_t) bits(base, len - 1, 0) << shiftAmt; + if (sign_extend) { + int sign_bit = bits(tmp, len + shiftAmt - 1); + tmp = sign_bit ? (tmp | ~mask(len + shiftAmt)) : tmp; + } + return tmp & mask(width); +} + // Shift Rm by Rs int32_t ArmStaticInst::shift_rm_rs(uint32_t base, uint32_t shamt, @@ -214,22 +298,33 @@ ArmStaticInst::printReg(std::ostream &os, int reg) const switch (regIdxToClass(reg, &rel_reg)) { case IntRegClass: - switch (rel_reg) { - case PCReg: - ccprintf(os, "pc"); - break; - case StackPointerReg: - ccprintf(os, "sp"); - break; - case FramePointerReg: - ccprintf(os, "fp"); - break; - case ReturnAddressReg: - ccprintf(os, "lr"); - break; - default: - ccprintf(os, "r%d", reg); - break; + if (aarch64) { + if (reg == INTREG_UREG0) + ccprintf(os, "ureg0"); + else if (reg == INTREG_SPX) + ccprintf(os, "%s%s", (intWidth == 32) ? "w" : "", "sp"); + else if (reg == INTREG_X31) + ccprintf(os, "%szr", (intWidth == 32) ? "w" : "x"); + else + ccprintf(os, "%s%d", (intWidth == 32) ? "w" : "x", reg); + } else { + switch (rel_reg) { + case PCReg: + ccprintf(os, "pc"); + break; + case StackPointerReg: + ccprintf(os, "sp"); + break; + case FramePointerReg: + ccprintf(os, "fp"); + break; + case ReturnAddressReg: + ccprintf(os, "lr"); + break; + default: + ccprintf(os, "r%d", reg); + break; + } } break; case FloatRegClass: @@ -247,67 +342,102 @@ ArmStaticInst::printReg(std::ostream &os, int reg) const void ArmStaticInst::printMnemonic(std::ostream &os, const std::string &suffix, - bool withPred) const + bool withPred, + bool withCond64, + ConditionCode cond64) const { os << " " << mnemonic; - if (withPred) { - unsigned condCode = machInst.condCode; - switch (condCode) { - case COND_EQ: - os << "eq"; - break; - case COND_NE: - os << "ne"; - break; - case COND_CS: - os << "cs"; - break; - case COND_CC: - os << "cc"; - break; - case COND_MI: - os << "mi"; - break; - case COND_PL: - os << "pl"; - break; - case COND_VS: - os << "vs"; - break; - case COND_VC: - os << "vc"; - break; - case COND_HI: - os << "hi"; - break; - case COND_LS: - os << "ls"; - break; - case COND_GE: - os << "ge"; - break; - case COND_LT: - os << "lt"; - break; - case COND_GT: - os << "gt"; - break; - case COND_LE: - os << "le"; - break; - case COND_AL: - // This one is implicit. - break; - case COND_UC: - // Unconditional. - break; - default: - panic("Unrecognized condition code %d.\n", condCode); - } + if (withPred && !aarch64) { + printCondition(os, machInst.condCode); + os << suffix; + } else if (withCond64) { + os << "."; + printCondition(os, cond64); os << suffix; - if (machInst.bigThumb) - os << ".w"; - os << " "; + } + if (machInst.bigThumb) + os << ".w"; + os << " "; +} + +void +ArmStaticInst::printTarget(std::ostream &os, Addr target, + const SymbolTable *symtab) const +{ + Addr symbolAddr; + std::string symbol; + + if (symtab && symtab->findNearestSymbol(target, symbol, symbolAddr)) { + ccprintf(os, "<%s", symbol); + if (symbolAddr != target) + ccprintf(os, "+%d>", target - symbolAddr); + else + ccprintf(os, ">"); + } else { + ccprintf(os, "%#x", target); + } +} + +void +ArmStaticInst::printCondition(std::ostream &os, + unsigned code, + bool noImplicit) const +{ + switch (code) { + case COND_EQ: + os << "eq"; + break; + case COND_NE: + os << "ne"; + break; + case COND_CS: + os << "cs"; + break; + case COND_CC: + os << "cc"; + break; + case COND_MI: + os << "mi"; + break; + case COND_PL: + os << "pl"; + break; + case COND_VS: + os << "vs"; + break; + case COND_VC: + os << "vc"; + break; + case COND_HI: + os << "hi"; + break; + case COND_LS: + os << "ls"; + break; + case COND_GE: + os << "ge"; + break; + case COND_LT: + os << "lt"; + break; + case COND_GT: + os << "gt"; + break; + case COND_LE: + os << "le"; + break; + case COND_AL: + // This one is implicit. + if (noImplicit) + os << "al"; + break; + case COND_UC: + // Unconditional. + if (noImplicit) + os << "uc"; + break; + default: + panic("Unrecognized condition code %d.\n", code); } } @@ -393,6 +523,38 @@ ArmStaticInst::printShiftOperand(std::ostream &os, } void +ArmStaticInst::printExtendOperand(bool firstOperand, std::ostream &os, + IntRegIndex rm, ArmExtendType type, + int64_t shiftAmt) const +{ + if (!firstOperand) + ccprintf(os, ", "); + printReg(os, rm); + if (type == UXTX && shiftAmt == 0) + return; + switch (type) { + case UXTB: ccprintf(os, ", UXTB"); + break; + case UXTH: ccprintf(os, ", UXTH"); + break; + case UXTW: ccprintf(os, ", UXTW"); + break; + case UXTX: ccprintf(os, ", LSL"); + break; + case SXTB: ccprintf(os, ", SXTB"); + break; + case SXTH: ccprintf(os, ", SXTH"); + break; + case SXTW: ccprintf(os, ", SXTW"); + break; + case SXTX: ccprintf(os, ", SXTW"); + break; + } + if (type == UXTX || shiftAmt) + ccprintf(os, " #%d", shiftAmt); +} + +void ArmStaticInst::printDataInst(std::ostream &os, bool withImm, bool immShift, bool s, IntRegIndex rd, IntRegIndex rn, IntRegIndex rm, IntRegIndex rs, uint32_t shiftAmt, diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh index c36024ecd..aeec67ec2 100644 --- a/src/arch/arm/insts/static_inst.hh +++ b/src/arch/arm/insts/static_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -44,6 +44,7 @@ #include "arch/arm/faults.hh" #include "arch/arm/utility.hh" +#include "arch/arm/system.hh" #include "base/trace.hh" #include "cpu/static_inst.hh" #include "sim/byteswap.hh" @@ -55,6 +56,9 @@ namespace ArmISA class ArmStaticInst : public StaticInst { protected: + bool aarch64; + uint8_t intWidth; + int32_t shift_rm_imm(uint32_t base, uint32_t shamt, uint32_t type, uint32_t cfval) const; int32_t shift_rm_rs(uint32_t base, uint32_t shamt, @@ -65,6 +69,11 @@ class ArmStaticInst : public StaticInst bool shift_carry_rs(uint32_t base, uint32_t shamt, uint32_t type, uint32_t cfval) const; + int64_t shiftReg64(uint64_t base, uint64_t shiftAmt, + ArmShiftType type, uint8_t width) const; + int64_t extendReg64(uint64_t base, ArmExtendType type, + uint64_t shiftAmt, uint8_t width) const; + template<int width> static inline bool saturateOp(int32_t &res, int64_t op1, int64_t op2, bool sub=false) @@ -135,6 +144,11 @@ class ArmStaticInst : public StaticInst OpClass __opClass) : StaticInst(mnem, _machInst, __opClass) { + aarch64 = machInst.aarch64; + if (bits(machInst, 28, 24) == 0x10) + intWidth = 64; // Force 64-bit width for ADR/ADRP + else + intWidth = (aarch64 && bits(machInst, 31)) ? 64 : 32; } /// Print a register name for disassembly given the unique @@ -142,13 +156,22 @@ class ArmStaticInst : public StaticInst void printReg(std::ostream &os, int reg) const; void printMnemonic(std::ostream &os, const std::string &suffix = "", - bool withPred = true) const; + bool withPred = true, + bool withCond64 = false, + ConditionCode cond64 = COND_UC) const; + void printTarget(std::ostream &os, Addr target, + const SymbolTable *symtab) const; + void printCondition(std::ostream &os, unsigned code, + bool noImplicit=false) const; void printMemSymbol(std::ostream &os, const SymbolTable *symtab, const std::string &prefix, const Addr addr, const std::string &suffix) const; void printShiftOperand(std::ostream &os, IntRegIndex rm, bool immShift, uint32_t shiftAmt, IntRegIndex rs, ArmShiftType type) const; + void printExtendOperand(bool firstOperand, std::ostream &os, + IntRegIndex rm, ArmExtendType type, + int64_t shiftAmt) const; void printDataInst(std::ostream &os, bool withImm) const; @@ -166,10 +189,13 @@ class ArmStaticInst : public StaticInst std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; static inline uint32_t - cpsrWriteByInstr(CPSR cpsr, uint32_t val, - uint8_t byteMask, bool affectState, bool nmfi) + cpsrWriteByInstr(CPSR cpsr, uint32_t val, SCR scr, NSACR nsacr, + uint8_t byteMask, bool affectState, bool nmfi, ThreadContext *tc) { - bool privileged = (cpsr.mode != MODE_USER); + bool privileged = (cpsr.mode != MODE_USER); + bool haveVirt = ArmSystem::haveVirtualization(tc); + bool haveSecurity = ArmSystem::haveSecurity(tc); + bool isSecure = inSecureState(scr, cpsr) || !haveSecurity; uint32_t bitMask = 0; @@ -182,14 +208,53 @@ class ArmStaticInst : public StaticInst } if (bits(byteMask, 1)) { unsigned highIdx = affectState ? 15 : 9; - unsigned lowIdx = privileged ? 8 : 9; + unsigned lowIdx = (privileged && (isSecure || scr.aw || haveVirt)) + ? 8 : 9; bitMask = bitMask | mask(highIdx, lowIdx); } if (bits(byteMask, 0)) { if (privileged) { - bitMask = bitMask | mask(7, 6); - if (!badMode((OperatingMode)(val & mask(5)))) { - bitMask = bitMask | mask(5); + bitMask |= 1 << 7; + if ( (!nmfi || !((val >> 6) & 0x1)) && + (isSecure || scr.fw || haveVirt) ) { + bitMask |= 1 << 6; + } + // Now check the new mode is allowed + OperatingMode newMode = (OperatingMode) (val & mask(5)); + OperatingMode oldMode = (OperatingMode)(uint32_t)cpsr.mode; + if (!badMode(newMode)) { + bool validModeChange = true; + // Check for attempts to enter modes only permitted in + // Secure state from Non-secure state. These are Monitor + // mode ('10110'), and FIQ mode ('10001') if the Security + // Extensions have reserved it. + if (!isSecure && newMode == MODE_MON) + validModeChange = false; + if (!isSecure && newMode == MODE_FIQ && nsacr.rfr == '1') + validModeChange = false; + // There is no Hyp mode ('11010') in Secure state, so that + // is UNPREDICTABLE + if (scr.ns == '0' && newMode == MODE_HYP) + validModeChange = false; + // Cannot move into Hyp mode directly from a Non-secure + // PL1 mode + if (!isSecure && oldMode != MODE_HYP && newMode == MODE_HYP) + validModeChange = false; + // Cannot move out of Hyp mode with this function except + // on an exception return + if (oldMode == MODE_HYP && newMode != MODE_HYP && !affectState) + validModeChange = false; + // Must not change to 64 bit when running in 32 bit mode + if (!opModeIs64(oldMode) && opModeIs64(newMode)) + validModeChange = false; + + // If we passed all of the above then set the bit mask to + // copy the mode accross + if (validModeChange) { + bitMask = bitMask | mask(5); + } else { + warn_once("Illegal change to CPSR mode attempted\n"); + } } else { warn_once("Ignoring write of bad mode to CPSR.\n"); } @@ -198,11 +263,7 @@ class ArmStaticInst : public StaticInst bitMask = bitMask | (1 << 5); } - bool cpsr_f = cpsr.f; - uint32_t new_cpsr = ((uint32_t)cpsr & ~bitMask) | (val & bitMask); - if (nmfi && !cpsr_f) - new_cpsr &= ~(1 << 6); - return new_cpsr; + return ((uint32_t)cpsr & ~bitMask) | (val & bitMask); } static inline uint32_t @@ -296,12 +357,12 @@ class ArmStaticInst : public StaticInst inline Fault disabledFault() const { - if (FullSystem) { - return new UndefinedInstruction(); - } else { - return new UndefinedInstruction(machInst, false, mnemonic, true); - } + return new UndefinedInstruction(machInst, false, mnemonic, true); } + + public: + virtual void + annotateFault(ArmFault *fault) {} }; } diff --git a/src/arch/arm/insts/vfp.cc b/src/arch/arm/insts/vfp.cc index ca0f58226..03fdc83fa 100644 --- a/src/arch/arm/insts/vfp.cc +++ b/src/arch/arm/insts/vfp.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -46,6 +46,37 @@ */ std::string +FpCondCompRegOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", #%d", defCc); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string +FpCondSelOp::generateDisassembly( + Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printReg(ss, dest); + ccprintf(ss, ", "); + printReg(ss, op1); + ccprintf(ss, ", "); + printReg(ss, op2); + ccprintf(ss, ", "); + printCondition(ss, condCode, true); + return ss.str(); +} + +std::string FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; @@ -92,6 +123,21 @@ FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const } std::string +FpRegRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest + FP_Reg_Base); + ss << ", "; + printReg(ss, op1 + FP_Reg_Base); + ss << ", "; + printReg(ss, op2 + FP_Reg_Base); + ss << ", "; + printReg(ss, op3 + FP_Reg_Base); + return ss.str(); +} + +std::string FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const { std::stringstream ss; @@ -131,24 +177,25 @@ prepFpState(uint32_t rMode) } void -finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush) +finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask) { int exceptions = fetestexcept(FeAllExceptions); bool underflow = false; - if (exceptions & FeInvalid) { + if ((exceptions & FeInvalid) && mask.ioc) { fpscr.ioc = 1; } - if (exceptions & FeDivByZero) { + if ((exceptions & FeDivByZero) && mask.dzc) { fpscr.dzc = 1; } - if (exceptions & FeOverflow) { + if ((exceptions & FeOverflow) && mask.ofc) { fpscr.ofc = 1; } if (exceptions & FeUnderflow) { underflow = true; - fpscr.ufc = 1; + if (mask.ufc) + fpscr.ufc = 1; } - if ((exceptions & FeInexact) && !(underflow && flush)) { + if ((exceptions & FeInexact) && !(underflow && flush) && mask.ixc) { fpscr.ixc = 1; } fesetround(state); @@ -329,19 +376,33 @@ fixFpSFpDDest(FPSCR fpscr, float val) return mid; } -uint16_t -vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, - uint32_t rMode, bool ahp, float op) +static inline uint16_t +vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble) { - uint32_t opBits = fpToBits(op); + uint32_t mWidth; + uint32_t eWidth; + uint32_t eHalfRange; + uint32_t sBitPos; + + if (isDouble) { + mWidth = 52; + eWidth = 11; + } else { + mWidth = 23; + eWidth = 8; + } + sBitPos = eWidth + mWidth; + eHalfRange = (1 << (eWidth-1)) - 1; + // Extract the operand. - bool neg = bits(opBits, 31); - uint32_t exponent = bits(opBits, 30, 23); - uint32_t oldMantissa = bits(opBits, 22, 0); - uint32_t mantissa = oldMantissa >> (23 - 10); + bool neg = bits(opBits, sBitPos); + uint32_t exponent = bits(opBits, sBitPos-1, mWidth); + uint64_t oldMantissa = bits(opBits, mWidth-1, 0); + uint32_t mantissa = oldMantissa >> (mWidth - 10); // Do the conversion. - uint32_t extra = oldMantissa & mask(23 - 10); - if (exponent == 0xff) { + uint64_t extra = oldMantissa & mask(mWidth - 10); + if (exponent == mask(eWidth)) { if (oldMantissa != 0) { // Nans. if (bits(mantissa, 9) == 0) { @@ -379,7 +440,6 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, if (exponent == 0) { // Denormalized. - // If flush to zero is on, this shouldn't happen. assert(!flush); @@ -407,13 +467,13 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, // We need to track the dropped bits differently since // more can be dropped by denormalizing. - bool topOne = bits(extra, 12); - bool restZeros = bits(extra, 11, 0) == 0; + bool topOne = bits(extra, mWidth - 10 - 1); + bool restZeros = bits(extra, mWidth - 10 - 2, 0) == 0; - if (exponent <= (127 - 15)) { + if (exponent <= (eHalfRange - 15)) { // The result is too small. Denormalize. mantissa |= (1 << 10); - while (mantissa && exponent <= (127 - 15)) { + while (mantissa && exponent <= (eHalfRange - 15)) { restZeros = restZeros && !topOne; topOne = bits(mantissa, 0); mantissa = mantissa >> 1; @@ -424,7 +484,7 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, exponent = 0; } else { // Change bias. - exponent -= (127 - 15); + exponent -= (eHalfRange - 15); } if (exponent == 0 && (inexact || fpscr.ufe)) { @@ -488,155 +548,115 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, return result; } -float -vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) +uint16_t +vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, float op) { - float junk = 0.0; + uint64_t opBits = fpToBits(op); + return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, false); +} + +uint16_t +vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, double op) +{ + uint64_t opBits = fpToBits(op); + return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, true); +} + +static inline uint64_t +vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble) +{ + uint32_t mWidth; + uint32_t eWidth; + uint32_t eHalfRange; + uint32_t sBitPos; + + if (isDouble) { + mWidth = 52; + eWidth = 11; + } else { + mWidth = 23; + eWidth = 8; + } + sBitPos = eWidth + mWidth; + eHalfRange = (1 << (eWidth-1)) - 1; + // Extract the bitfields. bool neg = bits(op, 15); uint32_t exponent = bits(op, 14, 10); - uint32_t mantissa = bits(op, 9, 0); + uint64_t mantissa = bits(op, 9, 0); // Do the conversion. if (exponent == 0) { if (mantissa != 0) { // Normalize the value. - exponent = exponent + (127 - 15) + 1; + exponent = exponent + (eHalfRange - 15) + 1; while (mantissa < (1 << 10)) { mantissa = mantissa << 1; exponent--; } } - mantissa = mantissa << (23 - 10); + mantissa = mantissa << (mWidth - 10); } else if (exponent == 0x1f && !ahp) { // Infinities and nans. - exponent = 0xff; + exponent = mask(eWidth); if (mantissa != 0) { // Nans. - mantissa = mantissa << (23 - 10); - if (bits(mantissa, 22) == 0) { + mantissa = mantissa << (mWidth - 10); + if (bits(mantissa, mWidth-1) == 0) { // Signalling nan. fpscr.ioc = 1; - mantissa |= (1 << 22); + mantissa |= (((uint64_t) 1) << (mWidth-1)); } if (defaultNan) { - mantissa &= ~mask(22); + mantissa &= ~mask(mWidth-1); neg = false; } } } else { - exponent = exponent + (127 - 15); - mantissa = mantissa << (23 - 10); + exponent = exponent + (eHalfRange - 15); + mantissa = mantissa << (mWidth - 10); } // Reassemble the result. - uint32_t result = bits(mantissa, 22, 0); - replaceBits(result, 30, 23, exponent); - if (neg) - result |= (1 << 31); + uint64_t result = bits(mantissa, mWidth-1, 0); + replaceBits(result, sBitPos-1, mWidth, exponent); + if (neg) { + result |= (((uint64_t) 1) << sBitPos); + } + return result; +} + +double +vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) +{ + double junk = 0.0; + uint64_t result; + + result = vcvtFpHFp(fpscr, defaultNan, ahp, op, true); return bitsToFp(result, junk); } -uint64_t -vfpFpSToFixed(float val, bool isSigned, bool half, - uint8_t imm, bool rzero) +float +vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op) { - int rmode = rzero ? FeRoundZero : fegetround(); - __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); - fesetround(FeRoundNearest); - val = val * powf(2.0, imm); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - fesetround(rmode); - feclearexcept(FeAllExceptions); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - float origVal = val; - val = rintf(val); - int fpType = std::fpclassify(val); - if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { - if (fpType == FP_NAN) { - feraiseexcept(FeInvalid); - } - val = 0.0; - } else if (origVal != val) { - switch (rmode) { - case FeRoundNearest: - if (origVal - val > 0.5) - val += 1.0; - else if (val - origVal > 0.5) - val -= 1.0; - break; - case FeRoundDown: - if (origVal < val) - val -= 1.0; - break; - case FeRoundUpward: - if (origVal > val) - val += 1.0; - break; - } - feraiseexcept(FeInexact); - } + float junk = 0.0; + uint64_t result; - if (isSigned) { - if (half) { - if ((double)val < (int16_t)(1 << 15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)(1 << 15); - } - if ((double)val > (int16_t)mask(15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)mask(15); - } - return (int16_t)val; - } else { - if ((double)val < (int32_t)(1 << 31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)(1 << 31); - } - if ((double)val > (int32_t)mask(31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)mask(31); - } - return (int32_t)val; - } - } else { - if (half) { - if ((double)val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if ((double)val > (mask(16))) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(16); - } - return (uint16_t)val; - } else { - if ((double)val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if ((double)val > (mask(32))) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(32); - } - return (uint32_t)val; - } - } + result = vcvtFpHFp(fpscr, defaultNan, ahp, op, false); + return bitsToFp(result, junk); } float vfpUFixedToFpS(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm) + uint64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = (uint16_t)val; + else if (width == 32) + val = (uint32_t)val; + else if (width != 64) + panic("Unsupported width %d", width); float scale = powf(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -646,11 +666,16 @@ vfpUFixedToFpS(bool flush, bool defaultNan, float vfpSFixedToFpS(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm) + int64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = sext<16>(val & mask(16)); + else if (width == 32) + val = sext<32>(val & mask(32)); + else if (width != 64) + panic("Unsupported width %d", width); + float scale = powf(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -658,106 +683,19 @@ vfpSFixedToFpS(bool flush, bool defaultNan, return fixDivDest(flush, defaultNan, val / scale, (float)val, scale); } -uint64_t -vfpFpDToFixed(double val, bool isSigned, bool half, - uint8_t imm, bool rzero) -{ - int rmode = rzero ? FeRoundZero : fegetround(); - fesetround(FeRoundNearest); - val = val * pow(2.0, imm); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - fesetround(rmode); - feclearexcept(FeAllExceptions); - __asm__ __volatile__("" : "=m" (val) : "m" (val)); - double origVal = val; - val = rint(val); - int fpType = std::fpclassify(val); - if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { - if (fpType == FP_NAN) { - feraiseexcept(FeInvalid); - } - val = 0.0; - } else if (origVal != val) { - switch (rmode) { - case FeRoundNearest: - if (origVal - val > 0.5) - val += 1.0; - else if (val - origVal > 0.5) - val -= 1.0; - break; - case FeRoundDown: - if (origVal < val) - val -= 1.0; - break; - case FeRoundUpward: - if (origVal > val) - val += 1.0; - break; - } - feraiseexcept(FeInexact); - } - if (isSigned) { - if (half) { - if (val < (int16_t)(1 << 15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)(1 << 15); - } - if (val > (int16_t)mask(15)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int16_t)mask(15); - } - return (int16_t)val; - } else { - if (val < (int32_t)(1 << 31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)(1 << 31); - } - if (val > (int32_t)mask(31)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return (int32_t)mask(31); - } - return (int32_t)val; - } - } else { - if (half) { - if (val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if (val > mask(16)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(16); - } - return (uint16_t)val; - } else { - if (val < 0) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return 0; - } - if (val > mask(32)) { - feraiseexcept(FeInvalid); - feclearexcept(FeInexact); - return mask(32); - } - return (uint32_t)val; - } - } -} double vfpUFixedToFpD(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm) + uint64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = (uint16_t)val; + else if (width == 32) + val = (uint32_t)val; + else if (width != 64) + panic("Unsupported width %d", width); + double scale = pow(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -767,11 +705,16 @@ vfpUFixedToFpD(bool flush, bool defaultNan, double vfpSFixedToFpD(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm) + int64_t val, uint8_t width, uint8_t imm) { fesetround(FeRoundNearest); - if (half) + if (width == 16) val = sext<16>(val & mask(16)); + else if (width == 32) + val = sext<32>(val & mask(32)); + else if (width != 64) + panic("Unsupported width %d", width); + double scale = pow(2.0, imm); __asm__ __volatile__("" : "=m" (scale) : "m" (scale)); feclearexcept(FeAllExceptions); @@ -976,6 +919,85 @@ template double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan, double op1, double op2) const; +// @TODO remove this function when we've finished switching all FMA code to use the new FPLIB +template <class fpType> +fpType +FpOp::ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, + fpType (*func)(fpType, fpType, fpType), + bool flush, bool defaultNan, uint32_t rMode) const +{ + const bool single = (sizeof(fpType) == sizeof(float)); + fpType junk = 0.0; + + if (flush && (flushToZero(op1, op2) || flushToZero(op3))) + fpscr.idc = 1; + VfpSavedState state = prepFpState(rMode); + __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3), "=m" (state) + : "m" (op1), "m" (op2), "m" (op3), "m" (state)); + fpType dest = func(op1, op2, op3); + __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest)); + + int fpClass = std::fpclassify(dest); + // Get NAN behavior right. This varies between x86 and ARM. + if (fpClass == FP_NAN) { + const uint64_t qnan = + single ? 0x7fc00000 : ULL(0x7ff8000000000000); + const bool nan1 = std::isnan(op1); + const bool nan2 = std::isnan(op2); + const bool nan3 = std::isnan(op3); + const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan); + const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan); + const bool signal3 = nan3 && ((fpToBits(op3) & qnan) != qnan); + if ((!nan1 && !nan2 && !nan3) || (defaultNan == 1)) { + dest = bitsToFp(qnan, junk); + } else if (signal1) { + dest = bitsToFp(fpToBits(op1) | qnan, junk); + } else if (signal2) { + dest = bitsToFp(fpToBits(op2) | qnan, junk); + } else if (signal3) { + dest = bitsToFp(fpToBits(op3) | qnan, junk); + } else if (nan1) { + dest = op1; + } else if (nan2) { + dest = op2; + } else if (nan3) { + dest = op3; + } + } else if (flush && flushToZero(dest)) { + feraiseexcept(FeUnderflow); + } else if (( + (single && (dest == bitsToFp(0x00800000, junk) || + dest == bitsToFp(0x80800000, junk))) || + (!single && + (dest == bitsToFp(ULL(0x0010000000000000), junk) || + dest == bitsToFp(ULL(0x8010000000000000), junk))) + ) && rMode != VfpRoundZero) { + /* + * Correct for the fact that underflow is detected -before- rounding + * in ARM and -after- rounding in x86. + */ + fesetround(FeRoundZero); + __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3) + : "m" (op1), "m" (op2), "m" (op3)); + fpType temp = func(op1, op2, op2); + __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp)); + if (flush && flushToZero(temp)) { + dest = temp; + } + } + finishVfp(fpscr, state, flush); + return dest; +} + +template +float FpOp::ternaryOp(FPSCR &fpscr, float op1, float op2, float op3, + float (*func)(float, float, float), + bool flush, bool defaultNan, uint32_t rMode) const; +template +double FpOp::ternaryOp(FPSCR &fpscr, double op1, double op2, double op3, + double (*func)(double, double, double), + bool flush, bool defaultNan, uint32_t rMode) const; + template <class fpType> fpType FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2, diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh index 9babaae04..f17f90973 100644 --- a/src/arch/arm/insts/vfp.hh +++ b/src/arch/arm/insts/vfp.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -104,7 +104,8 @@ enum VfpRoundingMode VfpRoundNearest = 0, VfpRoundUpward = 1, VfpRoundDown = 2, - VfpRoundZero = 3 + VfpRoundZero = 3, + VfpRoundAway = 4 }; static inline float bitsToFp(uint64_t, float); @@ -212,7 +213,7 @@ isSnan(fpType val) typedef int VfpSavedState; VfpSavedState prepFpState(uint32_t rMode); -void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush); +void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask = FpscrExcMask); template <class fpType> fpType fixDest(FPSCR fpscr, fpType val, fpType op1); @@ -228,7 +229,11 @@ double fixFpSFpDDest(FPSCR fpscr, float val); uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan, uint32_t rMode, bool ahp, float op); -float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); +uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan, + uint32_t rMode, bool ahp, double op); + +float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); +double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op); static inline double makeDouble(uint32_t low, uint32_t high) @@ -249,19 +254,192 @@ highFromDouble(double val) return fpToBits(val) >> 32; } -uint64_t vfpFpSToFixed(float val, bool isSigned, bool half, - uint8_t imm, bool rzero = true); +static inline void +setFPExceptions(int exceptions) { + feclearexcept(FeAllExceptions); + feraiseexcept(exceptions); +} + +template <typename T> +uint64_t +vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool + useRmode = true, VfpRoundingMode roundMode = VfpRoundZero, + bool aarch64 = false) +{ + int rmode; + bool roundAwayFix = false; + + if (!useRmode) { + rmode = fegetround(); + } else { + switch (roundMode) + { + case VfpRoundNearest: + rmode = FeRoundNearest; + break; + case VfpRoundUpward: + rmode = FeRoundUpward; + break; + case VfpRoundDown: + rmode = FeRoundDown; + break; + case VfpRoundZero: + rmode = FeRoundZero; + break; + case VfpRoundAway: + // There is no equivalent rounding mode, use round down and we'll + // fix it later + rmode = FeRoundDown; + roundAwayFix = true; + break; + default: + panic("Unsupported roundMode %d\n", roundMode); + } + } + __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode)); + fesetround(FeRoundNearest); + val = val * pow(2.0, imm); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + fesetround(rmode); + feclearexcept(FeAllExceptions); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + T origVal = val; + val = rint(val); + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + + int exceptions = fetestexcept(FeAllExceptions); + + int fpType = std::fpclassify(val); + if (fpType == FP_SUBNORMAL || fpType == FP_NAN) { + if (fpType == FP_NAN) { + exceptions |= FeInvalid; + } + val = 0.0; + } else if (origVal != val) { + switch (rmode) { + case FeRoundNearest: + if (origVal - val > 0.5) + val += 1.0; + else if (val - origVal > 0.5) + val -= 1.0; + break; + case FeRoundDown: + if (roundAwayFix) { + // The ordering on the subtraction looks a bit odd in that we + // don't do the obvious origVal - val, instead we do + // -(val - origVal). This is required to get the corruct bit + // exact behaviour when very close to the 0.5 threshold. + volatile T error = val; + error -= origVal; + error = -error; + if ( (error > 0.5) || + ((error == 0.5) && (val >= 0)) ) + val += 1.0; + } else { + if (origVal < val) + val -= 1.0; + } + break; + case FeRoundUpward: + if (origVal > val) + val += 1.0; + break; + } + exceptions |= FeInexact; + } + + __asm__ __volatile__("" : "=m" (val) : "m" (val)); + + if (isSigned) { + bool outOfRange = false; + int64_t result = (int64_t) val; + uint64_t finalVal; + + if (!aarch64) { + if (width == 16) { + finalVal = (int16_t)val; + } else if (width == 32) { + finalVal =(int32_t)val; + } else if (width == 64) { + finalVal = result; + } else { + panic("Unsupported width %d\n", width); + } + + // check if value is in range + int64_t minVal = ~mask(width-1); + if ((double)val < minVal) { + outOfRange = true; + finalVal = minVal; + } + int64_t maxVal = mask(width-1); + if ((double)val > maxVal) { + outOfRange = true; + finalVal = maxVal; + } + } else { + bool isNeg = val < 0; + finalVal = result & mask(width); + // If the result is supposed to be less than 64 bits check that the + // upper bits that got thrown away are just sign extension bits + if (width != 64) { + outOfRange = ((uint64_t) result >> (width - 1)) != + (isNeg ? mask(64-width+1) : 0); + } + // Check if the original floating point value doesn't matches the + // integer version we are also out of range. So create a saturated + // result. + if (isNeg) { + outOfRange |= val < result; + if (outOfRange) { + finalVal = 1LL << (width-1); + } + } else { + outOfRange |= val > result; + if (outOfRange) { + finalVal = mask(width-1); + } + } + } + + // Raise an exception if the value was out of range + if (outOfRange) { + exceptions |= FeInvalid; + exceptions &= ~FeInexact; + } + setFPExceptions(exceptions); + return finalVal; + } else { + if ((double)val < 0) { + exceptions |= FeInvalid; + exceptions &= ~FeInexact; + setFPExceptions(exceptions); + return 0; + } + + uint64_t result = ((uint64_t) val) & mask(width); + if (val > result) { + exceptions |= FeInvalid; + exceptions &= ~FeInexact; + setFPExceptions(exceptions); + return mask(width); + } + + setFPExceptions(exceptions); + return result; + } +}; + + float vfpUFixedToFpS(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm); + uint64_t val, uint8_t width, uint8_t imm); float vfpSFixedToFpS(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm); + int64_t val, uint8_t width, uint8_t imm); -uint64_t vfpFpDToFixed(double val, bool isSigned, bool half, - uint8_t imm, bool rzero = true); double vfpUFixedToFpD(bool flush, bool defaultNan, - uint32_t val, bool half, uint8_t imm); + uint64_t val, uint8_t width, uint8_t imm); double vfpSFixedToFpD(bool flush, bool defaultNan, - int32_t val, bool half, uint8_t imm); + int64_t val, uint8_t width, uint8_t imm); float fprSqrtEstimate(FPSCR &fpscr, float op); uint32_t unsignedRSqrtEstimate(uint32_t op); @@ -292,6 +470,20 @@ class VfpMacroOp : public PredMacroOp void nextIdxs(IntRegIndex &dest); }; +template <typename T> +static inline T +fpAdd(T a, T b) +{ + return a + b; +}; + +template <typename T> +static inline T +fpSub(T a, T b) +{ + return a - b; +}; + static inline float fpAddS(float a, float b) { @@ -328,6 +520,54 @@ fpDivD(double a, double b) return a / b; } +template <typename T> +static inline T +fpDiv(T a, T b) +{ + return a / b; +}; + +template <typename T> +static inline T +fpMulX(T a, T b) +{ + uint64_t opData; + uint32_t sign1; + uint32_t sign2; + const bool single = (sizeof(T) == sizeof(float)); + if (single) { + opData = (fpToBits(a)); + sign1 = opData>>31; + opData = (fpToBits(b)); + sign2 = opData>>31; + } else { + opData = (fpToBits(a)); + sign1 = opData>>63; + opData = (fpToBits(b)); + sign2 = opData>>63; + } + bool inf1 = (std::fpclassify(a) == FP_INFINITE); + bool inf2 = (std::fpclassify(b) == FP_INFINITE); + bool zero1 = (std::fpclassify(a) == FP_ZERO); + bool zero2 = (std::fpclassify(b) == FP_ZERO); + if ((inf1 && zero2) || (zero1 && inf2)) { + if(sign1 ^ sign2) + return (T)(-2.0); + else + return (T)(2.0); + } else { + return (a * b); + } +}; + + +template <typename T> +static inline T +fpMul(T a, T b) +{ + return a * b; +}; + static inline float fpMulS(float a, float b) { @@ -340,23 +580,140 @@ fpMulD(double a, double b) return a * b; } -static inline float -fpMaxS(float a, float b) +template <typename T> +static inline T +// @todo remove this when all calls to it have been replaced with the new fplib implementation +fpMulAdd(T op1, T op2, T addend) +{ + T result; + + if (sizeof(T) == sizeof(float)) + result = fmaf(op1, op2, addend); + else + result = fma(op1, op2, addend); + + // ARM doesn't generate signed nan's from this opperation, so fix up the result + if (std::isnan(result) && !std::isnan(op1) && + !std::isnan(op2) && !std::isnan(addend)) + { + uint64_t bitMask = ULL(0x1) << ((sizeof(T) * 8) - 1); + result = bitsToFp(fpToBits(result) & ~bitMask, op1); + } + return result; +} + +template <typename T> +static inline T +fpRIntX(T a, FPSCR &fpscr) +{ + T rVal; + + rVal = rint(a); + if (rVal != a && !std::isnan(a)) + fpscr.ixc = 1; + return (rVal); +}; + +template <typename T> +static inline T +fpMaxNum(T a, T b) { + const bool single = (sizeof(T) == sizeof(float)); + const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); + + if (std::isnan(a)) + return ((fpToBits(a) & qnan) == qnan) ? b : a; + if (std::isnan(b)) + return ((fpToBits(b) & qnan) == qnan) ? a : b; // Handle comparisons of +0 and -0. if (!std::signbit(a) && std::signbit(b)) return a; - return fmaxf(a, b); -} + return fmax(a, b); +}; -static inline float -fpMinS(float a, float b) +template <typename T> +static inline T +fpMax(T a, T b) { + if (std::isnan(a)) + return a; + if (std::isnan(b)) + return b; + return fpMaxNum<T>(a, b); +}; + +template <typename T> +static inline T +fpMinNum(T a, T b) +{ + const bool single = (sizeof(T) == sizeof(float)); + const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000); + + if (std::isnan(a)) + return ((fpToBits(a) & qnan) == qnan) ? b : a; + if (std::isnan(b)) + return ((fpToBits(b) & qnan) == qnan) ? a : b; // Handle comparisons of +0 and -0. if (std::signbit(a) && !std::signbit(b)) return a; - return fminf(a, b); -} + return fmin(a, b); +}; + +template <typename T> +static inline T +fpMin(T a, T b) +{ + if (std::isnan(a)) + return a; + if (std::isnan(b)) + return b; + return fpMinNum<T>(a, b); +}; + +template <typename T> +static inline T +fpRSqrts(T a, T b) +{ + int fpClassA = std::fpclassify(a); + int fpClassB = std::fpclassify(b); + T aXb; + int fpClassAxB; + + if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) || + (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { + return 1.5; + } + aXb = a*b; + fpClassAxB = std::fpclassify(aXb); + if(fpClassAxB == FP_SUBNORMAL) { + feraiseexcept(FeUnderflow); + return 1.5; + } + return (3.0 - (a * b)) / 2.0; +}; + +template <typename T> +static inline T +fpRecps(T a, T b) +{ + int fpClassA = std::fpclassify(a); + int fpClassB = std::fpclassify(b); + T aXb; + int fpClassAxB; + + if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) || + (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) { + return 2.0; + } + aXb = a*b; + fpClassAxB = std::fpclassify(aXb); + if(fpClassAxB == FP_SUBNORMAL) { + feraiseexcept(FeUnderflow); + return 2.0; + } + return 2.0 - (a * b); +}; + static inline float fpRSqrtsS(float a, float b) @@ -400,6 +757,23 @@ fpRecpsS(float a, float b) return 2.0 - (a * b); } +template <typename T> +static inline T +roundNEven(T a) { + T val; + + val = round(a); + if (a - val == 0.5) { + if ( (((int) a) & 1) == 0 ) val += 1.0; + } + else if (a - val == -0.5) { + if ( (((int) a) & 1) == 0 ) val -= 1.0; + } + return val; +} + + + class FpOp : public PredOp { protected: @@ -457,6 +831,12 @@ class FpOp : public PredOp template <class fpType> fpType + ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3, + fpType (*func)(fpType, fpType, fpType), + bool flush, bool defaultNan, uint32_t rMode) const; + + template <class fpType> + fpType binaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType (*func)(fpType, fpType), bool flush, bool defaultNan, uint32_t rMode) const; @@ -478,6 +858,55 @@ class FpOp : public PredOp pcState.advance(); } } + + float + fpSqrt (FPSCR fpscr,float x) const + { + + return unaryOp(fpscr,x,sqrtf,fpscr.fz,fpscr.rMode); + + } + + double + fpSqrt (FPSCR fpscr,double x) const + { + + return unaryOp(fpscr,x,sqrt,fpscr.fz,fpscr.rMode); + + } +}; + +class FpCondCompRegOp : public FpOp +{ + protected: + IntRegIndex op1, op2; + ConditionCode condCode; + uint8_t defCc; + + FpCondCompRegOp(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode, uint8_t _defCc) : + FpOp(mnem, _machInst, __opClass), + op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + +class FpCondSelOp : public FpOp +{ + protected: + IntRegIndex dest, op1, op2; + ConditionCode condCode; + + FpCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode) : + FpOp(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2), condCode(_condCode) + {} + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; class FpRegRegOp : public FpOp @@ -550,6 +979,26 @@ class FpRegRegRegOp : public FpOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class FpRegRegRegRegOp : public FpOp +{ + protected: + IntRegIndex dest; + IntRegIndex op1; + IntRegIndex op2; + IntRegIndex op3; + + FpRegRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop) : + FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2), + op3(_op3) + { + setVfpMicroFlags(mode, flags); + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + class FpRegRegRegImmOp : public FpOp { protected: diff --git a/src/arch/arm/interrupts.cc b/src/arch/arm/interrupts.cc index c05ae984e..6682b75a0 100644 --- a/src/arch/arm/interrupts.cc +++ b/src/arch/arm/interrupts.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 ARM Limited + * Copyright (c) 2009, 2012-2013 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -38,9 +38,128 @@ */ #include "arch/arm/interrupts.hh" +#include "arch/arm/system.hh" ArmISA::Interrupts * ArmInterruptsParams::create() { return new ArmISA::Interrupts(this); } + +bool +ArmISA::Interrupts::takeInt(ThreadContext *tc, InterruptTypes int_type) const +{ + // Table G1-17~19 of ARM V8 ARM + InterruptMask mask; + bool highest_el_is_64 = ArmSystem::highestELIs64(tc); + + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + SCR scr; + HCR hcr; + hcr = tc->readMiscReg(MISCREG_HCR); + ExceptionLevel el = (ExceptionLevel) ((uint32_t) cpsr.el); + bool cpsr_mask_bit, scr_routing_bit, scr_fwaw_bit, hcr_mask_override_bit; + + if (!highest_el_is_64) + scr = tc->readMiscReg(MISCREG_SCR); + else + scr = tc->readMiscReg(MISCREG_SCR_EL3); + + bool is_secure = inSecureState(scr, cpsr); + + switch(int_type) { + case INT_FIQ: + cpsr_mask_bit = cpsr.f; + scr_routing_bit = scr.fiq; + scr_fwaw_bit = scr.fw; + hcr_mask_override_bit = hcr.fmo; + break; + case INT_IRQ: + cpsr_mask_bit = cpsr.i; + scr_routing_bit = scr.irq; + scr_fwaw_bit = 1; + hcr_mask_override_bit = hcr.imo; + break; + case INT_ABT: + cpsr_mask_bit = cpsr.a; + scr_routing_bit = scr.ea; + scr_fwaw_bit = scr.aw; + hcr_mask_override_bit = hcr.amo; + break; + default: + panic("Unhandled interrupt type!"); + } + + if (hcr.tge) + hcr_mask_override_bit = 1; + + if (!highest_el_is_64) { + // AArch32 + if (!scr_routing_bit) { + // SCR IRQ == 0 + if (!hcr_mask_override_bit) + mask = INT_MASK_M; + else { + if (!is_secure && (el == EL0 || el == EL1)) + mask = INT_MASK_T; + else + mask = INT_MASK_M; + } + } else { + // SCR IRQ == 1 + if ((!is_secure) && + (hcr_mask_override_bit || + (!scr_fwaw_bit && !hcr_mask_override_bit))) + mask = INT_MASK_T; + else + mask = INT_MASK_M; + } + } else { + // AArch64 + if (!scr_routing_bit) { + // SCR IRQ == 0 + if (!scr.rw) { + // SCR RW == 0 + if (!hcr_mask_override_bit) { + if (el == EL3) + mask = INT_MASK_P; + else + mask = INT_MASK_M; + } else { + if (el == EL3) + mask = INT_MASK_T; + else if (is_secure || el == EL2) + mask = INT_MASK_M; + else + mask = INT_MASK_T; + } + } else { + // SCR RW == 1 + if (!hcr_mask_override_bit) { + if (el == EL3 || el == EL2) + mask = INT_MASK_P; + else + mask = INT_MASK_M; + } else { + if (el == EL3) + mask = INT_MASK_P; + else if (is_secure || el == EL2) + mask = INT_MASK_M; + else + mask = INT_MASK_T; + } + } + } else { + // SCR IRQ == 1 + if (el == EL3) + mask = INT_MASK_M; + else + mask = INT_MASK_T; + } + } + + return ((mask == INT_MASK_T) || + ((mask == INT_MASK_M) && !cpsr_mask_bit)) && + (mask != INT_MASK_P); +} + diff --git a/src/arch/arm/interrupts.hh b/src/arch/arm/interrupts.hh index 7def6ddd6..8e6c2b261 100644 --- a/src/arch/arm/interrupts.hh +++ b/src/arch/arm/interrupts.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010,2012 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -47,6 +47,7 @@ #include "arch/arm/isa_traits.hh" #include "arch/arm/miscregs.hh" #include "arch/arm/registers.hh" +#include "arch/arm/utility.hh" #include "cpu/thread_context.hh" #include "debug/Interrupt.hh" #include "params/ArmInterrupts.hh" @@ -123,31 +124,79 @@ class Interrupts : public SimObject memset(interrupts, 0, sizeof(interrupts)); } + enum InterruptMask { + INT_MASK_M, // masked (subject to PSTATE.{A,I,F} mask bit + INT_MASK_T, // taken regardless of mask + INT_MASK_P // pending + }; + + bool takeInt(ThreadContext *tc, InterruptTypes int_type) const; + bool checkInterrupts(ThreadContext *tc) const { - if (!intStatus) + HCR hcr = tc->readMiscReg(MISCREG_HCR); + + if (!(intStatus || hcr.va || hcr.vi || hcr.vf)) return false; CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); - - return ((interrupts[INT_IRQ] && !cpsr.i) || - (interrupts[INT_FIQ] && !cpsr.f) || - (interrupts[INT_ABT] && !cpsr.a) || - (interrupts[INT_RST]) || - (interrupts[INT_SEV])); + SCR scr = tc->readMiscReg(MISCREG_SCR); + + bool isHypMode = cpsr.mode == MODE_HYP; + bool isSecure = inSecureState(scr, cpsr); + bool allowVIrq = !cpsr.i && hcr.imo && !isSecure && !isHypMode; + bool allowVFiq = !cpsr.f && hcr.fmo && !isSecure && !isHypMode; + bool allowVAbort = !cpsr.a && hcr.amo && !isSecure && !isHypMode; + + bool take_irq = takeInt(tc, INT_IRQ); + bool take_fiq = takeInt(tc, INT_FIQ); + bool take_ea = takeInt(tc, INT_ABT); + + return ((interrupts[INT_IRQ] && take_irq) || + (interrupts[INT_FIQ] && take_fiq) || + (interrupts[INT_ABT] && take_ea) || + ((interrupts[INT_VIRT_IRQ] || hcr.vi) && allowVIrq) || + ((interrupts[INT_VIRT_FIQ] || hcr.vf) && allowVFiq) || + (hcr.va && allowVAbort) || + (interrupts[INT_RST]) || + (interrupts[INT_SEV]) + ); } /** - * Check the raw interrupt state. * This function is used to check if a wfi operation should sleep. If there * is an interrupt pending, even if it's masked, wfi doesn't sleep. * @return any interrupts pending */ bool - checkRaw() const + checkWfiWake(HCR hcr, CPSR cpsr, SCR scr) const + { + uint64_t maskedIntStatus; + bool virtWake; + + maskedIntStatus = intStatus & ~((1 << INT_VIRT_IRQ) | + (1 << INT_VIRT_FIQ)); + virtWake = (hcr.vi || interrupts[INT_VIRT_IRQ]) && hcr.imo; + virtWake |= (hcr.vf || interrupts[INT_VIRT_FIQ]) && hcr.fmo; + virtWake |= hcr.va && hcr.amo; + virtWake &= (cpsr.mode != MODE_HYP) && !inSecureState(scr, cpsr); + return maskedIntStatus || virtWake; + } + + uint32_t + getISR(HCR hcr, CPSR cpsr, SCR scr) { - return intStatus; + bool useHcrMux; + CPSR isr = 0; // ARM ARM states ISR reg uses same bit possitions as CPSR + + useHcrMux = (cpsr.mode != MODE_HYP) && !inSecureState(scr, cpsr); + isr.i = (useHcrMux & hcr.imo) ? (interrupts[INT_VIRT_IRQ] || hcr.vi) + : interrupts[INT_IRQ]; + isr.f = (useHcrMux & hcr.fmo) ? (interrupts[INT_VIRT_FIQ] || hcr.vf) + : interrupts[INT_FIQ]; + isr.a = (useHcrMux & hcr.amo) ? hcr.va : interrupts[INT_ABT]; + return isr; } /** @@ -172,22 +221,45 @@ class Interrupts : public SimObject Fault getInterrupt(ThreadContext *tc) { - if (!intStatus) + HCR hcr = tc->readMiscReg(MISCREG_HCR); + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + SCR scr = tc->readMiscReg(MISCREG_SCR); + + // Calculate a few temp vars so we can work out if there's a pending + // virtual interrupt, and if its allowed to happen + // ARM ARM Issue C section B1.9.9, B1.9.11, and B1.9.13 + bool isHypMode = cpsr.mode == MODE_HYP; + bool isSecure = inSecureState(scr, cpsr); + bool allowVIrq = !cpsr.i && hcr.imo && !isSecure && !isHypMode; + bool allowVFiq = !cpsr.f && hcr.fmo && !isSecure && !isHypMode; + bool allowVAbort = !cpsr.a && hcr.amo && !isSecure && !isHypMode; + + if ( !(intStatus || (hcr.vi && allowVIrq) || (hcr.vf && allowVFiq) || + (hcr.va && allowVAbort)) ) return NoFault; - CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + bool take_irq = takeInt(tc, INT_IRQ); + bool take_fiq = takeInt(tc, INT_FIQ); + bool take_ea = takeInt(tc, INT_ABT); + - if (interrupts[INT_IRQ] && !cpsr.i) + if (interrupts[INT_IRQ] && take_irq) return new Interrupt; - if (interrupts[INT_FIQ] && !cpsr.f) + if ((interrupts[INT_VIRT_IRQ] || hcr.vi) && allowVIrq) + return new VirtualInterrupt; + if (interrupts[INT_FIQ] && take_fiq) return new FastInterrupt; - if (interrupts[INT_ABT] && !cpsr.a) - return new DataAbort(0, false, 0, - ArmFault::AsynchronousExternalAbort); + if ((interrupts[INT_VIRT_FIQ] || hcr.vf) && allowVFiq) + return new VirtualFastInterrupt; + if (interrupts[INT_ABT] && take_ea) + return new SystemError; + if (hcr.va && allowVAbort) + return new VirtualDataAbort(0, TlbEntry::DomainType::NoAccess, false, + ArmFault::AsynchronousExternalAbort); if (interrupts[INT_RST]) - return new Reset; + return new Reset; if (interrupts[INT_SEV]) - return new ArmSev; + return new ArmSev; panic("intStatus and interrupts not in sync\n"); } diff --git a/src/arch/arm/intregs.hh b/src/arch/arm/intregs.hh index 3fe00b765..fa18aa68d 100644 --- a/src/arch/arm/intregs.hh +++ b/src/arch/arm/intregs.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -83,6 +83,9 @@ enum IntRegIndex INTREG_R14_MON, INTREG_LR_MON = INTREG_R14_MON, + INTREG_R13_HYP, + INTREG_SP_HYP = INTREG_R13_HYP, + INTREG_R13_ABT, INTREG_SP_ABT = INTREG_R13_ABT, INTREG_R14_ABT, @@ -108,7 +111,7 @@ enum IntRegIndex INTREG_R14_FIQ, INTREG_LR_FIQ = INTREG_R14_FIQ, - INTREG_ZERO, // Dummy zero reg since there has to be one. + INTREG_ZERO, INTREG_UREG0, INTREG_UREG1, INTREG_UREG2, @@ -117,12 +120,54 @@ enum IntRegIndex INTREG_CONDCODES_V, INTREG_CONDCODES_GE, INTREG_FPCONDCODES, + INTREG_DUMMY, // Dummy reg used to throw away int reg results + + INTREG_SP0, + INTREG_SP1, + INTREG_SP2, + INTREG_SP3, NUM_INTREGS, - NUM_ARCH_INTREGS = INTREG_PC + 1, + NUM_ARCH_INTREGS = 32, + + /* AArch64 registers */ + INTREG_X0 = 0, + INTREG_X1, + INTREG_X2, + INTREG_X3, + INTREG_X4, + INTREG_X5, + INTREG_X6, + INTREG_X7, + INTREG_X8, + INTREG_X9, + INTREG_X10, + INTREG_X11, + INTREG_X12, + INTREG_X13, + INTREG_X14, + INTREG_X15, + INTREG_X16, + INTREG_X17, + INTREG_X18, + INTREG_X19, + INTREG_X20, + INTREG_X21, + INTREG_X22, + INTREG_X23, + INTREG_X24, + INTREG_X25, + INTREG_X26, + INTREG_X27, + INTREG_X28, + INTREG_X29, + INTREG_X30, + INTREG_X31, + + INTREG_SPX = NUM_INTREGS, /* All the aliased indexes. */ - + /* USR mode */ INTREG_R0_USR = INTREG_R0, INTREG_R1_USR = INTREG_R1, @@ -195,6 +240,25 @@ enum IntRegIndex INTREG_PC_ABT = INTREG_PC, INTREG_R15_ABT = INTREG_R15, + /* HYP mode */ + INTREG_R0_HYP = INTREG_R0, + INTREG_R1_HYP = INTREG_R1, + INTREG_R2_HYP = INTREG_R2, + INTREG_R3_HYP = INTREG_R3, + INTREG_R4_HYP = INTREG_R4, + INTREG_R5_HYP = INTREG_R5, + INTREG_R6_HYP = INTREG_R6, + INTREG_R7_HYP = INTREG_R7, + INTREG_R8_HYP = INTREG_R8, + INTREG_R9_HYP = INTREG_R9, + INTREG_R10_HYP = INTREG_R10, + INTREG_R11_HYP = INTREG_R11, + INTREG_R12_HYP = INTREG_R12, + INTREG_LR_HYP = INTREG_LR, + INTREG_R14_HYP = INTREG_R14, + INTREG_PC_HYP = INTREG_PC, + INTREG_R15_HYP = INTREG_R15, + /* UND mode */ INTREG_R0_UND = INTREG_R0, INTREG_R1_UND = INTREG_R1, @@ -244,11 +308,26 @@ enum IntRegIndex typedef IntRegIndex IntRegMap[NUM_ARCH_INTREGS]; +const IntRegMap IntReg64Map = { + INTREG_R0, INTREG_R1, INTREG_R2, INTREG_R3, + INTREG_R4, INTREG_R5, INTREG_R6, INTREG_R7, + INTREG_R8_USR, INTREG_R9_USR, INTREG_R10_USR, INTREG_R11_USR, + INTREG_R12_USR, INTREG_R13_USR, INTREG_R14_USR, INTREG_R13_HYP, + INTREG_R14_IRQ, INTREG_R13_IRQ, INTREG_R14_SVC, INTREG_R13_SVC, + INTREG_R14_ABT, INTREG_R13_ABT, INTREG_R14_UND, INTREG_R13_UND, + INTREG_R8_FIQ, INTREG_R9_FIQ, INTREG_R10_FIQ, INTREG_R11_FIQ, + INTREG_R12_FIQ, INTREG_R13_FIQ, INTREG_R14_FIQ, INTREG_ZERO +}; + const IntRegMap IntRegUsrMap = { INTREG_R0_USR, INTREG_R1_USR, INTREG_R2_USR, INTREG_R3_USR, INTREG_R4_USR, INTREG_R5_USR, INTREG_R6_USR, INTREG_R7_USR, INTREG_R8_USR, INTREG_R9_USR, INTREG_R10_USR, INTREG_R11_USR, - INTREG_R12_USR, INTREG_R13_USR, INTREG_R14_USR, INTREG_R15_USR + INTREG_R12_USR, INTREG_R13_USR, INTREG_R14_USR, INTREG_R15_USR, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -258,11 +337,33 @@ INTREG_USR(unsigned index) return IntRegUsrMap[index]; } +const IntRegMap IntRegHypMap = { + INTREG_R0_HYP, INTREG_R1_HYP, INTREG_R2_HYP, INTREG_R3_HYP, + INTREG_R4_HYP, INTREG_R5_HYP, INTREG_R6_HYP, INTREG_R7_HYP, + INTREG_R8_HYP, INTREG_R9_HYP, INTREG_R10_HYP, INTREG_R11_HYP, + INTREG_R12_HYP, INTREG_R13_HYP, INTREG_R14_HYP, INTREG_R15_HYP, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO +}; + +static inline IntRegIndex +INTREG_HYP(unsigned index) +{ + assert(index < NUM_ARCH_INTREGS); + return IntRegHypMap[index]; +} + const IntRegMap IntRegSvcMap = { INTREG_R0_SVC, INTREG_R1_SVC, INTREG_R2_SVC, INTREG_R3_SVC, INTREG_R4_SVC, INTREG_R5_SVC, INTREG_R6_SVC, INTREG_R7_SVC, INTREG_R8_SVC, INTREG_R9_SVC, INTREG_R10_SVC, INTREG_R11_SVC, - INTREG_R12_SVC, INTREG_R13_SVC, INTREG_R14_SVC, INTREG_R15_SVC + INTREG_R12_SVC, INTREG_R13_SVC, INTREG_R14_SVC, INTREG_R15_SVC, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -276,7 +377,11 @@ const IntRegMap IntRegMonMap = { INTREG_R0_MON, INTREG_R1_MON, INTREG_R2_MON, INTREG_R3_MON, INTREG_R4_MON, INTREG_R5_MON, INTREG_R6_MON, INTREG_R7_MON, INTREG_R8_MON, INTREG_R9_MON, INTREG_R10_MON, INTREG_R11_MON, - INTREG_R12_MON, INTREG_R13_MON, INTREG_R14_MON, INTREG_R15_MON + INTREG_R12_MON, INTREG_R13_MON, INTREG_R14_MON, INTREG_R15_MON, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -290,7 +395,11 @@ const IntRegMap IntRegAbtMap = { INTREG_R0_ABT, INTREG_R1_ABT, INTREG_R2_ABT, INTREG_R3_ABT, INTREG_R4_ABT, INTREG_R5_ABT, INTREG_R6_ABT, INTREG_R7_ABT, INTREG_R8_ABT, INTREG_R9_ABT, INTREG_R10_ABT, INTREG_R11_ABT, - INTREG_R12_ABT, INTREG_R13_ABT, INTREG_R14_ABT, INTREG_R15_ABT + INTREG_R12_ABT, INTREG_R13_ABT, INTREG_R14_ABT, INTREG_R15_ABT, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -304,7 +413,11 @@ const IntRegMap IntRegUndMap = { INTREG_R0_UND, INTREG_R1_UND, INTREG_R2_UND, INTREG_R3_UND, INTREG_R4_UND, INTREG_R5_UND, INTREG_R6_UND, INTREG_R7_UND, INTREG_R8_UND, INTREG_R9_UND, INTREG_R10_UND, INTREG_R11_UND, - INTREG_R12_UND, INTREG_R13_UND, INTREG_R14_UND, INTREG_R15_UND + INTREG_R12_UND, INTREG_R13_UND, INTREG_R14_UND, INTREG_R15_UND, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -318,7 +431,11 @@ const IntRegMap IntRegIrqMap = { INTREG_R0_IRQ, INTREG_R1_IRQ, INTREG_R2_IRQ, INTREG_R3_IRQ, INTREG_R4_IRQ, INTREG_R5_IRQ, INTREG_R6_IRQ, INTREG_R7_IRQ, INTREG_R8_IRQ, INTREG_R9_IRQ, INTREG_R10_IRQ, INTREG_R11_IRQ, - INTREG_R12_IRQ, INTREG_R13_IRQ, INTREG_R14_IRQ, INTREG_R15_IRQ + INTREG_R12_IRQ, INTREG_R13_IRQ, INTREG_R14_IRQ, INTREG_R15_IRQ, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -332,7 +449,11 @@ const IntRegMap IntRegFiqMap = { INTREG_R0_FIQ, INTREG_R1_FIQ, INTREG_R2_FIQ, INTREG_R3_FIQ, INTREG_R4_FIQ, INTREG_R5_FIQ, INTREG_R6_FIQ, INTREG_R7_FIQ, INTREG_R8_FIQ, INTREG_R9_FIQ, INTREG_R10_FIQ, INTREG_R11_FIQ, - INTREG_R12_FIQ, INTREG_R13_FIQ, INTREG_R14_FIQ, INTREG_R15_FIQ + INTREG_R12_FIQ, INTREG_R13_FIQ, INTREG_R14_FIQ, INTREG_R15_FIQ, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, + INTREG_ZERO, INTREG_ZERO, INTREG_ZERO, INTREG_ZERO }; static inline IntRegIndex @@ -351,6 +472,51 @@ intRegInMode(OperatingMode mode, int reg) return mode * intRegsPerMode + reg; } +static inline int +flattenIntRegModeIndex(int reg) +{ + int mode = reg / intRegsPerMode; + reg = reg % intRegsPerMode; + switch (mode) { + case MODE_USER: + case MODE_SYSTEM: + return INTREG_USR(reg); + case MODE_FIQ: + return INTREG_FIQ(reg); + case MODE_IRQ: + return INTREG_IRQ(reg); + case MODE_SVC: + return INTREG_SVC(reg); + case MODE_MON: + return INTREG_MON(reg); + case MODE_ABORT: + return INTREG_ABT(reg); + case MODE_HYP: + return INTREG_HYP(reg); + case MODE_UNDEFINED: + return INTREG_UND(reg); + default: + panic("%d: Flattening into an unknown mode: reg:%#x mode:%#x\n", + curTick(), reg, mode); + } +} + + +static inline IntRegIndex +makeSP(IntRegIndex reg) +{ + if (reg == INTREG_X31) + reg = INTREG_SPX; + return reg; +} + + +static inline bool +isSP(IntRegIndex reg) +{ + return reg == INTREG_SPX; +} + } #endif diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index 86be2803d..4f1ef91ec 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -51,12 +51,111 @@ namespace ArmISA { + +/** + * Some registers aliase with others, and therefore need to be translated. + * For each entry: + * The first value is the misc register that is to be looked up + * the second value is the lower part of the translation + * the third the upper part + */ +const struct ISA::MiscRegInitializerEntry + ISA::MiscRegSwitch[miscRegTranslateMax] = { + {MISCREG_CSSELR_EL1, {MISCREG_CSSELR, 0}}, + {MISCREG_SCTLR_EL1, {MISCREG_SCTLR, 0}}, + {MISCREG_SCTLR_EL2, {MISCREG_HSCTLR, 0}}, + {MISCREG_ACTLR_EL1, {MISCREG_ACTLR, 0}}, + {MISCREG_ACTLR_EL2, {MISCREG_HACTLR, 0}}, + {MISCREG_CPACR_EL1, {MISCREG_CPACR, 0}}, + {MISCREG_CPTR_EL2, {MISCREG_HCPTR, 0}}, + {MISCREG_HCR_EL2, {MISCREG_HCR, 0}}, + {MISCREG_MDCR_EL2, {MISCREG_HDCR, 0}}, + {MISCREG_HSTR_EL2, {MISCREG_HSTR, 0}}, + {MISCREG_HACR_EL2, {MISCREG_HACR, 0}}, + {MISCREG_TTBR0_EL1, {MISCREG_TTBR0, 0}}, + {MISCREG_TTBR1_EL1, {MISCREG_TTBR1, 0}}, + {MISCREG_TTBR0_EL2, {MISCREG_HTTBR, 0}}, + {MISCREG_VTTBR_EL2, {MISCREG_VTTBR, 0}}, + {MISCREG_TCR_EL1, {MISCREG_TTBCR, 0}}, + {MISCREG_TCR_EL2, {MISCREG_HTCR, 0}}, + {MISCREG_VTCR_EL2, {MISCREG_VTCR, 0}}, + {MISCREG_AFSR0_EL1, {MISCREG_ADFSR, 0}}, + {MISCREG_AFSR1_EL1, {MISCREG_AIFSR, 0}}, + {MISCREG_AFSR0_EL2, {MISCREG_HADFSR, 0}}, + {MISCREG_AFSR1_EL2, {MISCREG_HAIFSR, 0}}, + {MISCREG_ESR_EL2, {MISCREG_HSR, 0}}, + {MISCREG_FAR_EL1, {MISCREG_DFAR, MISCREG_IFAR}}, + {MISCREG_FAR_EL2, {MISCREG_HDFAR, MISCREG_HIFAR}}, + {MISCREG_HPFAR_EL2, {MISCREG_HPFAR, 0}}, + {MISCREG_PAR_EL1, {MISCREG_PAR, 0}}, + {MISCREG_MAIR_EL1, {MISCREG_PRRR, MISCREG_NMRR}}, + {MISCREG_MAIR_EL2, {MISCREG_HMAIR0, MISCREG_HMAIR1}}, + {MISCREG_AMAIR_EL1, {MISCREG_AMAIR0, MISCREG_AMAIR1}}, + {MISCREG_VBAR_EL1, {MISCREG_VBAR, 0}}, + {MISCREG_VBAR_EL2, {MISCREG_HVBAR, 0}}, + {MISCREG_CONTEXTIDR_EL1, {MISCREG_CONTEXTIDR, 0}}, + {MISCREG_TPIDR_EL0, {MISCREG_TPIDRURW, 0}}, + {MISCREG_TPIDRRO_EL0, {MISCREG_TPIDRURO, 0}}, + {MISCREG_TPIDR_EL1, {MISCREG_TPIDRPRW, 0}}, + {MISCREG_TPIDR_EL2, {MISCREG_HTPIDR, 0}}, + {MISCREG_TEECR32_EL1, {MISCREG_TEECR, 0}}, + {MISCREG_CNTFRQ_EL0, {MISCREG_CNTFRQ, 0}}, + {MISCREG_CNTPCT_EL0, {MISCREG_CNTPCT, 0}}, + {MISCREG_CNTVCT_EL0, {MISCREG_CNTVCT, 0}}, + {MISCREG_CNTVOFF_EL2, {MISCREG_CNTVOFF, 0}}, + {MISCREG_CNTKCTL_EL1, {MISCREG_CNTKCTL, 0}}, + {MISCREG_CNTHCTL_EL2, {MISCREG_CNTHCTL, 0}}, + {MISCREG_CNTP_TVAL_EL0, {MISCREG_CNTP_TVAL, 0}}, + {MISCREG_CNTP_CTL_EL0, {MISCREG_CNTP_CTL, 0}}, + {MISCREG_CNTP_CVAL_EL0, {MISCREG_CNTP_CVAL, 0}}, + {MISCREG_CNTV_TVAL_EL0, {MISCREG_CNTV_TVAL, 0}}, + {MISCREG_CNTV_CTL_EL0, {MISCREG_CNTV_CTL, 0}}, + {MISCREG_CNTV_CVAL_EL0, {MISCREG_CNTV_CVAL, 0}}, + {MISCREG_CNTHP_TVAL_EL2, {MISCREG_CNTHP_TVAL, 0}}, + {MISCREG_CNTHP_CTL_EL2, {MISCREG_CNTHP_CTL, 0}}, + {MISCREG_CNTHP_CVAL_EL2, {MISCREG_CNTHP_CVAL, 0}}, + {MISCREG_DACR32_EL2, {MISCREG_DACR, 0}}, + {MISCREG_IFSR32_EL2, {MISCREG_IFSR, 0}}, + {MISCREG_TEEHBR32_EL1, {MISCREG_TEEHBR, 0}}, + {MISCREG_SDER32_EL3, {MISCREG_SDER, 0}} +}; + + ISA::ISA(Params *p) - : SimObject(p) + : SimObject(p), system(NULL), lookUpMiscReg(NUM_MISCREGS, {0,0}) { SCTLR sctlr; sctlr = 0; miscRegs[MISCREG_SCTLR_RST] = sctlr; + + system = dynamic_cast<ArmSystem *>(p->system); + DPRINTFN("ISA system set to: %p %p\n", system, p->system); + + // Cache system-level properties + if (FullSystem && system) { + haveSecurity = system->haveSecurity(); + haveLPAE = system->haveLPAE(); + haveVirtualization = system->haveVirtualization(); + haveLargeAsid64 = system->haveLargeAsid64(); + physAddrRange64 = system->physAddrRange64(); + } else { + haveSecurity = haveLPAE = haveVirtualization = false; + haveLargeAsid64 = false; + physAddrRange64 = 32; // dummy value + } + + /** Fill in the miscReg translation table */ + for (uint32_t i = 0; i < miscRegTranslateMax; i++) { + struct MiscRegLUTEntry new_entry; + + uint32_t select = MiscRegSwitch[i].index; + new_entry = MiscRegSwitch[i].entry; + + lookUpMiscReg[select] = new_entry; + } + + preUnflattenMiscReg(); + clear(); } @@ -73,27 +172,42 @@ ISA::clear() SCTLR sctlr_rst = miscRegs[MISCREG_SCTLR_RST]; memset(miscRegs, 0, sizeof(miscRegs)); + + // Initialize configurable default values + miscRegs[MISCREG_MIDR] = p->midr; + miscRegs[MISCREG_MIDR_EL1] = p->midr; + miscRegs[MISCREG_VPIDR] = p->midr; + + if (FullSystem && system->highestELIs64()) { + // Initialize AArch64 state + clear64(p); + return; + } + + // Initialize AArch32 state... + CPSR cpsr = 0; cpsr.mode = MODE_USER; miscRegs[MISCREG_CPSR] = cpsr; updateRegMap(cpsr); SCTLR sctlr = 0; - sctlr.te = (bool)sctlr_rst.te; - sctlr.nmfi = (bool)sctlr_rst.nmfi; - sctlr.v = (bool)sctlr_rst.v; - sctlr.u = 1; + sctlr.te = (bool) sctlr_rst.te; + sctlr.nmfi = (bool) sctlr_rst.nmfi; + sctlr.v = (bool) sctlr_rst.v; + sctlr.u = 1; sctlr.xp = 1; sctlr.rao2 = 1; sctlr.rao3 = 1; - sctlr.rao4 = 1; - miscRegs[MISCREG_SCTLR] = sctlr; + sctlr.rao4 = 0xf; // SCTLR[6:3] + miscRegs[MISCREG_SCTLR_NS] = sctlr; miscRegs[MISCREG_SCTLR_RST] = sctlr_rst; + miscRegs[MISCREG_HCPTR] = 0; - /* Start with an event in the mailbox */ + // Start with an event in the mailbox miscRegs[MISCREG_SEV_MAILBOX] = 1; - // Separate Instruction and Data TLBs. + // Separate Instruction and Data TLBs miscRegs[MISCREG_TLBTR] = 1; MVFR0 mvfr0 = 0; @@ -119,7 +233,8 @@ ISA::clear() // Reset values of PRRR and NMRR are implementation dependent - miscRegs[MISCREG_PRRR] = + // @todo: PRRR and NMRR in secure state? + miscRegs[MISCREG_PRRR_NS] = (1 << 19) | // 19 (0 << 18) | // 18 (0 << 17) | // 17 @@ -132,7 +247,7 @@ ISA::clear() (2 << 4) | // 5:4 (1 << 2) | // 3:2 0; // 1:0 - miscRegs[MISCREG_NMRR] = + miscRegs[MISCREG_NMRR_NS] = (1 << 30) | // 31:30 (0 << 26) | // 27:26 (0 << 24) | // 25:24 @@ -151,8 +266,6 @@ ISA::clear() miscRegs[MISCREG_CPACR] = 0; - // Initialize configurable default values - miscRegs[MISCREG_MIDR] = p->midr; miscRegs[MISCREG_ID_PFR0] = p->id_pfr0; miscRegs[MISCREG_ID_PFR1] = p->id_pfr1; @@ -169,27 +282,132 @@ ISA::clear() miscRegs[MISCREG_ID_ISAR4] = p->id_isar4; miscRegs[MISCREG_ID_ISAR5] = p->id_isar5; - miscRegs[MISCREG_FPSID] = p->fpsid; + if (haveLPAE) { + TTBCR ttbcr = miscRegs[MISCREG_TTBCR_NS]; + ttbcr.eae = 0; + miscRegs[MISCREG_TTBCR_NS] = ttbcr; + // Enforce consistency with system-level settings + miscRegs[MISCREG_ID_MMFR0] = (miscRegs[MISCREG_ID_MMFR0] & ~0xf) | 0x5; + } + + if (haveSecurity) { + miscRegs[MISCREG_SCTLR_S] = sctlr; + miscRegs[MISCREG_SCR] = 0; + miscRegs[MISCREG_VBAR_S] = 0; + } else { + // we're always non-secure + miscRegs[MISCREG_SCR] = 1; + } //XXX We need to initialize the rest of the state. } +void +ISA::clear64(const ArmISAParams *p) +{ + CPSR cpsr = 0; + Addr rvbar = system->resetAddr64(); + switch (system->highestEL()) { + // Set initial EL to highest implemented EL using associated stack + // pointer (SP_ELx); set RVBAR_ELx to implementation defined reset + // value + case EL3: + cpsr.mode = MODE_EL3H; + miscRegs[MISCREG_RVBAR_EL3] = rvbar; + break; + case EL2: + cpsr.mode = MODE_EL2H; + miscRegs[MISCREG_RVBAR_EL2] = rvbar; + break; + case EL1: + cpsr.mode = MODE_EL1H; + miscRegs[MISCREG_RVBAR_EL1] = rvbar; + break; + default: + panic("Invalid highest implemented exception level"); + break; + } + + // Initialize rest of CPSR + cpsr.daif = 0xf; // Mask all interrupts + cpsr.ss = 0; + cpsr.il = 0; + miscRegs[MISCREG_CPSR] = cpsr; + updateRegMap(cpsr); + + // Initialize other control registers + miscRegs[MISCREG_MPIDR_EL1] = 0x80000000; + if (haveSecurity) { + miscRegs[MISCREG_SCTLR_EL3] = 0x30c50870; + miscRegs[MISCREG_SCR_EL3] = 0x00000030; // RES1 fields + // @todo: uncomment this to enable Virtualization + // } else if (haveVirtualization) { + // miscRegs[MISCREG_SCTLR_EL2] = 0x30c50870; + } else { + miscRegs[MISCREG_SCTLR_EL1] = 0x30c50870; + // Always non-secure + miscRegs[MISCREG_SCR_EL3] = 1; + } + + // Initialize configurable id registers + miscRegs[MISCREG_ID_AA64AFR0_EL1] = p->id_aa64afr0_el1; + miscRegs[MISCREG_ID_AA64AFR1_EL1] = p->id_aa64afr1_el1; + miscRegs[MISCREG_ID_AA64DFR0_EL1] = p->id_aa64dfr0_el1; + miscRegs[MISCREG_ID_AA64DFR1_EL1] = p->id_aa64dfr1_el1; + miscRegs[MISCREG_ID_AA64ISAR0_EL1] = p->id_aa64isar0_el1; + miscRegs[MISCREG_ID_AA64ISAR1_EL1] = p->id_aa64isar1_el1; + miscRegs[MISCREG_ID_AA64MMFR0_EL1] = p->id_aa64mmfr0_el1; + miscRegs[MISCREG_ID_AA64MMFR1_EL1] = p->id_aa64mmfr1_el1; + miscRegs[MISCREG_ID_AA64PFR0_EL1] = p->id_aa64pfr0_el1; + miscRegs[MISCREG_ID_AA64PFR1_EL1] = p->id_aa64pfr1_el1; + + // Enforce consistency with system-level settings... + + // EL3 + // (no AArch32/64 interprocessing support for now) + miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits( + miscRegs[MISCREG_ID_AA64PFR0_EL1], 15, 12, + haveSecurity ? 0x1 : 0x0); + // EL2 + // (no AArch32/64 interprocessing support for now) + miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits( + miscRegs[MISCREG_ID_AA64PFR0_EL1], 11, 8, + haveVirtualization ? 0x1 : 0x0); + // Large ASID support + miscRegs[MISCREG_ID_AA64MMFR0_EL1] = insertBits( + miscRegs[MISCREG_ID_AA64MMFR0_EL1], 7, 4, + haveLargeAsid64 ? 0x2 : 0x0); + // Physical address size + miscRegs[MISCREG_ID_AA64MMFR0_EL1] = insertBits( + miscRegs[MISCREG_ID_AA64MMFR0_EL1], 3, 0, + encodePhysAddrRange64(physAddrRange64)); +} + MiscReg ISA::readMiscRegNoEffect(int misc_reg) const { assert(misc_reg < NumMiscRegs); - int flat_idx; - if (misc_reg == MISCREG_SPSR) - flat_idx = flattenMiscIndex(misc_reg); - else - flat_idx = misc_reg; - MiscReg val = miscRegs[flat_idx]; + int flat_idx = flattenMiscIndex(misc_reg); // Note: indexes of AArch64 + // registers are left unchanged + MiscReg val; + + if (lookUpMiscReg[flat_idx].lower == 0 || flat_idx == MISCREG_SPSR + || flat_idx == MISCREG_SCTLR_EL1) { + if (flat_idx == MISCREG_SPSR) + flat_idx = flattenMiscIndex(MISCREG_SPSR); + if (flat_idx == MISCREG_SCTLR_EL1) + flat_idx = flattenMiscIndex(MISCREG_SCTLR); + val = miscRegs[flat_idx]; + } else + if (lookUpMiscReg[flat_idx].upper > 0) + val = ((miscRegs[lookUpMiscReg[flat_idx].lower] & mask(32)) + | (miscRegs[lookUpMiscReg[flat_idx].upper] << 32)); + else + val = miscRegs[lookUpMiscReg[flat_idx].lower]; - DPRINTF(MiscRegs, "Reading From misc reg %d (%d) : %#x\n", - misc_reg, flat_idx, val); return val; } @@ -197,33 +415,98 @@ ISA::readMiscRegNoEffect(int misc_reg) const MiscReg ISA::readMiscReg(int misc_reg, ThreadContext *tc) { - ArmSystem *arm_sys; + CPSR cpsr = 0; + PCState pc = 0; + SCR scr = 0; if (misc_reg == MISCREG_CPSR) { - CPSR cpsr = miscRegs[misc_reg]; - PCState pc = tc->pcState(); + cpsr = miscRegs[misc_reg]; + pc = tc->pcState(); cpsr.j = pc.jazelle() ? 1 : 0; cpsr.t = pc.thumb() ? 1 : 0; return cpsr; } - if (misc_reg >= MISCREG_CP15_UNIMP_START) - panic("Unimplemented CP15 register %s read.\n", - miscRegName[misc_reg]); - switch (misc_reg) { - case MISCREG_MPIDR: - arm_sys = dynamic_cast<ArmSystem*>(tc->getSystemPtr()); - assert(arm_sys); +#ifndef NDEBUG + if (!miscRegInfo[misc_reg][MISCREG_IMPLEMENTED]) { + if (miscRegInfo[misc_reg][MISCREG_WARN_NOT_FAIL]) + warn("Unimplemented system register %s read.\n", + miscRegName[misc_reg]); + else + panic("Unimplemented system register %s read.\n", + miscRegName[misc_reg]); + } +#endif - if (arm_sys->multiProc) { - return 0x80000000 | // multiprocessor extensions available - tc->cpuId(); + switch (unflattenMiscReg(misc_reg)) { + case MISCREG_HCR: + { + if (!haveVirtualization) + return 0; + else + return readMiscRegNoEffect(MISCREG_HCR); + } + case MISCREG_CPACR: + { + const uint32_t ones = (uint32_t)(-1); + CPACR cpacrMask = 0; + // Only cp10, cp11, and ase are implemented, nothing else should + // be readable? (straight copy from the write code) + cpacrMask.cp10 = ones; + cpacrMask.cp11 = ones; + cpacrMask.asedis = ones; + + // Security Extensions may limit the readability of CPACR + if (haveSecurity) { + scr = readMiscRegNoEffect(MISCREG_SCR); + cpsr = readMiscRegNoEffect(MISCREG_CPSR); + if (scr.ns && (cpsr.mode != MODE_MON)) { + NSACR nsacr = readMiscRegNoEffect(MISCREG_NSACR); + // NB: Skipping the full loop, here + if (!nsacr.cp10) cpacrMask.cp10 = 0; + if (!nsacr.cp11) cpacrMask.cp11 = 0; + } + } + MiscReg val = readMiscRegNoEffect(MISCREG_CPACR); + val &= cpacrMask; + DPRINTF(MiscRegs, "Reading misc reg %s: %#x\n", + miscRegName[misc_reg], val); + return val; + } + case MISCREG_MPIDR: + cpsr = readMiscRegNoEffect(MISCREG_CPSR); + scr = readMiscRegNoEffect(MISCREG_SCR); + if ((cpsr.mode == MODE_HYP) || inSecureState(scr, cpsr)) { + return getMPIDR(system, tc); + } else { + return readMiscReg(MISCREG_VMPIDR, tc); + } + break; + case MISCREG_MPIDR_EL1: + // @todo in the absence of v8 virtualization support just return MPIDR_EL1 + return getMPIDR(system, tc) & 0xffffffff; + case MISCREG_VMPIDR: + // top bit defined as RES1 + return readMiscRegNoEffect(misc_reg) | 0x80000000; + case MISCREG_ID_AFR0: // not implemented, so alias MIDR + case MISCREG_ID_DFR0: // not implemented, so alias MIDR + case MISCREG_REVIDR: // not implemented, so alias MIDR + case MISCREG_MIDR: + cpsr = readMiscRegNoEffect(MISCREG_CPSR); + scr = readMiscRegNoEffect(MISCREG_SCR); + if ((cpsr.mode == MODE_HYP) || inSecureState(scr, cpsr)) { + return readMiscRegNoEffect(misc_reg); } else { - return 0x80000000 | // multiprocessor extensions available - 0x40000000 | // in up system - tc->cpuId(); + return readMiscRegNoEffect(MISCREG_VPIDR); } break; + case MISCREG_JOSCR: // Jazelle trivial implementation, RAZ/WI + case MISCREG_JMCR: // Jazelle trivial implementation, RAZ/WI + case MISCREG_JIDR: // Jazelle trivial implementation, RAZ/WI + case MISCREG_AIDR: // AUX ID set to 0 + case MISCREG_TCMTR: // No TCM's + return 0; + case MISCREG_CLIDR: warn_once("The clidr register always reports 0 caches.\n"); warn_once("clidr LoUIS field of 0b001 to match current " @@ -276,6 +559,75 @@ ISA::readMiscReg(int misc_reg, ThreadContext *tc) return readMiscRegNoEffect(MISCREG_FPSCR) & ~FpscrQcMask; case MISCREG_FPSCR_EXC: return readMiscRegNoEffect(MISCREG_FPSCR) & ~FpscrExcMask; + case MISCREG_FPSR: + { + const uint32_t ones = (uint32_t)(-1); + FPSCR fpscrMask = 0; + fpscrMask.ioc = ones; + fpscrMask.dzc = ones; + fpscrMask.ofc = ones; + fpscrMask.ufc = ones; + fpscrMask.ixc = ones; + fpscrMask.idc = ones; + fpscrMask.qc = ones; + fpscrMask.v = ones; + fpscrMask.c = ones; + fpscrMask.z = ones; + fpscrMask.n = ones; + return readMiscRegNoEffect(MISCREG_FPSCR) & (uint32_t)fpscrMask; + } + case MISCREG_FPCR: + { + const uint32_t ones = (uint32_t)(-1); + FPSCR fpscrMask = 0; + fpscrMask.ioe = ones; + fpscrMask.dze = ones; + fpscrMask.ofe = ones; + fpscrMask.ufe = ones; + fpscrMask.ixe = ones; + fpscrMask.ide = ones; + fpscrMask.len = ones; + fpscrMask.stride = ones; + fpscrMask.rMode = ones; + fpscrMask.fz = ones; + fpscrMask.dn = ones; + fpscrMask.ahp = ones; + return readMiscRegNoEffect(MISCREG_FPSCR) & (uint32_t)fpscrMask; + } + case MISCREG_NZCV: + { + CPSR cpsr = 0; + cpsr.nz = tc->readIntReg(INTREG_CONDCODES_NZ); + cpsr.c = tc->readIntReg(INTREG_CONDCODES_C); + cpsr.v = tc->readIntReg(INTREG_CONDCODES_V); + return cpsr; + } + case MISCREG_DAIF: + { + CPSR cpsr = 0; + cpsr.daif = (uint8_t) ((CPSR) miscRegs[MISCREG_CPSR]).daif; + return cpsr; + } + case MISCREG_SP_EL0: + { + return tc->readIntReg(INTREG_SP0); + } + case MISCREG_SP_EL1: + { + return tc->readIntReg(INTREG_SP1); + } + case MISCREG_SP_EL2: + { + return tc->readIntReg(INTREG_SP2); + } + case MISCREG_SPSEL: + { + return miscRegs[MISCREG_CPSR] & 0x1; + } + case MISCREG_CURRENTEL: + { + return miscRegs[MISCREG_CPSR] & 0xc; + } case MISCREG_L2CTLR: { // mostly unimplemented, just set NumCPUs field from sim and return @@ -289,8 +641,120 @@ ISA::readMiscReg(int misc_reg, ThreadContext *tc) * Return 0 as we don't support debug architecture yet. */ return 0; - case MISCREG_DBGDSCR_INT: + case MISCREG_DBGDSCRint: return 0; + case MISCREG_ISR: + return tc->getCpuPtr()->getInterruptController()->getISR( + readMiscRegNoEffect(MISCREG_HCR), + readMiscRegNoEffect(MISCREG_CPSR), + readMiscRegNoEffect(MISCREG_SCR)); + case MISCREG_ISR_EL1: + return tc->getCpuPtr()->getInterruptController()->getISR( + readMiscRegNoEffect(MISCREG_HCR_EL2), + readMiscRegNoEffect(MISCREG_CPSR), + readMiscRegNoEffect(MISCREG_SCR_EL3)); + case MISCREG_DCZID_EL0: + return 0x04; // DC ZVA clear 64-byte chunks + case MISCREG_HCPTR: + { + MiscReg val = readMiscRegNoEffect(misc_reg); + // The trap bit associated with CP14 is defined as RAZ + val &= ~(1 << 14); + // If a CP bit in NSACR is 0 then the corresponding bit in + // HCPTR is RAO/WI + bool secure_lookup = haveSecurity && + inSecureState(readMiscRegNoEffect(MISCREG_SCR), + readMiscRegNoEffect(MISCREG_CPSR)); + if (!secure_lookup) { + MiscReg mask = readMiscRegNoEffect(MISCREG_NSACR); + val |= (mask ^ 0x7FFF) & 0xBFFF; + } + // Set the bits for unimplemented coprocessors to RAO/WI + val |= 0x33FF; + return (val); + } + case MISCREG_HDFAR: // alias for secure DFAR + return readMiscRegNoEffect(MISCREG_DFAR_S); + case MISCREG_HIFAR: // alias for secure IFAR + return readMiscRegNoEffect(MISCREG_IFAR_S); + case MISCREG_HVBAR: // bottom bits reserved + return readMiscRegNoEffect(MISCREG_HVBAR) & 0xFFFFFFE0; + case MISCREG_SCTLR: // Some bits hardwired + // The FI field (bit 21) is common between S/NS versions of the register + return (readMiscRegNoEffect(MISCREG_SCTLR_S) & (1 << 21)) | + (readMiscRegNoEffect(misc_reg) & 0x72DD39FF) | 0x00C00818; // V8 SCTLR + case MISCREG_SCTLR_EL1: + // The FI field (bit 21) is common between S/NS versions of the register + return (readMiscRegNoEffect(MISCREG_SCTLR_S) & (1 << 21)) | + (readMiscRegNoEffect(misc_reg) & 0x37DDDBFF) | 0x30D00800; // V8 SCTLR_EL1 + case MISCREG_SCTLR_EL3: + // The FI field (bit 21) is common between S/NS versions of the register + return (readMiscRegNoEffect(MISCREG_SCTLR_S) & (1 << 21)) | + (readMiscRegNoEffect(misc_reg) & 0x32CD183F) | 0x30C50830; // V8 SCTLR_EL3 + case MISCREG_HSCTLR: // FI comes from SCTLR + { + uint32_t mask = 1 << 27; + return (readMiscRegNoEffect(MISCREG_HSCTLR) & ~mask) | + (readMiscRegNoEffect(MISCREG_SCTLR) & mask); + } + case MISCREG_SCR: + { + CPSR cpsr = readMiscRegNoEffect(MISCREG_CPSR); + if (cpsr.width) { + return readMiscRegNoEffect(MISCREG_SCR); + } else { + return readMiscRegNoEffect(MISCREG_SCR_EL3); + } + } + // Generic Timer registers + case MISCREG_CNTFRQ: + case MISCREG_CNTFRQ_EL0: + inform_once("Read CNTFREQ_EL0 frequency\n"); + return getSystemCounter(tc)->freq(); + case MISCREG_CNTPCT: + case MISCREG_CNTPCT_EL0: + return getSystemCounter(tc)->value(); + case MISCREG_CNTVCT: + return getSystemCounter(tc)->value(); + case MISCREG_CNTVCT_EL0: + return getSystemCounter(tc)->value(); + case MISCREG_CNTP_CVAL: + case MISCREG_CNTP_CVAL_EL0: + return getArchTimer(tc, tc->cpuId())->compareValue(); + case MISCREG_CNTP_TVAL: + case MISCREG_CNTP_TVAL_EL0: + return getArchTimer(tc, tc->cpuId())->timerValue(); + case MISCREG_CNTP_CTL: + case MISCREG_CNTP_CTL_EL0: + return getArchTimer(tc, tc->cpuId())->control(); + // PL1 phys. timer, secure + // AArch64 + case MISCREG_CNTPS_CVAL_EL1: + case MISCREG_CNTPS_TVAL_EL1: + case MISCREG_CNTPS_CTL_EL1: + // PL2 phys. timer, non-secure + // AArch32 + case MISCREG_CNTHCTL: + case MISCREG_CNTHP_CVAL: + case MISCREG_CNTHP_TVAL: + case MISCREG_CNTHP_CTL: + // AArch64 + case MISCREG_CNTHCTL_EL2: + case MISCREG_CNTHP_CVAL_EL2: + case MISCREG_CNTHP_TVAL_EL2: + case MISCREG_CNTHP_CTL_EL2: + // Virtual timer + // AArch32 + case MISCREG_CNTV_CVAL: + case MISCREG_CNTV_TVAL: + case MISCREG_CNTV_CTL: + // AArch64 + // case MISCREG_CNTV_CVAL_EL2: + // case MISCREG_CNTV_TVAL_EL2: + // case MISCREG_CNTV_CTL_EL2: + panic("Generic Timer register not implemented\n"); + break; + } return readMiscRegNoEffect(misc_reg); } @@ -300,15 +764,28 @@ ISA::setMiscRegNoEffect(int misc_reg, const MiscReg &val) { assert(misc_reg < NumMiscRegs); - int flat_idx; - if (misc_reg == MISCREG_SPSR) - flat_idx = flattenMiscIndex(misc_reg); - else - flat_idx = misc_reg; - miscRegs[flat_idx] = val; + int flat_idx = flattenMiscIndex(misc_reg); // Note: indexes of AArch64 + // registers are left unchanged + + int flat_idx2 = lookUpMiscReg[flat_idx].upper; - DPRINTF(MiscRegs, "Writing to misc reg %d (%d) : %#x\n", misc_reg, - flat_idx, val); + if (flat_idx2 > 0) { + miscRegs[lookUpMiscReg[flat_idx].lower] = bits(val, 31, 0); + miscRegs[flat_idx2] = bits(val, 63, 32); + DPRINTF(MiscRegs, "Writing to misc reg %d (%d:%d) : %#x\n", + misc_reg, flat_idx, flat_idx2, val); + } else { + if (flat_idx == MISCREG_SPSR) + flat_idx = flattenMiscIndex(MISCREG_SPSR); + else if (flat_idx == MISCREG_SCTLR_EL1) + flat_idx = flattenMiscIndex(MISCREG_SCTLR); + else + flat_idx = (lookUpMiscReg[flat_idx].lower > 0) ? + lookUpMiscReg[flat_idx].lower : flat_idx; + miscRegs[flat_idx] = val; + DPRINTF(MiscRegs, "Writing to misc reg %d (%d) : %#x\n", + misc_reg, flat_idx, val); + } } void @@ -317,8 +794,13 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) MiscReg newVal = val; int x; + bool secure_lookup; + bool hyp; System *sys; ThreadContext *oc; + uint8_t target_el; + uint16_t asid; + SCR scr; if (misc_reg == MISCREG_CPSR) { updateRegMap(val); @@ -346,12 +828,18 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) } else { tc->pcState(pc); } - } else if (misc_reg >= MISCREG_CP15_UNIMP_START && - misc_reg < MISCREG_CP15_END) { - panic("Unimplemented CP15 register %s wrote with %#x.\n", - miscRegName[misc_reg], val); } else { - switch (misc_reg) { +#ifndef NDEBUG + if (!miscRegInfo[misc_reg][MISCREG_IMPLEMENTED]) { + if (miscRegInfo[misc_reg][MISCREG_WARN_NOT_FAIL]) + warn("Unimplemented system register %s write with %#x.\n", + miscRegName[misc_reg], val); + else + panic("Unimplemented system register %s write with %#x.\n", + miscRegName[misc_reg], val); + } +#endif + switch (unflattenMiscReg(misc_reg)) { case MISCREG_CPACR: { @@ -362,7 +850,61 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) cpacrMask.cp10 = ones; cpacrMask.cp11 = ones; cpacrMask.asedis = ones; + + // Security Extensions may limit the writability of CPACR + if (haveSecurity) { + scr = readMiscRegNoEffect(MISCREG_SCR); + CPSR cpsr = readMiscRegNoEffect(MISCREG_CPSR); + if (scr.ns && (cpsr.mode != MODE_MON)) { + NSACR nsacr = readMiscRegNoEffect(MISCREG_NSACR); + // NB: Skipping the full loop, here + if (!nsacr.cp10) cpacrMask.cp10 = 0; + if (!nsacr.cp11) cpacrMask.cp11 = 0; + } + } + + MiscReg old_val = readMiscRegNoEffect(MISCREG_CPACR); newVal &= cpacrMask; + newVal |= old_val & ~cpacrMask; + DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", + miscRegName[misc_reg], newVal); + } + break; + case MISCREG_CPACR_EL1: + { + const uint32_t ones = (uint32_t)(-1); + CPACR cpacrMask = 0; + cpacrMask.tta = ones; + cpacrMask.fpen = ones; + newVal &= cpacrMask; + DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", + miscRegName[misc_reg], newVal); + } + break; + case MISCREG_CPTR_EL2: + { + const uint32_t ones = (uint32_t)(-1); + CPTR cptrMask = 0; + cptrMask.tcpac = ones; + cptrMask.tta = ones; + cptrMask.tfp = ones; + newVal &= cptrMask; + cptrMask = 0; + cptrMask.res1_13_12_el2 = ones; + cptrMask.res1_9_0_el2 = ones; + newVal |= cptrMask; + DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", + miscRegName[misc_reg], newVal); + } + break; + case MISCREG_CPTR_EL3: + { + const uint32_t ones = (uint32_t)(-1); + CPTR cptrMask = 0; + cptrMask.tcpac = ones; + cptrMask.tta = ones; + cptrMask.tfp = ones; + newVal &= cptrMask; DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n", miscRegName[misc_reg], newVal); } @@ -370,6 +912,11 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) case MISCREG_CSSELR: warn_once("The csselr register isn't implemented.\n"); return; + + case MISCREG_DC_ZVA_Xt: + warn("Calling DC ZVA! Not Implemeted! Expect WEIRD results\n"); + return; + case MISCREG_FPSCR: { const uint32_t ones = (uint32_t)(-1); @@ -380,6 +927,12 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) fpscrMask.ufc = ones; fpscrMask.ixc = ones; fpscrMask.idc = ones; + fpscrMask.ioe = ones; + fpscrMask.dze = ones; + fpscrMask.ofe = ones; + fpscrMask.ufe = ones; + fpscrMask.ixe = ones; + fpscrMask.ide = ones; fpscrMask.len = ones; fpscrMask.stride = ones; fpscrMask.rMode = ones; @@ -392,26 +945,72 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) fpscrMask.z = ones; fpscrMask.n = ones; newVal = (newVal & (uint32_t)fpscrMask) | - (miscRegs[MISCREG_FPSCR] & ~(uint32_t)fpscrMask); + (readMiscRegNoEffect(MISCREG_FPSCR) & + ~(uint32_t)fpscrMask); tc->getDecoderPtr()->setContext(newVal); } break; + case MISCREG_FPSR: + { + const uint32_t ones = (uint32_t)(-1); + FPSCR fpscrMask = 0; + fpscrMask.ioc = ones; + fpscrMask.dzc = ones; + fpscrMask.ofc = ones; + fpscrMask.ufc = ones; + fpscrMask.ixc = ones; + fpscrMask.idc = ones; + fpscrMask.qc = ones; + fpscrMask.v = ones; + fpscrMask.c = ones; + fpscrMask.z = ones; + fpscrMask.n = ones; + newVal = (newVal & (uint32_t)fpscrMask) | + (readMiscRegNoEffect(MISCREG_FPSCR) & + ~(uint32_t)fpscrMask); + misc_reg = MISCREG_FPSCR; + } + break; + case MISCREG_FPCR: + { + const uint32_t ones = (uint32_t)(-1); + FPSCR fpscrMask = 0; + fpscrMask.ioe = ones; + fpscrMask.dze = ones; + fpscrMask.ofe = ones; + fpscrMask.ufe = ones; + fpscrMask.ixe = ones; + fpscrMask.ide = ones; + fpscrMask.len = ones; + fpscrMask.stride = ones; + fpscrMask.rMode = ones; + fpscrMask.fz = ones; + fpscrMask.dn = ones; + fpscrMask.ahp = ones; + newVal = (newVal & (uint32_t)fpscrMask) | + (readMiscRegNoEffect(MISCREG_FPSCR) & + ~(uint32_t)fpscrMask); + misc_reg = MISCREG_FPSCR; + } + break; case MISCREG_CPSR_Q: { assert(!(newVal & ~CpsrMaskQ)); - newVal = miscRegs[MISCREG_CPSR] | newVal; + newVal = readMiscRegNoEffect(MISCREG_CPSR) | newVal; misc_reg = MISCREG_CPSR; } break; case MISCREG_FPSCR_QC: { - newVal = miscRegs[MISCREG_FPSCR] | (newVal & FpscrQcMask); + newVal = readMiscRegNoEffect(MISCREG_FPSCR) | + (newVal & FpscrQcMask); misc_reg = MISCREG_FPSCR; } break; case MISCREG_FPSCR_EXC: { - newVal = miscRegs[MISCREG_FPSCR] | (newVal & FpscrExcMask); + newVal = readMiscRegNoEffect(MISCREG_FPSCR) | + (newVal & FpscrExcMask); misc_reg = MISCREG_FPSCR; } break; @@ -421,16 +1020,63 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) // bit 29 - valid only if fpexc[31] is 0 const uint32_t fpexcMask = 0x60000000; newVal = (newVal & fpexcMask) | - (miscRegs[MISCREG_FPEXC] & ~fpexcMask); + (readMiscRegNoEffect(MISCREG_FPEXC) & ~fpexcMask); + } + break; + case MISCREG_HCR: + { + if (!haveVirtualization) + return; + } + break; + case MISCREG_IFSR: + { + // ARM ARM (ARM DDI 0406C.b) B4.1.96 + const uint32_t ifsrMask = + mask(31, 13) | mask(11, 11) | mask(8, 6); + newVal = newVal & ~ifsrMask; + } + break; + case MISCREG_DFSR: + { + // ARM ARM (ARM DDI 0406C.b) B4.1.52 + const uint32_t dfsrMask = mask(31, 14) | mask(8, 8); + newVal = newVal & ~dfsrMask; + } + break; + case MISCREG_AMAIR0: + case MISCREG_AMAIR1: + { + // ARM ARM (ARM DDI 0406C.b) B4.1.5 + // Valid only with LPAE + if (!haveLPAE) + return; + DPRINTF(MiscRegs, "Writing AMAIR: %#x\n", newVal); } break; + case MISCREG_SCR: + tc->getITBPtr()->invalidateMiscReg(); + tc->getDTBPtr()->invalidateMiscReg(); + break; case MISCREG_SCTLR: { DPRINTF(MiscRegs, "Writing SCTLR: %#x\n", newVal); - SCTLR sctlr = miscRegs[MISCREG_SCTLR]; + MiscRegIndex sctlr_idx; + scr = readMiscRegNoEffect(MISCREG_SCR); + if (haveSecurity && !scr.ns) { + sctlr_idx = MISCREG_SCTLR_S; + } else { + sctlr_idx = MISCREG_SCTLR_NS; + // The FI field (bit 21) is common between S/NS versions + // of the register, we store this in the secure copy of + // the reg + miscRegs[MISCREG_SCTLR_S] &= ~(1 << 21); + miscRegs[MISCREG_SCTLR_S] |= newVal & (1 << 21); + } + SCTLR sctlr = miscRegs[sctlr_idx]; SCTLR new_sctlr = newVal; - new_sctlr.nmfi = (bool)sctlr.nmfi; - miscRegs[MISCREG_SCTLR] = (MiscReg)new_sctlr; + new_sctlr.nmfi = ((bool)sctlr.nmfi) && !haveVirtualization; + miscRegs[sctlr_idx] = (MiscReg)new_sctlr; tc->getITBPtr()->invalidateMiscReg(); tc->getDTBPtr()->invalidateMiscReg(); @@ -440,6 +1086,7 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) sys = tc->getSystemPtr(); for (x = 0; x < sys->numContexts(); x++) { oc = sys->getThreadContext(x); + // @todo: double check this for security SCTLR other_sctlr = oc->readMiscRegNoEffect(MISCREG_SCTLR); if (!other_sctlr.c && oc->status() != ThreadContext::Halted) return; @@ -479,96 +1126,317 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) case MISCREG_TLBTR: case MISCREG_MVFR0: case MISCREG_MVFR1: + + case MISCREG_ID_AA64AFR0_EL1: + case MISCREG_ID_AA64AFR1_EL1: + case MISCREG_ID_AA64DFR0_EL1: + case MISCREG_ID_AA64DFR1_EL1: + case MISCREG_ID_AA64ISAR0_EL1: + case MISCREG_ID_AA64ISAR1_EL1: + case MISCREG_ID_AA64MMFR0_EL1: + case MISCREG_ID_AA64MMFR1_EL1: + case MISCREG_ID_AA64PFR0_EL1: + case MISCREG_ID_AA64PFR1_EL1: // ID registers are constants. return; + // TLBI all entries, EL0&1 inner sharable (ignored) case MISCREG_TLBIALLIS: - case MISCREG_TLBIALL: + case MISCREG_TLBIALL: // TLBI all entries, EL0&1, + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; sys = tc->getSystemPtr(); for (x = 0; x < sys->numContexts(); x++) { oc = sys->getThreadContext(x); assert(oc->getITBPtr() && oc->getDTBPtr()); - oc->getITBPtr()->flushAll(); - oc->getDTBPtr()->flushAll(); + oc->getITBPtr()->flushAllSecurity(secure_lookup, target_el); + oc->getDTBPtr()->flushAllSecurity(secure_lookup, target_el); // If CheckerCPU is connected, need to notify it of a flush CheckerCPU *checker = oc->getCheckerCpuPtr(); if (checker) { - checker->getITBPtr()->flushAll(); - checker->getDTBPtr()->flushAll(); + checker->getITBPtr()->flushAllSecurity(secure_lookup, + target_el); + checker->getDTBPtr()->flushAllSecurity(secure_lookup, + target_el); } } return; + // TLBI all entries, EL0&1, instruction side case MISCREG_ITLBIALL: - tc->getITBPtr()->flushAll(); + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tc->getITBPtr()->flushAllSecurity(secure_lookup, target_el); return; + // TLBI all entries, EL0&1, data side case MISCREG_DTLBIALL: - tc->getDTBPtr()->flushAll(); + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tc->getDTBPtr()->flushAllSecurity(secure_lookup, target_el); return; + // TLBI based on VA, EL0&1 inner sharable (ignored) case MISCREG_TLBIMVAIS: case MISCREG_TLBIMVA: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; sys = tc->getSystemPtr(); for (x = 0; x < sys->numContexts(); x++) { oc = sys->getThreadContext(x); assert(oc->getITBPtr() && oc->getDTBPtr()); oc->getITBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), + secure_lookup, target_el); oc->getDTBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), + secure_lookup, target_el); CheckerCPU *checker = oc->getCheckerCpuPtr(); if (checker) { checker->getITBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), secure_lookup, target_el); checker->getDTBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), secure_lookup, target_el); } } return; + // TLBI by ASID, EL0&1, inner sharable case MISCREG_TLBIASIDIS: case MISCREG_TLBIASID: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; sys = tc->getSystemPtr(); for (x = 0; x < sys->numContexts(); x++) { oc = sys->getThreadContext(x); assert(oc->getITBPtr() && oc->getDTBPtr()); - oc->getITBPtr()->flushAsid(bits(newVal, 7,0)); - oc->getDTBPtr()->flushAsid(bits(newVal, 7,0)); + oc->getITBPtr()->flushAsid(bits(newVal, 7,0), + secure_lookup, target_el); + oc->getDTBPtr()->flushAsid(bits(newVal, 7,0), + secure_lookup, target_el); CheckerCPU *checker = oc->getCheckerCpuPtr(); if (checker) { - checker->getITBPtr()->flushAsid(bits(newVal, 7,0)); - checker->getDTBPtr()->flushAsid(bits(newVal, 7,0)); + checker->getITBPtr()->flushAsid(bits(newVal, 7,0), + secure_lookup, target_el); + checker->getDTBPtr()->flushAsid(bits(newVal, 7,0), + secure_lookup, target_el); } } return; + // TLBI by address, EL0&1, inner sharable (ignored) case MISCREG_TLBIMVAAIS: case MISCREG_TLBIMVAA: - sys = tc->getSystemPtr(); - for (x = 0; x < sys->numContexts(); x++) { - oc = sys->getThreadContext(x); - assert(oc->getITBPtr() && oc->getDTBPtr()); - oc->getITBPtr()->flushMva(mbits(newVal, 31,12)); - oc->getDTBPtr()->flushMva(mbits(newVal, 31,12)); - - CheckerCPU *checker = oc->getCheckerCpuPtr(); - if (checker) { - checker->getITBPtr()->flushMva(mbits(newVal, 31,12)); - checker->getDTBPtr()->flushMva(mbits(newVal, 31,12)); - } - } + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + hyp = 0; + tlbiMVA(tc, newVal, secure_lookup, hyp, target_el); + return; + // TLBI by address, EL2, hypervisor mode + case MISCREG_TLBIMVAH: + case MISCREG_TLBIMVAHIS: + assert32(tc); + target_el = 1; // aarch32, use hyp bit + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + hyp = 1; + tlbiMVA(tc, newVal, secure_lookup, hyp, target_el); return; + // TLBI by address and asid, EL0&1, instruction side only case MISCREG_ITLBIMVA: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; tc->getITBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), secure_lookup, target_el); return; + // TLBI by address and asid, EL0&1, data side only case MISCREG_DTLBIMVA: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; tc->getDTBPtr()->flushMvaAsid(mbits(newVal, 31, 12), - bits(newVal, 7,0)); + bits(newVal, 7,0), secure_lookup, target_el); return; + // TLBI by ASID, EL0&1, instrution side only case MISCREG_ITLBIASID: - tc->getITBPtr()->flushAsid(bits(newVal, 7,0)); + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tc->getITBPtr()->flushAsid(bits(newVal, 7,0), secure_lookup, + target_el); return; + // TLBI by ASID EL0&1 data size only case MISCREG_DTLBIASID: - tc->getDTBPtr()->flushAsid(bits(newVal, 7,0)); + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tc->getDTBPtr()->flushAsid(bits(newVal, 7,0), secure_lookup, + target_el); + return; + // Invalidate entire Non-secure Hyp/Non-Hyp Unified TLB + case MISCREG_TLBIALLNSNH: + case MISCREG_TLBIALLNSNHIS: + assert32(tc); + target_el = 1; // el 0 and 1 are handled together + hyp = 0; + tlbiALLN(tc, hyp, target_el); + return; + // TLBI all entries, EL2, hyp, + case MISCREG_TLBIALLH: + case MISCREG_TLBIALLHIS: + assert32(tc); + target_el = 1; // aarch32, use hyp bit + hyp = 1; + tlbiALLN(tc, hyp, target_el); + return; + // AArch64 TLBI: invalidate all entries EL3 + case MISCREG_TLBI_ALLE3IS: + case MISCREG_TLBI_ALLE3: + assert64(tc); + target_el = 3; + secure_lookup = true; + tlbiALL(tc, secure_lookup, target_el); + return; + // @todo: uncomment this to enable Virtualization + // case MISCREG_TLBI_ALLE2IS: + // case MISCREG_TLBI_ALLE2: + // TLBI all entries, EL0&1 + case MISCREG_TLBI_ALLE1IS: + case MISCREG_TLBI_ALLE1: + // AArch64 TLBI: invalidate all entries, stage 1, current VMID + case MISCREG_TLBI_VMALLE1IS: + case MISCREG_TLBI_VMALLE1: + // AArch64 TLBI: invalidate all entries, stages 1 & 2, current VMID + case MISCREG_TLBI_VMALLS12E1IS: + case MISCREG_TLBI_VMALLS12E1: + // @todo: handle VMID and stage 2 to enable Virtualization + assert64(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tlbiALL(tc, secure_lookup, target_el); + return; + // AArch64 TLBI: invalidate by VA and ASID, stage 1, current VMID + // VAEx(IS) and VALEx(IS) are the same because TLBs only store entries + // from the last level of translation table walks + // @todo: handle VMID to enable Virtualization + // TLBI all entries, EL0&1 + case MISCREG_TLBI_VAE3IS_Xt: + case MISCREG_TLBI_VAE3_Xt: + // TLBI by VA, EL3 regime stage 1, last level walk + case MISCREG_TLBI_VALE3IS_Xt: + case MISCREG_TLBI_VALE3_Xt: + assert64(tc); + target_el = 3; + asid = 0xbeef; // does not matter, tlbi is global + secure_lookup = true; + tlbiVA(tc, newVal, asid, secure_lookup, target_el); + return; + // TLBI by VA, EL2 + case MISCREG_TLBI_VAE2IS_Xt: + case MISCREG_TLBI_VAE2_Xt: + // TLBI by VA, EL2, stage1 last level walk + case MISCREG_TLBI_VALE2IS_Xt: + case MISCREG_TLBI_VALE2_Xt: + assert64(tc); + target_el = 2; + asid = 0xbeef; // does not matter, tlbi is global + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tlbiVA(tc, newVal, asid, secure_lookup, target_el); + return; + // TLBI by VA EL1 & 0, stage1, ASID, current VMID + case MISCREG_TLBI_VAE1IS_Xt: + case MISCREG_TLBI_VAE1_Xt: + case MISCREG_TLBI_VALE1IS_Xt: + case MISCREG_TLBI_VALE1_Xt: + assert64(tc); + asid = bits(newVal, 63, 48); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + tlbiVA(tc, newVal, asid, secure_lookup, target_el); + return; + // AArch64 TLBI: invalidate by ASID, stage 1, current VMID + // @todo: handle VMID to enable Virtualization + case MISCREG_TLBI_ASIDE1IS_Xt: + case MISCREG_TLBI_ASIDE1_Xt: + assert64(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + sys = tc->getSystemPtr(); + for (x = 0; x < sys->numContexts(); x++) { + oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + asid = bits(newVal, 63, 48); + if (haveLargeAsid64) + asid &= mask(8); + oc->getITBPtr()->flushAsid(asid, secure_lookup, target_el); + oc->getDTBPtr()->flushAsid(asid, secure_lookup, target_el); + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushAsid(asid, + secure_lookup, target_el); + checker->getDTBPtr()->flushAsid(asid, + secure_lookup, target_el); + } + } + return; + // AArch64 TLBI: invalidate by VA, ASID, stage 1, current VMID + // VAAE1(IS) and VAALE1(IS) are the same because TLBs only store + // entries from the last level of translation table walks + // @todo: handle VMID to enable Virtualization + case MISCREG_TLBI_VAAE1IS_Xt: + case MISCREG_TLBI_VAAE1_Xt: + case MISCREG_TLBI_VAALE1IS_Xt: + case MISCREG_TLBI_VAALE1_Xt: + assert64(tc); + target_el = 1; // el 0 and 1 are handled together + scr = readMiscReg(MISCREG_SCR, tc); + secure_lookup = haveSecurity && !scr.ns; + sys = tc->getSystemPtr(); + for (x = 0; x < sys->numContexts(); x++) { + // @todo: extra controls on TLBI broadcast? + oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + Addr va = ((Addr) bits(newVal, 43, 0)) << 12; + oc->getITBPtr()->flushMva(va, + secure_lookup, false, target_el); + oc->getDTBPtr()->flushMva(va, + secure_lookup, false, target_el); + + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushMva(va, + secure_lookup, false, target_el); + checker->getDTBPtr()->flushMva(va, + secure_lookup, false, target_el); + } + } + return; + // AArch64 TLBI: invalidate by IPA, stage 2, current VMID + case MISCREG_TLBI_IPAS2LE1IS_Xt: + case MISCREG_TLBI_IPAS2LE1_Xt: + case MISCREG_TLBI_IPAS2E1IS_Xt: + case MISCREG_TLBI_IPAS2E1_Xt: + assert64(tc); + // @todo: implement these as part of Virtualization + warn("Not doing anything for write of miscreg ITLB_IPAS2\n"); return; case MISCREG_ACTLR: warn("Not doing anything for write of miscreg ACTLR\n"); @@ -591,77 +1459,566 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc) warn("Not doing anything for write to miscreg %s\n", miscRegName[misc_reg]); break; - case MISCREG_V2PCWPR: - case MISCREG_V2PCWPW: - case MISCREG_V2PCWUR: - case MISCREG_V2PCWUW: - case MISCREG_V2POWPR: - case MISCREG_V2POWPW: - case MISCREG_V2POWUR: - case MISCREG_V2POWUW: + case MISCREG_HSTR: // TJDBX, now redifined to be RES0 + { + HSTR hstrMask = 0; + hstrMask.tjdbx = 1; + newVal &= ~((uint32_t) hstrMask); + break; + } + case MISCREG_HCPTR: + { + // If a CP bit in NSACR is 0 then the corresponding bit in + // HCPTR is RAO/WI. Same applies to NSASEDIS + secure_lookup = haveSecurity && + inSecureState(readMiscRegNoEffect(MISCREG_SCR), + readMiscRegNoEffect(MISCREG_CPSR)); + if (!secure_lookup) { + MiscReg oldValue = readMiscRegNoEffect(MISCREG_HCPTR); + MiscReg mask = (readMiscRegNoEffect(MISCREG_NSACR) ^ 0x7FFF) & 0xBFFF; + newVal = (newVal & ~mask) | (oldValue & mask); + } + break; + } + case MISCREG_HDFAR: // alias for secure DFAR + misc_reg = MISCREG_DFAR_S; + break; + case MISCREG_HIFAR: // alias for secure IFAR + misc_reg = MISCREG_IFAR_S; + break; + case MISCREG_ATS1CPR: + case MISCREG_ATS1CPW: + case MISCREG_ATS1CUR: + case MISCREG_ATS1CUW: + case MISCREG_ATS12NSOPR: + case MISCREG_ATS12NSOPW: + case MISCREG_ATS12NSOUR: + case MISCREG_ATS12NSOUW: + case MISCREG_ATS1HR: + case MISCREG_ATS1HW: { RequestPtr req = new Request; - unsigned flags; - BaseTLB::Mode mode; + unsigned flags = 0; + BaseTLB::Mode mode = BaseTLB::Read; + TLB::ArmTranslationType tranType = TLB::NormalTran; Fault fault; switch(misc_reg) { - case MISCREG_V2PCWPR: - flags = TLB::MustBeOne; - mode = BaseTLB::Read; - break; - case MISCREG_V2PCWPW: - flags = TLB::MustBeOne; - mode = BaseTLB::Write; - break; - case MISCREG_V2PCWUR: - flags = TLB::MustBeOne | TLB::UserMode; - mode = BaseTLB::Read; - break; - case MISCREG_V2PCWUW: - flags = TLB::MustBeOne | TLB::UserMode; - mode = BaseTLB::Write; - break; - default: - panic("Security Extensions not implemented!"); + case MISCREG_ATS1CPR: + flags = TLB::MustBeOne; + tranType = TLB::S1CTran; + mode = BaseTLB::Read; + break; + case MISCREG_ATS1CPW: + flags = TLB::MustBeOne; + tranType = TLB::S1CTran; + mode = BaseTLB::Write; + break; + case MISCREG_ATS1CUR: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1CTran; + mode = BaseTLB::Read; + break; + case MISCREG_ATS1CUW: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1CTran; + mode = BaseTLB::Write; + break; + case MISCREG_ATS12NSOPR: + if (!haveSecurity) + panic("Security Extensions required for ATS12NSOPR"); + flags = TLB::MustBeOne; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Read; + break; + case MISCREG_ATS12NSOPW: + if (!haveSecurity) + panic("Security Extensions required for ATS12NSOPW"); + flags = TLB::MustBeOne; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Write; + break; + case MISCREG_ATS12NSOUR: + if (!haveSecurity) + panic("Security Extensions required for ATS12NSOUR"); + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Read; + break; + case MISCREG_ATS12NSOUW: + if (!haveSecurity) + panic("Security Extensions required for ATS12NSOUW"); + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Write; + break; + case MISCREG_ATS1HR: // only really useful from secure mode. + flags = TLB::MustBeOne; + tranType = TLB::HypMode; + mode = BaseTLB::Read; + break; + case MISCREG_ATS1HW: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; + mode = BaseTLB::Write; + break; } - warn("Translating via MISCREG in atomic mode! Fix Me!\n"); - req->setVirt(0, val, 1, flags, tc->pcState().pc(), - Request::funcMasterId); - fault = tc->getDTBPtr()->translateAtomic(req, tc, mode); + // If we're in timing mode then doing the translation in + // functional mode then we're slightly distorting performance + // results obtained from simulations. The translation should be + // done in the same mode the core is running in. NOTE: This + // can't be an atomic translation because that causes problems + // with unexpected atomic snoop requests. + warn("Translating via MISCREG(%d) in functional mode! Fix Me!\n", misc_reg); + req->setVirt(0, val, 1, flags, Request::funcMasterId, + tc->pcState().pc()); + req->setThreadContext(tc->contextId(), tc->threadId()); + fault = tc->getDTBPtr()->translateFunctional(req, tc, mode, tranType); + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + HCR hcr = readMiscRegNoEffect(MISCREG_HCR); + + MiscReg newVal; if (fault == NoFault) { - miscRegs[MISCREG_PAR] = - (req->getPaddr() & 0xfffff000) | - (tc->getDTBPtr()->getAttr() ); + Addr paddr = req->getPaddr(); + if (haveLPAE && (ttbcr.eae || tranType & TLB::HypMode || + ((tranType & TLB::S1S2NsTran) && hcr.vm) )) { + newVal = (paddr & mask(39, 12)) | + (tc->getDTBPtr()->getAttr()); + } else { + newVal = (paddr & 0xfffff000) | + (tc->getDTBPtr()->getAttr()); + } DPRINTF(MiscRegs, "MISCREG: Translated addr 0x%08x: PAR: 0x%08x\n", - val, miscRegs[MISCREG_PAR]); - } - else { + val, newVal); + } else { + ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get()); // Set fault bit and FSR - FSR fsr = miscRegs[MISCREG_DFSR]; - miscRegs[MISCREG_PAR] = - (fsr.ext << 6) | - (fsr.fsHigh << 5) | - (fsr.fsLow << 1) | - 0x1; // F bit + FSR fsr = armFault->getFsr(tc); + + newVal = ((fsr >> 9) & 1) << 11; + if (newVal) { + // LPAE - rearange fault status + newVal |= ((fsr >> 0) & 0x3f) << 1; + } else { + // VMSA - rearange fault status + newVal |= ((fsr >> 0) & 0xf) << 1; + newVal |= ((fsr >> 10) & 0x1) << 5; + newVal |= ((fsr >> 12) & 0x1) << 6; + } + newVal |= 0x1; // F bit + newVal |= ((armFault->iss() >> 7) & 0x1) << 8; + newVal |= armFault->isStage2() ? 0x200 : 0; + DPRINTF(MiscRegs, + "MISCREG: Translated addr 0x%08x fault fsr %#x: PAR: 0x%08x\n", + val, fsr, newVal); } + delete req; + setMiscRegNoEffect(MISCREG_PAR, newVal); return; } + case MISCREG_TTBCR: + { + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + const uint32_t ones = (uint32_t)(-1); + TTBCR ttbcrMask = 0; + TTBCR ttbcrNew = newVal; + + // ARM DDI 0406C.b, ARMv7-32 + ttbcrMask.n = ones; // T0SZ + if (haveSecurity) { + ttbcrMask.pd0 = ones; + ttbcrMask.pd1 = ones; + } + ttbcrMask.epd0 = ones; + ttbcrMask.irgn0 = ones; + ttbcrMask.orgn0 = ones; + ttbcrMask.sh0 = ones; + ttbcrMask.ps = ones; // T1SZ + ttbcrMask.a1 = ones; + ttbcrMask.epd1 = ones; + ttbcrMask.irgn1 = ones; + ttbcrMask.orgn1 = ones; + ttbcrMask.sh1 = ones; + if (haveLPAE) + ttbcrMask.eae = ones; + + if (haveLPAE && ttbcrNew.eae) { + newVal = newVal & ttbcrMask; + } else { + newVal = (newVal & ttbcrMask) | (ttbcr & (~ttbcrMask)); + } + } + case MISCREG_TTBR0: + case MISCREG_TTBR1: + { + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + if (haveLPAE) { + if (ttbcr.eae) { + // ARMv7 bit 63-56, 47-40 reserved, UNK/SBZP + // ARMv8 AArch32 bit 63-56 only + uint64_t ttbrMask = mask(63,56) | mask(47,40); + newVal = (newVal & (~ttbrMask)); + } + } + } case MISCREG_CONTEXTIDR: case MISCREG_PRRR: case MISCREG_NMRR: + case MISCREG_MAIR0: + case MISCREG_MAIR1: case MISCREG_DACR: + case MISCREG_VTTBR: + case MISCREG_SCR_EL3: + case MISCREG_SCTLR_EL1: + case MISCREG_SCTLR_EL2: + case MISCREG_SCTLR_EL3: + case MISCREG_TCR_EL1: + case MISCREG_TCR_EL2: + case MISCREG_TCR_EL3: + case MISCREG_TTBR0_EL1: + case MISCREG_TTBR1_EL1: + case MISCREG_TTBR0_EL2: + case MISCREG_TTBR0_EL3: tc->getITBPtr()->invalidateMiscReg(); tc->getDTBPtr()->invalidateMiscReg(); break; + case MISCREG_NZCV: + { + CPSR cpsr = val; + + tc->setIntReg(INTREG_CONDCODES_NZ, cpsr.nz); + tc->setIntReg(INTREG_CONDCODES_C, cpsr.c); + tc->setIntReg(INTREG_CONDCODES_V, cpsr.v); + } + break; + case MISCREG_DAIF: + { + CPSR cpsr = miscRegs[MISCREG_CPSR]; + cpsr.daif = (uint8_t) ((CPSR) newVal).daif; + newVal = cpsr; + misc_reg = MISCREG_CPSR; + } + break; + case MISCREG_SP_EL0: + tc->setIntReg(INTREG_SP0, newVal); + break; + case MISCREG_SP_EL1: + tc->setIntReg(INTREG_SP1, newVal); + break; + case MISCREG_SP_EL2: + tc->setIntReg(INTREG_SP2, newVal); + break; + case MISCREG_SPSEL: + { + CPSR cpsr = miscRegs[MISCREG_CPSR]; + cpsr.sp = (uint8_t) ((CPSR) newVal).sp; + newVal = cpsr; + misc_reg = MISCREG_CPSR; + } + break; + case MISCREG_CURRENTEL: + { + CPSR cpsr = miscRegs[MISCREG_CPSR]; + cpsr.el = (uint8_t) ((CPSR) newVal).el; + newVal = cpsr; + misc_reg = MISCREG_CPSR; + } + break; + case MISCREG_AT_S1E1R_Xt: + case MISCREG_AT_S1E1W_Xt: + case MISCREG_AT_S1E0R_Xt: + case MISCREG_AT_S1E0W_Xt: + case MISCREG_AT_S1E2R_Xt: + case MISCREG_AT_S1E2W_Xt: + case MISCREG_AT_S12E1R_Xt: + case MISCREG_AT_S12E1W_Xt: + case MISCREG_AT_S12E0R_Xt: + case MISCREG_AT_S12E0W_Xt: + case MISCREG_AT_S1E3R_Xt: + case MISCREG_AT_S1E3W_Xt: + { + RequestPtr req = new Request; + unsigned flags = 0; + BaseTLB::Mode mode = BaseTLB::Read; + TLB::ArmTranslationType tranType = TLB::NormalTran; + Fault fault; + switch(misc_reg) { + case MISCREG_AT_S1E1R_Xt: + flags = TLB::MustBeOne; + tranType = TLB::S1CTran; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S1E1W_Xt: + flags = TLB::MustBeOne; + tranType = TLB::S1CTran; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S1E0R_Xt: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1CTran; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S1E0W_Xt: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1CTran; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S1E2R_Xt: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S1E2W_Xt: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S12E0R_Xt: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S12E0W_Xt: + flags = TLB::MustBeOne | TLB::UserMode; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S12E1R_Xt: + flags = TLB::MustBeOne; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Read; + break; + case MISCREG_AT_S12E1W_Xt: + flags = TLB::MustBeOne; + tranType = TLB::S1S2NsTran; + mode = BaseTLB::Write; + break; + case MISCREG_AT_S1E3R_Xt: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; // There is no TZ mode defined. + mode = BaseTLB::Read; + break; + case MISCREG_AT_S1E3W_Xt: + flags = TLB::MustBeOne; + tranType = TLB::HypMode; // There is no TZ mode defined. + mode = BaseTLB::Write; + break; + } + // If we're in timing mode then doing the translation in + // functional mode then we're slightly distorting performance + // results obtained from simulations. The translation should be + // done in the same mode the core is running in. NOTE: This + // can't be an atomic translation because that causes problems + // with unexpected atomic snoop requests. + warn("Translating via MISCREG(%d) in functional mode! Fix Me!\n", misc_reg); + req->setVirt(0, val, 1, flags, Request::funcMasterId, + tc->pcState().pc()); + req->setThreadContext(tc->contextId(), tc->threadId()); + fault = tc->getDTBPtr()->translateFunctional(req, tc, mode, + tranType); + + MiscReg newVal; + if (fault == NoFault) { + Addr paddr = req->getPaddr(); + uint64_t attr = tc->getDTBPtr()->getAttr(); + uint64_t attr1 = attr >> 56; + if (!attr1 || attr1 ==0x44) { + attr |= 0x100; + attr &= ~ uint64_t(0x80); + } + newVal = (paddr & mask(47, 12)) | attr; + DPRINTF(MiscRegs, + "MISCREG: Translated addr %#x: PAR_EL1: %#xx\n", + val, newVal); + } else { + ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get()); + // Set fault bit and FSR + FSR fsr = armFault->getFsr(tc); + + newVal = ((fsr >> 9) & 1) << 11; + // rearange fault status + newVal |= ((fsr >> 0) & 0x3f) << 1; + newVal |= 0x1; // F bit + newVal |= ((armFault->iss() >> 7) & 0x1) << 8; + newVal |= armFault->isStage2() ? 0x200 : 0; + DPRINTF(MiscRegs, + "MISCREG: Translated addr %#x fault fsr %#x: PAR: %#x\n", + val, fsr, newVal); + } + delete req; + setMiscRegNoEffect(MISCREG_PAR_EL1, newVal); + return; + } + case MISCREG_SPSR_EL3: + case MISCREG_SPSR_EL2: + case MISCREG_SPSR_EL1: + // Force bits 23:21 to 0 + newVal = val & ~(0x7 << 21); + break; case MISCREG_L2CTLR: warn("miscreg L2CTLR (%s) written with %#x. ignored...\n", miscRegName[misc_reg], uint32_t(val)); + break; + + // Generic Timer registers + case MISCREG_CNTFRQ: + case MISCREG_CNTFRQ_EL0: + getSystemCounter(tc)->setFreq(val); + break; + case MISCREG_CNTP_CVAL: + case MISCREG_CNTP_CVAL_EL0: + getArchTimer(tc, tc->cpuId())->setCompareValue(val); + break; + case MISCREG_CNTP_TVAL: + case MISCREG_CNTP_TVAL_EL0: + getArchTimer(tc, tc->cpuId())->setTimerValue(val); + break; + case MISCREG_CNTP_CTL: + case MISCREG_CNTP_CTL_EL0: + getArchTimer(tc, tc->cpuId())->setControl(val); + break; + // PL1 phys. timer, secure + // AArch64 + case MISCREG_CNTPS_CVAL_EL1: + case MISCREG_CNTPS_TVAL_EL1: + case MISCREG_CNTPS_CTL_EL1: + // PL2 phys. timer, non-secure + // AArch32 + case MISCREG_CNTHCTL: + case MISCREG_CNTHP_CVAL: + case MISCREG_CNTHP_TVAL: + case MISCREG_CNTHP_CTL: + // AArch64 + case MISCREG_CNTHCTL_EL2: + case MISCREG_CNTHP_CVAL_EL2: + case MISCREG_CNTHP_TVAL_EL2: + case MISCREG_CNTHP_CTL_EL2: + // Virtual timer + // AArch32 + case MISCREG_CNTV_CVAL: + case MISCREG_CNTV_TVAL: + case MISCREG_CNTV_CTL: + // AArch64 + // case MISCREG_CNTV_CVAL_EL2: + // case MISCREG_CNTV_TVAL_EL2: + // case MISCREG_CNTV_CTL_EL2: + panic("Generic Timer register not implemented\n"); + break; } } setMiscRegNoEffect(misc_reg, newVal); } +void +ISA::tlbiVA(ThreadContext *tc, MiscReg newVal, uint8_t asid, bool secure_lookup, + uint8_t target_el) +{ + if (haveLargeAsid64) + asid &= mask(8); + Addr va = ((Addr) bits(newVal, 43, 0)) << 12; + System *sys = tc->getSystemPtr(); + for (int x = 0; x < sys->numContexts(); x++) { + ThreadContext *oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + oc->getITBPtr()->flushMvaAsid(va, asid, + secure_lookup, target_el); + oc->getDTBPtr()->flushMvaAsid(va, asid, + secure_lookup, target_el); + + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushMvaAsid( + va, asid, secure_lookup, target_el); + checker->getDTBPtr()->flushMvaAsid( + va, asid, secure_lookup, target_el); + } + } +} + +void +ISA::tlbiALL(ThreadContext *tc, bool secure_lookup, uint8_t target_el) +{ + System *sys = tc->getSystemPtr(); + for (int x = 0; x < sys->numContexts(); x++) { + ThreadContext *oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + oc->getITBPtr()->flushAllSecurity(secure_lookup, target_el); + oc->getDTBPtr()->flushAllSecurity(secure_lookup, target_el); + + // If CheckerCPU is connected, need to notify it of a flush + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushAllSecurity(secure_lookup, + target_el); + checker->getDTBPtr()->flushAllSecurity(secure_lookup, + target_el); + } + } +} + +void +ISA::tlbiALLN(ThreadContext *tc, bool hyp, uint8_t target_el) +{ + System *sys = tc->getSystemPtr(); + for (int x = 0; x < sys->numContexts(); x++) { + ThreadContext *oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + oc->getITBPtr()->flushAllNs(hyp, target_el); + oc->getDTBPtr()->flushAllNs(hyp, target_el); + + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushAllNs(hyp, target_el); + checker->getDTBPtr()->flushAllNs(hyp, target_el); + } + } +} + +void +ISA::tlbiMVA(ThreadContext *tc, MiscReg newVal, bool secure_lookup, bool hyp, + uint8_t target_el) +{ + System *sys = tc->getSystemPtr(); + for (int x = 0; x < sys->numContexts(); x++) { + ThreadContext *oc = sys->getThreadContext(x); + assert(oc->getITBPtr() && oc->getDTBPtr()); + oc->getITBPtr()->flushMva(mbits(newVal, 31,12), + secure_lookup, hyp, target_el); + oc->getDTBPtr()->flushMva(mbits(newVal, 31,12), + secure_lookup, hyp, target_el); + + CheckerCPU *checker = oc->getCheckerCpuPtr(); + if (checker) { + checker->getITBPtr()->flushMva(mbits(newVal, 31,12), + secure_lookup, hyp, target_el); + checker->getDTBPtr()->flushMva(mbits(newVal, 31,12), + secure_lookup, hyp, target_el); + } + } +} + +::GenericTimer::SystemCounter * +ISA::getSystemCounter(ThreadContext *tc) +{ + ::GenericTimer::SystemCounter *cnt = ((ArmSystem *) tc->getSystemPtr())-> + getSystemCounter(); + if (cnt == NULL) { + panic("System counter not available\n"); + } + return cnt; +} + +::GenericTimer::ArchTimer * +ISA::getArchTimer(ThreadContext *tc, int cpu_id) +{ + ::GenericTimer::ArchTimer *timer = ((ArmSystem *) tc->getSystemPtr())-> + getArchTimer(cpu_id); + if (timer == NULL) { + panic("Architected timer not available\n"); + } + return timer; +} + } ArmISA::ISA * diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index c747fc770..c72d5d50f 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -44,9 +44,11 @@ #define __ARCH_ARM_ISA_HH__ #include "arch/arm/registers.hh" +#include "arch/arm/system.hh" #include "arch/arm/tlb.hh" #include "arch/arm/types.hh" #include "debug/Checkpoint.hh" +#include "dev/arm/generic_timer.hh" #include "sim/sim_object.hh" struct ArmISAParams; @@ -56,45 +58,174 @@ class EventManager; namespace ArmISA { + + /** + * At the moment there are 57 registers which need to be aliased/ + * translated with other registers in the ISA. This enum helps with that + * translation. + */ + enum translateTable { + miscRegTranslateCSSELR_EL1, + miscRegTranslateSCTLR_EL1, + miscRegTranslateSCTLR_EL2, + miscRegTranslateACTLR_EL1, + miscRegTranslateACTLR_EL2, + miscRegTranslateCPACR_EL1, + miscRegTranslateCPTR_EL2, + miscRegTranslateHCR_EL2, + miscRegTranslateMDCR_EL2, + miscRegTranslateHSTR_EL2, + miscRegTranslateHACR_EL2, + miscRegTranslateTTBR0_EL1, + miscRegTranslateTTBR1_EL1, + miscRegTranslateTTBR0_EL2, + miscRegTranslateVTTBR_EL2, + miscRegTranslateTCR_EL1, + miscRegTranslateTCR_EL2, + miscRegTranslateVTCR_EL2, + miscRegTranslateAFSR0_EL1, + miscRegTranslateAFSR1_EL1, + miscRegTranslateAFSR0_EL2, + miscRegTranslateAFSR1_EL2, + miscRegTranslateESR_EL2, + miscRegTranslateFAR_EL1, + miscRegTranslateFAR_EL2, + miscRegTranslateHPFAR_EL2, + miscRegTranslatePAR_EL1, + miscRegTranslateMAIR_EL1, + miscRegTranslateMAIR_EL2, + miscRegTranslateAMAIR_EL1, + miscRegTranslateVBAR_EL1, + miscRegTranslateVBAR_EL2, + miscRegTranslateCONTEXTIDR_EL1, + miscRegTranslateTPIDR_EL0, + miscRegTranslateTPIDRRO_EL0, + miscRegTranslateTPIDR_EL1, + miscRegTranslateTPIDR_EL2, + miscRegTranslateTEECR32_EL1, + miscRegTranslateCNTFRQ_EL0, + miscRegTranslateCNTPCT_EL0, + miscRegTranslateCNTVCT_EL0, + miscRegTranslateCNTVOFF_EL2, + miscRegTranslateCNTKCTL_EL1, + miscRegTranslateCNTHCTL_EL2, + miscRegTranslateCNTP_TVAL_EL0, + miscRegTranslateCNTP_CTL_EL0, + miscRegTranslateCNTP_CVAL_EL0, + miscRegTranslateCNTV_TVAL_EL0, + miscRegTranslateCNTV_CTL_EL0, + miscRegTranslateCNTV_CVAL_EL0, + miscRegTranslateCNTHP_TVAL_EL2, + miscRegTranslateCNTHP_CTL_EL2, + miscRegTranslateCNTHP_CVAL_EL2, + miscRegTranslateDACR32_EL2, + miscRegTranslateIFSR32_EL2, + miscRegTranslateTEEHBR32_EL1, + miscRegTranslateSDER32_EL3, + miscRegTranslateMax + }; + class ISA : public SimObject { protected: + // Parent system + ArmSystem *system; + + // Cached copies of system-level properties + bool haveSecurity; + bool haveLPAE; + bool haveVirtualization; + bool haveLargeAsid64; + uint8_t physAddrRange64; + + /** Register translation entry used in lookUpMiscReg */ + struct MiscRegLUTEntry { + uint32_t lower; + uint32_t upper; + }; + + struct MiscRegInitializerEntry { + uint32_t index; + struct MiscRegLUTEntry entry; + }; + + /** Register table noting all translations */ + static const struct MiscRegInitializerEntry + MiscRegSwitch[miscRegTranslateMax]; + + /** Translation table accessible via the value of the register */ + std::vector<struct MiscRegLUTEntry> lookUpMiscReg; + MiscReg miscRegs[NumMiscRegs]; const IntRegIndex *intRegMap; void updateRegMap(CPSR cpsr) { - switch (cpsr.mode) { - case MODE_USER: - case MODE_SYSTEM: - intRegMap = IntRegUsrMap; - break; - case MODE_FIQ: - intRegMap = IntRegFiqMap; - break; - case MODE_IRQ: - intRegMap = IntRegIrqMap; - break; - case MODE_SVC: - intRegMap = IntRegSvcMap; - break; - case MODE_MON: - intRegMap = IntRegMonMap; - break; - case MODE_ABORT: - intRegMap = IntRegAbtMap; - break; - case MODE_UNDEFINED: - intRegMap = IntRegUndMap; - break; - default: - panic("Unrecognized mode setting in CPSR.\n"); + if (cpsr.width == 0) { + intRegMap = IntReg64Map; + } else { + switch (cpsr.mode) { + case MODE_USER: + case MODE_SYSTEM: + intRegMap = IntRegUsrMap; + break; + case MODE_FIQ: + intRegMap = IntRegFiqMap; + break; + case MODE_IRQ: + intRegMap = IntRegIrqMap; + break; + case MODE_SVC: + intRegMap = IntRegSvcMap; + break; + case MODE_MON: + intRegMap = IntRegMonMap; + break; + case MODE_ABORT: + intRegMap = IntRegAbtMap; + break; + case MODE_HYP: + intRegMap = IntRegHypMap; + break; + case MODE_UNDEFINED: + intRegMap = IntRegUndMap; + break; + default: + panic("Unrecognized mode setting in CPSR.\n"); + } } } + ::GenericTimer::SystemCounter * getSystemCounter(ThreadContext *tc); + ::GenericTimer::ArchTimer * getArchTimer(ThreadContext *tc, + int cpu_id); + + + private: + inline void assert32(ThreadContext *tc) { + CPSR cpsr M5_VAR_USED = readMiscReg(MISCREG_CPSR, tc); + assert(cpsr.width); + } + + inline void assert64(ThreadContext *tc) { + CPSR cpsr M5_VAR_USED = readMiscReg(MISCREG_CPSR, tc); + assert(!cpsr.width); + } + + void tlbiVA(ThreadContext *tc, MiscReg newVal, uint8_t asid, + bool secure_lookup, uint8_t target_el); + + void tlbiALL(ThreadContext *tc, bool secure_lookup, uint8_t target_el); + + void tlbiALLN(ThreadContext *tc, bool hyp, uint8_t target_el); + + void tlbiMVA(ThreadContext *tc, MiscReg newVal, bool secure_lookup, + bool hyp, uint8_t target_el); + public: void clear(); + void clear64(const ArmISAParams *p); MiscReg readMiscRegNoEffect(int misc_reg) const; MiscReg readMiscReg(int misc_reg, ThreadContext *tc); @@ -109,28 +240,28 @@ namespace ArmISA return intRegMap[reg]; } else if (reg < NUM_INTREGS) { return reg; - } else { - int mode = reg / intRegsPerMode; - reg = reg % intRegsPerMode; - switch (mode) { - case MODE_USER: - case MODE_SYSTEM: - return INTREG_USR(reg); - case MODE_FIQ: - return INTREG_FIQ(reg); - case MODE_IRQ: - return INTREG_IRQ(reg); - case MODE_SVC: - return INTREG_SVC(reg); - case MODE_MON: - return INTREG_MON(reg); - case MODE_ABORT: - return INTREG_ABT(reg); - case MODE_UNDEFINED: - return INTREG_UND(reg); + } else if (reg == INTREG_SPX) { + CPSR cpsr = miscRegs[MISCREG_CPSR]; + ExceptionLevel el = opModeToEL( + (OperatingMode) (uint8_t) cpsr.mode); + if (!cpsr.sp && el != EL0) + return INTREG_SP0; + switch (el) { + case EL3: + return INTREG_SP3; + // @todo: uncomment this to enable Virtualization + // case EL2: + // return INTREG_SP2; + case EL1: + return INTREG_SP1; + case EL0: + return INTREG_SP0; default: - panic("Flattening into an unknown mode.\n"); + panic("Invalid exception level"); + break; } + } else { + return flattenIntRegModeIndex(reg); } } @@ -150,47 +281,127 @@ namespace ArmISA int flattenMiscIndex(int reg) const { + int flat_idx = reg; + if (reg == MISCREG_SPSR) { - int spsr_idx = NUM_MISCREGS; CPSR cpsr = miscRegs[MISCREG_CPSR]; switch (cpsr.mode) { + case MODE_EL0T: + warn("User mode does not have SPSR\n"); + flat_idx = MISCREG_SPSR; + break; + case MODE_EL1T: + case MODE_EL1H: + flat_idx = MISCREG_SPSR_EL1; + break; + case MODE_EL2T: + case MODE_EL2H: + flat_idx = MISCREG_SPSR_EL2; + break; + case MODE_EL3T: + case MODE_EL3H: + flat_idx = MISCREG_SPSR_EL3; + break; case MODE_USER: warn("User mode does not have SPSR\n"); - spsr_idx = MISCREG_SPSR; + flat_idx = MISCREG_SPSR; break; case MODE_FIQ: - spsr_idx = MISCREG_SPSR_FIQ; + flat_idx = MISCREG_SPSR_FIQ; break; case MODE_IRQ: - spsr_idx = MISCREG_SPSR_IRQ; + flat_idx = MISCREG_SPSR_IRQ; break; case MODE_SVC: - spsr_idx = MISCREG_SPSR_SVC; + flat_idx = MISCREG_SPSR_SVC; break; case MODE_MON: - spsr_idx = MISCREG_SPSR_MON; + flat_idx = MISCREG_SPSR_MON; break; case MODE_ABORT: - spsr_idx = MISCREG_SPSR_ABT; + flat_idx = MISCREG_SPSR_ABT; + break; + case MODE_HYP: + flat_idx = MISCREG_SPSR_HYP; break; case MODE_UNDEFINED: - spsr_idx = MISCREG_SPSR_UND; + flat_idx = MISCREG_SPSR_UND; break; default: warn("Trying to access SPSR in an invalid mode: %d\n", cpsr.mode); - spsr_idx = MISCREG_SPSR; + flat_idx = MISCREG_SPSR; break; } - return spsr_idx; + } else if (miscRegInfo[reg][MISCREG_MUTEX]) { + // Mutually exclusive CP15 register + switch (reg) { + case MISCREG_PRRR_MAIR0: + case MISCREG_PRRR_MAIR0_NS: + case MISCREG_PRRR_MAIR0_S: + { + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + // If the muxed reg has been flattened, work out the + // offset and apply it to the unmuxed reg + int idxOffset = reg - MISCREG_PRRR_MAIR0; + if (ttbcr.eae) + flat_idx = flattenMiscIndex(MISCREG_MAIR0 + + idxOffset); + else + flat_idx = flattenMiscIndex(MISCREG_PRRR + + idxOffset); + } + break; + case MISCREG_NMRR_MAIR1: + case MISCREG_NMRR_MAIR1_NS: + case MISCREG_NMRR_MAIR1_S: + { + TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR); + // If the muxed reg has been flattened, work out the + // offset and apply it to the unmuxed reg + int idxOffset = reg - MISCREG_NMRR_MAIR1; + if (ttbcr.eae) + flat_idx = flattenMiscIndex(MISCREG_MAIR1 + + idxOffset); + else + flat_idx = flattenMiscIndex(MISCREG_NMRR + + idxOffset); + } + break; + case MISCREG_PMXEVTYPER_PMCCFILTR: + { + PMSELR pmselr = miscRegs[MISCREG_PMSELR]; + if (pmselr.sel == 31) + flat_idx = flattenMiscIndex(MISCREG_PMCCFILTR); + else + flat_idx = flattenMiscIndex(MISCREG_PMXEVTYPER); + } + break; + default: + panic("Unrecognized misc. register.\n"); + break; + } + } else { + if (miscRegInfo[reg][MISCREG_BANKED]) { + bool secureReg = haveSecurity && + inSecureState(miscRegs[MISCREG_SCR], + miscRegs[MISCREG_CPSR]); + flat_idx += secureReg ? 2 : 1; + } } - return reg; + return flat_idx; } void serialize(std::ostream &os) { DPRINTF(Checkpoint, "Serializing Arm Misc Registers\n"); SERIALIZE_ARRAY(miscRegs, NumMiscRegs); + + SERIALIZE_SCALAR(haveSecurity); + SERIALIZE_SCALAR(haveLPAE); + SERIALIZE_SCALAR(haveVirtualization); + SERIALIZE_SCALAR(haveLargeAsid64); + SERIALIZE_SCALAR(physAddrRange64); } void unserialize(Checkpoint *cp, const std::string §ion) { @@ -198,6 +409,12 @@ namespace ArmISA UNSERIALIZE_ARRAY(miscRegs, NumMiscRegs); CPSR tmp_cpsr = miscRegs[MISCREG_CPSR]; updateRegMap(tmp_cpsr); + + UNSERIALIZE_SCALAR(haveSecurity); + UNSERIALIZE_SCALAR(haveLPAE); + UNSERIALIZE_SCALAR(haveVirtualization); + UNSERIALIZE_SCALAR(haveLargeAsid64); + UNSERIALIZE_SCALAR(physAddrRange64); } void startup(ThreadContext *tc) {} diff --git a/src/arch/arm/isa/bitfields.isa b/src/arch/arm/isa/bitfields.isa index 5a8b5db6d..6006cfb2d 100644 --- a/src/arch/arm/isa/bitfields.isa +++ b/src/arch/arm/isa/bitfields.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -73,6 +73,7 @@ def bitfield SEVEN_AND_FOUR sevenAndFour; def bitfield THUMB thumb; def bitfield BIGTHUMB bigThumb; +def bitfield AARCH64 aarch64; // Other def bitfield COND_CODE condCode; diff --git a/src/arch/arm/isa/decoder/aarch64.isa b/src/arch/arm/isa/decoder/aarch64.isa new file mode 100644 index 000000000..a6c0fa2df --- /dev/null +++ b/src/arch/arm/isa/decoder/aarch64.isa @@ -0,0 +1,48 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +//////////////////////////////////////////////////////////////////// +// +// The 64 bit ARM decoder +// -------------------------- +// + + +Aarch64::aarch64(); + diff --git a/src/arch/arm/isa/decoder/arm.isa b/src/arch/arm/isa/decoder/arm.isa index 4bd9d5cf4..f0c0dec18 100644 --- a/src/arch/arm/isa/decoder/arm.isa +++ b/src/arch/arm/isa/decoder/arm.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2012 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -73,7 +73,11 @@ format DataOp { 0x9: ArmBlxReg::armBlxReg(); } 0x5: ArmSatAddSub::armSatAddSub(); - 0x7: Breakpoint::bkpt(); + 0x6: ArmERet::armERet(); + 0x7: decode OPCODE_22 { + 0: Breakpoint::bkpt(); + 1: ArmSmcHyp::armSmcHyp(); + } } 0x1: ArmHalfWordMultAndMultAcc::armHalfWordMultAndMultAcc(); } @@ -105,6 +109,10 @@ format DataOp { } 0x6: decode CPNUM { 0xa, 0xb: ExtensionRegLoadStore::extensionRegLoadStore(); + 0xf: decode OPCODE_20 { + 0: Mcrr15::Mcrr15(); + 1: Mrrc15::Mrrc15(); + } } 0x7: decode OPCODE_24 { 0: decode OPCODE_4 { diff --git a/src/arch/arm/isa/decoder/decoder.isa b/src/arch/arm/isa/decoder/decoder.isa index cf7d17871..94685b943 100644 --- a/src/arch/arm/isa/decoder/decoder.isa +++ b/src/arch/arm/isa/decoder/decoder.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -41,8 +41,12 @@ // Authors: Gabe Black decode THUMB default Unknown::unknown() { -0: -##include "arm.isa" +0: decode AARCH64 { + 0: + ##include "arm.isa" + 1: + ##include "aarch64.isa" +} 1: ##include "thumb.isa" } diff --git a/src/arch/arm/isa/decoder/thumb.isa b/src/arch/arm/isa/decoder/thumb.isa index f54cc728d..31495793e 100644 --- a/src/arch/arm/isa/decoder/thumb.isa +++ b/src/arch/arm/isa/decoder/thumb.isa @@ -95,8 +95,14 @@ decode BIGTHUMB { 0xa, 0xb: ExtensionRegLoadStore::extensionRegLoadStre(); 0xf: decode HTOPCODE_9_4 { 0x00: Unknown::undefined(); - 0x04: WarnUnimpl::mcrr(); // mcrr2 - 0x05: WarnUnimpl::mrrc(); // mrrc2 + 0x04: decode LTCOPROC { + 0xf: Mcrr15::Mcrr15(); + default: WarnUnimpl::mcrr(); // mcrr2 + } + 0x05: decode LTCOPROC { + 0xf: Mrrc15::Mrrc15(); + default: WarnUnimpl::mrrc(); // mrrc2 + } 0x02, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e: WarnUnimpl::stc(); // stc2 diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa new file mode 100644 index 000000000..3ed70ce81 --- /dev/null +++ b/src/arch/arm/isa/formats/aarch64.isa @@ -0,0 +1,2035 @@ +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black +// Thomas Grocutt +// Mbou Eyole +// Giacomo Gabrielli + +output header {{ +namespace Aarch64 +{ + StaticInstPtr decodeDataProcImm(ExtMachInst machInst); + StaticInstPtr decodeBranchExcSys(ExtMachInst machInst); + StaticInstPtr decodeLoadsStores(ExtMachInst machInst); + StaticInstPtr decodeDataProcReg(ExtMachInst machInst); + + StaticInstPtr decodeFpAdvSIMD(ExtMachInst machInst); + StaticInstPtr decodeFp(ExtMachInst machInst); + StaticInstPtr decodeAdvSIMD(ExtMachInst machInst); + StaticInstPtr decodeAdvSIMDScalar(ExtMachInst machInst); + + StaticInstPtr decodeGem5Ops(ExtMachInst machInst); +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeDataProcImm(ExtMachInst machInst) + { + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rdsp = makeSP(rd); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + + uint8_t opc = bits(machInst, 30, 29); + bool sf = bits(machInst, 31); + bool n = bits(machInst, 22); + uint8_t immr = bits(machInst, 21, 16); + uint8_t imms = bits(machInst, 15, 10); + switch (bits(machInst, 25, 23)) { + case 0x0: + case 0x1: + { + uint64_t immlo = bits(machInst, 30, 29); + uint64_t immhi = bits(machInst, 23, 5); + uint64_t imm = (immlo << 0) | (immhi << 2); + if (bits(machInst, 31) == 0) + return new AdrXImm(machInst, rd, INTREG_ZERO, sext<21>(imm)); + else + return new AdrpXImm(machInst, rd, INTREG_ZERO, + sext<33>(imm << 12)); + } + case 0x2: + case 0x3: + { + uint32_t imm12 = bits(machInst, 21, 10); + uint8_t shift = bits(machInst, 23, 22); + uint32_t imm; + if (shift == 0x0) + imm = imm12 << 0; + else if (shift == 0x1) + imm = imm12 << 12; + else + return new Unknown64(machInst); + switch (opc) { + case 0x0: + return new AddXImm(machInst, rdsp, rnsp, imm); + case 0x1: + return new AddXImmCc(machInst, rd, rnsp, imm); + case 0x2: + return new SubXImm(machInst, rdsp, rnsp, imm); + case 0x3: + return new SubXImmCc(machInst, rd, rnsp, imm); + } + } + case 0x4: + { + if (!sf && n) + return new Unknown64(machInst); + // len = MSB(n:NOT(imms)), len < 1 is undefined. + uint8_t len = 0; + if (n) { + len = 6; + } else if (imms == 0x3f || imms == 0x3e) { + return new Unknown64(machInst); + } else { + len = findMsbSet(imms ^ 0x3f); + } + // Generate r, s, and size. + uint64_t r = bits(immr, len - 1, 0); + uint64_t s = bits(imms, len - 1, 0); + uint8_t size = 1 << len; + if (s == size - 1) + return new Unknown64(machInst); + // Generate the pattern with s 1s, rotated by r, with size bits. + uint64_t pattern = mask(s + 1); + if (r) { + pattern = (pattern >> r) | (pattern << (size - r)); + pattern &= mask(size); + } + uint8_t width = sf ? 64 : 32; + // Replicate that to fill up the immediate. + for (unsigned i = 1; i < (width / size); i *= 2) + pattern |= (pattern << (i * size)); + uint64_t imm = pattern; + + switch (opc) { + case 0x0: + return new AndXImm(machInst, rdsp, rn, imm); + case 0x1: + return new OrrXImm(machInst, rdsp, rn, imm); + case 0x2: + return new EorXImm(machInst, rdsp, rn, imm); + case 0x3: + return new AndXImmCc(machInst, rd, rn, imm); + } + } + case 0x5: + { + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + uint32_t imm16 = bits(machInst, 20, 5); + uint32_t hw = bits(machInst, 22, 21); + switch (opc) { + case 0x0: + return new Movn(machInst, rd, imm16, hw * 16); + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new Movz(machInst, rd, imm16, hw * 16); + case 0x3: + return new Movk(machInst, rd, imm16, hw * 16); + } + } + case 0x6: + if ((sf != n) || (!sf && (bits(immr, 5) || bits(imms, 5)))) + return new Unknown64(machInst); + switch (opc) { + case 0x0: + return new Sbfm64(machInst, rd, rn, immr, imms); + case 0x1: + return new Bfm64(machInst, rd, rn, immr, imms); + case 0x2: + return new Ubfm64(machInst, rd, rn, immr, imms); + case 0x3: + return new Unknown64(machInst); + } + case 0x7: + { + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + if (opc || bits(machInst, 21)) + return new Unknown64(machInst); + else + return new Extr64(machInst, rd, rn, rm, imms); + } + } + return new FailUnimplemented("Unhandled Case8", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeBranchExcSys(ExtMachInst machInst) + { + switch (bits(machInst, 30, 29)) { + case 0x0: + { + int64_t imm = sext<26>(bits(machInst, 25, 0)) << 2; + if (bits(machInst, 31) == 0) + return new B64(machInst, imm); + else + return new Bl64(machInst, imm); + } + case 0x1: + { + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + if (bits(machInst, 25) == 0) { + int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2; + if (bits(machInst, 24) == 0) + return new Cbz64(machInst, imm, rt); + else + return new Cbnz64(machInst, imm, rt); + } else { + uint64_t bitmask = 0x1; + bitmask <<= bits(machInst, 23, 19); + int64_t imm = sext<14>(bits(machInst, 18, 5)) << 2; + if (bits(machInst, 31)) + bitmask <<= 32; + if (bits(machInst, 24) == 0) + return new Tbz64(machInst, bitmask, imm, rt); + else + return new Tbnz64(machInst, bitmask, imm, rt); + } + } + case 0x2: + // bit 30:26=10101 + if (bits(machInst, 31) == 0) { + if (bits(machInst, 25, 24) || bits(machInst, 4)) + return new Unknown64(machInst); + int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2; + ConditionCode condCode = + (ConditionCode)(uint8_t)(bits(machInst, 3, 0)); + return new BCond64(machInst, imm, condCode); + } else if (bits(machInst, 25, 24) == 0x0) { + if (bits(machInst, 4, 2)) + return new Unknown64(machInst); + uint8_t decVal = (bits(machInst, 1, 0) << 0) | + (bits(machInst, 23, 21) << 2); + switch (decVal) { + case 0x01: + return new Svc64(machInst); + case 0x02: + return new FailUnimplemented("hvc", machInst); + case 0x03: + return new Smc64(machInst); + case 0x04: + return new FailUnimplemented("brk", machInst); + case 0x08: + return new FailUnimplemented("hlt", machInst); + case 0x15: + return new FailUnimplemented("dcps1", machInst); + case 0x16: + return new FailUnimplemented("dcps2", machInst); + case 0x17: + return new FailUnimplemented("dcps3", machInst); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 25, 22) == 0x4) { + // bit 31:22=1101010100 + bool l = bits(machInst, 21); + uint8_t op0 = bits(machInst, 20, 19); + uint8_t op1 = bits(machInst, 18, 16); + uint8_t crn = bits(machInst, 15, 12); + uint8_t crm = bits(machInst, 11, 8); + uint8_t op2 = bits(machInst, 7, 5); + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + switch (op0) { + case 0x0: + if (rt != 0x1f || l) + return new Unknown64(machInst); + if (crn == 0x2 && op1 == 0x3) { + switch (op2) { + case 0x0: + return new NopInst(machInst); + case 0x1: + return new YieldInst(machInst); + case 0x2: + return new WfeInst(machInst); + case 0x3: + return new WfiInst(machInst); + case 0x4: + return new SevInst(machInst); + case 0x5: + return new SevlInst(machInst); + default: + return new Unknown64(machInst); + } + } else if (crn == 0x3 && op1 == 0x3) { + switch (op2) { + case 0x2: + return new Clrex64(machInst); + case 0x4: + return new Dsb64(machInst); + case 0x5: + return new Dmb64(machInst); + case 0x6: + return new Isb64(machInst); + default: + return new Unknown64(machInst); + } + } else if (crn == 0x4) { + // MSR immediate + switch (op1 << 3 | op2) { + case 0x5: + // SP + return new MsrSP64(machInst, + (IntRegIndex) MISCREG_SPSEL, + INTREG_ZERO, + crm & 0x1); + case 0x1e: + // DAIFSet + return new MsrDAIFSet64( + machInst, + (IntRegIndex) MISCREG_DAIF, + INTREG_ZERO, + crm); + case 0x1f: + // DAIFClr + return new MsrDAIFClr64( + machInst, + (IntRegIndex) MISCREG_DAIF, + INTREG_ZERO, + crm); + default: + return new Unknown64(machInst); + } + } else { + return new Unknown64(machInst); + } + break; + case 0x1: + case 0x2: + case 0x3: + { + // bit 31:22=1101010100, 20:19=11 + bool read = l; + MiscRegIndex miscReg = + decodeAArch64SysReg(op0, op1, crn, crm, op2); + if (read) { + if ((miscReg == MISCREG_DC_CIVAC_Xt) || + (miscReg == MISCREG_DC_CVAC_Xt) || + (miscReg == MISCREG_DC_ZVA_Xt)) { + return new Unknown64(machInst); + } + } + // Check for invalid registers + if (miscReg == MISCREG_UNKNOWN) { + return new Unknown64(machInst); + } else if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) { + if (miscReg == MISCREG_NZCV) { + if (read) + return new MrsNZCV64(machInst, rt, (IntRegIndex) miscReg); + else + return new MsrNZCV64(machInst, (IntRegIndex) miscReg, rt); + } + uint32_t iss = msrMrs64IssBuild(read, op0, op1, crn, crm, op2, rt); + if (miscReg == MISCREG_DC_ZVA_Xt && !read) + return new Dczva(machInst, rt, (IntRegIndex) miscReg, iss); + + if (read) + return new Mrs64(machInst, rt, (IntRegIndex) miscReg, iss); + else + return new Msr64(machInst, (IntRegIndex) miscReg, rt, iss); + } else if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) { + std::string full_mnem = csprintf("%s %s", + read ? "mrs" : "msr", miscRegName[miscReg]); + return new WarnUnimplemented(read ? "mrs" : "msr", + machInst, full_mnem); + } else { + return new FailUnimplemented(csprintf("%s %s", + read ? "mrs" : "msr", miscRegName[miscReg]).c_str(), + machInst); + } + } + break; + } + } else if (bits(machInst, 25) == 0x1) { + uint8_t opc = bits(machInst, 24, 21); + uint8_t op2 = bits(machInst, 20, 16); + uint8_t op3 = bits(machInst, 15, 10); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + uint8_t op4 = bits(machInst, 4, 0); + if (op2 != 0x1f || op3 != 0x0 || op4 != 0x0) + return new Unknown64(machInst); + switch (opc) { + case 0x0: + return new Br64(machInst, rn); + case 0x1: + return new Blr64(machInst, rn); + case 0x2: + return new Ret64(machInst, rn); + case 0x4: + if (rn != 0x1f) + return new Unknown64(machInst); + return new Eret64(machInst); + case 0x5: + if (rn != 0x1f) + return new Unknown64(machInst); + return new FailUnimplemented("dret", machInst); + } + } + default: + return new Unknown64(machInst); + } + return new FailUnimplemented("Unhandled Case7", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeLoadsStores(ExtMachInst machInst) + { + // bit 27,25=10 + switch (bits(machInst, 29, 28)) { + case 0x0: + if (bits(machInst, 26) == 0) { + if (bits(machInst, 24) != 0) + return new Unknown64(machInst); + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + IntRegIndex rt2 = (IntRegIndex)(uint8_t)bits(machInst, 14, 10); + IntRegIndex rs = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + uint8_t opc = (bits(machInst, 15) << 0) | + (bits(machInst, 23, 21) << 1); + uint8_t size = bits(machInst, 31, 30); + switch (opc) { + case 0x0: + switch (size) { + case 0x0: + return new STXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new STXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new STXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new STXRX64(machInst, rt, rnsp, rs); + } + case 0x1: + switch (size) { + case 0x0: + return new STLXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new STLXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new STLXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new STLXRX64(machInst, rt, rnsp, rs); + } + case 0x2: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new STXPW64(machInst, rs, rt, rt2, rnsp); + case 0x3: + return new STXPX64(machInst, rs, rt, rt2, rnsp); + } + + case 0x3: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new STLXPW64(machInst, rs, rt, rt2, rnsp); + case 0x3: + return new STLXPX64(machInst, rs, rt, rt2, rnsp); + } + + case 0x4: + switch (size) { + case 0x0: + return new LDXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new LDXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new LDXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new LDXRX64(machInst, rt, rnsp, rs); + } + case 0x5: + switch (size) { + case 0x0: + return new LDAXRB64(machInst, rt, rnsp, rs); + case 0x1: + return new LDAXRH64(machInst, rt, rnsp, rs); + case 0x2: + return new LDAXRW64(machInst, rt, rnsp, rs); + case 0x3: + return new LDAXRX64(machInst, rt, rnsp, rs); + } + case 0x6: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new LDXPW64(machInst, rt, rt2, rnsp); + case 0x3: + return new LDXPX64(machInst, rt, rt2, rnsp); + } + + case 0x7: + switch (size) { + case 0x0: + case 0x1: + return new Unknown64(machInst); + case 0x2: + return new LDAXPW64(machInst, rt, rt2, rnsp); + case 0x3: + return new LDAXPX64(machInst, rt, rt2, rnsp); + } + + case 0x9: + switch (size) { + case 0x0: + return new STLRB64(machInst, rt, rnsp); + case 0x1: + return new STLRH64(machInst, rt, rnsp); + case 0x2: + return new STLRW64(machInst, rt, rnsp); + case 0x3: + return new STLRX64(machInst, rt, rnsp); + } + case 0xd: + switch (size) { + case 0x0: + return new LDARB64(machInst, rt, rnsp); + case 0x1: + return new LDARH64(machInst, rt, rnsp); + case 0x2: + return new LDARW64(machInst, rt, rnsp); + case 0x3: + return new LDARX64(machInst, rt, rnsp); + } + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 31)) { + return new Unknown64(machInst); + } else { + return decodeNeonMem(machInst); + } + case 0x1: + { + if (bits(machInst, 24) != 0) + return new Unknown64(machInst); + uint8_t switchVal = (bits(machInst, 26) << 0) | + (bits(machInst, 31, 30) << 1); + int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2; + IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + switch (switchVal) { + case 0x0: + return new LDRWL64_LIT(machInst, rt, imm); + case 0x1: + return new LDRSFP64_LIT(machInst, rt, imm); + case 0x2: + return new LDRXL64_LIT(machInst, rt, imm); + case 0x3: + return new LDRDFP64_LIT(machInst, rt, imm); + case 0x4: + return new LDRSWL64_LIT(machInst, rt, imm); + case 0x5: + return new BigFpMemLit("ldr", machInst, rt, imm); + case 0x6: + return new PRFM64_LIT(machInst, rt, imm); + default: + return new Unknown64(machInst); + } + } + case 0x2: + { + uint8_t opc = bits(machInst, 31, 30); + if (opc >= 3) + return new Unknown64(machInst); + uint32_t size = 0; + bool fp = bits(machInst, 26); + bool load = bits(machInst, 22); + if (fp) { + size = 4 << opc; + } else { + if ((opc == 1) && !load) + return new Unknown64(machInst); + size = (opc == 0 || opc == 1) ? 4 : 8; + } + uint8_t type = bits(machInst, 24, 23); + int64_t imm = sext<7>(bits(machInst, 21, 15)) * size; + + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rt2 = (IntRegIndex)(uint8_t)bits(machInst, 14, 10); + + bool noAlloc = (type == 0); + bool signExt = !noAlloc && !fp && opc == 1; + PairMemOp::AddrMode mode; + const char *mnemonic = NULL; + switch (type) { + case 0x0: + case 0x2: + mode = PairMemOp::AddrMd_Offset; + break; + case 0x1: + mode = PairMemOp::AddrMd_PostIndex; + break; + case 0x3: + mode = PairMemOp::AddrMd_PreIndex; + break; + default: + return new Unknown64(machInst); + } + if (load) { + if (noAlloc) + mnemonic = "ldnp"; + else if (signExt) + mnemonic = "ldpsw"; + else + mnemonic = "ldp"; + } else { + if (noAlloc) + mnemonic = "stnp"; + else + mnemonic = "stp"; + } + + return new LdpStp(mnemonic, machInst, size, fp, load, noAlloc, + signExt, false, false, imm, mode, rn, rt, rt2); + } + // bit 29:27=111, 25=0 + case 0x3: + { + uint8_t switchVal = (bits(machInst, 23, 22) << 0) | + (bits(machInst, 26) << 2) | + (bits(machInst, 31, 30) << 3); + if (bits(machInst, 24) == 1) { + uint64_t imm12 = bits(machInst, 21, 10); + IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + switch (switchVal) { + case 0x00: + return new STRB64_IMM(machInst, rt, rnsp, imm12); + case 0x01: + return new LDRB64_IMM(machInst, rt, rnsp, imm12); + case 0x02: + return new LDRSBX64_IMM(machInst, rt, rnsp, imm12); + case 0x03: + return new LDRSBW64_IMM(machInst, rt, rnsp, imm12); + case 0x04: + return new STRBFP64_IMM(machInst, rt, rnsp, imm12); + case 0x05: + return new LDRBFP64_IMM(machInst, rt, rnsp, imm12); + case 0x06: + return new BigFpMemImm("str", machInst, false, + rt, rnsp, imm12 << 4); + case 0x07: + return new BigFpMemImm("ldr", machInst, true, + rt, rnsp, imm12 << 4); + case 0x08: + return new STRH64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x09: + return new LDRH64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0a: + return new LDRSHX64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0b: + return new LDRSHW64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0c: + return new STRHFP64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x0d: + return new LDRHFP64_IMM(machInst, rt, rnsp, imm12 << 1); + case 0x10: + return new STRW64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x11: + return new LDRW64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x12: + return new LDRSW64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x14: + return new STRSFP64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x15: + return new LDRSFP64_IMM(machInst, rt, rnsp, imm12 << 2); + case 0x18: + return new STRX64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x19: + return new LDRX64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x1a: + return new PRFM64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x1c: + return new STRDFP64_IMM(machInst, rt, rnsp, imm12 << 3); + case 0x1d: + return new LDRDFP64_IMM(machInst, rt, rnsp, imm12 << 3); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 21) == 1) { + if (bits(machInst, 11, 10) != 0x2) + return new Unknown64(machInst); + if (!bits(machInst, 14)) + return new Unknown64(machInst); + IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + ArmExtendType type = + (ArmExtendType)(uint32_t)bits(machInst, 15, 13); + uint8_t s = bits(machInst, 12); + switch (switchVal) { + case 0x00: + return new STRB64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x01: + return new LDRB64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x02: + return new LDRSBX64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x03: + return new LDRSBW64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x04: + return new STRBFP64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x05: + return new LDRBFP64_REG(machInst, rt, rnsp, rm, type, 0); + case 0x6: + return new BigFpMemReg("str", machInst, false, + rt, rnsp, rm, type, s * 4); + case 0x7: + return new BigFpMemReg("ldr", machInst, true, + rt, rnsp, rm, type, s * 4); + case 0x08: + return new STRH64_REG(machInst, rt, rnsp, rm, type, s); + case 0x09: + return new LDRH64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0a: + return new LDRSHX64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0b: + return new LDRSHW64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0c: + return new STRHFP64_REG(machInst, rt, rnsp, rm, type, s); + case 0x0d: + return new LDRHFP64_REG(machInst, rt, rnsp, rm, type, s); + case 0x10: + return new STRW64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x11: + return new LDRW64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x12: + return new LDRSW64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x14: + return new STRSFP64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x15: + return new LDRSFP64_REG(machInst, rt, rnsp, rm, type, s * 2); + case 0x18: + return new STRX64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x19: + return new LDRX64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x1a: + return new PRFM64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x1c: + return new STRDFP64_REG(machInst, rt, rnsp, rm, type, s * 3); + case 0x1d: + return new LDRDFP64_REG(machInst, rt, rnsp, rm, type, s * 3); + default: + return new Unknown64(machInst); + } + } else { + // bit 29:27=111, 25:24=00, 21=0 + switch (bits(machInst, 11, 10)) { + case 0x0: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STURB64_IMM(machInst, rt, rnsp, imm); + case 0x01: + return new LDURB64_IMM(machInst, rt, rnsp, imm); + case 0x02: + return new LDURSBX64_IMM(machInst, rt, rnsp, imm); + case 0x03: + return new LDURSBW64_IMM(machInst, rt, rnsp, imm); + case 0x04: + return new STURBFP64_IMM(machInst, rt, rnsp, imm); + case 0x05: + return new LDURBFP64_IMM(machInst, rt, rnsp, imm); + case 0x06: + return new BigFpMemImm("stur", machInst, false, + rt, rnsp, imm); + case 0x07: + return new BigFpMemImm("ldur", machInst, true, + rt, rnsp, imm); + case 0x08: + return new STURH64_IMM(machInst, rt, rnsp, imm); + case 0x09: + return new LDURH64_IMM(machInst, rt, rnsp, imm); + case 0x0a: + return new LDURSHX64_IMM(machInst, rt, rnsp, imm); + case 0x0b: + return new LDURSHW64_IMM(machInst, rt, rnsp, imm); + case 0x0c: + return new STURHFP64_IMM(machInst, rt, rnsp, imm); + case 0x0d: + return new LDURHFP64_IMM(machInst, rt, rnsp, imm); + case 0x10: + return new STURW64_IMM(machInst, rt, rnsp, imm); + case 0x11: + return new LDURW64_IMM(machInst, rt, rnsp, imm); + case 0x12: + return new LDURSW64_IMM(machInst, rt, rnsp, imm); + case 0x14: + return new STURSFP64_IMM(machInst, rt, rnsp, imm); + case 0x15: + return new LDURSFP64_IMM(machInst, rt, rnsp, imm); + case 0x18: + return new STURX64_IMM(machInst, rt, rnsp, imm); + case 0x19: + return new LDURX64_IMM(machInst, rt, rnsp, imm); + case 0x1a: + return new PRFUM64_IMM(machInst, rt, rnsp, imm); + case 0x1c: + return new STURDFP64_IMM(machInst, rt, rnsp, imm); + case 0x1d: + return new LDURDFP64_IMM(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + // bit 29:27=111, 25:24=00, 21=0, 11:10=01 + case 0x1: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STRB64_POST(machInst, rt, rnsp, imm); + case 0x01: + return new LDRB64_POST(machInst, rt, rnsp, imm); + case 0x02: + return new LDRSBX64_POST(machInst, rt, rnsp, imm); + case 0x03: + return new LDRSBW64_POST(machInst, rt, rnsp, imm); + case 0x04: + return new STRBFP64_POST(machInst, rt, rnsp, imm); + case 0x05: + return new LDRBFP64_POST(machInst, rt, rnsp, imm); + case 0x06: + return new BigFpMemPost("str", machInst, false, + rt, rnsp, imm); + case 0x07: + return new BigFpMemPost("ldr", machInst, true, + rt, rnsp, imm); + case 0x08: + return new STRH64_POST(machInst, rt, rnsp, imm); + case 0x09: + return new LDRH64_POST(machInst, rt, rnsp, imm); + case 0x0a: + return new LDRSHX64_POST(machInst, rt, rnsp, imm); + case 0x0b: + return new LDRSHW64_POST(machInst, rt, rnsp, imm); + case 0x0c: + return new STRHFP64_POST(machInst, rt, rnsp, imm); + case 0x0d: + return new LDRHFP64_POST(machInst, rt, rnsp, imm); + case 0x10: + return new STRW64_POST(machInst, rt, rnsp, imm); + case 0x11: + return new LDRW64_POST(machInst, rt, rnsp, imm); + case 0x12: + return new LDRSW64_POST(machInst, rt, rnsp, imm); + case 0x14: + return new STRSFP64_POST(machInst, rt, rnsp, imm); + case 0x15: + return new LDRSFP64_POST(machInst, rt, rnsp, imm); + case 0x18: + return new STRX64_POST(machInst, rt, rnsp, imm); + case 0x19: + return new LDRX64_POST(machInst, rt, rnsp, imm); + case 0x1c: + return new STRDFP64_POST(machInst, rt, rnsp, imm); + case 0x1d: + return new LDRDFP64_POST(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + case 0x2: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STTRB64_IMM(machInst, rt, rnsp, imm); + case 0x01: + return new LDTRB64_IMM(machInst, rt, rnsp, imm); + case 0x02: + return new LDTRSBX64_IMM(machInst, rt, rnsp, imm); + case 0x03: + return new LDTRSBW64_IMM(machInst, rt, rnsp, imm); + case 0x08: + return new STTRH64_IMM(machInst, rt, rnsp, imm); + case 0x09: + return new LDTRH64_IMM(machInst, rt, rnsp, imm); + case 0x0a: + return new LDTRSHX64_IMM(machInst, rt, rnsp, imm); + case 0x0b: + return new LDTRSHW64_IMM(machInst, rt, rnsp, imm); + case 0x10: + return new STTRW64_IMM(machInst, rt, rnsp, imm); + case 0x11: + return new LDTRW64_IMM(machInst, rt, rnsp, imm); + case 0x12: + return new LDTRSW64_IMM(machInst, rt, rnsp, imm); + case 0x18: + return new STTRX64_IMM(machInst, rt, rnsp, imm); + case 0x19: + return new LDTRX64_IMM(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + case 0x3: + { + IntRegIndex rt = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + uint64_t imm = sext<9>(bits(machInst, 20, 12)); + switch (switchVal) { + case 0x00: + return new STRB64_PRE(machInst, rt, rnsp, imm); + case 0x01: + return new LDRB64_PRE(machInst, rt, rnsp, imm); + case 0x02: + return new LDRSBX64_PRE(machInst, rt, rnsp, imm); + case 0x03: + return new LDRSBW64_PRE(machInst, rt, rnsp, imm); + case 0x04: + return new STRBFP64_PRE(machInst, rt, rnsp, imm); + case 0x05: + return new LDRBFP64_PRE(machInst, rt, rnsp, imm); + case 0x06: + return new BigFpMemPre("str", machInst, false, + rt, rnsp, imm); + case 0x07: + return new BigFpMemPre("ldr", machInst, true, + rt, rnsp, imm); + case 0x08: + return new STRH64_PRE(machInst, rt, rnsp, imm); + case 0x09: + return new LDRH64_PRE(machInst, rt, rnsp, imm); + case 0x0a: + return new LDRSHX64_PRE(machInst, rt, rnsp, imm); + case 0x0b: + return new LDRSHW64_PRE(machInst, rt, rnsp, imm); + case 0x0c: + return new STRHFP64_PRE(machInst, rt, rnsp, imm); + case 0x0d: + return new LDRHFP64_PRE(machInst, rt, rnsp, imm); + case 0x10: + return new STRW64_PRE(machInst, rt, rnsp, imm); + case 0x11: + return new LDRW64_PRE(machInst, rt, rnsp, imm); + case 0x12: + return new LDRSW64_PRE(machInst, rt, rnsp, imm); + case 0x14: + return new STRSFP64_PRE(machInst, rt, rnsp, imm); + case 0x15: + return new LDRSFP64_PRE(machInst, rt, rnsp, imm); + case 0x18: + return new STRX64_PRE(machInst, rt, rnsp, imm); + case 0x19: + return new LDRX64_PRE(machInst, rt, rnsp, imm); + case 0x1c: + return new STRDFP64_PRE(machInst, rt, rnsp, imm); + case 0x1d: + return new LDRDFP64_PRE(machInst, rt, rnsp, imm); + default: + return new Unknown64(machInst); + } + } + } + } + } + } + return new FailUnimplemented("Unhandled Case1", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeDataProcReg(ExtMachInst machInst) + { + uint8_t switchVal = (bits(machInst, 28) << 1) | + (bits(machInst, 24) << 0); + switch (switchVal) { + case 0x0: + { + uint8_t switchVal = (bits(machInst, 21) << 0) | + (bits(machInst, 30, 29) << 1); + ArmShiftType type = (ArmShiftType)(uint8_t)bits(machInst, 23, 22); + uint8_t imm6 = bits(machInst, 15, 10); + bool sf = bits(machInst, 31); + if (!sf && (imm6 & 0x20)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + + switch (switchVal) { + case 0x0: + return new AndXSReg(machInst, rd, rn, rm, imm6, type); + case 0x1: + return new BicXSReg(machInst, rd, rn, rm, imm6, type); + case 0x2: + return new OrrXSReg(machInst, rd, rn, rm, imm6, type); + case 0x3: + return new OrnXSReg(machInst, rd, rn, rm, imm6, type); + case 0x4: + return new EorXSReg(machInst, rd, rn, rm, imm6, type); + case 0x5: + return new EonXSReg(machInst, rd, rn, rm, imm6, type); + case 0x6: + return new AndXSRegCc(machInst, rd, rn, rm, imm6, type); + case 0x7: + return new BicXSRegCc(machInst, rd, rn, rm, imm6, type); + } + } + case 0x1: + { + uint8_t switchVal = bits(machInst, 30, 29); + if (bits(machInst, 21) == 0) { + ArmShiftType type = + (ArmShiftType)(uint8_t)bits(machInst, 23, 22); + if (type == ROR) + return new Unknown64(machInst); + uint8_t imm6 = bits(machInst, 15, 10); + if (!bits(machInst, 31) && bits(imm6, 5)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + switch (switchVal) { + case 0x0: + return new AddXSReg(machInst, rd, rn, rm, imm6, type); + case 0x1: + return new AddXSRegCc(machInst, rd, rn, rm, imm6, type); + case 0x2: + return new SubXSReg(machInst, rd, rn, rm, imm6, type); + case 0x3: + return new SubXSRegCc(machInst, rd, rn, rm, imm6, type); + } + } else { + if (bits(machInst, 23, 22) != 0 || bits(machInst, 12, 10) > 0x4) + return new Unknown64(machInst); + ArmExtendType type = + (ArmExtendType)(uint8_t)bits(machInst, 15, 13); + uint8_t imm3 = bits(machInst, 12, 10); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rdsp = makeSP(rd); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rnsp = makeSP(rn); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + + switch (switchVal) { + case 0x0: + return new AddXEReg(machInst, rdsp, rnsp, rm, type, imm3); + case 0x1: + return new AddXERegCc(machInst, rd, rnsp, rm, type, imm3); + case 0x2: + return new SubXEReg(machInst, rdsp, rnsp, rm, type, imm3); + case 0x3: + return new SubXERegCc(machInst, rd, rnsp, rm, type, imm3); + } + } + } + case 0x2: + { + if (bits(machInst, 21) == 1) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + switch (bits(machInst, 23, 22)) { + case 0x0: + { + if (bits(machInst, 15, 10)) + return new Unknown64(machInst); + uint8_t switchVal = bits(machInst, 30, 29); + switch (switchVal) { + case 0x0: + return new AdcXSReg(machInst, rd, rn, rm, 0, LSL); + case 0x1: + return new AdcXSRegCc(machInst, rd, rn, rm, 0, LSL); + case 0x2: + return new SbcXSReg(machInst, rd, rn, rm, 0, LSL); + case 0x3: + return new SbcXSRegCc(machInst, rd, rn, rm, 0, LSL); + } + } + case 0x1: + { + if ((bits(machInst, 4) == 1) || + (bits(machInst, 10) == 1) || + (bits(machInst, 29) == 0)) { + return new Unknown64(machInst); + } + ConditionCode cond = + (ConditionCode)(uint8_t)bits(machInst, 15, 12); + uint8_t flags = bits(machInst, 3, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + if (bits(machInst, 11) == 0) { + IntRegIndex rm = + (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + if (bits(machInst, 30) == 0) { + return new CcmnReg64(machInst, rn, rm, cond, flags); + } else { + return new CcmpReg64(machInst, rn, rm, cond, flags); + } + } else { + uint8_t imm5 = bits(machInst, 20, 16); + if (bits(machInst, 30) == 0) { + return new CcmnImm64(machInst, rn, imm5, cond, flags); + } else { + return new CcmpImm64(machInst, rn, imm5, cond, flags); + } + } + } + case 0x2: + { + if (bits(machInst, 29) == 1 || + bits(machInst, 11) == 1) { + return new Unknown64(machInst); + } + uint8_t switchVal = (bits(machInst, 10) << 0) | + (bits(machInst, 30) << 1); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + ConditionCode cond = + (ConditionCode)(uint8_t)bits(machInst, 15, 12); + switch (switchVal) { + case 0x0: + return new Csel64(machInst, rd, rn, rm, cond); + case 0x1: + return new Csinc64(machInst, rd, rn, rm, cond); + case 0x2: + return new Csinv64(machInst, rd, rn, rm, cond); + case 0x3: + return new Csneg64(machInst, rd, rn, rm, cond); + } + } + case 0x3: + if (bits(machInst, 30) == 0) { + if (bits(machInst, 29) != 0) + return new Unknown64(machInst); + uint8_t switchVal = bits(machInst, 15, 10); + switch (switchVal) { + case 0x2: + return new Udiv64(machInst, rd, rn, rm); + case 0x3: + return new Sdiv64(machInst, rd, rn, rm); + case 0x8: + return new Lslv64(machInst, rd, rn, rm); + case 0x9: + return new Lsrv64(machInst, rd, rn, rm); + case 0xa: + return new Asrv64(machInst, rd, rn, rm); + case 0xb: + return new Rorv64(machInst, rd, rn, rm); + default: + return new Unknown64(machInst); + } + } else { + if (bits(machInst, 20, 16) != 0 || + bits(machInst, 29) != 0) { + return new Unknown64(machInst); + } + uint8_t switchVal = bits(machInst, 15, 10); + switch (switchVal) { + case 0x0: + return new Rbit64(machInst, rd, rn); + case 0x1: + return new Rev1664(machInst, rd, rn); + case 0x2: + if (bits(machInst, 31) == 0) + return new Rev64(machInst, rd, rn); + else + return new Rev3264(machInst, rd, rn); + case 0x3: + if (bits(machInst, 31) != 1) + return new Unknown64(machInst); + return new Rev64(machInst, rd, rn); + case 0x4: + return new Clz64(machInst, rd, rn); + case 0x5: + return new Cls64(machInst, rd, rn); + } + } + } + } + case 0x3: + { + if (bits(machInst, 30, 29) != 0x0 || + (bits(machInst, 23, 21) != 0 && bits(machInst, 31) == 0)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + IntRegIndex ra = (IntRegIndex)(uint8_t)bits(machInst, 14, 10); + IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16); + switch (bits(machInst, 23, 21)) { + case 0x0: + if (bits(machInst, 15) == 0) + return new Madd64(machInst, rd, ra, rn, rm); + else + return new Msub64(machInst, rd, ra, rn, rm); + case 0x1: + if (bits(machInst, 15) == 0) + return new Smaddl64(machInst, rd, ra, rn, rm); + else + return new Smsubl64(machInst, rd, ra, rn, rm); + case 0x2: + if (bits(machInst, 15) != 0) + return new Unknown64(machInst); + return new Smulh64(machInst, rd, rn, rm); + case 0x5: + if (bits(machInst, 15) == 0) + return new Umaddl64(machInst, rd, ra, rn, rm); + else + return new Umsubl64(machInst, rd, ra, rn, rm); + case 0x6: + if (bits(machInst, 15) != 0) + return new Unknown64(machInst); + return new Umulh64(machInst, rd, rn, rm); + default: + return new Unknown64(machInst); + } + } + } + return new FailUnimplemented("Unhandled Case2", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeAdvSIMD(ExtMachInst machInst) + { + if (bits(machInst, 24) == 1) { + if (bits(machInst, 10) == 0) { + return decodeNeonIndexedElem(machInst); + } else if (bits(machInst, 23) == 1) { + return new Unknown64(machInst); + } else { + if (bits(machInst, 22, 19)) { + return decodeNeonShiftByImm(machInst); + } else { + return decodeNeonModImm(machInst); + } + } + } else if (bits(machInst, 21) == 1) { + if (bits(machInst, 10) == 1) { + return decodeNeon3Same(machInst); + } else if (bits(machInst, 11) == 0) { + return decodeNeon3Diff(machInst); + } else if (bits(machInst, 20, 17) == 0x0) { + return decodeNeon2RegMisc(machInst); + } else if (bits(machInst, 20, 17) == 0x8) { + return decodeNeonAcrossLanes(machInst); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 24) || + bits(machInst, 21) || + bits(machInst, 15)) { + return new Unknown64(machInst); + } else if (bits(machInst, 10) == 1) { + if (bits(machInst, 23, 22)) + return new Unknown64(machInst); + return decodeNeonCopy(machInst); + } else if (bits(machInst, 29) == 1) { + return decodeNeonExt(machInst); + } else if (bits(machInst, 11) == 1) { + return decodeNeonZipUzpTrn(machInst); + } else if (bits(machInst, 23, 22) == 0x0) { + return decodeNeonTblTbx(machInst); + } else { + return new Unknown64(machInst); + } + return new FailUnimplemented("Unhandled Case3", machInst); + } +} +}}; + + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + // bit 30=0, 28:25=1111 + decodeFp(ExtMachInst machInst) + { + if (bits(machInst, 24) == 1) { + if (bits(machInst, 31) || bits(machInst, 29)) + return new Unknown64(machInst); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + IntRegIndex ra = (IntRegIndex)(uint32_t)bits(machInst, 14, 10); + uint8_t switchVal = (bits(machInst, 23, 21) << 1) | + (bits(machInst, 15) << 0); + switch (switchVal) { + case 0x0: // FMADD Sd = Sa + Sn*Sm + return new FMAddS(machInst, rd, rn, rm, ra); + case 0x1: // FMSUB Sd = Sa + (-Sn)*Sm + return new FMSubS(machInst, rd, rn, rm, ra); + case 0x2: // FNMADD Sd = (-Sa) + (-Sn)*Sm + return new FNMAddS(machInst, rd, rn, rm, ra); + case 0x3: // FNMSUB Sd = (-Sa) + Sn*Sm + return new FNMSubS(machInst, rd, rn, rm, ra); + case 0x4: // FMADD Dd = Da + Dn*Dm + return new FMAddD(machInst, rd, rn, rm, ra); + case 0x5: // FMSUB Dd = Da + (-Dn)*Dm + return new FMSubD(machInst, rd, rn, rm, ra); + case 0x6: // FNMADD Dd = (-Da) + (-Dn)*Dm + return new FNMAddD(machInst, rd, rn, rm, ra); + case 0x7: // FNMSUB Dd = (-Da) + Dn*Dm + return new FNMSubD(machInst, rd, rn, rm, ra); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 21) == 0) { + bool s = bits(machInst, 29); + if (s) + return new Unknown64(machInst); + uint8_t switchVal = bits(machInst, 20, 16); + uint8_t type = bits(machInst, 23, 22); + uint8_t scale = bits(machInst, 15, 10); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + if (bits(machInst, 18, 17) == 3 && scale != 0) + return new Unknown64(machInst); + // 30:24=0011110, 21=0 + switch (switchVal) { + case 0x00: + return new FailUnimplemented("fcvtns", machInst); + case 0x01: + return new FailUnimplemented("fcvtnu", machInst); + case 0x02: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // SCVTF Sd = convertFromInt(Wn/(2^fbits)) + return new FcvtSFixedFpSW(machInst, rd, rn, scale); + case 1: // SCVTF Dd = convertFromInt(Wn/(2^fbits)) + return new FcvtSFixedFpDW(machInst, rd, rn, scale); + case 4: // SCVTF Sd = convertFromInt(Xn/(2^fbits)) + return new FcvtSFixedFpSX(machInst, rd, rn, scale); + case 5: // SCVTF Dd = convertFromInt(Xn/(2^fbits)) + return new FcvtSFixedFpDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + case 0x03: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // UCVTF Sd = convertFromInt(Wn/(2^fbits)) + return new FcvtUFixedFpSW(machInst, rd, rn, scale); + case 1: // UCVTF Dd = convertFromInt(Wn/(2^fbits)) + return new FcvtUFixedFpDW(machInst, rd, rn, scale); + case 4: // UCVTF Sd = convertFromInt(Xn/(2^fbits)) + return new FcvtUFixedFpSX(machInst, rd, rn, scale); + case 5: // UCVTF Dd = convertFromInt(Xn/(2^fbits)) + return new FcvtUFixedFpDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + case 0x04: + return new FailUnimplemented("fcvtas", machInst); + case 0x05: + return new FailUnimplemented("fcvtau", machInst); + case 0x08: + return new FailUnimplemented("fcvtps", machInst); + case 0x09: + return new FailUnimplemented("fcvtpu", machInst); + case 0x0e: + return new FailUnimplemented("fmov elem. to 64", machInst); + case 0x0f: + return new FailUnimplemented("fmov 64 bit", machInst); + case 0x10: + return new FailUnimplemented("fcvtms", machInst); + case 0x11: + return new FailUnimplemented("fcvtmu", machInst); + case 0x18: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // FCVTZS Wd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpSFixedSW(machInst, rd, rn, scale); + case 1: // FCVTZS Wd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpSFixedDW(machInst, rd, rn, scale); + case 4: // FCVTZS Xd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpSFixedSX(machInst, rd, rn, scale); + case 5: // FCVTZS Xd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpSFixedDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + case 0x19: + switch ( (bits(machInst, 31) << 2) | type ) { + case 0: // FCVTZU Wd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpUFixedSW(machInst, rd, rn, scale); + case 1: // FCVTZU Wd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpUFixedDW(machInst, rd, rn, scale); + case 4: // FCVTZU Xd = convertToIntExactTowardZero(Sn*(2^fbits)) + return new FcvtFpUFixedSX(machInst, rd, rn, scale); + case 5: // FCVTZU Xd = convertToIntExactTowardZero(Dn*(2^fbits)) + return new FcvtFpUFixedDX(machInst, rd, rn, scale); + default: + return new Unknown64(machInst); + } + } + } else { + // 30=0, 28:24=11110, 21=1 + uint8_t type = bits(machInst, 23, 22); + uint8_t imm8 = bits(machInst, 20, 13); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + switch (bits(machInst, 11, 10)) { + case 0x0: + if (bits(machInst, 12) == 1) { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 9, 5)) { + return new Unknown64(machInst); + } + // 31:29=000, 28:24=11110, 21=1, 12:10=100 + if (type == 0) { + // FMOV S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,5) + // :imm8<5:0>:Zeros(19) + uint32_t imm = vfp_modified_imm(imm8, false); + return new FmovImmS(machInst, rd, imm); + } else if (type == 1) { + // FMOV D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,8) + // :imm8<5:0>:Zeros(48) + uint64_t imm = vfp_modified_imm(imm8, true); + return new FmovImmD(machInst, rd, imm); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 13) == 1) { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 15, 14) || + bits(machInst, 23) || + bits(machInst, 2, 0)) { + return new Unknown64(machInst); + } + uint8_t switchVal = (bits(machInst, 4, 3) << 0) | + (bits(machInst, 22) << 2); + IntRegIndex rm = (IntRegIndex)(uint32_t) + bits(machInst, 20, 16); + // 28:23=000111100, 21=1, 15:10=001000, 2:0=000 + switch (switchVal) { + case 0x0: + // FCMP flags = compareQuiet(Sn,Sm) + return new FCmpRegS(machInst, rn, rm); + case 0x1: + // FCMP flags = compareQuiet(Sn,0.0) + return new FCmpImmS(machInst, rn, 0); + case 0x2: + // FCMPE flags = compareSignaling(Sn,Sm) + return new FCmpERegS(machInst, rn, rm); + case 0x3: + // FCMPE flags = compareSignaling(Sn,0.0) + return new FCmpEImmS(machInst, rn, 0); + case 0x4: + // FCMP flags = compareQuiet(Dn,Dm) + return new FCmpRegD(machInst, rn, rm); + case 0x5: + // FCMP flags = compareQuiet(Dn,0.0) + return new FCmpImmD(machInst, rn, 0); + case 0x6: + // FCMPE flags = compareSignaling(Dn,Dm) + return new FCmpERegD(machInst, rn, rm); + case 0x7: + // FCMPE flags = compareSignaling(Dn,0.0) + return new FCmpEImmD(machInst, rn, 0); + default: + return new Unknown64(machInst); + } + } else if (bits(machInst, 14) == 1) { + if (bits(machInst, 31) || bits(machInst, 29)) + return new Unknown64(machInst); + uint8_t opcode = bits(machInst, 20, 15); + // Bits 31:24=00011110, 21=1, 14:10=10000 + switch (opcode) { + case 0x0: + if (type == 0) + // FMOV Sd = Sn + return new FmovRegS(machInst, rd, rn); + else if (type == 1) + // FMOV Dd = Dn + return new FmovRegD(machInst, rd, rn); + break; + case 0x1: + if (type == 0) + // FABS Sd = abs(Sn) + return new FAbsS(machInst, rd, rn); + else if (type == 1) + // FABS Dd = abs(Dn) + return new FAbsD(machInst, rd, rn); + break; + case 0x2: + if (type == 0) + // FNEG Sd = -Sn + return new FNegS(machInst, rd, rn); + else if (type == 1) + // FNEG Dd = -Dn + return new FNegD(machInst, rd, rn); + break; + case 0x3: + if (type == 0) + // FSQRT Sd = sqrt(Sn) + return new FSqrtS(machInst, rd, rn); + else if (type == 1) + // FSQRT Dd = sqrt(Dn) + return new FSqrtD(machInst, rd, rn); + break; + case 0x4: + if (type == 1) + // FCVT Sd = convertFormat(Dn) + return new FcvtFpDFpS(machInst, rd, rn); + else if (type == 3) + // FCVT Sd = convertFormat(Hn) + return new FcvtFpHFpS(machInst, rd, rn); + break; + case 0x5: + if (type == 0) + // FCVT Dd = convertFormat(Sn) + return new FCvtFpSFpD(machInst, rd, rn); + else if (type == 3) + // FCVT Dd = convertFormat(Hn) + return new FcvtFpHFpD(machInst, rd, rn); + break; + case 0x7: + if (type == 0) + // FCVT Hd = convertFormat(Sn) + return new FcvtFpSFpH(machInst, rd, rn); + else if (type == 1) + // FCVT Hd = convertFormat(Dn) + return new FcvtFpDFpH(machInst, rd, rn); + break; + case 0x8: + if (type == 0) // FRINTN Sd = roundToIntegralTiesToEven(Sn) + return new FRIntNS(machInst, rd, rn); + else if (type == 1) // FRINTN Dd = roundToIntegralTiesToEven(Dn) + return new FRIntND(machInst, rd, rn); + break; + case 0x9: + if (type == 0) // FRINTP Sd = roundToIntegralTowardPlusInf(Sn) + return new FRIntPS(machInst, rd, rn); + else if (type == 1) // FRINTP Dd = roundToIntegralTowardPlusInf(Dn) + return new FRIntPD(machInst, rd, rn); + break; + case 0xa: + if (type == 0) // FRINTM Sd = roundToIntegralTowardMinusInf(Sn) + return new FRIntMS(machInst, rd, rn); + else if (type == 1) // FRINTM Dd = roundToIntegralTowardMinusInf(Dn) + return new FRIntMD(machInst, rd, rn); + break; + case 0xb: + if (type == 0) // FRINTZ Sd = roundToIntegralTowardZero(Sn) + return new FRIntZS(machInst, rd, rn); + else if (type == 1) // FRINTZ Dd = roundToIntegralTowardZero(Dn) + return new FRIntZD(machInst, rd, rn); + break; + case 0xc: + if (type == 0) // FRINTA Sd = roundToIntegralTiesToAway(Sn) + return new FRIntAS(machInst, rd, rn); + else if (type == 1) // FRINTA Dd = roundToIntegralTiesToAway(Dn) + return new FRIntAD(machInst, rd, rn); + break; + case 0xe: + if (type == 0) // FRINTX Sd = roundToIntegralExact(Sn) + return new FRIntXS(machInst, rd, rn); + else if (type == 1) // FRINTX Dd = roundToIntegralExact(Dn) + return new FRIntXD(machInst, rd, rn); + break; + case 0xf: + if (type == 0) // FRINTI Sd = roundToIntegral(Sn) + return new FRIntIS(machInst, rd, rn); + else if (type == 1) // FRINTI Dd = roundToIntegral(Dn) + return new FRIntID(machInst, rd, rn); + break; + default: + return new Unknown64(machInst); + } + return new Unknown64(machInst); + } else if (bits(machInst, 15) == 1) { + return new Unknown64(machInst); + } else { + if (bits(machInst, 29)) + return new Unknown64(machInst); + uint8_t rmode = bits(machInst, 20, 19); + uint8_t switchVal1 = bits(machInst, 18, 16); + uint8_t switchVal2 = (type << 1) | bits(machInst, 31); + // 30:24=0011110, 21=1, 15:10=000000 + switch (switchVal1) { + case 0x0: + switch ((switchVal2 << 2) | rmode) { + case 0x0: //FCVTNS Wd = convertToIntExactTiesToEven(Sn) + return new FcvtFpSIntWSN(machInst, rd, rn); + case 0x1: //FCVTPS Wd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpSIntWSP(machInst, rd, rn); + case 0x2: //FCVTMS Wd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpSIntWSM(machInst, rd, rn); + case 0x3: //FCVTZS Wd = convertToIntExactTowardZero(Sn) + return new FcvtFpSIntWSZ(machInst, rd, rn); + case 0x4: //FCVTNS Xd = convertToIntExactTiesToEven(Sn) + return new FcvtFpSIntXSN(machInst, rd, rn); + case 0x5: //FCVTPS Xd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpSIntXSP(machInst, rd, rn); + case 0x6: //FCVTMS Xd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpSIntXSM(machInst, rd, rn); + case 0x7: //FCVTZS Xd = convertToIntExactTowardZero(Sn) + return new FcvtFpSIntXSZ(machInst, rd, rn); + case 0x8: //FCVTNS Wd = convertToIntExactTiesToEven(Dn) + return new FcvtFpSIntWDN(machInst, rd, rn); + case 0x9: //FCVTPS Wd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpSIntWDP(machInst, rd, rn); + case 0xA: //FCVTMS Wd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpSIntWDM(machInst, rd, rn); + case 0xB: //FCVTZS Wd = convertToIntExactTowardZero(Dn) + return new FcvtFpSIntWDZ(machInst, rd, rn); + case 0xC: //FCVTNS Xd = convertToIntExactTiesToEven(Dn) + return new FcvtFpSIntXDN(machInst, rd, rn); + case 0xD: //FCVTPS Xd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpSIntXDP(machInst, rd, rn); + case 0xE: //FCVTMS Xd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpSIntXDM(machInst, rd, rn); + case 0xF: //FCVTZS Xd = convertToIntExactTowardZero(Dn) + return new FcvtFpSIntXDZ(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x1: + switch ((switchVal2 << 2) | rmode) { + case 0x0: //FCVTNU Wd = convertToIntExactTiesToEven(Sn) + return new FcvtFpUIntWSN(machInst, rd, rn); + case 0x1: //FCVTPU Wd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpUIntWSP(machInst, rd, rn); + case 0x2: //FCVTMU Wd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpUIntWSM(machInst, rd, rn); + case 0x3: //FCVTZU Wd = convertToIntExactTowardZero(Sn) + return new FcvtFpUIntWSZ(machInst, rd, rn); + case 0x4: //FCVTNU Xd = convertToIntExactTiesToEven(Sn) + return new FcvtFpUIntXSN(machInst, rd, rn); + case 0x5: //FCVTPU Xd = convertToIntExactTowardPlusInf(Sn) + return new FcvtFpUIntXSP(machInst, rd, rn); + case 0x6: //FCVTMU Xd = convertToIntExactTowardMinusInf(Sn) + return new FcvtFpUIntXSM(machInst, rd, rn); + case 0x7: //FCVTZU Xd = convertToIntExactTowardZero(Sn) + return new FcvtFpUIntXSZ(machInst, rd, rn); + case 0x8: //FCVTNU Wd = convertToIntExactTiesToEven(Dn) + return new FcvtFpUIntWDN(machInst, rd, rn); + case 0x9: //FCVTPU Wd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpUIntWDP(machInst, rd, rn); + case 0xA: //FCVTMU Wd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpUIntWDM(machInst, rd, rn); + case 0xB: //FCVTZU Wd = convertToIntExactTowardZero(Dn) + return new FcvtFpUIntWDZ(machInst, rd, rn); + case 0xC: //FCVTNU Xd = convertToIntExactTiesToEven(Dn) + return new FcvtFpUIntXDN(machInst, rd, rn); + case 0xD: //FCVTPU Xd = convertToIntExactTowardPlusInf(Dn) + return new FcvtFpUIntXDP(machInst, rd, rn); + case 0xE: //FCVTMU Xd = convertToIntExactTowardMinusInf(Dn) + return new FcvtFpUIntXDM(machInst, rd, rn); + case 0xF: //FCVTZU Xd = convertToIntExactTowardZero(Dn) + return new FcvtFpUIntXDZ(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x2: + if (rmode != 0) + return new Unknown64(machInst); + switch (switchVal2) { + case 0: // SCVTF Sd = convertFromInt(Wn) + return new FcvtWSIntFpS(machInst, rd, rn); + case 1: // SCVTF Sd = convertFromInt(Xn) + return new FcvtXSIntFpS(machInst, rd, rn); + case 2: // SCVTF Dd = convertFromInt(Wn) + return new FcvtWSIntFpD(machInst, rd, rn); + case 3: // SCVTF Dd = convertFromInt(Xn) + return new FcvtXSIntFpD(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x3: + switch (switchVal2) { + case 0: // UCVTF Sd = convertFromInt(Wn) + return new FcvtWUIntFpS(machInst, rd, rn); + case 1: // UCVTF Sd = convertFromInt(Xn) + return new FcvtXUIntFpS(machInst, rd, rn); + case 2: // UCVTF Dd = convertFromInt(Wn) + return new FcvtWUIntFpD(machInst, rd, rn); + case 3: // UCVTF Dd = convertFromInt(Xn) + return new FcvtXUIntFpD(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x4: + if (rmode != 0) + return new Unknown64(machInst); + switch (switchVal2) { + case 0: // FCVTAS Wd = convertToIntExactTiesToAway(Sn) + return new FcvtFpSIntWSA(machInst, rd, rn); + case 1: // FCVTAS Xd = convertToIntExactTiesToAway(Sn) + return new FcvtFpSIntXSA(machInst, rd, rn); + case 2: // FCVTAS Wd = convertToIntExactTiesToAway(Dn) + return new FcvtFpSIntWDA(machInst, rd, rn); + case 3: // FCVTAS Wd = convertToIntExactTiesToAway(Dn) + return new FcvtFpSIntXDA(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x5: + switch (switchVal2) { + case 0: // FCVTAU Wd = convertToIntExactTiesToAway(Sn) + return new FcvtFpUIntWSA(machInst, rd, rn); + case 1: // FCVTAU Xd = convertToIntExactTiesToAway(Sn) + return new FcvtFpUIntXSA(machInst, rd, rn); + case 2: // FCVTAU Wd = convertToIntExactTiesToAway(Dn) + return new FcvtFpUIntWDA(machInst, rd, rn); + case 3: // FCVTAU Xd = convertToIntExactTiesToAway(Dn) + return new FcvtFpUIntXDA(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + case 0x06: + switch (switchVal2) { + case 0: // FMOV Wd = Sn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovRegCoreW(machInst, rd, rn); + case 3: // FMOV Xd = Dn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovRegCoreX(machInst, rd, rn); + case 5: // FMOV Xd = Vn<127:64> + if (rmode != 1) + return new Unknown64(machInst); + return new FmovURegCoreX(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + break; + case 0x07: + switch (switchVal2) { + case 0: // FMOV Sd = Wn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovCoreRegW(machInst, rd, rn); + case 3: // FMOV Xd = Dn + if (rmode != 0) + return new Unknown64(machInst); + return new FmovCoreRegX(machInst, rd, rn); + case 5: // FMOV Xd = Vn<127:64> + if (rmode != 1) + return new Unknown64(machInst); + return new FmovUCoreRegX(machInst, rd, rn); + default: + return new Unknown64(machInst); + } + break; + default: // Warning! missing cases in switch statement above, that still need to be added + return new Unknown64(machInst); + } + } + case 0x1: + { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 23)) { + return new Unknown64(machInst); + } + IntRegIndex rm = (IntRegIndex)(uint32_t) bits(machInst, 20, 16); + IntRegIndex rn = (IntRegIndex)(uint32_t) bits(machInst, 9, 5); + uint8_t imm = (IntRegIndex)(uint32_t) bits(machInst, 3, 0); + ConditionCode cond = + (ConditionCode)(uint8_t)(bits(machInst, 15, 12)); + uint8_t switchVal = (bits(machInst, 4) << 0) | + (bits(machInst, 22) << 1); + // 31:23=000111100, 21=1, 11:10=01 + switch (switchVal) { + case 0x0: + // FCCMP flags = if cond the compareQuiet(Sn,Sm) else #nzcv + return new FCCmpRegS(machInst, rn, rm, cond, imm); + case 0x1: + // FCCMP flags = if cond then compareSignaling(Sn,Sm) + // else #nzcv + return new FCCmpERegS(machInst, rn, rm, cond, imm); + case 0x2: + // FCCMP flags = if cond then compareQuiet(Dn,Dm) else #nzcv + return new FCCmpRegD(machInst, rn, rm, cond, imm); + case 0x3: + // FCCMP flags = if cond then compareSignaling(Dn,Dm) + // else #nzcv + return new FCCmpERegD(machInst, rn, rm, cond, imm); + default: + return new Unknown64(machInst); + } + } + case 0x2: + { + if (bits(machInst, 31) || + bits(machInst, 29) || + bits(machInst, 23)) { + return new Unknown64(machInst); + } + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + uint8_t switchVal = (bits(machInst, 15, 12) << 0) | + (bits(machInst, 22) << 4); + switch (switchVal) { + case 0x00: // FMUL Sd = Sn * Sm + return new FMulS(machInst, rd, rn, rm); + case 0x10: // FMUL Dd = Dn * Dm + return new FMulD(machInst, rd, rn, rm); + case 0x01: // FDIV Sd = Sn / Sm + return new FDivS(machInst, rd, rn, rm); + case 0x11: // FDIV Dd = Dn / Dm + return new FDivD(machInst, rd, rn, rm); + case 0x02: // FADD Sd = Sn + Sm + return new FAddS(machInst, rd, rn, rm); + case 0x12: // FADD Dd = Dn + Dm + return new FAddD(machInst, rd, rn, rm); + case 0x03: // FSUB Sd = Sn - Sm + return new FSubS(machInst, rd, rn, rm); + case 0x13: // FSUB Dd = Dn - Dm + return new FSubD(machInst, rd, rn, rm); + case 0x04: // FMAX Sd = max(Sn, Sm) + return new FMaxS(machInst, rd, rn, rm); + case 0x14: // FMAX Dd = max(Dn, Dm) + return new FMaxD(machInst, rd, rn, rm); + case 0x05: // FMIN Sd = min(Sn, Sm) + return new FMinS(machInst, rd, rn, rm); + case 0x15: // FMIN Dd = min(Dn, Dm) + return new FMinD(machInst, rd, rn, rm); + case 0x06: // FMAXNM Sd = maxNum(Sn, Sm) + return new FMaxNMS(machInst, rd, rn, rm); + case 0x16: // FMAXNM Dd = maxNum(Dn, Dm) + return new FMaxNMD(machInst, rd, rn, rm); + case 0x07: // FMINNM Sd = minNum(Sn, Sm) + return new FMinNMS(machInst, rd, rn, rm); + case 0x17: // FMINNM Dd = minNum(Dn, Dm) + return new FMinNMD(machInst, rd, rn, rm); + case 0x08: // FNMUL Sd = -(Sn * Sm) + return new FNMulS(machInst, rd, rn, rm); + case 0x18: // FNMUL Dd = -(Dn * Dm) + return new FNMulD(machInst, rd, rn, rm); + default: + return new Unknown64(machInst); + } + } + case 0x3: + { + if (bits(machInst, 31) || bits(machInst, 29)) + return new Unknown64(machInst); + uint8_t type = bits(machInst, 23, 22); + IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16); + ConditionCode cond = + (ConditionCode)(uint8_t)(bits(machInst, 15, 12)); + if (type == 0) // FCSEL Sd = if cond then Sn else Sm + return new FCSelS(machInst, rd, rn, rm, cond); + else if (type == 1) // FCSEL Dd = if cond then Dn else Dm + return new FCSelD(machInst, rd, rn, rm, cond); + else + return new Unknown64(machInst); + } + } + } + return new FailUnimplemented("Unhandled Case4", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeAdvSIMDScalar(ExtMachInst machInst) + { + if (bits(machInst, 24) == 1) { + if (bits(machInst, 10) == 0) { + return decodeNeonScIndexedElem(machInst); + } else if (bits(machInst, 23) == 0) { + return decodeNeonScShiftByImm(machInst); + } + } else if (bits(machInst, 21) == 1) { + if (bits(machInst, 10) == 1) { + return decodeNeonSc3Same(machInst); + } else if (bits(machInst, 11) == 0) { + return decodeNeonSc3Diff(machInst); + } else if (bits(machInst, 20, 17) == 0x0) { + return decodeNeonSc2RegMisc(machInst); + } else if (bits(machInst, 20, 17) == 0x8) { + return decodeNeonScPwise(machInst); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 23, 22) == 0 && + bits(machInst, 15) == 0 && + bits(machInst, 10) == 1) { + return decodeNeonScCopy(machInst); + } else { + return new Unknown64(machInst); + } + return new FailUnimplemented("Unhandled Case6", machInst); + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeFpAdvSIMD(ExtMachInst machInst) + { + + if (bits(machInst, 28) == 0) { + if (bits(machInst, 31) == 0) { + return decodeAdvSIMD(machInst); + } else { + return new Unknown64(machInst); + } + } else if (bits(machInst, 30) == 0) { + return decodeFp(machInst); + } else if (bits(machInst, 31) == 0) { + return decodeAdvSIMDScalar(machInst); + } else { + return new Unknown64(machInst); + } + } +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeGem5Ops(ExtMachInst machInst) + { + const uint32_t m5func = bits(machInst, 23, 16); + switch (m5func) { + case 0x00: return new Arm(machInst); + case 0x01: return new Quiesce(machInst); + case 0x02: return new QuiesceNs64(machInst); + case 0x03: return new QuiesceCycles64(machInst); + case 0x04: return new QuiesceTime64(machInst); + case 0x07: return new Rpns64(machInst); + case 0x09: return new WakeCPU64(machInst); + case 0x10: return new Deprecated_ivlb(machInst); + case 0x11: return new Deprecated_ivle(machInst); + case 0x20: return new Deprecated_exit (machInst); + case 0x21: return new M5exit64(machInst); + case 0x31: return new Loadsymbol(machInst); + case 0x30: return new Initparam64(machInst); + case 0x40: return new Resetstats64(machInst); + case 0x41: return new Dumpstats64(machInst); + case 0x42: return new Dumpresetstats64(machInst); + case 0x43: return new M5checkpoint64(machInst); + case 0x4F: return new M5writefile64(machInst); + case 0x50: return new M5readfile64(machInst); + case 0x51: return new M5break(machInst); + case 0x52: return new M5switchcpu(machInst); + case 0x53: return new M5addsymbol64(machInst); + case 0x54: return new M5panic(machInst); + case 0x5a: return new M5workbegin64(machInst); + case 0x5b: return new M5workend64(machInst); + default: return new Unknown64(machInst); + } + } +} +}}; + +def format Aarch64() {{ + decode_block = ''' + { + using namespace Aarch64; + if (bits(machInst, 27) == 0x0) { + if (bits(machInst, 28) == 0x0) + return new Unknown64(machInst); + else if (bits(machInst, 26) == 0) + // bit 28:26=100 + return decodeDataProcImm(machInst); + else + // bit 28:26=101 + return decodeBranchExcSys(machInst); + } else if (bits(machInst, 25) == 0) { + // bit 27=1, 25=0 + return decodeLoadsStores(machInst); + } else if (bits(machInst, 26) == 0) { + // bit 27:25=101 + return decodeDataProcReg(machInst); + } else if (bits(machInst, 24) == 1 && + bits(machInst, 31, 28) == 0xF) { + return decodeGem5Ops(machInst); + } else { + // bit 27:25=111 + return decodeFpAdvSIMD(machInst); + } + } + ''' +}}; diff --git a/src/arch/arm/isa/formats/branch.isa b/src/arch/arm/isa/formats/branch.isa index f1b17ec90..513506d31 100644 --- a/src/arch/arm/isa/formats/branch.isa +++ b/src/arch/arm/isa/formats/branch.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -101,7 +101,7 @@ def format Thumb16CondBranchAndSvc() {{ return new B(machInst, sext<9>(bits(machInst, 7, 0) << 1), (ConditionCode)(uint32_t)bits(machInst, 11, 8)); } else if (bits(machInst, 8)) { - return new Svc(machInst); + return new Svc(machInst, bits(machInst, 7, 0)); } else { // This space will not be allocated in the future. return new Unknown(machInst); @@ -127,7 +127,7 @@ def format Thumb32BranchesAndMiscCtrl() {{ // Permanently undefined. return new Unknown(machInst); } else { - return new WarnUnimplemented("smc", machInst); + return new Smc(machInst); } } else if ((op & 0x38) != 0x38) { const uint32_t s = bits(machInst, 26); @@ -141,20 +141,26 @@ def format Thumb32BranchesAndMiscCtrl() {{ return new B(machInst, imm, (ConditionCode)(uint32_t)bits(machInst, 25, 22)); } else { + // HIGH: 12-11=10, LOW: 15-14=00, 12=0 switch (op) { case 0x38: - { - const IntRegIndex rn = - (IntRegIndex)(uint32_t)bits(machInst, 19, 16); - const uint8_t byteMask = bits(machInst, 11, 8); - return new MsrCpsrReg(machInst, rn, byteMask); - } case 0x39: { const IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 19, 16); const uint8_t byteMask = bits(machInst, 11, 8); - return new MsrSpsrReg(machInst, rn, byteMask); + const bool r = bits(machInst, 20); + if (bits(machInst, 5)) { + const uint8_t sysM = (bits(machInst, 4) << 4) | + byteMask; + return new MsrBankedReg(machInst, rn, sysM, r); + } else { + if (r) { + return new MsrSpsrReg(machInst, rn, byteMask); + } else { + return new MsrCpsrReg(machInst, rn, byteMask); + } + } } case 0x3a: { @@ -196,11 +202,11 @@ def format Thumb32BranchesAndMiscCtrl() {{ case 0x2: return new Clrex(machInst); case 0x4: - return new Dsb(machInst); + return new Dsb(machInst, 0); case 0x5: - return new Dmb(machInst); + return new Dmb(machInst, 0); case 0x6: - return new Isb(machInst); + return new Isb(machInst, 0); default: break; } @@ -208,28 +214,44 @@ def format Thumb32BranchesAndMiscCtrl() {{ } case 0x3c: { - // On systems that don't support bxj, bxj == bx - return new BxReg(machInst, + return new BxjReg(machInst, (IntRegIndex)(uint32_t)bits(machInst, 19, 16), COND_UC); } case 0x3d: { const uint32_t imm32 = bits(machInst, 7, 0); - return new SubsImmPclr(machInst, INTREG_PC, INTREG_LR, - imm32, false); + if (imm32 == 0) { + return new Eret(machInst); + } else { + return new SubsImmPclr(machInst, INTREG_PC, + INTREG_LR, imm32, false); + } } case 0x3e: + case 0x3f: { + const IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 11, 8); - return new MrsCpsr(machInst, rd); + const bool r = bits(machInst, 20); + if (bits(machInst, 5)) { + const uint8_t sysM = (bits(machInst, 4) << 4) | + bits(machInst, 11, 8); + return new MrsBankedReg(machInst, rd, sysM, r); + } else { + if (r) { + return new MrsSpsr(machInst, rd); + } else { + return new MrsCpsr(machInst, rd); + } + } } - case 0x3f: + case 0xfe: { - const IntRegIndex rd = - (IntRegIndex)(uint32_t)bits(machInst, 11, 8); - return new MrsSpsr(machInst, rd); + uint32_t imm16 = (bits(machInst, 19, 16) << 12) | + (bits(machInst, 11, 0) << 0); + return new Hvc(machInst, imm16); } } break; diff --git a/src/arch/arm/isa/formats/formats.isa b/src/arch/arm/isa/formats/formats.isa index 90144c101..44e9c5b5e 100644 --- a/src/arch/arm/isa/formats/formats.isa +++ b/src/arch/arm/isa/formats/formats.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -44,6 +44,12 @@ //Include the basic format ##include "basic.isa" +//Include support for decoding AArch64 instructions +##include "aarch64.isa" + +//Include support for decoding AArch64 NEON instructions +##include "neon64.isa" + //Include support for predicated instructions ##include "pred.isa" diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index 6d779e541..ccd4589a3 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -151,8 +151,7 @@ let {{ if (singleAll) { size = bits(machInst, 7, 6); bool t = bits(machInst, 5); - unsigned eBytes = (1 << size); - align = (eBytes - 1) | TLB::AllowUnaligned; + align = size | TLB::AllowUnaligned; if (width == 1) { regs = t ? 2 : 1; inc = 1; @@ -164,7 +163,7 @@ let {{ case 1: case 2: if (bits(machInst, 4)) - align = width * eBytes - 1; + align = size + width - 1; break; case 3: break; @@ -173,20 +172,19 @@ let {{ if (bits(machInst, 4) == 0) return new Unknown(machInst); size = 2; - align = 0xf; + align = 0x4; } else if (size == 2) { if (bits(machInst, 4)) - align = 7; + align = 0x3; } else { if (bits(machInst, 4)) - align = 4 * eBytes - 1; + align = size + 2; } break; } } else { size = bits(machInst, 11, 10); - unsigned eBytes = (1 << size); - align = (eBytes - 1) | TLB::AllowUnaligned; + align = size | TLB::AllowUnaligned; regs = width; unsigned indexAlign = bits(machInst, 7, 4); // If width is 1, inc is always 1. That's overridden later. @@ -219,13 +217,13 @@ let {{ break; case 2: if (bits(indexAlign, 1, 0)) - align = 3; + align = 2; break; } break; case 2: if (bits(indexAlign, 0)) - align = (2 * eBytes) - 1; + align = size + 1; break; case 3: break; @@ -234,11 +232,11 @@ let {{ case 0: case 1: if (bits(indexAlign, 0)) - align = (4 * eBytes) - 1; + align = size + 2; break; case 2: if (bits(indexAlign, 0)) - align = (4 << bits(indexAlign, 1, 0)) - 1; + align = bits(indexAlign, 1, 0) + 2; break; } break; @@ -252,9 +250,9 @@ let {{ align = bits(machInst, 5, 4); if (align == 0) { // @align wasn't specified, so alignment can be turned off. - align = ((1 << size) - 1) | TLB::AllowUnaligned; + align = size | TLB::AllowUnaligned; } else { - align = ((4 << align) - 1); + align = align + 2; } switch (width) { case 1: @@ -588,6 +586,23 @@ let {{ } } case 0xc: + if (b) { + if (!u) { + if (bits(c, 1) == 0) { + if (q) { + return new NVfmaQFp<float>(machInst, vd, vn, vm); + } else { + return new NVfmaDFp<float>(machInst, vd, vn, vm); + } + } else { + if (q) { + return new NVfmsQFp<float>(machInst, vd, vn, vm); + } else { + return new NVfmsDFp<float>(machInst, vd, vn, vm); + } + } + } + } return new Unknown(machInst); case 0xd: if (b) { @@ -1827,7 +1842,7 @@ let {{ break; case 0x1: { - if (offset == 0 || vd + offset/2 > NumFloatArchRegs) { + if (offset == 0 || vd + offset/2 > NumFloatV7ArchRegs) { break; } switch (bits(opcode, 1, 0)) { @@ -1951,8 +1966,9 @@ let {{ } else if (a == 0x7) { const IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); - uint32_t specReg = bits(machInst, 19, 16); - switch (specReg) { + uint32_t reg = bits(machInst, 19, 16); + uint32_t specReg; + switch (reg) { case 0: specReg = MISCREG_FPSID; break; @@ -1974,7 +1990,9 @@ let {{ if (specReg == MISCREG_FPSCR) { return new VmsrFpscr(machInst, (IntRegIndex)specReg, rt); } else { - return new Vmsr(machInst, (IntRegIndex)specReg, rt); + uint32_t iss = mcrMrcIssBuild(0, bits(machInst, 3, 0), rt, + reg, a, bits(machInst, 7, 5)); + return new Vmsr(machInst, (IntRegIndex)specReg, rt, iss); } } } else if (l == 0 && c == 1) { @@ -2041,8 +2059,9 @@ let {{ } else if (a == 7) { const IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); - uint32_t specReg = bits(machInst, 19, 16); - switch (specReg) { + uint32_t reg = bits(machInst, 19, 16); + uint32_t specReg; + switch (reg) { case 0: specReg = MISCREG_FPSID; break; @@ -2070,7 +2089,9 @@ let {{ } else if (specReg == MISCREG_FPSCR) { return new VmrsFpscr(machInst, rt, (IntRegIndex)specReg); } else { - return new Vmrs(machInst, rt, (IntRegIndex)specReg); + uint32_t iss = mcrMrcIssBuild(l, bits(machInst, 3, 0), rt, + reg, a, bits(machInst, 7, 5)); + return new Vmrs(machInst, rt, (IntRegIndex)specReg, iss); } } } else { @@ -2235,6 +2256,44 @@ let {{ } } break; + case 0x9: + if ((opc3 & 0x1) == 0) { + if (single) { + return decodeVfpRegRegRegOp<VfnmaS>( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp<VfnmaD>( + machInst, vd, vn, vm, true); + } + } else { + if (single) { + return decodeVfpRegRegRegOp<VfnmsS>( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp<VfnmsD>( + machInst, vd, vn, vm, true); + } + } + break; + case 0xa: + if ((opc3 & 0x1) == 0) { + if (single) { + return decodeVfpRegRegRegOp<VfmaS>( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp<VfmaD>( + machInst, vd, vn, vm, true); + } + } else { + if (single) { + return decodeVfpRegRegRegOp<VfmsS>( + machInst, vd, vn, vm, false); + } else { + return decodeVfpRegRegRegOp<VfmsD>( + machInst, vd, vn, vm, true); + } + } + break; case 0xb: if ((opc3 & 0x1) == 0) { const uint32_t baseImm = diff --git a/src/arch/arm/isa/formats/mem.isa b/src/arch/arm/isa/formats/mem.isa index f7830eff3..abac27021 100644 --- a/src/arch/arm/isa/formats/mem.isa +++ b/src/arch/arm/isa/formats/mem.isa @@ -282,7 +282,7 @@ def format Thumb32SrsRfe() {{ } } else { const uint32_t mode = bits(machInst, 4, 0); - if (badMode((OperatingMode)mode)) + if (badMode32((OperatingMode)mode)) return new Unknown(machInst); if (!add && !wb) { return new %(srs)s(machInst, mode, diff --git a/src/arch/arm/isa/formats/misc.isa b/src/arch/arm/isa/formats/misc.isa index 00a37d17b..647f9846d 100644 --- a/src/arch/arm/isa/formats/misc.isa +++ b/src/arch/arm/isa/formats/misc.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2012 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -36,19 +36,42 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Authors: Gabe Black +// Giacomo Gabrielli + +def format ArmERet() {{ + decode_block = "return new Eret(machInst);" +}}; def format Svc() {{ - decode_block = "return new Svc(machInst);" + decode_block = "return new Svc(machInst, bits(machInst, 23, 0));" +}}; + +def format ArmSmcHyp() {{ + decode_block = ''' + { + if (bits(machInst, 21)) + { + return new Smc(machInst); + } else { + uint32_t imm16 = (bits(machInst, 19, 8) << 4) | + (bits(machInst, 3, 0) << 0); + return new Hvc(machInst, imm16); + } + } + ''' }}; def format ArmMsrMrs() {{ decode_block = ''' { const uint8_t byteMask = bits(machInst, 19, 16); + const uint8_t sysM = byteMask | (bits(machInst, 8) << 4); const IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 3, 0); const IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); const uint32_t opcode = bits(machInst, 24, 21); const bool useImm = bits(machInst, 25); + const bool r = bits(machInst, 22); + const bool isBanked = bits(machInst, 9); const uint32_t unrotated = bits(machInst, 7, 0); const uint32_t rotation = (bits(machInst, 11, 8) << 1); @@ -56,20 +79,36 @@ def format ArmMsrMrs() {{ switch (opcode) { case 0x8: - return new MrsCpsr(machInst, rd); + if (isBanked) { + return new MrsBankedReg(machInst, rd, sysM, r!=0); + } else { + return new MrsCpsr(machInst, rd); + } case 0x9: if (useImm) { return new MsrCpsrImm(machInst, imm, byteMask); } else { - return new MsrCpsrReg(machInst, rn, byteMask); + if (isBanked) { + return new MsrBankedReg(machInst, rn, sysM, r!=0); + } else { + return new MsrCpsrReg(machInst, rn, byteMask); + } } case 0xa: - return new MrsSpsr(machInst, rd); + if (isBanked) { + return new MrsBankedReg(machInst, rd, sysM, r!=0); + } else { + return new MrsSpsr(machInst, rd); + } case 0xb: if (useImm) { return new MsrSpsrImm(machInst, imm, byteMask); } else { - return new MsrSpsrReg(machInst, rn, byteMask); + if (isBanked) { + return new MsrBankedReg(machInst, rn, sysM, r!=0); + } else { + return new MsrSpsrReg(machInst, rn, byteMask); + } } default: return new Unknown(machInst); @@ -99,16 +138,17 @@ let {{ switch (miscReg) { case MISCREG_NOP: return new NopInst(machInst); - case NUM_MISCREGS: + case MISCREG_CP14_UNIMPL: return new FailUnimplemented( csprintf("miscreg crn:%d opc1:%d crm:%d opc2:%d %s unknown", crn, opc1, crm, opc2, isRead ? "read" : "write").c_str(), machInst); default: + uint32_t iss = mcrMrcIssBuild(isRead, crm, rt, crn, opc1, opc2); if (isRead) { - return new Mrc14(machInst, rt, (IntRegIndex)miscReg); + return new Mrc14(machInst, rt, (IntRegIndex)miscReg, iss); } else { - return new Mcr14(machInst, (IntRegIndex)miscReg, rt); + return new Mcr14(machInst, (IntRegIndex)miscReg, rt, iss); } } } @@ -123,8 +163,8 @@ def format McrMrc14() {{ let {{ header_output = ''' - StaticInstPtr - decodeMcrMrc15(ExtMachInst machInst); + StaticInstPtr decodeMcrMrc14(ExtMachInst machInst); + StaticInstPtr decodeMcrMrc15(ExtMachInst machInst); ''' decoder_output = ''' StaticInstPtr @@ -136,107 +176,50 @@ let {{ const uint32_t crm = bits(machInst, 3, 0); const MiscRegIndex miscReg = decodeCP15Reg(crn, opc1, crm, opc2); const IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 15, 12); - const bool isRead = bits(machInst, 20); + uint32_t iss = mcrMrcIssBuild(isRead, crm, rt, crn, opc1, opc2); switch (miscReg) { case MISCREG_NOP: return new NopInst(machInst); - case NUM_MISCREGS: + case MISCREG_CP15_UNIMPL: return new FailUnimplemented( csprintf("miscreg crn:%d opc1:%d crm:%d opc2:%d %s unknown", crn, opc1, crm, opc2, isRead ? "read" : "write").c_str(), machInst); - case MISCREG_DCCISW: - return new WarnUnimplemented( - isRead ? "mrc dccisw" : "mcr dcisw", machInst); - case MISCREG_DCCIMVAC: - return new WarnUnimplemented( - isRead ? "mrc dccimvac" : "mcr dccimvac", machInst); - case MISCREG_DCIMVAC: - return new WarnUnimplemented( - isRead ? "mrc dcimvac" : "mcr dcimvac", machInst); case MISCREG_DCCMVAC: return new FlushPipeInst( isRead ? "mrc dccmvac" : "mcr dccmvac", machInst); - case MISCREG_DCCMVAU: - return new WarnUnimplemented( - isRead ? "mrc dccmvau" : "mcr dccmvau", machInst); case MISCREG_CP15ISB: - return new Isb(machInst); + return new Isb(machInst, iss); case MISCREG_CP15DSB: - return new Dsb(machInst); + return new Dsb(machInst, iss); case MISCREG_CP15DMB: - return new Dmb(machInst); - case MISCREG_ICIALLUIS: - return new WarnUnimplemented( - isRead ? "mrc icialluis" : "mcr icialluis", machInst); - case MISCREG_ICIMVAU: - return new WarnUnimplemented( - isRead ? "mrc icimvau" : "mcr icimvau", machInst); - case MISCREG_BPIMVA: - return new WarnUnimplemented( - isRead ? "mrc bpimva" : "mcr bpimva", machInst); - case MISCREG_BPIALLIS: - return new WarnUnimplemented( - isRead ? "mrc bpiallis" : "mcr bpiallis", machInst); - case MISCREG_BPIALL: - return new WarnUnimplemented( - isRead ? "mrc bpiall" : "mcr bpiall", machInst); - case MISCREG_L2LATENCY: - return new WarnUnimplemented( - isRead ? "mrc l2latency" : "mcr l2latency", machInst); - case MISCREG_CRN15: - return new WarnUnimplemented( - isRead ? "mrc crn15" : "mcr crn15", machInst); - - // Write only. - case MISCREG_TLBIALLIS: - case MISCREG_TLBIMVAIS: - case MISCREG_TLBIASIDIS: - case MISCREG_TLBIMVAAIS: - case MISCREG_ITLBIALL: - case MISCREG_ITLBIMVA: - case MISCREG_ITLBIASID: - case MISCREG_DTLBIALL: - case MISCREG_DTLBIMVA: - case MISCREG_DTLBIASID: - case MISCREG_TLBIALL: - case MISCREG_TLBIMVA: - case MISCREG_TLBIASID: - case MISCREG_TLBIMVAA: - if (isRead) { - return new Unknown(machInst); - } else { - return new Mcr15(machInst, (IntRegIndex)miscReg, rt); - } + return new Dmb(machInst, iss); + default: + if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) { + std::string full_mnem = csprintf("%s %s", + isRead ? "mrc" : "mcr", miscRegName[miscReg]); + warn("\\tinstruction '%s' unimplemented\\n", full_mnem); - // Read only in user mode. - case MISCREG_TPIDRURO: - if (isRead) { - return new Mrc15User(machInst, rt, (IntRegIndex)miscReg); - } else { - return new Mcr15(machInst, (IntRegIndex)miscReg, rt); + // Remove the warn flag and set the implemented flag. This + // prevents the instruction warning a second time, it also + // means the instruction is actually generated. Actually + // creating the instruction to access an register that isn't + // implemented sounds a bit silly, but its required to get + // the correct behaviour for hyp traps and undef exceptions. + miscRegInfo[miscReg][MISCREG_IMPLEMENTED] = true; + miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL] = false; } - // Read/write in user mode. - case MISCREG_TPIDRURW: - if (isRead) { - return new Mrc15User(machInst, rt, (IntRegIndex)miscReg); + if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) { + if (isRead) + return new Mrc15(machInst, rt, (IntRegIndex)miscReg, iss); + return new Mcr15(machInst, (IntRegIndex)miscReg, rt, iss); } else { - return new Mcr15User(machInst, (IntRegIndex)miscReg, rt); - } - - // Read/write, priveleged only. - default: - if (miscReg >= MISCREG_CP15_UNIMP_START) return new FailUnimplemented(csprintf("%s %s", isRead ? "mrc" : "mcr", miscRegName[miscReg]).c_str(), machInst); - if (isRead) { - return new Mrc15(machInst, rt, (IntRegIndex)miscReg); - } else { - return new Mcr15(machInst, (IntRegIndex)miscReg, rt); } } } @@ -248,3 +231,70 @@ def format McrMrc15() {{ return decodeMcrMrc15(machInst); ''' }}; + +let {{ + header_output = ''' + StaticInstPtr + decodeMcrrMrrc15(ExtMachInst machInst); + ''' + decoder_output = ''' + StaticInstPtr + decodeMcrrMrrc15(ExtMachInst machInst) + { + const uint32_t crm = bits(machInst, 3, 0); + const uint32_t opc1 = bits(machInst, 7, 4); + const MiscRegIndex miscReg = decodeCP15Reg64(crm, opc1); + const IntRegIndex rt = (IntRegIndex) (uint32_t) bits(machInst, 15, 12); + const IntRegIndex rt2 = (IntRegIndex) (uint32_t) bits(machInst, 19, 16); + + const bool isRead = bits(machInst, 20); + + switch (miscReg) { + case MISCREG_CP15_UNIMPL: + return new FailUnimplemented( + csprintf("miscreg crm:%d opc1:%d 64-bit %s unknown", + crm, opc1, isRead ? "read" : "write").c_str(), + machInst); + default: + if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) { + std::string full_mnem = csprintf("%s %s", + isRead ? "mrrc" : "mcrr", miscRegName[miscReg]); + warn("\\tinstruction '%s' unimplemented\\n", full_mnem); + + // Remove the warn flag and set the implemented flag. This + // prevents the instruction warning a second time, it also + // means the instruction is actually generated. Actually + // creating the instruction to access an register that isn't + // implemented sounds a bit silly, but its required to get + // the correct behaviour for hyp traps and undef exceptions. + miscRegInfo[miscReg][MISCREG_IMPLEMENTED] = true; + miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL] = false; + } + + if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) { + uint32_t iss = mcrrMrrcIssBuild(isRead, crm, rt, rt2, opc1); + + if (isRead) + return new Mrrc15(machInst, (IntRegIndex) miscReg, rt2, rt, iss); + return new Mcrr15(machInst, rt2, rt, (IntRegIndex) miscReg, iss); + } else { + return new FailUnimplemented(csprintf("%s %s", + isRead ? "mrrc" : "mcrr", miscRegName[miscReg]).c_str(), + machInst); + } + } + } + ''' +}}; + +def format Mcrr15() {{ + decode_block = ''' + return decodeMcrrMrrc15(machInst); + ''' +}}; + +def format Mrrc15() {{ + decode_block = ''' + return decodeMcrrMrrc15(machInst); + ''' +}}; diff --git a/src/arch/arm/isa/formats/neon64.isa b/src/arch/arm/isa/formats/neon64.isa new file mode 100644 index 000000000..72bbd0c60 --- /dev/null +++ b/src/arch/arm/isa/formats/neon64.isa @@ -0,0 +1,2626 @@ +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Giacomo Gabrielli +// Mbou Eyole + +output header {{ +namespace Aarch64 +{ + // AdvSIMD three same + StaticInstPtr decodeNeon3Same(ExtMachInst machInst); + // AdvSIMD three different + StaticInstPtr decodeNeon3Diff(ExtMachInst machInst); + // AdvSIMD two-reg misc + StaticInstPtr decodeNeon2RegMisc(ExtMachInst machInst); + // AdvSIMD across lanes + StaticInstPtr decodeNeonAcrossLanes(ExtMachInst machInst); + // AdvSIMD copy + StaticInstPtr decodeNeonCopy(ExtMachInst machInst); + // AdvSIMD vector x indexed element + StaticInstPtr decodeNeonIndexedElem(ExtMachInst machInst); + // AdvSIMD modified immediate + StaticInstPtr decodeNeonModImm(ExtMachInst machInst); + // AdvSIMD shift by immediate + StaticInstPtr decodeNeonShiftByImm(ExtMachInst machInst); + // AdvSIMD TBL/TBX + StaticInstPtr decodeNeonTblTbx(ExtMachInst machInst); + // AdvSIMD ZIP/UZP/TRN + StaticInstPtr decodeNeonZipUzpTrn(ExtMachInst machInst); + // AdvSIMD EXT + StaticInstPtr decodeNeonExt(ExtMachInst machInst); + + // AdvSIMD scalar three same + StaticInstPtr decodeNeonSc3Same(ExtMachInst machInst); + // AdvSIMD scalar three different + StaticInstPtr decodeNeonSc3Diff(ExtMachInst machInst); + // AdvSIMD scalar two-reg misc + StaticInstPtr decodeNeonSc2RegMisc(ExtMachInst machInst); + // AdvSIMD scalar pairwise + StaticInstPtr decodeNeonScPwise(ExtMachInst machInst); + // AdvSIMD scalar copy + StaticInstPtr decodeNeonScCopy(ExtMachInst machInst); + // AdvSIMD scalar x indexed element + StaticInstPtr decodeNeonScIndexedElem(ExtMachInst machInst); + // AdvSIMD scalar shift by immediate + StaticInstPtr decodeNeonScShiftByImm(ExtMachInst machInst); + + // AdvSIMD load/store + StaticInstPtr decodeNeonMem(ExtMachInst machInst); +} +}}; + +output decoder {{ +namespace Aarch64 +{ + StaticInstPtr + decodeNeon3Same(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 15, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t size_q = (size << 1) | q; + uint8_t sz_q = size_q & 0x3; + + switch (opcode) { + case 0x00: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UhaddDX, UhaddQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<ShaddDX, ShaddQX>( + q, size, machInst, vd, vn, vm); + case 0x01: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UqaddDX, UqaddQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SqaddDX, SqaddQX>( + q, size, machInst, vd, vn, vm); + case 0x02: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UrhaddDX, UrhaddQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SrhaddDX, SrhaddQX>( + q, size, machInst, vd, vn, vm); + case 0x03: + switch (size) { + case 0x0: + if (u) { + if (q) + return new EorQX<uint64_t>(machInst, vd, vn, vm); + else + return new EorDX<uint64_t>(machInst, vd, vn, vm); + } else { + if (q) + return new AndQX<uint64_t>(machInst, vd, vn, vm); + else + return new AndDX<uint64_t>(machInst, vd, vn, vm); + } + case 0x1: + if (u) { + if (q) + return new BslQX<uint64_t>(machInst, vd, vn, vm); + else + return new BslDX<uint64_t>(machInst, vd, vn, vm); + } else { + if (q) + return new BicQX<uint64_t>(machInst, vd, vn, vm); + else + return new BicDX<uint64_t>(machInst, vd, vn, vm); + } + case 0x2: + if (u) { + if (q) + return new BitQX<uint64_t>(machInst, vd, vn, vm); + else + return new BitDX<uint64_t>(machInst, vd, vn, vm); + } else { + if (q) + return new OrrQX<uint64_t>(machInst, vd, vn, vm); + else + return new OrrDX<uint64_t>(machInst, vd, vn, vm); + } + case 0x3: + if (u) { + if (q) + return new BifQX<uint64_t>(machInst, vd, vn, vm); + else + return new BifDX<uint64_t>(machInst, vd, vn, vm); + } else { + if (q) + return new OrnQX<uint64_t>(machInst, vd, vn, vm); + else + return new OrnDX<uint64_t>(machInst, vd, vn, vm); + } + } + case 0x04: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UhsubDX, UhsubQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<ShsubDX, ShsubQX>( + q, size, machInst, vd, vn, vm); + case 0x05: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UqsubDX, UqsubQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SqsubDX, SqsubQX>( + q, size, machInst, vd, vn, vm); + case 0x06: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<CmhiDX, CmhiQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<CmgtDX, CmgtQX>( + q, size, machInst, vd, vn, vm); + case 0x07: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<CmhsDX, CmhsQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<CmgeDX, CmgeQX>( + q, size, machInst, vd, vn, vm); + case 0x08: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UshlDX, UshlQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SshlDX, SshlQX>( + q, size, machInst, vd, vn, vm); + case 0x09: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UqshlDX, UqshlQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SqshlDX, SqshlQX>( + q, size, machInst, vd, vn, vm); + case 0x0a: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UrshlDX, UrshlQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SrshlDX, SrshlQX>( + q, size, machInst, vd, vn, vm); + case 0x0b: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<UqrshlDX, UqrshlQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeXReg<SqrshlDX, SqrshlQX>( + q, size, machInst, vd, vn, vm); + case 0x0c: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmaxDX, UmaxQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmaxDX, SmaxQX>( + q, size, machInst, vd, vn, vm); + case 0x0d: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UminDX, UminQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SminDX, SminQX>( + q, size, machInst, vd, vn, vm); + case 0x0e: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UabdDX, UabdQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SabdDX, SabdQX>( + q, size, machInst, vd, vn, vm); + case 0x0f: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UabaDX, UabaQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SabaDX, SabaQX>( + q, size, machInst, vd, vn, vm); + case 0x10: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<SubDX, SubQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeXReg<AddDX, AddQX>( + q, size, machInst, vd, vn, vm); + case 0x11: + if (size_q == 0x6) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeXReg<CmeqDX, CmeqQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeXReg<CmtstDX, CmtstQX>( + q, size, machInst, vd, vn, vm); + case 0x12: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<MlsDX, MlsQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeSReg<MlaDX, MlaQX>( + q, size, machInst, vd, vn, vm); + case 0x13: + if (size == 0x3 || (size != 0x0 && bits(machInst, 29))) + return new Unknown64(machInst); + if (u) { + if (q) + return new PmulQX<uint8_t>(machInst, vd, vn, vm); + else + return new PmulDX<uint8_t>(machInst, vd, vn, vm); + } else { + return decodeNeonUThreeSReg<MulDX, MulQX>( + q, size, machInst, vd, vn, vm); + } + case 0x14: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmaxpDX, UmaxpQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmaxpDX, SmaxpQX>( + q, size, machInst, vd, vn, vm); + case 0x15: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UminpDX, UminpQX>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SminpDX, SminpQX>( + q, size, machInst, vd, vn, vm); + case 0x16: + if (size == 0x3 || size == 0x0) + return new Unknown64(machInst); + if (u) { + if (q) + return decodeNeonSThreeHAndWReg<SqrdmulhQX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeHAndWReg<SqrdmulhDX>( + size, machInst, vd, vn, vm); + } else { + if (q) + return decodeNeonSThreeHAndWReg<SqdmulhQX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeHAndWReg<SqdmulhDX>( + size, machInst, vd, vn, vm); + } + case 0x17: + if (u || size_q == 0x6) + return new Unknown64(machInst); + else + return decodeNeonUThreeXReg<AddpDX, AddpQX>( + q, size, machInst, vd, vn, vm); + case 0x18: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FmaxnmpDX, FmaxnmpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FmaxnmDX, FmaxnmQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg<FminnmpDX, FminnmpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FminnmDX, FminnmQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x19: + if (size < 0x2) { + if (u || sz_q == 0x2) + return new Unknown64(machInst); + else + return decodeNeonUThreeFpReg<FmlaDX, FmlaQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u || sz_q == 0x2) + return new Unknown64(machInst); + else + return decodeNeonUThreeFpReg<FmlsDX, FmlsQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x1a: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FaddpDX, FaddpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FaddDX, FaddQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg<FabdDX, FabdQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FsubDX, FsubQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x1b: + if (size < 0x2 && sz_q != 0x2) { + if (u) + return decodeNeonUThreeFpReg<FmulDX, FmulQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FmulxDX, FmulxQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + return new Unknown64(machInst); + } + case 0x1c: + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FcmgeDX, FcmgeQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FcmeqDX, FcmeqQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg<FcmgtDX, FcmgtQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } + case 0x1d: + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FacgeDX, FacgeQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } else { + if (u) + return decodeNeonUThreeFpReg<FacgtDX, FacgtQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } + case 0x1e: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FmaxpDX, FmaxpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FmaxDX, FmaxQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeFpReg<FminpDX, FminpQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FminDX, FminQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + case 0x1f: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (u) + return decodeNeonUThreeFpReg<FdivDX, FdivQX>( + q, size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeFpReg<FrecpsDX, FrecpsQX>( + q, size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return new Unknown64(machInst); + else + return decodeNeonUThreeFpReg<FrsqrtsDX, FrsqrtsQX>( + q, size & 0x1, machInst, vd, vn, vm); + } + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeon3Diff(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 15, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x0: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UaddlX, Uaddl2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SaddlX, Saddl2X>( + q, size, machInst, vd, vn, vm); + case 0x1: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UaddwX, Uaddw2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SaddwX, Saddw2X>( + q, size, machInst, vd, vn, vm); + case 0x2: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UsublX, Usubl2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SsublX, Ssubl2X>( + q, size, machInst, vd, vn, vm); + case 0x3: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UsubwX, Usubw2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SsubwX, Ssubw2X>( + q, size, machInst, vd, vn, vm); + case 0x4: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<RaddhnX, Raddhn2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeSReg<AddhnX, Addhn2X>( + q, size, machInst, vd, vn, vm); + case 0x5: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UabalX, Uabal2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SabalX, Sabal2X>( + q, size, machInst, vd, vn, vm); + case 0x6: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<RsubhnX, Rsubhn2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonUThreeSReg<SubhnX, Subhn2X>( + q, size, machInst, vd, vn, vm); + case 0x7: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UabdlX, Uabdl2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SabdlX, Sabdl2X>( + q, size, machInst, vd, vn, vm); + case 0x8: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmlalX, Umlal2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmlalX, Smlal2X>( + q, size, machInst, vd, vn, vm); + case 0x9: + if (u || (size == 0x0 || size == 0x3)) { + return new Unknown64(machInst); + } else { + if (q) { + return decodeNeonSThreeHAndWReg<Sqdmlal2X>( + size, machInst, vd, vn, vm); + } else { + return decodeNeonSThreeHAndWReg<SqdmlalX>( + size, machInst, vd, vn, vm); + } + } + case 0xa: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmlslX, Umlsl2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmlslX, Smlsl2X>( + q, size, machInst, vd, vn, vm); + case 0xb: + if (u || (size == 0x0 || size == 0x3)) { + return new Unknown64(machInst); + } else { + if (q) { + return decodeNeonSThreeHAndWReg<Sqdmlsl2X>( + size, machInst, vd, vn, vm); + } else { + return decodeNeonSThreeHAndWReg<SqdmlslX>( + size, machInst, vd, vn, vm); + } + } + case 0xc: + if (size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeSReg<UmullX, Umull2X>( + q, size, machInst, vd, vn, vm); + else + return decodeNeonSThreeSReg<SmullX, Smull2X>( + q, size, machInst, vd, vn, vm); + case 0xd: + if (u || (size == 0x0 || size == 0x3)) { + return new Unknown64(machInst); + } else { + if (q) { + return decodeNeonSThreeHAndWReg<Sqdmull2X>( + size, machInst, vd, vn, vm); + } else { + return decodeNeonSThreeHAndWReg<SqdmullX>( + size, machInst, vd, vn, vm); + } + } + case 0xe: + if (u || size != 0) { + return new Unknown64(machInst); + } else { + if (q) + return new Pmull2X<uint8_t>(machInst, vd, vn, vm); + else + return new PmullX<uint8_t>(machInst, vd, vn, vm); + } + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeon2RegMisc(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5); + + uint8_t size_q = (size << 1) | q; + uint8_t sz_q = size_q & 0x3; + uint8_t op = (uint8_t)((bits(machInst, 12) << 1) | + bits(machInst, 29)); + uint8_t switchVal = opcode | ((u ? 1 : 0) << 5); + + switch (switchVal) { + case 0x00: + if (op + size >= 3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<Rev64DX, Rev64QX>( + q, size, machInst, vd, vn); + case 0x01: + if (op + size >= 3) + return new Unknown64(machInst); + if (q) + return new Rev16QX<uint8_t>(machInst, vd, vn); + else + return new Rev16DX<uint8_t>(machInst, vd, vn); + case 0x02: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<SaddlpDX, SaddlpQX>( + q, size, machInst, vd, vn); + case 0x03: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonUTwoMiscXReg<SuqaddDX, SuqaddQX>( + q, size, machInst, vd, vn); + case 0x04: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<ClsDX, ClsQX>( + q, size, machInst, vd, vn); + case 0x05: + if (size != 0x0) + return new Unknown64(machInst); + if (q) + return new CntQX<uint8_t>(machInst, vd, vn); + else + return new CntDX<uint8_t>(machInst, vd, vn); + case 0x06: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<SadalpDX, SadalpQX>( + q, size, machInst, vd, vn); + case 0x07: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<SqabsDX, SqabsQX>( + q, size, machInst, vd, vn); + case 0x08: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmgtZeroDX, CmgtZeroQX>( + q, size, machInst, vd, vn); + case 0x09: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmeqZeroDX, CmeqZeroQX>( + q, size, machInst, vd, vn); + case 0x0a: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmltZeroDX, CmltZeroQX>( + q, size, machInst, vd, vn); + case 0x0b: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<AbsDX, AbsQX>( + q, size, machInst, vd, vn); + case 0x0c: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmgtZeroDX, FcmgtZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x0d: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmeqZeroDX, FcmeqZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x0e: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmltZeroDX, FcmltZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x0f: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FabsDX, FabsQX>( + q, size & 0x1, machInst, vd, vn); + case 0x12: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<XtnX, Xtn2X>( + q, size, machInst, vd, vn); + case 0x14: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<SqxtnX, Sqxtn2X>( + q, size, machInst, vd, vn); + case 0x16: + if (size > 0x1) + return new Unknown64(machInst); + if (q) { + if (size) + return new Fcvtn2X<uint32_t>(machInst, vd, vn); + else + return new Fcvtn2X<uint16_t>(machInst, vd, vn); + } else { + if (size) + return new FcvtnX<uint32_t>(machInst, vd, vn); + else + return new FcvtnX<uint16_t>(machInst, vd, vn); + } + case 0x17: + if (size > 0x1) + return new Unknown64(machInst); + if (q) { + if (size) + return new Fcvtl2X<uint32_t>(machInst, vd, vn); + else + return new Fcvtl2X<uint16_t>(machInst, vd, vn); + } else { + if (size) + return new FcvtlX<uint32_t>(machInst, vd, vn); + else + return new FcvtlX<uint16_t>(machInst, vd, vn); + } + case 0x18: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FrintnDX, FrintnQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FrintpDX, FrintpQX>( + q, size & 0x1, machInst, vd, vn); + case 0x19: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FrintmDX, FrintmQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FrintzDX, FrintzQX>( + q, size & 0x1, machInst, vd, vn); + case 0x1a: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FcvtnsDX, FcvtnsQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FcvtpsDX, FcvtpsQX>( + q, size & 0x1, machInst, vd, vn); + case 0x1b: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FcvtmsDX, FcvtmsQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FcvtzsIntDX, FcvtzsIntQX>( + q, size & 0x1, machInst, vd, vn); + case 0x1c: + if (size < 0x2) { + if (sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcvtasDX, FcvtasQX>( + q, size & 0x1, machInst, vd, vn); + } else { + if (size & 0x1) + return new Unknown64(machInst); + if (q) + return new UrecpeQX<uint32_t>(machInst, vd, vn); + else + return new UrecpeDX<uint32_t>(machInst, vd, vn); + } + case 0x1d: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) { + if (q) { + if (size & 0x1) + return new ScvtfIntDQX<uint64_t>(machInst, vd, vn); + else + return new ScvtfIntSQX<uint32_t>(machInst, vd, vn); + } else { + if (size & 0x1) + return new Unknown(machInst); + else + return new ScvtfIntDX<uint32_t>(machInst, vd, vn); + } + } else { + return decodeNeonUTwoMiscFpReg<FrecpeDX, FrecpeQX>( + q, size & 0x1, machInst, vd, vn); + } + case 0x20: + if (op + size >= 3) + return new Unknown64(machInst); + if (q) { + if (size & 0x1) + return new Rev32QX<uint16_t>(machInst, vd, vn); + else + return new Rev32QX<uint8_t>(machInst, vd, vn); + } else { + if (size & 0x1) + return new Rev32DX<uint16_t>(machInst, vd, vn); + else + return new Rev32DX<uint8_t>(machInst, vd, vn); + } + case 0x22: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<UaddlpDX, UaddlpQX>( + q, size, machInst, vd, vn); + case 0x23: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonUTwoMiscXReg<UsqaddDX, UsqaddQX>( + q, size, machInst, vd, vn); + return new Unknown64(machInst); + case 0x24: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<ClzDX, ClzQX>( + q, size, machInst, vd, vn); + case 0x25: + if (size == 0x0) { + if (q) + return new MvnQX<uint64_t>(machInst, vd, vn); + else + return new MvnDX<uint64_t>(machInst, vd, vn); + } else if (size == 0x1) { + if (q) + return new RbitQX<uint8_t>(machInst, vd, vn); + else + return new RbitDX<uint8_t>(machInst, vd, vn); + } else { + return new Unknown64(machInst); + } + case 0x26: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<UadalpDX, UadalpQX>( + q, size, machInst, vd, vn); + case 0x27: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<SqnegDX, SqnegQX>( + q, size, machInst, vd, vn); + case 0x28: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmgeZeroDX, CmgeZeroQX>( + q, size, machInst, vd, vn); + case 0x29: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<CmleZeroDX, CmleZeroQX>( + q, size, machInst, vd, vn); + case 0x2b: + if (size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonSTwoMiscXReg<NegDX, NegQX>( + q, size, machInst, vd, vn); + case 0x2c: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmgeZeroDX, FcmgeZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x2d: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FcmleZeroDX, FcmleZeroQX>( + q, size & 0x1, machInst, vd, vn); + case 0x2f: + if (size < 0x2 || size_q == 0x6) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FnegDX, FnegQX>( + q, size & 0x1, machInst, vd, vn); + case 0x32: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonSTwoMiscSReg<SqxtunX, Sqxtun2X>( + q, size, machInst, vd, vn); + case 0x33: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<ShllX, Shll2X>( + q, size, machInst, vd, vn); + case 0x34: + if (size == 0x3) + return new Unknown64(machInst); + return decodeNeonUTwoMiscSReg<UqxtnX, Uqxtn2X>( + q, size, machInst, vd, vn); + case 0x36: + if (size != 0x1) + return new Unknown64(machInst); + if (q) + return new Fcvtxn2X<uint32_t>(machInst, vd, vn); + else + return new FcvtxnX<uint32_t>(machInst, vd, vn); + case 0x38: + if (size > 0x1 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FrintaDX, FrintaQX>( + q, size & 0x1, machInst, vd, vn); + case 0x39: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FrintxDX, FrintxQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FrintiDX, FrintiQX>( + q, size & 0x1, machInst, vd, vn); + case 0x3a: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FcvtnuDX, FcvtnuQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FcvtpuDX, FcvtpuQX>( + q, size & 0x1, machInst, vd, vn); + case 0x3b: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<FcvtmuDX, FcvtmuQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FcvtzuIntDX, FcvtzuIntQX>( + q, size & 0x1, machInst, vd, vn); + case 0x3c: + if (size < 0x2) { + return decodeNeonUTwoMiscFpReg<FcvtauDX, FcvtauQX>( + q, size & 0x1, machInst, vd, vn); + } else if (size == 0x2) { + if (q) + return new UrsqrteQX<uint32_t>(machInst, vd, vn); + else + return new UrsqrteDX<uint32_t>(machInst, vd, vn); + } else { + return new Unknown64(machInst); + } + case 0x3d: + if (sz_q == 0x2) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUTwoMiscFpReg<UcvtfIntDX, UcvtfIntQX>( + q, size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscFpReg<FrsqrteDX, FrsqrteQX>( + q, size & 0x1, machInst, vd, vn); + case 0x3f: + if (size < 0x2 || sz_q == 0x2) + return new Unknown64(machInst); + return decodeNeonUTwoMiscFpReg<FsqrtDX, FsqrtQX>( + q, size & 0x1, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonAcrossLanes(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t size_q = (size << 1) | q; + uint8_t sz_q = size_q & 0x3; + uint8_t switchVal = opcode | ((u ? 1 : 0) << 5); + + switch (switchVal) { + case 0x03: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonSAcrossLanesLongReg<SaddlvDX, SaddlvQX, + SaddlvBQX>( + q, size, machInst, vd, vn); + case 0x0a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonSAcrossLanesReg<SmaxvDX, SmaxvQX>( + q, size, machInst, vd, vn); + case 0x1a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonSAcrossLanesReg<SminvDX, SminvQX>( + q, size, machInst, vd, vn); + case 0x1b: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesReg<AddvDX, AddvQX>( + q, size, machInst, vd, vn); + case 0x23: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesLongReg<UaddlvDX, UaddlvQX, + UaddlvBQX>( + q, size, machInst, vd, vn); + case 0x2a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesReg<UmaxvDX, UmaxvQX>( + q, size, machInst, vd, vn); + case 0x2c: + if (sz_q != 0x1) + return new Unknown64(machInst); + if (size < 0x2) { + if (q) + return new FmaxnmvQX<uint32_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } else { + if (q) + return new FminnmvQX<uint32_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } + case 0x2f: + if (sz_q != 0x1) + return new Unknown64(machInst); + if (size < 0x2) { + if (q) + return new FmaxvQX<uint32_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } else { + if (q) + return new FminvQX<uint32_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } + case 0x3a: + if (size_q == 0x4 || size == 0x3) + return new Unknown64(machInst); + return decodeNeonUAcrossLanesReg<UminvDX, UminvQX>( + q, size, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonCopy(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t op = bits(machInst, 29); + uint8_t imm5 = bits(machInst, 20, 16); + uint8_t imm4 = bits(machInst, 14, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t imm5_pos = findLsbSet(imm5); + uint8_t index1 = 0, index2 = 0; + + if (op) { + if (!q || (imm4 & mask(imm5_pos))) + return new Unknown64(machInst); + + index1 = bits(imm5, 4, imm5_pos + 1); // dst + index2 = bits(imm4, 3, imm5_pos); // src + + switch (imm5_pos) { + case 0: + return new InsElemX<uint8_t>(machInst, vd, vn, index1, index2); + case 1: + return new InsElemX<uint16_t>(machInst, vd, vn, index1, index2); + case 2: + return new InsElemX<uint32_t>(machInst, vd, vn, index1, index2); + case 3: + return new InsElemX<uint64_t>(machInst, vd, vn, index1, index2); + default: + return new Unknown64(machInst); + } + } + + switch (imm4) { + case 0x0: + index1 = bits(imm5, 4, imm5_pos + 1); + switch (imm5_pos) { + case 0: + if (q) + return new DupElemQX<uint8_t>(machInst, vd, vn, index1); + else + return new DupElemDX<uint8_t>(machInst, vd, vn, index1); + case 1: + if (q) + return new DupElemQX<uint16_t>(machInst, vd, vn, index1); + else + return new DupElemDX<uint16_t>(machInst, vd, vn, index1); + case 2: + if (q) + return new DupElemQX<uint32_t>(machInst, vd, vn, index1); + else + return new DupElemDX<uint32_t>(machInst, vd, vn, index1); + case 3: + if (q) + return new DupElemQX<uint64_t>(machInst, vd, vn, index1); + else + return new Unknown64(machInst); + default: + return new Unknown64(machInst); + } + case 0x1: + switch (imm5) { + case 0x1: + if (q) + return new DupGprWQX<uint8_t>(machInst, vd, vn); + else + return new DupGprWDX<uint8_t>(machInst, vd, vn); + case 0x2: + if (q) + return new DupGprWQX<uint16_t>(machInst, vd, vn); + else + return new DupGprWDX<uint16_t>(machInst, vd, vn); + case 0x4: + if (q) + return new DupGprWQX<uint32_t>(machInst, vd, vn); + else + return new DupGprWDX<uint32_t>(machInst, vd, vn); + case 0x8: + if (q) + return new DupGprXQX<uint64_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } + case 0x3: + index1 = imm5 >> (imm5_pos + 1); + switch (imm5_pos) { + case 0: + return new InsGprWX<uint8_t>(machInst, vd, vn, index1); + case 1: + return new InsGprWX<uint16_t>(machInst, vd, vn, index1); + case 2: + return new InsGprWX<uint32_t>(machInst, vd, vn, index1); + case 3: + return new InsGprXX<uint64_t>(machInst, vd, vn, index1); + default: + return new Unknown64(machInst); + } + case 0x5: + index1 = bits(imm5, 4, imm5_pos + 1); + switch (imm5_pos) { + case 0: + if (q) + return new SmovXX<int8_t>(machInst, vd, vn, index1); + else + return new SmovWX<int8_t>(machInst, vd, vn, index1); + case 1: + if (q) + return new SmovXX<int16_t>(machInst, vd, vn, index1); + else + return new SmovWX<int16_t>(machInst, vd, vn, index1); + case 2: + if (q) + return new SmovXX<int32_t>(machInst, vd, vn, index1); + else + return new Unknown64(machInst); + default: + return new Unknown64(machInst); + } + case 0x7: + index1 = imm5 >> (imm5_pos + 1); + + if ((q && imm5_pos != 3) || (!q && imm5_pos >= 3)) + return new Unknown64(machInst); + + switch (imm5_pos) { + case 0: + return new UmovWX<uint8_t>(machInst, vd, vn, index1); + case 1: + return new UmovWX<uint16_t>(machInst, vd, vn, index1); + case 2: + return new UmovWX<uint32_t>(machInst, vd, vn, index1); + case 3: + return new UmovXX<uint64_t>(machInst, vd, vn, index1); + default: + return new Unknown64(machInst); + } + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonIndexedElem(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t L = bits(machInst, 21); + uint8_t M = bits(machInst, 20); + uint8_t opcode = bits(machInst, 15, 12); + uint8_t H = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm_bf = (IntRegIndex) (uint8_t) bits(machInst, 19, 16); + + uint8_t index = 0; + uint8_t index_fp = 0; + uint8_t vmh = 0; + uint8_t sz = size & 0x1; + uint8_t sz_q = (sz << 1) | bits(machInst, 30); + uint8_t sz_L = (sz << 1) | L; + + // Index and 2nd register operand for integer instructions + if (size == 0x1) { + index = (H << 2) | (L << 1) | M; + // vmh = 0; + } else if (size == 0x2) { + index = (H << 1) | L; + vmh = M; + } + IntRegIndex vm = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + // Index and 2nd register operand for FP instructions + vmh = M; + if ((size & 0x1) == 0) { + index_fp = (H << 1) | L; + } else if (L == 0) { + index_fp = H; + } + IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + switch (opcode) { + case 0x0: + if (!u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmHAndWReg<MlaElemDX, MlaElemQX>( + q, size, machInst, vd, vn, vm, index); + case 0x1: + if (!u && size >= 2 && sz_q != 0x2 && sz_L != 0x3) + return decodeNeonUThreeImmFpReg<FmlaElemDX, FmlaElemQX>( + q, sz, machInst, vd, vn, vm_fp, index_fp); + else + return new Unknown64(machInst); + case 0x2: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmHAndWReg<UmlalElemX, UmlalElem2X>( + q, size, machInst, vd, vn, vm, index); + else + return decodeNeonSThreeImmHAndWReg<SmlalElemX, SmlalElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0x3: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmlalElemX, + SqdmlalElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0x4: + if (u && !(size == 0x0 || size == 0x3)) + return decodeNeonUThreeImmHAndWReg<MlsElemDX, MlsElemQX>( + q, size, machInst, vd, vn, vm, index); + else + return new Unknown64(machInst); + case 0x5: + if (!u && size >= 0x2 && sz_L != 0x3 && sz_q != 0x2) + return decodeNeonUThreeImmFpReg<FmlsElemDX, FmlsElemQX>( + q, sz, machInst, vd, vn, vm_fp, index_fp); + else + return new Unknown64(machInst); + case 0x6: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmHAndWReg<UmlslElemX, UmlslElem2X>( + q, size, machInst, vd, vn, vm, index); + else + return decodeNeonSThreeImmHAndWReg<SmlslElemX, SmlslElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0x7: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmlslElemX, + SqdmlslElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0x8: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmHAndWReg<MulElemDX, MulElemQX>( + q, size, machInst, vd, vn, vm, index); + case 0x9: + if (size >= 2 && sz_q != 0x2 && sz_L != 0x3) { + if (u) + return decodeNeonUThreeImmFpReg<FmulxElemDX, FmulxElemQX>( + q, sz, machInst, vd, vn, vm_fp, index_fp); + else + return decodeNeonUThreeImmFpReg<FmulElemDX, FmulElemQX>( + q, sz, machInst, vd, vn, vm_fp, index_fp); + } else { + return new Unknown64(machInst); + } + case 0xa: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmHAndWReg<UmullElemX, UmullElem2X>( + q, size, machInst, vd, vn, vm, index); + else + return decodeNeonSThreeImmHAndWReg<SmullElemX, SmullElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0xb: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmullElemX, SqdmullElem2X>( + q, size, machInst, vd, vn, vm, index); + case 0xc: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmulhElemDX, SqdmulhElemQX>( + q, size, machInst, vd, vn, vm, index); + case 0xd: + if (u || (size == 0x0 || size == 0x3)) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX, SqrdmulhElemQX>( + q, size, machInst, vd, vn, vm, index); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonModImm(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t op = bits(machInst, 29); + uint8_t abcdefgh = (bits(machInst, 18, 16) << 5) | + bits(machInst, 9, 5); + uint8_t cmode = bits(machInst, 15, 12); + uint8_t o2 = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + + if (o2 == 0x1 || (op == 0x1 && cmode == 0xf && !q)) + return new Unknown64(machInst); + + bool immValid = true; + const uint64_t bigImm = simd_modified_imm(op, cmode, abcdefgh, + immValid, + true /* isAarch64 */); + if (!immValid) { + return new Unknown(machInst); + } + + if (op) { + if (bits(cmode, 3) == 0) { + if (bits(cmode, 0) == 0) { + if (q) + return new MvniQX<uint64_t>(machInst, vd, bigImm); + else + return new MvniDX<uint64_t>(machInst, vd, bigImm); + } else { + if (q) + return new BicImmQX<uint64_t>(machInst, vd, bigImm); + else + return new BicImmDX<uint64_t>(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 2) == 1) { + switch (bits(cmode, 1, 0)) { + case 0: + case 1: + if (q) + return new MvniQX<uint64_t>(machInst, vd, bigImm); + else + return new MvniDX<uint64_t>(machInst, vd, bigImm); + case 2: + if (q) + return new MoviQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + case 3: + if (q) + return new FmovQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 0) == 0) { + if (q) + return new MvniQX<uint64_t>(machInst, vd, bigImm); + else + return new MvniDX<uint64_t>(machInst, vd, bigImm); + } else { + if (q) + return new BicImmQX<uint64_t>(machInst, vd, + bigImm); + else + return new BicImmDX<uint64_t>(machInst, vd, + bigImm); + } + } + } + } else { + if (bits(cmode, 3) == 0) { + if (bits(cmode, 0) == 0) { + if (q) + return new MoviQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + } else { + if (q) + return new OrrImmQX<uint64_t>(machInst, vd, bigImm); + else + return new OrrImmDX<uint64_t>(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 2) == 1) { + if (bits(cmode, 1, 0) == 0x3) { + if (q) + return new FmovQX<uint32_t>(machInst, vd, bigImm); + else + return new FmovDX<uint32_t>(machInst, vd, bigImm); + } else { + if (q) + return new MoviQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + } + } else { + if (bits(cmode, 0) == 0) { + if (q) + return new MoviQX<uint64_t>(machInst, vd, bigImm); + else + return new MoviDX<uint64_t>(machInst, vd, bigImm); + } else { + if (q) + return new OrrImmQX<uint64_t>(machInst, vd, + bigImm); + else + return new OrrImmDX<uint64_t>(machInst, vd, bigImm); + } + } + } + } + return new Unknown(machInst); + } + + StaticInstPtr + decodeNeonShiftByImm(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t u = bits(machInst, 29); + uint8_t immh = bits(machInst, 22, 19); + uint8_t immb = bits(machInst, 18, 16); + uint8_t opcode = bits(machInst, 15, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t immh3 = bits(machInst, 22); + uint8_t immh3_q = (immh3 << 1) | q; + uint8_t op_u = (bits(machInst, 12) << 1) | u; + uint8_t size = findMsbSet(immh); + int shiftAmt = 0; + + switch (opcode) { + case 0x00: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg<UshrDX, UshrQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SshrDX, SshrQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x02: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg<UsraDX, UsraQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SsraDX, SsraQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x04: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg<UrshrDX, UrshrQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SrshrDX, SrshrQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x06: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftXReg<UrsraDX, UrsraQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SrsraDX, SrsraQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x08: + if (u && !(immh3_q == 0x2)) { + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + return decodeNeonUTwoShiftXReg<SriDX, SriQX>( + q, size, machInst, vd, vn, shiftAmt); + } else { + return new Unknown64(machInst); + } + case 0x0a: + if (immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftXReg<SliDX, SliQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftXReg<ShlDX, ShlQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x0c: + if (u && !(immh3_q == 0x2 || op_u == 0x0)) { + shiftAmt = ((immh << 3) | immb) - (8 << size); + return decodeNeonSTwoShiftXReg<SqshluDX, SqshluQX>( + q, size, machInst, vd, vn, shiftAmt); + } else { + return new Unknown64(machInst); + } + case 0x0e: + if (immh3_q == 0x2 || op_u == 0x0) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftXReg<UqshlImmDX, UqshlImmQX>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftXReg<SqshlImmDX, SqshlImmQX>( + q, size, machInst, vd, vn, shiftAmt); + case 0x10: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonSTwoShiftSReg<SqshrunX, Sqshrun2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftSReg<ShrnX, Shrn2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x11: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonSTwoShiftSReg<SqrshrunX, Sqrshrun2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftSReg<RshrnX, Rshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x12: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftSReg<UqshrnX, Uqshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftSReg<SqshrnX, Sqshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x13: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftSReg<UqrshrnX, Uqrshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftSReg<SqrshrnX, Sqrshrn2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x14: + if (immh3) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftSReg<UshllX, Ushll2X>( + q, size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftSReg<SshllX, Sshll2X>( + q, size, machInst, vd, vn, shiftAmt); + case 0x1c: + if (immh < 0x4 || immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) { + return decodeNeonUTwoShiftFpReg<UcvtfFixedDX, UcvtfFixedQX>( + q, size & 0x1, machInst, vd, vn, shiftAmt); + } else { + if (q) { + if (size & 0x1) + return new ScvtfFixedDQX<uint64_t>(machInst, vd, vn, + shiftAmt); + else + return new ScvtfFixedSQX<uint32_t>(machInst, vd, vn, + shiftAmt); + } else { + if (size & 0x1) + return new Unknown(machInst); + else + return new ScvtfFixedDX<uint32_t>(machInst, vd, vn, + shiftAmt); + } + } + case 0x1f: + if (immh < 0x4 || immh3_q == 0x2) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftFpReg<FcvtzuFixedDX, FcvtzuFixedQX>( + q, size & 0x1, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftFpReg<FcvtzsFixedDX, FcvtzsFixedQX>( + q, size & 0x1, machInst, vd, vn, shiftAmt); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonTblTbx(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t switchVal = bits(machInst, 14, 12); + + switch (switchVal) { + case 0x0: + if (q) + return new Tbl1QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbl1DX<uint8_t>(machInst, vd, vn, vm); + case 0x1: + if (q) + return new Tbx1QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbx1DX<uint8_t>(machInst, vd, vn, vm); + case 0x2: + if (q) + return new Tbl2QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbl2DX<uint8_t>(machInst, vd, vn, vm); + case 0x3: + if (q) + return new Tbx2QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbx2DX<uint8_t>(machInst, vd, vn, vm); + case 0x4: + if (q) + return new Tbl3QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbl3DX<uint8_t>(machInst, vd, vn, vm); + case 0x5: + if (q) + return new Tbx3QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbx3DX<uint8_t>(machInst, vd, vn, vm); + case 0x6: + if (q) + return new Tbl4QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbl4DX<uint8_t>(machInst, vd, vn, vm); + case 0x7: + if (q) + return new Tbx4QX<uint8_t>(machInst, vd, vn, vm); + else + return new Tbx4DX<uint8_t>(machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + + return new Unknown64(machInst); + } + + StaticInstPtr + decodeNeonZipUzpTrn(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 14, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x1: + return decodeNeonUThreeXReg<Uzp1DX, Uzp1QX>( + q, size, machInst, vd, vn, vm); + case 0x2: + return decodeNeonUThreeXReg<Trn1DX, Trn1QX>( + q, size, machInst, vd, vn, vm); + case 0x3: + return decodeNeonUThreeXReg<Zip1DX, Zip1QX>( + q, size, machInst, vd, vn, vm); + case 0x5: + return decodeNeonUThreeXReg<Uzp2DX, Uzp2QX>( + q, size, machInst, vd, vn, vm); + case 0x6: + return decodeNeonUThreeXReg<Trn2DX, Trn2QX>( + q, size, machInst, vd, vn, vm); + case 0x7: + return decodeNeonUThreeXReg<Zip2DX, Zip2QX>( + q, size, machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + return new Unknown64(machInst); + } + + StaticInstPtr + decodeNeonExt(ExtMachInst machInst) + { + uint8_t q = bits(machInst, 30); + uint8_t op2 = bits(machInst, 23, 22); + uint8_t imm4 = bits(machInst, 14, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + if (op2 != 0 || (q == 0x0 && bits(imm4, 3) == 0x1)) + return new Unknown64(machInst); + + uint8_t index = q ? imm4 : imm4 & 0x7; + + if (q) { + return new ExtQX<uint8_t>(machInst, vd, vn, vm, index); + } else { + return new ExtDX<uint8_t>(machInst, vd, vn, vm, index); + } + } + + StaticInstPtr + decodeNeonSc3Same(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 15, 11); + uint8_t s = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x01: + if (u) + return decodeNeonUThreeUReg<UqaddScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg<SqaddScX>( + size, machInst, vd, vn, vm); + case 0x05: + if (u) + return decodeNeonUThreeUReg<UqsubScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg<SqsubScX>( + size, machInst, vd, vn, vm); + case 0x06: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new CmhiDX<uint64_t>(machInst, vd, vn, vm); + else + return new CmgtDX<int64_t>(machInst, vd, vn, vm); + case 0x07: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new CmhsDX<uint64_t>(machInst, vd, vn, vm); + else + return new CmgeDX<int64_t>(machInst, vd, vn, vm); + case 0x08: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return new UshlDX<uint64_t>(machInst, vd, vn, vm); + else + return new SshlDX<int64_t>(machInst, vd, vn, vm); + case 0x09: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeUReg<UqshlScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg<SqshlScX>( + size, machInst, vd, vn, vm); + case 0x0a: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return new UrshlDX<uint64_t>(machInst, vd, vn, vm); + else + return new SrshlDX<int64_t>(machInst, vd, vn, vm); + case 0x0b: + if (!s && size != 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeUReg<UqrshlScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeUReg<SqrshlScX>( + size, machInst, vd, vn, vm); + case 0x10: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new SubDX<uint64_t>(machInst, vd, vn, vm); + else + return new AddDX<uint64_t>(machInst, vd, vn, vm); + case 0x11: + if (size != 0x3) + return new Unknown64(machInst); + if (u) + return new CmeqDX<uint64_t>(machInst, vd, vn, vm); + else + return new CmtstDX<uint64_t>(machInst, vd, vn, vm); + case 0x16: + if (size == 0x3 || size == 0x0) + return new Unknown64(machInst); + if (u) + return decodeNeonSThreeHAndWReg<SqrdmulhScX>( + size, machInst, vd, vn, vm); + else + return decodeNeonSThreeHAndWReg<SqdmulhScX>( + size, machInst, vd, vn, vm); + case 0x1a: + if (!u || size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUThreeScFpReg<FabdScX>( + size & 0x1, machInst, vd, vn, vm); + case 0x1b: + if (u || size > 0x1) + return new Unknown64(machInst); + else + return decodeNeonUThreeScFpReg<FmulxScX>( + size & 0x1, machInst, vd, vn, vm); + case 0x1c: + if (size < 0x2) { + if (u) + return decodeNeonUThreeScFpReg<FcmgeScX>( + size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeScFpReg<FcmeqScX>( + size & 0x1, machInst, vd, vn, vm); + } else { + if (u) + return decodeNeonUThreeScFpReg<FcmgtScX>( + size & 0x1, machInst, vd, vn, vm); + else + return new Unknown64(machInst); + } + case 0x1d: + if (!u) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUThreeScFpReg<FacgeScX>( + size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeScFpReg<FacgtScX>( + size & 0x1, machInst, vd, vn, vm); + case 0x1f: + if (u) + return new Unknown64(machInst); + if (size < 0x2) + return decodeNeonUThreeScFpReg<FrecpsScX>( + size & 0x1, machInst, vd, vn, vm); + else + return decodeNeonUThreeScFpReg<FrsqrtsScX>( + size & 0x1, machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonSc3Diff(ExtMachInst machInst) + { + if (bits(machInst, 29)) + return new Unknown64(machInst); + + uint8_t size = bits(machInst, 23, 22); + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + + uint8_t opcode = bits(machInst, 15, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + switch (opcode) { + case 0x9: + return decodeNeonSThreeHAndWReg<SqdmlalScX>(size, machInst, vd, vn, vm); + case 0xb: + return decodeNeonSThreeHAndWReg<SqdmlslScX>(size, machInst, vd, vn, vm); + case 0xd: + return decodeNeonSThreeHAndWReg<SqdmullScX>(size, machInst, vd, vn, vm); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonSc2RegMisc(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t switchVal = opcode | ((u ? 1 : 0) << 5); + switch (switchVal) { + case 0x03: + return decodeNeonUTwoMiscUReg<SuqaddScX>(size, machInst, vd, vn); + case 0x07: + return decodeNeonSTwoMiscUReg<SqabsScX>(size, machInst, vd, vn); + case 0x08: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmgtZeroDX<int64_t>(machInst, vd, vn); + case 0x09: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmeqZeroDX<int64_t>(machInst, vd, vn); + case 0x0a: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmltZeroDX<int64_t>(machInst, vd, vn); + case 0x0b: + if (size != 0x3) + return new Unknown64(machInst); + else + return new AbsDX<int64_t>(machInst, vd, vn); + case 0x0c: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmgtZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x0d: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmeqZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x0e: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmltZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x14: + if (size == 0x3) { + return new Unknown64(machInst); + } else { + switch (size) { + case 0x0: + return new SqxtnScX<int8_t>(machInst, vd, vn); + case 0x1: + return new SqxtnScX<int16_t>(machInst, vd, vn); + case 0x2: + return new SqxtnScX<int32_t>(machInst, vd, vn); + } + } + case 0x1a: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtnsScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FcvtpsScX>( + size & 0x1, machInst, vd, vn); + case 0x1b: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtmsScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FcvtzsIntScX>( + size & 0x1, machInst, vd, vn); + case 0x1c: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtasScX>( + size & 0x1, machInst, vd, vn); + else + return new Unknown64(machInst); + case 0x1d: + if (size < 0x2) { + if (size & 0x1) + return new ScvtfIntScDX<uint64_t>(machInst, vd, vn); + else + return new ScvtfIntScSX<uint32_t>(machInst, vd, vn); + } else { + return decodeNeonUTwoMiscScFpReg<FrecpeScX>( + size & 0x1, machInst, vd, vn); + } + case 0x1f: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FrecpxX>( + size & 0x1, machInst, vd, vn); + case 0x23: + return decodeNeonUTwoMiscUReg<UsqaddScX>(size, machInst, vd, vn); + case 0x27: + return decodeNeonSTwoMiscUReg<SqnegScX>(size, machInst, vd, vn); + case 0x28: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmgeZeroDX<int64_t>(machInst, vd, vn); + case 0x29: + if (size != 0x3) + return new Unknown64(machInst); + else + return new CmleZeroDX<int64_t>(machInst, vd, vn); + case 0x2b: + if (size != 0x3) + return new Unknown64(machInst); + else + return new NegDX<int64_t>(machInst, vd, vn); + case 0x2c: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmgeZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x2d: + if (size < 0x2) + return new Unknown64(machInst); + else + return decodeNeonUTwoMiscScFpReg<FcmleZeroScX>( + size & 0x1, machInst, vd, vn); + case 0x32: + if (size == 0x3) { + return new Unknown64(machInst); + } else { + switch (size) { + case 0x0: + return new SqxtunScX<int8_t>(machInst, vd, vn); + case 0x1: + return new SqxtunScX<int16_t>(machInst, vd, vn); + case 0x2: + return new SqxtunScX<int32_t>(machInst, vd, vn); + } + } + case 0x34: + if (size == 0x3) { + return new Unknown64(machInst); + } else { + switch (size) { + case 0x0: + return new UqxtnScX<uint8_t>(machInst, vd, vn); + case 0x1: + return new UqxtnScX<uint16_t>(machInst, vd, vn); + case 0x2: + return new UqxtnScX<uint32_t>(machInst, vd, vn); + } + } + case 0x36: + if (size != 0x1) { + return new Unknown64(machInst); + } else { + return new FcvtxnScX<uint32_t>(machInst, vd, vn); + } + case 0x3a: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtnuScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FcvtpuScX>( + size & 0x1, machInst, vd, vn); + case 0x3b: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtmuScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FcvtzuIntScX>( + size & 0x1, machInst, vd, vn); + case 0x3c: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<FcvtauScX>( + size & 0x1, machInst, vd, vn); + else + return new Unknown64(machInst); + case 0x3d: + if (size < 0x2) + return decodeNeonUTwoMiscScFpReg<UcvtfIntScX>( + size & 0x1, machInst, vd, vn); + else + return decodeNeonUTwoMiscScFpReg<FrsqrteScX>( + size & 0x1, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonScPwise(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t opcode = bits(machInst, 16, 12); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + if (!u) { + if (opcode == 0x1b && size == 0x3) + return new AddpScQX<uint64_t>(machInst, vd, vn); + else + return new Unknown64(machInst); + } + + uint8_t switchVal = (opcode << 0) | (size << 5); + switch (switchVal) { + case 0x0c: + case 0x2c: + return decodeNeonUTwoMiscPwiseScFpReg<FmaxnmpScDX, FmaxnmpScQX>( + size & 0x1, machInst, vd, vn); + case 0x0d: + case 0x2d: + return decodeNeonUTwoMiscPwiseScFpReg<FaddpScDX, FaddpScQX>( + size & 0x1, machInst, vd, vn); + case 0x0f: + case 0x2f: + return decodeNeonUTwoMiscPwiseScFpReg<FmaxpScDX, FmaxpScQX>( + size & 0x1, machInst, vd, vn); + case 0x4c: + case 0x6c: + return decodeNeonUTwoMiscPwiseScFpReg<FminnmpScDX, FminnmpScQX>( + size & 0x1, machInst, vd, vn); + case 0x4f: + case 0x6f: + return decodeNeonUTwoMiscPwiseScFpReg<FminpScDX, FminpScQX>( + size & 0x1, machInst, vd, vn); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonScCopy(ExtMachInst machInst) + { + if (bits(machInst, 14, 11) != 0 || bits(machInst, 29)) + return new Unknown64(machInst); + + uint8_t imm5 = bits(machInst, 20, 16); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t size = findLsbSet(imm5); + uint8_t index = bits(imm5, 4, size + 1); + + return decodeNeonUTwoShiftUReg<DupElemScX>( + size, machInst, vd, vn, index); + } + + StaticInstPtr + decodeNeonScIndexedElem(ExtMachInst machInst) + { + uint8_t u = bits(machInst, 29); + uint8_t size = bits(machInst, 23, 22); + uint8_t L = bits(machInst, 21); + uint8_t M = bits(machInst, 20); + uint8_t opcode = bits(machInst, 15, 12); + uint8_t H = bits(machInst, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex vm_bf = (IntRegIndex) (uint8_t) bits(machInst, 19, 16); + + uint8_t index = 0; + uint8_t index_fp = 0; + uint8_t vmh = 0; + uint8_t sz_L = bits(machInst, 22, 21); + + // Index and 2nd register operand for integer instructions + if (size == 0x1) { + index = (H << 2) | (L << 1) | M; + // vmh = 0; + } else if (size == 0x2) { + index = (H << 1) | L; + vmh = M; + } else if (size == 0x3) { + index = H; + vmh = M; + } + IntRegIndex vm = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + // Index and 2nd register operand for FP instructions + vmh = M; + if ((size & 0x1) == 0) { + index_fp = (H << 1) | L; + } else if (L == 0) { + index_fp = H; + } + IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf); + + if (u && opcode != 9) + return new Unknown64(machInst); + + switch (opcode) { + case 0x1: + if (size < 2 || sz_L == 0x3) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmScFpReg<FmlaElemScX>( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + case 0x3: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmlalElemScX>( + size, machInst, vd, vn, vm, index); + case 0x5: + if (size < 2 || sz_L == 0x3) + return new Unknown64(machInst); + else + return decodeNeonUThreeImmScFpReg<FmlsElemScX>( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + case 0x7: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmlslElemScX>( + size, machInst, vd, vn, vm, index); + case 0x9: + if (size < 2 || sz_L == 0x3) + return new Unknown64(machInst); + if (u) + return decodeNeonUThreeImmScFpReg<FmulxElemScX>( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + else + return decodeNeonUThreeImmScFpReg<FmulElemScX>( + size & 0x1, machInst, vd, vn, vm_fp, index_fp); + case 0xb: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmullElemScX>( + size, machInst, vd, vn, vm, index); + case 0xc: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqdmulhElemScX>( + size, machInst, vd, vn, vm, index); + case 0xd: + if (size == 0x0 || size == 0x3) + return new Unknown64(machInst); + else + return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>( + size, machInst, vd, vn, vm, index); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonScShiftByImm(ExtMachInst machInst) + { + bool u = bits(machInst, 29); + uint8_t immh = bits(machInst, 22, 19); + uint8_t immb = bits(machInst, 18, 16); + uint8_t opcode = bits(machInst, 15, 11); + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + + uint8_t immh3 = bits(machInst, 22); + uint8_t size = findMsbSet(immh); + int shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + + if (immh == 0x0) + return new Unknown64(machInst); + + switch (opcode) { + case 0x00: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UshrDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new SshrDX<int64_t>(machInst, vd, vn, shiftAmt); + case 0x02: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UsraDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new SsraDX<int64_t>(machInst, vd, vn, shiftAmt); + case 0x04: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UrshrDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new SrshrDX<int64_t>(machInst, vd, vn, shiftAmt); + case 0x06: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new UrsraDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new SrsraDX<int64_t>(machInst, vd, vn, shiftAmt); + case 0x08: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return new SriDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new Unknown64(machInst); + case 0x0a: + if (!immh3) + return new Unknown64(machInst); + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return new SliDX<uint64_t>(machInst, vd, vn, shiftAmt); + else + return new ShlDX<uint64_t>(machInst, vd, vn, shiftAmt); + case 0x0c: + if (u) { + shiftAmt = ((immh << 3) | immb) - (8 << size); + return decodeNeonSTwoShiftUReg<SqshluScX>( + size, machInst, vd, vn, shiftAmt); + } else { + return new Unknown64(machInst); + } + case 0x0e: + shiftAmt = ((immh << 3) | immb) - (8 << size); + if (u) + return decodeNeonUTwoShiftUReg<UqshlImmScX>( + size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftUReg<SqshlImmScX>( + size, machInst, vd, vn, shiftAmt); + case 0x10: + if (!u || immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + return decodeNeonSTwoShiftUSReg<SqshrunScX>( + size, machInst, vd, vn, shiftAmt); + case 0x11: + if (!u || immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + return decodeNeonSTwoShiftUSReg<SqrshrunScX>( + size, machInst, vd, vn, shiftAmt); + case 0x12: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftUSReg<UqshrnScX>( + size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftUSReg<SqshrnScX>( + size, machInst, vd, vn, shiftAmt); + case 0x13: + if (immh3) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftUSReg<UqrshrnScX>( + size, machInst, vd, vn, shiftAmt); + else + return decodeNeonSTwoShiftUSReg<SqrshrnScX>( + size, machInst, vd, vn, shiftAmt); + case 0x1c: + if (immh < 0x4) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) { + return decodeNeonUTwoShiftUFpReg<UcvtfFixedScX>( + size & 0x1, machInst, vd, vn, shiftAmt); + } else { + if (size & 0x1) + return new ScvtfFixedScDX<uint64_t>(machInst, vd, vn, + shiftAmt); + else + return new ScvtfFixedScSX<uint32_t>(machInst, vd, vn, + shiftAmt); + } + case 0x1f: + if (immh < 0x4) + return new Unknown64(machInst); + shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb); + if (u) + return decodeNeonUTwoShiftUFpReg<FcvtzuFixedScX>( + size & 0x1, machInst, vd, vn, shiftAmt); + else + return decodeNeonUTwoShiftUFpReg<FcvtzsFixedScX>( + size & 0x1, machInst, vd, vn, shiftAmt); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeNeonMem(ExtMachInst machInst) + { + uint8_t dataSize = bits(machInst, 30) ? 128 : 64; + bool multiple = bits(machInst, 24, 23) < 0x2; + bool load = bits(machInst, 22); + + uint8_t numStructElems = 0; + uint8_t numRegs = 0; + + if (multiple) { // AdvSIMD load/store multiple structures + uint8_t opcode = bits(machInst, 15, 12); + uint8_t eSize = bits(machInst, 11, 10); + bool wb = !(bits(machInst, 20, 16) == 0x0 && !bits(machInst, 23)); + + switch (opcode) { + case 0x0: // LD/ST4 (4 regs) + numStructElems = 4; + numRegs = 4; + break; + case 0x2: // LD/ST1 (4 regs) + numStructElems = 1; + numRegs = 4; + break; + case 0x4: // LD/ST3 (3 regs) + numStructElems = 3; + numRegs = 3; + break; + case 0x6: // LD/ST1 (3 regs) + numStructElems = 1; + numRegs = 3; + break; + case 0x7: // LD/ST1 (1 reg) + numStructElems = 1; + numRegs = 1; + break; + case 0x8: // LD/ST2 (2 regs) + numStructElems = 2; + numRegs = 2; + break; + case 0xa: // LD/ST1 (2 regs) + numStructElems = 1; + numRegs = 2; + break; + default: + return new Unknown64(machInst); + } + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + if (load) { + return new VldMult64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, numRegs, wb); + } else { + return new VstMult64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, numRegs, wb); + } + } else { // AdvSIMD load/store single structure + uint8_t scale = bits(machInst, 15, 14); + uint8_t numStructElems = (((uint8_t) bits(machInst, 13) << 1) | + (uint8_t) bits(machInst, 21)) + 1; + uint8_t index = 0; + bool wb = !(bits(machInst, 20, 16) == 0x0 && !bits(machInst, 23)); + bool replicate = false; + + switch (scale) { + case 0x0: + index = ((uint8_t) bits(machInst, 30) << 3) | + ((uint8_t) bits(machInst, 12) << 2) | + (uint8_t) bits(machInst, 11, 10); + break; + case 0x1: + index = ((uint8_t) bits(machInst, 30) << 2) | + ((uint8_t) bits(machInst, 12) << 1) | + (uint8_t) bits(machInst, 11); + break; + case 0x2: + if (bits(machInst, 10) == 0x0) { + index = ((uint8_t) bits(machInst, 30) << 1) | + bits(machInst, 12); + } else { + index = (uint8_t) bits(machInst, 30); + scale = 0x3; + } + break; + case 0x3: + scale = bits(machInst, 11, 10); + replicate = true; + break; + default: + return new Unknown64(machInst); + } + + uint8_t eSize = scale; + + IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0); + IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5); + IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16); + + if (load) { + return new VldSingle64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, index, wb, replicate); + } else { + return new VstSingle64(machInst, rn, vd, rm, eSize, dataSize, + numStructElems, index, wb, replicate); + } + } + } +} +}}; diff --git a/src/arch/arm/isa/formats/uncond.isa b/src/arch/arm/isa/formats/uncond.isa index 4a18a55bb..c376cd9ce 100644 --- a/src/arch/arm/isa/formats/uncond.isa +++ b/src/arch/arm/isa/formats/uncond.isa @@ -99,11 +99,11 @@ def format ArmUnconditional() {{ case 0x1: return new Clrex(machInst); case 0x4: - return new Dsb(machInst); + return new Dsb(machInst, 0); case 0x5: - return new Dmb(machInst); + return new Dmb(machInst, 0); case 0x6: - return new Isb(machInst); + return new Isb(machInst, 0); } } } else if (bits(op2, 0) == 0) { @@ -166,7 +166,7 @@ def format ArmUnconditional() {{ const uint32_t val = ((machInst >> 20) & 0x5); if (val == 0x4) { const uint32_t mode = bits(machInst, 4, 0); - if (badMode((OperatingMode)mode)) + if (badMode32((OperatingMode)mode)) return new Unknown(machInst); switch (bits(machInst, 24, 21)) { case 0x2: @@ -250,17 +250,10 @@ def format ArmUnconditional() {{ "ldc, ldc2 (immediate)", machInst); } } - if (op1 == 0xC5) { - return new WarnUnimplemented( - "mrrc, mrrc2", machInst); - } } else { if (bits(op1, 4, 3) != 0 || bits(op1, 1) == 1) { return new WarnUnimplemented( "stc, stc2", machInst); - } else if (op1 == 0xC4) { - return new WarnUnimplemented( - "mcrr, mcrrc", machInst); } } } diff --git a/src/arch/arm/isa/formats/unimp.isa b/src/arch/arm/isa/formats/unimp.isa index 1c9a4b402..8e346112c 100644 --- a/src/arch/arm/isa/formats/unimp.isa +++ b/src/arch/arm/isa/formats/unimp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -85,6 +85,9 @@ output header {{ private: /// Have we warned on this instruction yet? mutable bool warned; + /// Full mnemonic for MRC and MCR instructions including the + /// coproc. register name + std::string fullMnemonic; public: /// Constructor @@ -96,6 +99,16 @@ output header {{ flags[IsNonSpeculative] = true; } + WarnUnimplemented(const char *_mnemonic, ExtMachInst _machInst, + const std::string& _fullMnemonic) + : ArmStaticInst(_mnemonic, _machInst, No_OpClass), warned(false), + fullMnemonic(_fullMnemonic) + { + // don't call execute() (which panics) if we're on a + // speculative path + flags[IsNonSpeculative] = true; + } + %(BasicExecDeclare)s std::string @@ -147,10 +160,7 @@ output exec {{ FailUnimplemented::execute(%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(machInst, false, mnemonic); + return new UndefinedInstruction(machInst, false, mnemonic); } Fault @@ -158,7 +168,8 @@ output exec {{ Trace::InstRecord *traceData) const { if (!warned) { - warn("\tinstruction '%s' unimplemented\n", mnemonic); + warn("\tinstruction '%s' unimplemented\n", + fullMnemonic.size() ? fullMnemonic.c_str() : mnemonic); warned = true; } diff --git a/src/arch/arm/isa/includes.isa b/src/arch/arm/isa/includes.isa index 5dd13d623..a2ce84345 100644 --- a/src/arch/arm/isa/includes.isa +++ b/src/arch/arm/isa/includes.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -50,10 +50,16 @@ output header {{ #include <sstream> #include "arch/arm/insts/branch.hh" +#include "arch/arm/insts/branch64.hh" +#include "arch/arm/insts/data64.hh" +#include "arch/arm/insts/fplib.hh" #include "arch/arm/insts/macromem.hh" #include "arch/arm/insts/mem.hh" +#include "arch/arm/insts/mem64.hh" #include "arch/arm/insts/misc.hh" +#include "arch/arm/insts/misc64.hh" #include "arch/arm/insts/mult.hh" +#include "arch/arm/insts/neon64_mem.hh" #include "arch/arm/insts/pred_inst.hh" #include "arch/arm/insts/static_inst.hh" #include "arch/arm/insts/vfp.hh" @@ -63,6 +69,7 @@ output header {{ }}; output decoder {{ +#include <string> #include "arch/arm/decoder.hh" #include "arch/arm/faults.hh" #include "arch/arm/intregs.hh" diff --git a/src/arch/arm/isa/insts/aarch64.isa b/src/arch/arm/isa/insts/aarch64.isa new file mode 100644 index 000000000..6fcf9b5d2 --- /dev/null +++ b/src/arch/arm/isa/insts/aarch64.isa @@ -0,0 +1,58 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + movzCode = 'Dest64 = ((uint64_t)imm1) << imm2;' + movzIop = InstObjParams("movz", "Movz", "RegImmImmOp", movzCode, []) + header_output += RegImmImmOpDeclare.subst(movzIop) + decoder_output += RegImmImmOpConstructor.subst(movzIop) + exec_output += BasicExecute.subst(movzIop) + + movkCode = 'Dest64 = insertBits(Dest64, imm2 + 15, imm2, imm1);' + movkIop = InstObjParams("movk", "Movk", "RegImmImmOp", movkCode, []) + header_output += RegImmImmOpDeclare.subst(movkIop) + decoder_output += RegImmImmOpConstructor.subst(movkIop) + exec_output += BasicExecute.subst(movkIop) + + movnCode = 'Dest64 = ~(((uint64_t)imm1) << imm2);' + movnIop = InstObjParams("movn", "Movn", "RegImmImmOp", movnCode, []) + header_output += RegImmImmOpDeclare.subst(movnIop) + decoder_output += RegImmImmOpConstructor.subst(movnIop) + exec_output += BasicExecute.subst(movnIop) +}}; diff --git a/src/arch/arm/isa/insts/branch.isa b/src/arch/arm/isa/insts/branch.isa index e360f4581..3ee9d88e4 100644 --- a/src/arch/arm/isa/insts/branch.isa +++ b/src/arch/arm/isa/insts/branch.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -48,7 +48,7 @@ let {{ bCode = ''' NPC = (uint32_t)(PC + imm); ''' - br_tgt_code = '''pcs.instNPC(branchPC.instPC() + imm);''' + br_tgt_code = '''pcs.instNPC((uint32_t)(branchPC.instPC() + imm));''' instFlags = ["IsDirectControl"] if (link): bCode += ''' @@ -86,9 +86,9 @@ let {{ Name += "Imm" # Since we're switching ISAs, the target ISA will be the opposite # of the current ISA. Thumb is whether the target is ARM. - newPC = '(Thumb ? (roundDown(PC, 4) + imm) : (PC + imm))' + newPC = '(uint32_t)(Thumb ? (roundDown(PC, 4) + imm) : (PC + imm))' br_tgt_code = ''' - pcs.instNPC((branchPC.thumb() ? (roundDown(branchPC.instPC(),4) + imm) : + pcs.instNPC((uint32_t)(branchPC.thumb() ? (roundDown(branchPC.instPC(),4) + imm) : (branchPC.instPC() + imm))); ''' base = "BranchImmCond" @@ -150,7 +150,26 @@ let {{ if imm: decoder_output += BranchTarget.subst(blxIop) - #Ignore BXJ for now + bxjcode = ''' + HSTR hstr = Hstr; + CPSR cpsr = Cpsr; + SCR scr = Scr; + + if (ArmSystem::haveVirtualization(xc->tcBase()) && hstr.tjdbx && + !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) { + fault = new HypervisorTrap(machInst, op1, EC_TRAPPED_BXJ); + } + IWNPC = Op1; + ''' + + bxjIop = InstObjParams("bxj", "BxjReg", "BranchRegCond", + {"code": bxjcode, + "predicate_test": predicateTest, + "is_ras_pop": "op1 == INTREG_LR" }, + ["IsIndirectControl"]) + header_output += BranchRegCondDeclare.subst(bxjIop) + decoder_output += BranchRegCondConstructor.subst(bxjIop) + exec_output += PredOpExecute.subst(bxjIop) #CBNZ, CBZ. These are always unconditional as far as predicates for (mnem, test) in (("cbz", "=="), ("cbnz", "!=")): diff --git a/src/arch/arm/isa/insts/branch64.isa b/src/arch/arm/isa/insts/branch64.isa new file mode 100644 index 000000000..89cee6c22 --- /dev/null +++ b/src/arch/arm/isa/insts/branch64.isa @@ -0,0 +1,248 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black +// Giacomo Gabrielli + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + # B, BL + for (mnem, link) in (("b", False), ("bl", True)): + bCode = ('NPC = purifyTaggedAddr(RawPC + imm, xc->tcBase(), ' + 'currEL(xc->tcBase()));\n') + instFlags = ['IsDirectControl', 'IsUncondControl'] + if (link): + bCode += 'XLR = RawPC + 4;\n' + instFlags += ['IsCall'] + + bIop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchImm64", bCode, instFlags) + header_output += BranchImm64Declare.subst(bIop) + decoder_output += BranchImm64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # BR, BLR + for (mnem, link) in (("br", False), ("blr", True)): + bCode = ('NPC = purifyTaggedAddr(XOp1, xc->tcBase(), ' + 'currEL(xc->tcBase()));\n') + instFlags = ['IsIndirectControl', 'IsUncondControl'] + if (link): + bCode += 'XLR = RawPC + 4;\n' + instFlags += ['IsCall'] + + bIop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchReg64", bCode, instFlags) + header_output += BranchReg64Declare.subst(bIop) + decoder_output += BranchReg64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # B conditional + bCode = ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) + NPC = purifyTaggedAddr(RawPC + imm, xc->tcBase(), + currEL(xc->tcBase())); + else + NPC = NPC; + ''' + bIop = InstObjParams("b", "BCond64", "BranchImmCond64", bCode, + ['IsCondControl', 'IsDirectControl']) + header_output += BranchImmCond64Declare.subst(bIop) + decoder_output += BranchImmCond64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # RET + bCode = ('NPC = purifyTaggedAddr(XOp1, xc->tcBase(), ' + 'currEL(xc->tcBase()));\n') + instFlags = ['IsIndirectControl', 'IsUncondControl', 'IsReturn'] + + bIop = InstObjParams('ret', 'Ret64', "BranchRet64", bCode, instFlags) + header_output += BranchReg64Declare.subst(bIop) + decoder_output += BranchReg64Constructor.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # ERET + bCode = '''Addr newPc; + CPSR cpsr = Cpsr; + CPSR spsr = Spsr; + + ExceptionLevel curr_el = opModeToEL((OperatingMode) (uint8_t) cpsr.mode); + switch (curr_el) { + case EL3: + newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL3); + break; + case EL2: + newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL2); + break; + case EL1: + newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL1); + break; + default: + return new UndefinedInstruction(machInst, false, mnemonic); + break; + } + if (spsr.width && (newPc & mask(2))) { + // To avoid PC Alignment fault when returning to AArch32 + if (spsr.t) + newPc = newPc & ~mask(1); + else + newPc = newPc & ~mask(2); + } + spsr.q = 0; + spsr.it1 = 0; + spsr.j = 0; + spsr.res0_23_22 = 0; + spsr.ge = 0; + spsr.it2 = 0; + spsr.t = 0; + + OperatingMode mode = (OperatingMode) (uint8_t) spsr.mode; + bool illegal = false; + ExceptionLevel target_el; + if (badMode(mode)) { + illegal = true; + } else { + target_el = opModeToEL(mode); + if (((target_el == EL2) && + !ArmSystem::haveVirtualization(xc->tcBase())) || + (target_el > curr_el) || + (spsr.width == 1)) { + illegal = true; + } else { + bool known = true; + bool from32 = (spsr.width == 1); + bool to32 = false; + if (false) { // TODO: !haveAArch32EL + to32 = false; + } else if (!ArmSystem::highestELIs64(xc->tcBase())) { + to32 = true; + } else { + bool scr_rw, hcr_rw; + if (ArmSystem::haveSecurity(xc->tcBase())) { + SCR scr = xc->tcBase()->readMiscReg(MISCREG_SCR_EL3); + scr_rw = scr.rw; + } else { + scr_rw = true; + } + + if (ArmSystem::haveVirtualization(xc->tcBase())) { + HCR hcr = xc->tcBase()->readMiscReg(MISCREG_HCR_EL2); + hcr_rw = hcr.rw; + } else { + hcr_rw = scr_rw; + } + + switch (target_el) { + case EL3: + to32 = false; + break; + case EL2: + to32 = !scr_rw; + break; + case EL1: + to32 = !scr_rw || !hcr_rw; + break; + case EL0: + if (curr_el == EL0) { + to32 = cpsr.width; + } else if (!scr_rw || !hcr_rw) { + // EL0 using AArch32 if EL1 using AArch32 + to32 = true; + } else { + known = false; + to32 = false; + } + } + } + if (known) + illegal = (from32 != to32); + } + } + + if (illegal) { + uint8_t old_mode = cpsr.mode; + spsr.mode = old_mode; // Preserve old mode when invalid + spsr.il = 1; + } else { + if (cpsr.width != spsr.width) + panic("AArch32/AArch64 interprocessing not supported yet"); + } + Cpsr = spsr; + + CondCodesNZ = spsr.nz; + CondCodesC = spsr.c; + CondCodesV = spsr.v; + NPC = purifyTaggedAddr(newPc, xc->tcBase(), + opModeToEL((OperatingMode) (uint8_t) spsr.mode)); + LLSCLock = 0; // Clear exclusive monitor + SevMailbox = 1; //Set Event Register + ''' + instFlags = ['IsSerializeAfter', 'IsNonSpeculative', 'IsSquashAfter'] + bIop = InstObjParams('eret', 'Eret64', "BranchEret64", bCode, instFlags) + header_output += BasicDeclare.subst(bIop) + decoder_output += BasicConstructor64.subst(bIop) + exec_output += BasicExecute.subst(bIop) + + # CBNZ, CBZ + for (mnem, test) in (("cbz", "=="), ("cbnz", "!=")): + code = ('NPC = (Op164 %(test)s 0) ? ' + 'purifyTaggedAddr(RawPC + imm, xc->tcBase(), ' + 'currEL(xc->tcBase())) : NPC;\n') + code = code % {"test": test} + iop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchImmReg64", code, + ['IsCondControl', 'IsDirectControl']) + header_output += BranchImmReg64Declare.subst(iop) + decoder_output += BranchImmReg64Constructor.subst(iop) + exec_output += BasicExecute.subst(iop) + + # TBNZ, TBZ + for (mnem, test) in (("tbz", "=="), ("tbnz", "!=")): + code = ('NPC = ((Op164 & imm1) %(test)s 0) ? ' + 'purifyTaggedAddr(RawPC + imm2, xc->tcBase(), ' + 'currEL(xc->tcBase())) : NPC;\n') + code = code % {"test": test} + iop = InstObjParams(mnem, mnem.capitalize() + "64", + "BranchImmImmReg64", code, + ['IsCondControl', 'IsDirectControl']) + header_output += BranchImmImmReg64Declare.subst(iop) + decoder_output += BranchImmImmReg64Constructor.subst(iop) + exec_output += BasicExecute.subst(iop) +}}; diff --git a/src/arch/arm/isa/insts/data.isa b/src/arch/arm/isa/insts/data.isa index be56554b0..881676496 100644 --- a/src/arch/arm/isa/insts/data.isa +++ b/src/arch/arm/isa/insts/data.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -257,7 +257,8 @@ let {{ CPSR old_cpsr = Cpsr; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, Spsr, 0xF, true, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, true, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; diff --git a/src/arch/arm/isa/insts/data64.isa b/src/arch/arm/isa/insts/data64.isa new file mode 100644 index 000000000..77d7541ca --- /dev/null +++ b/src/arch/arm/isa/insts/data64.isa @@ -0,0 +1,465 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + def createCcCode64(carry, overflow): + code = "" + code += ''' + uint16_t _iz, _in; + _in = bits(resTemp, intWidth - 1); + _iz = ((resTemp & mask(intWidth)) == 0); + CondCodesNZ = (_in << 1) | _iz; + DPRINTF(Arm, "(in, iz) = (%%d, %%d)\\n", _in, _iz); + ''' + if overflow and overflow != "none": + code += ''' + uint16_t _iv; + _iv = %s & 1; + CondCodesV = _iv; + DPRINTF(Arm, "(iv) = (%%d)\\n", _iv); + ''' % overflow + if carry and carry != "none": + code += ''' + uint16_t _ic; + _ic = %s & 1; + CondCodesC = _ic; + DPRINTF(Arm, "(ic) = (%%d)\\n", _ic); + ''' % carry + return code + + oldC = 'CondCodesC' + oldV = 'CondCodesV' + # Dicts of ways to set the carry flag. + carryCode64 = { + "none": "none", + "add": 'findCarry(intWidth, resTemp, Op164, secOp)', + "sub": 'findCarry(intWidth, resTemp, Op164, ~secOp)', + "logic": '0' + } + # Dict of ways to set the overflow flag. + overflowCode64 = { + "none": "none", + "add": 'findOverflow(intWidth, resTemp, Op164, secOp)', + "sub": 'findOverflow(intWidth, resTemp, Op164, ~secOp)', + "logic": '0' + } + + immOp2 = "uint64_t secOp M5_VAR_USED = imm;" + sRegOp2 = "uint64_t secOp M5_VAR_USED = " + \ + "shiftReg64(Op264, shiftAmt, shiftType, intWidth);" + eRegOp2 = "uint64_t secOp M5_VAR_USED = " + \ + "extendReg64(Op264, extendType, shiftAmt, intWidth);" + + def buildDataWork(mnem, code, flagType, suffix, buildCc, buildNonCc, + base, templateBase): + code = ''' + uint64_t resTemp M5_VAR_USED = 0; + ''' + code + ccCode = createCcCode64(carryCode64[flagType], overflowCode64[flagType]) + Name = mnem.capitalize() + suffix + iop = InstObjParams(mnem, Name, base, code) + iopCc = InstObjParams(mnem + "s", Name + "Cc", base, code + ccCode) + + def subst(iop): + global header_output, decoder_output, exec_output + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + if buildNonCc: + subst(iop) + if buildCc: + subst(iopCc) + + def buildXImmDataInst(mnem, code, flagType = "logic", \ + buildCc = True, buildNonCc = True, \ + suffix = "XImm"): + buildDataWork(mnem, immOp2 + code, flagType, suffix, + buildCc, buildNonCc, "DataXImmOp", "DataXImm") + + def buildXSRegDataInst(mnem, code, flagType = "logic", \ + buildCc = True, buildNonCc = True, \ + suffix = "XSReg"): + buildDataWork(mnem, sRegOp2 + code, flagType, suffix, + buildCc, buildNonCc, "DataXSRegOp", "DataXSReg") + + def buildXERegDataInst(mnem, code, flagType = "logic", \ + buildCc = True, buildNonCc = True, \ + suffix = "XEReg"): + buildDataWork(mnem, eRegOp2 + code, flagType, suffix, + buildCc, buildNonCc, "DataXERegOp", "DataXEReg") + + def buildDataInst(mnem, code, flagType = "logic", + buildCc = True, buildNonCc = True): + buildXImmDataInst(mnem, code, flagType, buildCc, buildNonCc) + buildXSRegDataInst(mnem, code, flagType, buildCc, buildNonCc) + buildXERegDataInst(mnem, code, flagType, buildCc, buildNonCc) + + buildXImmDataInst("adr", "Dest64 = RawPC + imm", buildCc = False); + buildXImmDataInst("adrp", "Dest64 = (RawPC & ~mask(12)) + imm", + buildCc = False); + buildDataInst("and", "Dest64 = resTemp = Op164 & secOp;") + buildDataInst("eor", "Dest64 = Op164 ^ secOp;", buildCc = False) + buildXSRegDataInst("eon", "Dest64 = Op164 ^ ~secOp;", buildCc = False) + buildDataInst("sub", "Dest64 = resTemp = Op164 - secOp;", "sub") + buildDataInst("add", "Dest64 = resTemp = Op164 + secOp;", "add") + buildXSRegDataInst("adc", + "Dest64 = resTemp = Op164 + secOp + %s;" % oldC, "add") + buildXSRegDataInst("sbc", + "Dest64 = resTemp = Op164 - secOp - !%s;" % oldC, "sub") + buildDataInst("orr", "Dest64 = Op164 | secOp;", buildCc = False) + buildXSRegDataInst("orn", "Dest64 = Op164 | ~secOp;", buildCc = False) + buildXSRegDataInst("bic", "Dest64 = resTemp = Op164 & ~secOp;") + + def buildDataXImmInst(mnem, code, optArgs = []): + global header_output, decoder_output, exec_output + classNamePrefix = mnem[0].upper() + mnem[1:] + templateBase = "DataXImm" + iop = InstObjParams(mnem, classNamePrefix + "64", + templateBase + "Op", code, optArgs) + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + def buildDataXRegInst(mnem, regOps, code, optArgs = [], + overrideOpClass=None): + global header_output, decoder_output, exec_output + templateBase = "DataX%dReg" % regOps + classNamePrefix = mnem[0].upper() + mnem[1:] + if overrideOpClass: + iop = InstObjParams(mnem, classNamePrefix + "64", + templateBase + "Op", + { 'code': code, 'op_class': overrideOpClass}, + optArgs) + else: + iop = InstObjParams(mnem, classNamePrefix + "64", + templateBase + "Op", code, optArgs) + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + buildDataXRegInst("madd", 3, "Dest64 = Op164 + Op264 * Op364", + overrideOpClass="IntMultOp") + buildDataXRegInst("msub", 3, "Dest64 = Op164 - Op264 * Op364", + overrideOpClass="IntMultOp") + buildDataXRegInst("smaddl", 3, + "XDest = XOp1 + sext<32>(WOp2) * sext<32>(WOp3)", + overrideOpClass="IntMultOp") + buildDataXRegInst("smsubl", 3, + "XDest = XOp1 - sext<32>(WOp2) * sext<32>(WOp3)", + overrideOpClass="IntMultOp") + buildDataXRegInst("smulh", 2, ''' + uint64_t op1H = (int32_t)(XOp1 >> 32); + uint64_t op1L = (uint32_t)XOp1; + uint64_t op2H = (int32_t)(XOp2 >> 32); + uint64_t op2L = (uint32_t)XOp2; + uint64_t mid1 = ((op1L * op2L) >> 32) + op1H * op2L; + uint64_t mid2 = op1L * op2H; + uint64_t result = ((uint64_t)(uint32_t)mid1 + (uint32_t)mid2) >> 32; + result += shiftReg64(mid1, 32, ASR, intWidth); + result += shiftReg64(mid2, 32, ASR, intWidth); + XDest = result + op1H * op2H; + ''', overrideOpClass="IntMultOp") + buildDataXRegInst("umaddl", 3, "XDest = XOp1 + WOp2 * WOp3", + overrideOpClass="IntMultOp") + buildDataXRegInst("umsubl", 3, "XDest = XOp1 - WOp2 * WOp3", + overrideOpClass="IntMultOp") + buildDataXRegInst("umulh", 2, ''' + uint64_t op1H = (uint32_t)(XOp1 >> 32); + uint64_t op1L = (uint32_t)XOp1; + uint64_t op2H = (uint32_t)(XOp2 >> 32); + uint64_t op2L = (uint32_t)XOp2; + uint64_t mid1 = ((op1L * op2L) >> 32) + op1H * op2L; + uint64_t mid2 = op1L * op2H; + uint64_t result = ((uint64_t)(uint32_t)mid1 + (uint32_t)mid2) >> 32; + result += mid1 >> 32; + result += mid2 >> 32; + XDest = result + op1H * op2H; + ''', overrideOpClass="IntMultOp") + + buildDataXRegInst("asrv", 2, + "Dest64 = shiftReg64(Op164, Op264, ASR, intWidth)") + buildDataXRegInst("lslv", 2, + "Dest64 = shiftReg64(Op164, Op264, LSL, intWidth)") + buildDataXRegInst("lsrv", 2, + "Dest64 = shiftReg64(Op164, Op264, LSR, intWidth)") + buildDataXRegInst("rorv", 2, + "Dest64 = shiftReg64(Op164, Op264, ROR, intWidth)") + buildDataXRegInst("sdiv", 2, ''' + int64_t op1 = Op164; + int64_t op2 = Op264; + if (intWidth == 32) { + op1 = sext<32>(op1); + op2 = sext<32>(op2); + } + Dest64 = op2 == -1 ? -op1 : op2 ? op1 / op2 : 0; + ''', overrideOpClass="IntDivOp") + buildDataXRegInst("udiv", 2, "Dest64 = Op264 ? Op164 / Op264 : 0", + overrideOpClass="IntDivOp") + + buildDataXRegInst("cls", 1, ''' + uint64_t op1 = Op164; + if (bits(op1, intWidth - 1)) + op1 ^= mask(intWidth); + Dest64 = (op1 == 0) ? intWidth - 1 : (intWidth - 2 - findMsbSet(op1)); + ''') + buildDataXRegInst("clz", 1, ''' + Dest64 = (Op164 == 0) ? intWidth : (intWidth - 1 - findMsbSet(Op164)); + ''') + buildDataXRegInst("rbit", 1, ''' + uint64_t result = Op164; + uint64_t lBit = 1ULL << (intWidth - 1); + uint64_t rBit = 1ULL; + while (lBit > rBit) { + uint64_t maskBits = lBit | rBit; + uint64_t testBits = result & maskBits; + // If these bits are different, swap them by toggling them. + if (testBits && testBits != maskBits) + result ^= maskBits; + lBit >>= 1; rBit <<= 1; + } + Dest64 = result; + ''') + buildDataXRegInst("rev", 1, ''' + if (intWidth == 32) + Dest64 = betole<uint32_t>(Op164); + else + Dest64 = betole<uint64_t>(Op164); + ''') + buildDataXRegInst("rev16", 1, ''' + int count = intWidth / 16; + uint64_t result = 0; + for (unsigned i = 0; i < count; i++) { + uint16_t hw = Op164 >> (i * 16); + result |= (uint64_t)betole<uint16_t>(hw) << (i * 16); + } + Dest64 = result; + ''') + buildDataXRegInst("rev32", 1, ''' + int count = intWidth / 32; + uint64_t result = 0; + for (unsigned i = 0; i < count; i++) { + uint32_t hw = Op164 >> (i * 32); + result |= (uint64_t)betole<uint32_t>(hw) << (i * 32); + } + Dest64 = result; + ''') + + msrMrs64EnabledCheckCode = ''' + // Check for read/write access right + if (!can%sAArch64SysReg(flat_idx, Scr64, cpsr, xc->tcBase())) { + if (flat_idx == MISCREG_DAIF || + flat_idx == MISCREG_DC_ZVA_Xt || + flat_idx == MISCREG_DC_CVAC_Xt || + flat_idx == MISCREG_DC_CIVAC_Xt + ) + return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64); + return new UndefinedInstruction(machInst, false, mnemonic); + } + + // Check for traps to supervisor (FP/SIMD regs) + if (el <= EL1 && msrMrs64TrapToSup(flat_idx, el, Cpacr64)) + return new SupervisorTrap(machInst, 0x1E00000, EC_TRAPPED_SIMD_FP); + + bool is_vfp_neon = false; + + // Check for traps to hypervisor + if ((ArmSystem::haveVirtualization(xc->tcBase()) && el <= EL2) && + msrMrs64TrapToHyp(flat_idx, %s, CptrEl264, Hcr64, &is_vfp_neon)) { + return new HypervisorTrap(machInst, is_vfp_neon ? 0x1E00000 : imm, + is_vfp_neon ? EC_TRAPPED_SIMD_FP : EC_TRAPPED_MSR_MRS_64); + } + + // Check for traps to secure monitor + if ((ArmSystem::haveSecurity(xc->tcBase()) && el <= EL3) && + msrMrs64TrapToMon(flat_idx, CptrEl364, el, &is_vfp_neon)) { + return new SecureMonitorTrap(machInst, + is_vfp_neon ? 0x1E00000 : imm, + is_vfp_neon ? EC_TRAPPED_SIMD_FP : EC_TRAPPED_MSR_MRS_64); + } + ''' + + buildDataXImmInst("mrs", ''' + MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()-> + flattenMiscIndex(op1); + CPSR cpsr = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + %s + XDest = MiscOp1_ud; + ''' % (msrMrs64EnabledCheckCode % ('Read', 'true'),), + ["IsSerializeBefore"]) + + buildDataXRegInst("mrsNZCV", 1, ''' + CPSR cpsr = 0; + cpsr.nz = CondCodesNZ; + cpsr.c = CondCodesC; + cpsr.v = CondCodesV; + XDest = cpsr; + ''') + + buildDataXImmInst("msr", ''' + MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()-> + flattenMiscIndex(dest); + CPSR cpsr = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + %s + MiscDest_ud = XOp1; + ''' % (msrMrs64EnabledCheckCode % ('Write', 'false'),), + ["IsSerializeAfter", "IsNonSpeculative"]) + + buildDataXRegInst("msrNZCV", 1, ''' + CPSR cpsr = XOp1; + CondCodesNZ = cpsr.nz; + CondCodesC = cpsr.c; + CondCodesV = cpsr.v; + ''') + + msrdczva_ea_code = ''' + MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest); + CPSR cpsr = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + ''' + + msrdczva_ea_code += msrMrs64EnabledCheckCode % ('Write', 'false') + msrdczva_ea_code += ''' + Request::Flags memAccessFlags = Request::CACHE_BLOCK_ZERO|ArmISA::TLB::MustBeOne; + EA = XBase; + assert(!(Dczid & 0x10)); + uint64_t op_size = power(2, Dczid + 2); + EA &= ~(op_size - 1); + + ''' + + msrDCZVAIop = InstObjParams("dczva", "Dczva", "SysDC64", + { "ea_code" : msrdczva_ea_code, + "memacc_code" : ";", "use_uops" : 0, + "op_wb" : ";", "fa_code" : ";"}, ['IsStore', 'IsMemRef']); + header_output += DCStore64Declare.subst(msrDCZVAIop); + decoder_output += DCStore64Constructor.subst(msrDCZVAIop); + exec_output += DCStore64Execute.subst(msrDCZVAIop); + exec_output += DCStore64InitiateAcc.subst(msrDCZVAIop); + exec_output += Store64CompleteAcc.subst(msrDCZVAIop); + + + + buildDataXImmInst("msrSP", ''' + if (!canWriteAArch64SysReg( + (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest), + Scr64, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + MiscDest_ud = imm; + ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"]) + + buildDataXImmInst("msrDAIFSet", ''' + if (!canWriteAArch64SysReg( + (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest), + Scr64, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64); + } + CPSR cpsr = Cpsr; + cpsr.daif = cpsr.daif | imm; + Cpsr = cpsr; + ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"]) + + buildDataXImmInst("msrDAIFClr", ''' + if (!canWriteAArch64SysReg( + (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest), + Scr64, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64); + } + CPSR cpsr = Cpsr; + cpsr.daif = cpsr.daif & ~imm; + Cpsr = cpsr; + ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"]) + + def buildDataXCompInst(mnem, instType, suffix, code): + global header_output, decoder_output, exec_output + templateBase = "DataXCond%s" % instType + iop = InstObjParams(mnem, mnem.capitalize() + suffix + "64", + templateBase + "Op", code) + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + def buildDataXCondImmInst(mnem, code): + buildDataXCompInst(mnem, "CompImm", "Imm", code) + def buildDataXCondRegInst(mnem, code): + buildDataXCompInst(mnem, "CompReg", "Reg", code) + def buildDataXCondSelInst(mnem, code): + buildDataXCompInst(mnem, "Sel", "", code) + + def condCompCode(flagType, op, imm): + ccCode = createCcCode64(carryCode64[flagType], overflowCode64[flagType]) + opDecl = "uint64_t secOp M5_VAR_USED = imm;" + if not imm: + opDecl = "uint64_t secOp M5_VAR_USED = Op264;" + return opDecl + ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + uint64_t resTemp = Op164 ''' + op + ''' secOp; + ''' + ccCode + ''' + } else { + CondCodesNZ = (defCc >> 2) & 0x3; + CondCodesC = (defCc >> 1) & 0x1; + CondCodesV = defCc & 0x1; + } + ''' + + buildDataXCondImmInst("ccmn", condCompCode("add", "+", True)) + buildDataXCondImmInst("ccmp", condCompCode("sub", "-", True)) + buildDataXCondRegInst("ccmn", condCompCode("add", "+", False)) + buildDataXCondRegInst("ccmp", condCompCode("sub", "-", False)) + + condSelCode = ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + Dest64 = Op164; + } else { + Dest64 = %(altVal)s; + } + ''' + buildDataXCondSelInst("csel", condSelCode % {"altVal" : "Op264"}) + buildDataXCondSelInst("csinc", condSelCode % {"altVal" : "Op264 + 1"}) + buildDataXCondSelInst("csinv", condSelCode % {"altVal" : "~Op264"}) + buildDataXCondSelInst("csneg", condSelCode % {"altVal" : "-Op264"}) +}}; diff --git a/src/arch/arm/isa/insts/div.isa b/src/arch/arm/isa/insts/div.isa index 1ff6ef9e4..0896ea94f 100644 --- a/src/arch/arm/isa/insts/div.isa +++ b/src/arch/arm/isa/insts/div.isa @@ -40,12 +40,6 @@ let {{ sdivCode = ''' if (Op2_sw == 0) { - if (((SCTLR)Sctlr).dz) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); - } Dest_sw = 0; } else if (Op1_sw == INT_MIN && Op2_sw == -1) { Dest_sw = INT_MIN; @@ -63,12 +57,6 @@ let {{ udivCode = ''' if (Op2_uw == 0) { - if (((SCTLR)Sctlr).dz) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); - } Dest_uw = 0; } else { Dest_uw = Op1_uw / Op2_uw; diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa index b701995f4..60f030c3d 100644 --- a/src/arch/arm/isa/insts/fp.isa +++ b/src/arch/arm/isa/insts/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -191,14 +191,17 @@ let {{ decoder_output = "" exec_output = "" - vmsrIop = InstObjParams("vmsr", "Vmsr", "FpRegRegOp", - { "code": vmsrEnabledCheckCode + \ - "MiscDest = Op1;", + vmsrCode = vmsrEnabledCheckCode + ''' + MiscDest = Op1; + ''' + + vmsrIop = InstObjParams("vmsr", "Vmsr", "FpRegRegImmOp", + { "code": vmsrCode, "predicate_test": predicateTest, "op_class": "SimdFloatMiscOp" }, ["IsSerializeAfter","IsNonSpeculative"]) - header_output += FpRegRegOpDeclare.subst(vmsrIop); - decoder_output += FpRegRegOpConstructor.subst(vmsrIop); + header_output += FpRegRegImmOpDeclare.subst(vmsrIop); + decoder_output += FpRegRegImmOpConstructor.subst(vmsrIop); exec_output += PredOpExecute.subst(vmsrIop); vmsrFpscrCode = vmsrEnabledCheckCode + ''' @@ -215,14 +218,36 @@ let {{ decoder_output += FpRegRegOpConstructor.subst(vmsrFpscrIop); exec_output += PredOpExecute.subst(vmsrFpscrIop); - vmrsIop = InstObjParams("vmrs", "Vmrs", "FpRegRegOp", - { "code": vmrsEnabledCheckCode + \ - "Dest = MiscOp1;", + vmrsCode = vmrsEnabledCheckCode + ''' + CPSR cpsr = Cpsr; + SCR scr = Scr; + if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) { + HCR hcr = Hcr; + bool hypTrap = false; + switch(xc->tcBase()->flattenMiscIndex(op1)) { + case MISCREG_FPSID: + hypTrap = hcr.tid0; + break; + case MISCREG_MVFR0: + case MISCREG_MVFR1: + hypTrap = hcr.tid3; + break; + } + if (hypTrap) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP10_MRC_VMRS); + } + } + Dest = MiscOp1; + ''' + + vmrsIop = InstObjParams("vmrs", "Vmrs", "FpRegRegImmOp", + { "code": vmrsCode, "predicate_test": predicateTest, "op_class": "SimdFloatMiscOp" }, ["IsSerializeBefore"]) - header_output += FpRegRegOpDeclare.subst(vmrsIop); - decoder_output += FpRegRegOpConstructor.subst(vmrsIop); + header_output += FpRegRegImmOpDeclare.subst(vmrsIop); + decoder_output += FpRegRegImmOpConstructor.subst(vmrsIop); exec_output += PredOpExecute.subst(vmrsIop); vmrsFpscrIop = InstObjParams("vmrs", "VmrsFpscr", "FpRegRegOp", @@ -323,7 +348,7 @@ let {{ decoder_output += FpRegRegOpConstructor.subst(vmovRegQIop); exec_output += PredOpExecute.subst(vmovRegQIop); - vmovCoreRegBCode = vfpEnabledCheckCode + ''' + vmovCoreRegBCode = simdEnabledCheckCode + ''' FpDest_uw = insertBits(FpDest_uw, imm * 8 + 7, imm * 8, Op1_ub); ''' vmovCoreRegBIop = InstObjParams("vmov", "VmovCoreRegB", "FpRegRegImmOp", @@ -334,7 +359,7 @@ let {{ decoder_output += FpRegRegImmOpConstructor.subst(vmovCoreRegBIop); exec_output += PredOpExecute.subst(vmovCoreRegBIop); - vmovCoreRegHCode = vfpEnabledCheckCode + ''' + vmovCoreRegHCode = simdEnabledCheckCode + ''' FpDest_uw = insertBits(FpDest_uw, imm * 16 + 15, imm * 16, Op1_uh); ''' vmovCoreRegHIop = InstObjParams("vmov", "VmovCoreRegH", "FpRegRegImmOp", @@ -453,6 +478,17 @@ let {{ singleCode = singleSimpleCode + ''' FpscrExc = fpscr; ''' + singleTernOp = vfpEnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + VfpSavedState state = prepFpState(fpscr.rMode); + float cOp1 = FpOp1; + float cOp2 = FpOp2; + float cOp3 = FpDestP0; + FpDestP0 = ternaryOp(fpscr, %(palam)s, %(op)s, + fpscr.fz, fpscr.dn, fpscr.rMode); + finishVfp(fpscr, state, fpscr.fz); + FpscrExc = fpscr; + ''' singleBinOp = "binaryOp(fpscr, FpOp1, FpOp2," + \ "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)" singleUnaryOp = "unaryOp(fpscr, FpOp1, %(func)s, fpscr.fz, fpscr.rMode)" @@ -463,6 +499,19 @@ let {{ FpDestP1_uw = dblHi(dest); FpscrExc = fpscr; ''' + doubleTernOp = vfpEnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + VfpSavedState state = prepFpState(fpscr.rMode); + double cOp1 = dbl(FpOp1P0_uw, FpOp1P1_uw); + double cOp2 = dbl(FpOp2P0_uw, FpOp2P1_uw); + double cOp3 = dbl(FpDestP0_uw, FpDestP1_uw); + double cDest = ternaryOp(fpscr, %(palam)s, %(op)s, + fpscr.fz, fpscr.dn, fpscr.rMode); + FpDestP0_uw = dblLow(cDest); + FpDestP1_uw = dblHi(cDest); + finishVfp(fpscr, state, fpscr.fz); + FpscrExc = fpscr; + ''' doubleBinOp = ''' binaryOp(fpscr, dbl(FpOp1P0_uw, FpOp1P1_uw), dbl(FpOp2P0_uw, FpOp2P1_uw), @@ -473,6 +522,37 @@ let {{ fpscr.fz, fpscr.rMode) ''' + def buildTernaryFpOp(Name, base, opClass, singleOp, doubleOp, paramStr): + global header_output, decoder_output, exec_output + + code = singleTernOp % { "op": singleOp, "palam": paramStr } + sIop = InstObjParams(Name.lower() + "s", Name + "S", base, + { "code": code, + "predicate_test": predicateTest, + "op_class": opClass }, []) + code = doubleTernOp % { "op": doubleOp, "palam": paramStr } + dIop = InstObjParams(Name.lower() + "d", Name + "D", base, + { "code": code, + "predicate_test": predicateTest, + "op_class": opClass }, []) + + declareTempl = eval(base + "Declare"); + constructorTempl = eval(base + "Constructor"); + + for iop in sIop, dIop: + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += PredOpExecute.subst(iop) + + buildTernaryFpOp("Vfma", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd<float>", "fpMulAdd<double>", " cOp1, cOp2, cOp3" ) + buildTernaryFpOp("Vfms", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd<float>", "fpMulAdd<double>", "-cOp1, cOp2, cOp3" ) + buildTernaryFpOp("Vfnma", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd<float>", "fpMulAdd<double>", "-cOp1, cOp2, -cOp3" ) + buildTernaryFpOp("Vfnms", "FpRegRegRegOp", "SimdFloatMultAccOp", + "fpMulAdd<float>", "fpMulAdd<double>", " cOp1, cOp2, -cOp3" ) + def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp): global header_output, decoder_output, exec_output @@ -830,7 +910,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); vfpFlushToZero(fpscr, FpOp1); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uw = vfpFpSToFixed(FpOp1, false, false, 0, false); + FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, 0, false); __asm__ __volatile__("" :: "m" (FpDest_uw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -849,7 +929,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t result = vfpFpDToFixed(cOp1, false, false, 0, false); + uint64_t result = vfpFpToFixed<double>(cOp1, false, 32, 0, false); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -868,7 +948,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); vfpFlushToZero(fpscr, FpOp1); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sw = vfpFpSToFixed(FpOp1, true, false, 0, false); + FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, 0, false); __asm__ __volatile__("" :: "m" (FpDest_sw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -887,7 +967,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - int64_t result = vfpFpDToFixed(cOp1, true, false, 0, false); + int64_t result = vfpFpToFixed<double>(cOp1, true, 32, 0, false); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -907,7 +987,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uw = vfpFpSToFixed(FpOp1, false, false, 0); + FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, 0); __asm__ __volatile__("" :: "m" (FpDest_uw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -927,7 +1007,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t result = vfpFpDToFixed(cOp1, false, false, 0); + uint64_t result = vfpFpToFixed<double>(cOp1, false, 32, 0); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -947,7 +1027,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sw = vfpFpSToFixed(FpOp1, true, false, 0); + FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, 0); __asm__ __volatile__("" :: "m" (FpDest_sw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -967,7 +1047,7 @@ let {{ VfpSavedState state = prepFpState(fpscr.rMode); fesetround(FeRoundZero); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - int64_t result = vfpFpDToFixed(cOp1, true, false, 0); + int64_t result = vfpFpToFixed<double>(cOp1, true, 32, 0); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -1333,7 +1413,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sw = vfpFpSToFixed(FpOp1, true, false, imm); + FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, imm); __asm__ __volatile__("" :: "m" (FpDest_sw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1352,7 +1432,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t mid = vfpFpDToFixed(cOp1, true, false, imm); + uint64_t mid = vfpFpToFixed<double>(cOp1, true, 32, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = mid; @@ -1372,7 +1452,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uw = vfpFpSToFixed(FpOp1, false, false, imm); + FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, imm); __asm__ __volatile__("" :: "m" (FpDest_uw)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1391,7 +1471,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t mid = vfpFpDToFixed(cOp1, false, false, imm); + uint64_t mid = vfpFpToFixed<double>(cOp1, false, 32, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = mid; @@ -1410,7 +1490,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_sw) : "m" (FpOp1_sw)); - FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sw, false, imm); + FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sw, 32, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1428,7 +1508,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm); + double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, 32, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); @@ -1447,7 +1527,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_uw) : "m" (FpOp1_uw)); - FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uw, false, imm); + FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uw, 32, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1465,7 +1545,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm); + double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, 32, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); @@ -1485,7 +1565,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_sh = vfpFpSToFixed(FpOp1, true, true, imm); + FpDest_sh = vfpFpToFixed<float>(FpOp1, true, 16, imm); __asm__ __volatile__("" :: "m" (FpDest_sh)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1505,7 +1585,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t result = vfpFpDToFixed(cOp1, true, true, imm); + uint64_t result = vfpFpToFixed<double>(cOp1, true, 16, imm); __asm__ __volatile__("" :: "m" (result)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = result; @@ -1526,7 +1606,7 @@ let {{ vfpFlushToZero(fpscr, FpOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1)); - FpDest_uh = vfpFpSToFixed(FpOp1, false, true, imm); + FpDest_uh = vfpFpToFixed<float>(FpOp1, false, 16, imm); __asm__ __volatile__("" :: "m" (FpDest_uh)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1546,7 +1626,7 @@ let {{ vfpFlushToZero(fpscr, cOp1); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1)); - uint64_t mid = vfpFpDToFixed(cOp1, false, true, imm); + uint64_t mid = vfpFpToFixed<double>(cOp1, false, 16, imm); __asm__ __volatile__("" :: "m" (mid)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = mid; @@ -1566,7 +1646,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_sh) : "m" (FpOp1_sh)); - FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sh, true, imm); + FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sh, 16, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1585,7 +1665,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm); + double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, 16, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); @@ -1605,7 +1685,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (FpOp1_uh) : "m" (FpOp1_uh)); - FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uh, true, imm); + FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uh, 16, imm); __asm__ __volatile__("" :: "m" (FpDest)); finishVfp(fpscr, state, fpscr.fz); FpscrExc = fpscr; @@ -1624,7 +1704,7 @@ let {{ uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32)); VfpSavedState state = prepFpState(fpscr.rMode); __asm__ __volatile__("" : "=m" (mid) : "m" (mid)); - double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm); + double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, 16, imm); __asm__ __volatile__("" :: "m" (cDest)); finishVfp(fpscr, state, fpscr.fz); FpDestP0_uw = dblLow(cDest); diff --git a/src/arch/arm/isa/insts/fp64.isa b/src/arch/arm/isa/insts/fp64.isa new file mode 100644 index 000000000..95dec5062 --- /dev/null +++ b/src/arch/arm/isa/insts/fp64.isa @@ -0,0 +1,811 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Thomas Grocutt +// Edmund Grimley Evans + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + fmovImmSCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = bits(imm, 31, 0); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp", + { "code": fmovImmSCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegImmOpDeclare.subst(fmovImmSIop); + decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop); + exec_output += BasicExecute.subst(fmovImmSIop); + + fmovImmDCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = bits(imm, 31, 0); + AA64FpDestP1_uw = bits(imm, 63, 32); + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp", + { "code": fmovImmDCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegImmOpDeclare.subst(fmovImmDIop); + decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop); + exec_output += BasicExecute.subst(fmovImmDIop); + + fmovRegSCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = AA64FpOp1P0_uw; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp", + { "code": fmovRegSCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegSIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop); + exec_output += BasicExecute.subst(fmovRegSIop); + + fmovRegDCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = AA64FpOp1P0_uw; + AA64FpDestP1_uw = AA64FpOp1P1_uw; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp", + { "code": fmovRegDCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegDIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop); + exec_output += BasicExecute.subst(fmovRegDIop); + + fmovCoreRegWCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = WOp1_uw; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp", + { "code": fmovCoreRegWCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovCoreRegWIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop); + exec_output += BasicExecute.subst(fmovCoreRegWIop); + + fmovCoreRegXCode = vfp64EnabledCheckCode + ''' + AA64FpDestP0_uw = XOp1_ud; + AA64FpDestP1_uw = XOp1_ud >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp", + { "code": fmovCoreRegXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovCoreRegXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop); + exec_output += BasicExecute.subst(fmovCoreRegXIop); + + fmovUCoreRegXCode = vfp64EnabledCheckCode + ''' + AA64FpDestP2_uw = XOp1_ud; + AA64FpDestP3_uw = XOp1_ud >> 32; + ''' + fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp", + { "code": fmovUCoreRegXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovUCoreRegXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop); + exec_output += BasicExecute.subst(fmovUCoreRegXIop); + + fmovRegCoreWCode = vfp64EnabledCheckCode + ''' + WDest = AA64FpOp1P0_uw; + ''' + fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp", + { "code": fmovRegCoreWCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegCoreWIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop); + exec_output += BasicExecute.subst(fmovRegCoreWIop); + + fmovRegCoreXCode = vfp64EnabledCheckCode + ''' + XDest = ( ((uint64_t) AA64FpOp1P1_uw) << 32) | AA64FpOp1P0_uw; + ''' + fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp", + { "code": fmovRegCoreXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovRegCoreXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop); + exec_output += BasicExecute.subst(fmovRegCoreXIop); + + fmovURegCoreXCode = vfp64EnabledCheckCode + ''' + XDest = ( ((uint64_t) AA64FpOp1P3_uw) << 32) | AA64FpOp1P2_uw; + ''' + fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp", + { "code": fmovURegCoreXCode, + "op_class": "SimdFloatMiscOp" }, []) + header_output += FpRegRegOpDeclare.subst(fmovURegCoreXIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop); + exec_output += BasicExecute.subst(fmovURegCoreXIop); +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + singleIntConvCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint32_t cOp1 = AA64FpOp1P0_uw; + uint32_t cDest = %(op)s; + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + singleIntConvCode2 = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint32_t cOp1 = AA64FpOp1P0_uw; + uint32_t cOp2 = AA64FpOp2P0_uw; + uint32_t cDest = %(op)s; + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + singleBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \ + "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)" + singleUnaryOp = "unaryOp(fpscr, AA64FpOp1P0, %(func)s, fpscr.fz, fpscr.rMode)" + + doubleIntConvCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw; + uint64_t cDest = %(op)s; + AA64FpDestP0_uw = cDest & 0xFFFFFFFF; + AA64FpDestP1_uw = cDest >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + doubleIntConvCode2 = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cOp1 = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw; + uint64_t cOp2 = ((uint64_t) AA64FpOp2P1_uw) << 32 | AA64FpOp2P0_uw; + uint64_t cDest = %(op)s; + AA64FpDestP0_uw = cDest & 0xFFFFFFFF; + AA64FpDestP1_uw = cDest >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + doubleBinOp = ''' + binaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), + dbl(AA64FpOp2P0_uw, AA64FpOp2P1_uw), + %(func)s, fpscr.fz, fpscr.dn, fpscr.rMode); + ''' + doubleUnaryOp = ''' + unaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), %(func)s, + fpscr.fz, fpscr.rMode) + ''' + + def buildTernaryFpOp(name, opClass, sOp, dOp): + global header_output, decoder_output, exec_output + for isDouble in True, False: + code = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + ''' + if isDouble: + code += ''' + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32; + uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32; + uint64_t cDest; + ''' "cDest = " + dOp + ";" + ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + ''' + else: + code += ''' + uint32_t cOp1 = AA64FpOp1P0_uw; + uint32_t cOp2 = AA64FpOp2P0_uw; + uint32_t cOp3 = AA64FpOp3P0_uw; + uint32_t cDest; + ''' "cDest = " + sOp + ";" + ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + ''' + code += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + iop = InstObjParams(name.lower(), name + ("D" if isDouble else "S"), + "FpRegRegRegRegOp", + { "code": code, "op_class": opClass }, []) + + header_output += AA64FpRegRegRegRegOpDeclare.subst(iop) + decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildTernaryFpOp("FMAdd", "SimdFloatMultAccOp", + "fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)", + "fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" ) + buildTernaryFpOp("FMSub", "SimdFloatMultAccOp", + "fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)", + "fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" ) + buildTernaryFpOp("FNMAdd", "SimdFloatMultAccOp", + "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), fplibNeg<uint32_t>(cOp1), cOp2, fpscr)", + "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" ) + buildTernaryFpOp("FNMSub", "SimdFloatMultAccOp", + "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)", + "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1, cOp2, fpscr)" ) + + def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp): + global header_output, decoder_output, exec_output + + code = singleIntConvCode2 % { "op": singleOp } + sIop = InstObjParams(name, Name + "S", base, + { "code": code, + "op_class": opClass }, []) + + code = doubleIntConvCode2 % { "op": doubleOp } + dIop = InstObjParams(name, Name + "D", base, + { "code": code, + "op_class": opClass }, []) + + declareTempl = eval( base + "Declare"); + constructorTempl = eval("AA64" + base + "Constructor"); + + for iop in sIop, dIop: + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "SimdFloatAddOp", + "fplibAdd<uint32_t>(cOp1, cOp2, fpscr)", + "fplibAdd<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "SimdFloatAddOp", + "fplibSub<uint32_t>(cOp1, cOp2, fpscr)", + "fplibSub<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "SimdFloatDivOp", + "fplibDiv<uint32_t>(cOp1, cOp2, fpscr)", + "fplibDiv<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "SimdFloatMultOp", + "fplibMul<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMul<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "SimdFloatMultOp", + "fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))", + "fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))") + buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMin<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMin<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMax<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMax<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)") + buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "SimdFloatCmpOp", + "fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)", + "fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)") + + def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None): + if doubleOp is None: + doubleOp = singleOp + global header_output, decoder_output, exec_output + + code = singleIntConvCode % { "op": singleOp } + sIop = InstObjParams(name, Name + "S", base, + { "code": code, + "op_class": opClass }, []) + code = doubleIntConvCode % { "op": doubleOp } + dIop = InstObjParams(name, Name + "D", base, + { "code": code, + "op_class": opClass }, []) + + declareTempl = eval( base + "Declare"); + constructorTempl = eval("AA64" + base + "Constructor"); + + for iop in sIop, dIop: + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "SimdFloatSqrtOp", + "fplibSqrt<uint32_t>(cOp1, fpscr)", "fplibSqrt<uint64_t>(cOp1, fpscr)") + + def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp, + doubleOp = None, isIntConv = True): + if doubleOp is None: + doubleOp = singleOp + global header_output, decoder_output, exec_output + + if isIntConv: + sCode = singleIntConvCode + dCode = doubleIntConvCode + else: + sCode = singleCode + dCode = doubleCode + + for code, op, suffix in [[sCode, singleOp, "S"], + [dCode, doubleOp, "D"]]: + iop = InstObjParams(name, Name + suffix, base, + { "code": code % { "op": op }, + "op_class": opClass }, []) + + declareTempl = eval( base + "Declare"); + constructorTempl = eval("AA64" + base + "Constructor"); + + header_output += declareTempl.subst(iop) + decoder_output += constructorTempl.subst(iop) + exec_output += BasicExecute.subst(iop) + + buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "SimdFloatMiscOp", + "fplibNeg<uint32_t>(cOp1)", "fplibNeg<uint64_t>(cOp1)") + buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "SimdFloatMiscOp", + "fplibAbs<uint32_t>(cOp1)", "fplibAbs<uint64_t>(cOp1)") + buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)") + buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF, false, fpscr)") + buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF, false, fpscr)") + buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO, false, fpscr)") + buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)") + buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), false, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), false, fpscr)") + buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "SimdFloatMiscOp", + "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), true, fpscr)", + "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), true, fpscr)") +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + # Creates the integer to floating point instructions, including variants for + # signed/unsigned, float/double, etc + for regL, regOpL, width in [["W", "w", 32], + ["X", "d", 64]]: + for isDouble in True, False: + for us, usCode in [["U", "uint%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)], + ["S", "int%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)]]: + fcvtIntFpDCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s + ''' %(usCode) + + if isDouble: + fcvtIntFpDCode += ''' + uint64_t cDest = fplibFixedToFP<uint64_t>(cSrc, 0, + %s, FPCRRounding(fpscr), fpscr); + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + ''' % ("true" if us == "U" else "false") + else: + fcvtIntFpDCode += ''' + uint32_t cDest = fplibFixedToFP<uint32_t>(cSrc, 0, + %s, FPCRRounding(fpscr), fpscr); + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + ''' % ("true" if us == "U" else "false") + fcvtIntFpDCode += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S") + mnem = "%scvtf" %(us.lower()) + fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp", + { "code": fcvtIntFpDCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtIntFpDIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop); + exec_output += BasicExecute.subst(fcvtIntFpDIop); + + # Generates the floating point to integer conversion instructions in various + # variants, eg signed/unsigned + def buildFpCvtIntOp(isDouble, isSigned, isXReg): + global header_output, decoder_output, exec_output + + for rmode, roundingMode in [["N", "FPRounding_TIEEVEN"], + ["P", "FPRounding_POSINF"], + ["M", "FPRounding_NEGINF"], + ["Z", "FPRounding_ZERO"], + ["A", "FPRounding_TIEAWAY"]]: + fcvtFpIntCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc;''' + if isDouble: + fcvtFpIntCode += ''' + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + ''' + else: + fcvtFpIntCode += "uint32_t cOp1 = AA64FpOp1P0_uw;" + + fcvtFpIntCode += ''' + %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 0, %s, %s, fpscr); + FpscrExc = fpscr; + ''' %("X" if isXReg else "W", + "64" if isDouble else "32", + "64" if isXReg else "32", + "false" if isSigned else "true", + roundingMode) + + instName = "FcvtFp%sInt%s%s%s" %("S" if isSigned else "U", + "X" if isXReg else "W", + "D" if isDouble else "S", rmode) + mnem = "fcvt%s%s" %(rmode, "s" if isSigned else "u") + fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp", + { "code": fcvtFpIntCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpIntIop); + decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop); + exec_output += BasicExecute.subst(fcvtFpIntIop); + + # Now actually do the building with the different variants + for isDouble in True, False: + for isSigned in True, False: + for isXReg in True, False: + buildFpCvtIntOp(isDouble, isSigned, isXReg) + + fcvtFpSFpDCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cDest = fplibConvert<uint32_t, uint64_t>(AA64FpOp1P0_uw, + FPCRRounding(fpscr), fpscr); + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp", + { "code": fcvtFpSFpDCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpSFpDIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop); + exec_output += BasicExecute.subst(fcvtFpSFpDIop); + + fcvtFpDFpSCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + AA64FpDestP0_uw = fplibConvert<uint64_t, uint32_t>(cOp1, + FPCRRounding(fpscr), fpscr); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp", + {"code": fcvtFpDFpSCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpDFpSIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop); + exec_output += BasicExecute.subst(fcvtFpDFpSIop); + + # Half precision to single or double precision conversion + for isDouble in True, False: + code = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s cDest = fplibConvert<uint16_t, uint%s_t>(AA64FpOp1P0_uw, + FPCRRounding(fpscr), fpscr); + ''' % ("uint64_t" if isDouble else "uint32_t", + "64" if isDouble else "32") + if isDouble: + code += ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = cDest >> 32; + ''' + else: + code += ''' + AA64FpDestP0_uw = cDest; + AA64FpDestP1_uw = 0; + ''' + code += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + instName = "FcvtFpHFp%s" %("D" if isDouble else "S") + fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp", + { "code": code, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpHFpIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop); + exec_output += BasicExecute.subst(fcvtFpHFpIop); + + # single or double precision to Half precision conversion + for isDouble in True, False: + code = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s; + AA64FpDestP0_uw = fplibConvert<uint%s_t, uint16_t>(cOp1, + FPCRRounding(fpscr), fpscr); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' % ("uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" + if isDouble else "uint32_t cOp1 = AA64FpOp1P0_uw", + "64" if isDouble else "32") + + instName = "FcvtFp%sFpH" %("D" if isDouble else "S") + fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp", + { "code": code, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegOpDeclare.subst(fcvtFpFpHIop); + decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop); + exec_output += BasicExecute.subst(fcvtFpFpHIop); + + # Build the various versions of the floating point compare instructions + def buildFCmpOp(isQuiet, isDouble, isImm): + global header_output, decoder_output, exec_output + + fcmpCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s cOp1 = %s; + ''' % ("uint64_t" if isDouble else "uint32_t", + "AA64FpDestP0_uw | (uint64_t)AA64FpDestP1_uw << 32" + if isDouble else "AA64FpDestP0_uw") + if isImm: + fcmpCode += ''' + %s cOp2 = imm; + ''' % ("uint64_t" if isDouble else "uint32_t") + else: + fcmpCode += ''' + %s cOp2 = %s; + ''' % ("uint64_t" if isDouble else "uint32_t", + "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" + if isDouble else "AA64FpOp1P0_uw") + fcmpCode += ''' + int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr); + CondCodesNZ = cc >> 2 & 3; + CondCodesC = cc >> 1 & 1; + CondCodesV = cc & 1; + FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; + ''' % ("64" if isDouble else "32", "false" if isQuiet else "true") + + typeName = "Imm" if isImm else "Reg" + instName = "FCmp%s%s%s" %("" if isQuiet else "E", typeName, + "D" if isDouble else "S") + fcmpIop = InstObjParams("fcmp%s" %("" if isQuiet else "e"), instName, + "FpReg%sOp" %(typeName), + {"code": fcmpCode, + "op_class": "SimdFloatCmpOp"}, []) + + declareTemp = eval("FpReg%sOpDeclare" %(typeName)); + constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName)); + header_output += declareTemp.subst(fcmpIop); + decoder_output += constructorTemp.subst(fcmpIop); + exec_output += BasicExecute.subst(fcmpIop); + + for isQuiet in True, False: + for isDouble in True, False: + for isImm in True, False: + buildFCmpOp(isQuiet, isDouble, isImm) + + # Build the various versions of the conditional floating point compare + # instructions + def buildFCCmpOp(isQuiet, isDouble): + global header_output, decoder_output, exec_output + + fccmpCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + %s cOp1 = %s; + %s cOp2 = %s; + int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr); + CondCodesNZ = cc >> 2 & 3; + CondCodesC = cc >> 1 & 1; + CondCodesV = cc & 1; + } else { + CondCodesNZ = (defCc >> 2) & 0x3; + CondCodesC = (defCc >> 1) & 0x1; + CondCodesV = defCc & 0x1; + } + FpCondCodes = fpscr & FpCondCodesMask; + FpscrExc = fpscr; + ''' % ("uint64_t" if isDouble else "uint32_t", + "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32" + if isDouble else "AA64FpOp1P0_uw", + "uint64_t" if isDouble else "uint32_t", + "AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32" + if isDouble else "AA64FpOp2P0_uw", + "64" if isDouble else "32", "false" if isQuiet else "true") + + instName = "FCCmp%sReg%s" %("" if isQuiet else "E", + "D" if isDouble else "S") + fccmpIop = InstObjParams("fccmp%s" %("" if isQuiet else "e"), + instName, "FpCondCompRegOp", + {"code": fccmpCode, + "op_class": "SimdFloatCmpOp"}, []) + header_output += DataXCondCompRegDeclare.subst(fccmpIop); + decoder_output += DataXCondCompRegConstructor.subst(fccmpIop); + exec_output += BasicExecute.subst(fccmpIop); + + for isQuiet in True, False: + for isDouble in True, False: + buildFCCmpOp(isQuiet, isDouble) + +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + # Generates the variants of the floating to fixed point instructions + def buildFpCvtFixedOp(isSigned, isDouble, isXReg): + global header_output, decoder_output, exec_output + + fcvtFpFixedCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + ''' + if isDouble: + fcvtFpFixedCode += ''' + uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32; + ''' + else: + fcvtFpFixedCode += "uint32_t cOp1 = AA64FpOp1P0_uw;" + fcvtFpFixedCode += ''' + %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 64 - imm, %s, + FPRounding_ZERO, fpscr); + FpscrExc = fpscr; + ''' %("X" if isXReg else "W", + "64" if isDouble else "32", + "64" if isXReg else "32", + "false" if isSigned else "true") + + instName = "FcvtFp%sFixed%s%s" %("S" if isSigned else "U", + "D" if isDouble else "S", + "X" if isXReg else "W") + mnem = "fcvtz%s" %("s" if isSigned else "u") + fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp", + { "code": fcvtFpFixedCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop); + decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop); + exec_output += BasicExecute.subst(fcvtFpFixedIop); + + # Generates the variants of the fixed to floating point instructions + def buildFixedCvtFpOp(isSigned, isDouble, isXReg): + global header_output, decoder_output, exec_output + + srcRegType = "X" if isXReg else "W" + fcvtFixedFpCode = vfp64EnabledCheckCode + ''' + FPSCR fpscr = (FPSCR) FpscrExc; + %s result = fplibFixedToFP<uint%s_t>((%s%s_t)%sOp1, 64 - imm, + %s, FPCRRounding(fpscr), fpscr); + ''' %("uint64_t" if isDouble else "uint32_t", + "64" if isDouble else "32", + "int" if isSigned else "uint", "64" if isXReg else "32", + srcRegType, + "false" if isSigned else "true") + if isDouble: + fcvtFixedFpCode += ''' + AA64FpDestP0_uw = result; + AA64FpDestP1_uw = result >> 32; + ''' + else: + fcvtFixedFpCode += ''' + AA64FpDestP0_uw = result; + AA64FpDestP1_uw = 0; + ''' + fcvtFixedFpCode += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + FpscrExc = fpscr; + ''' + + instName = "Fcvt%sFixedFp%s%s" %("S" if isSigned else "U", + "D" if isDouble else "S", + srcRegType) + mnem = "%scvtf" %("s" if isSigned else "u") + fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp", + { "code": fcvtFixedFpCode, + "op_class": "SimdFloatCvtOp" }, []) + header_output += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop); + decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop); + exec_output += BasicExecute.subst(fcvtFixedFpIop); + + # loop over the variants building the instructions for each + for isXReg in True, False: + for isDouble in True, False: + for isSigned in True, False: + buildFpCvtFixedOp(isSigned, isDouble, isXReg) + buildFixedCvtFpOp(isSigned, isDouble, isXReg) +}}; + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + for isDouble in True, False: + code = ''' + if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) { + AA64FpDestP0_uw = AA64FpOp1P0_uw; + ''' + if isDouble: + code += ''' + AA64FpDestP1_uw = AA64FpOp1P1_uw; + } else { + AA64FpDestP0_uw = AA64FpOp2P0_uw; + AA64FpDestP1_uw = AA64FpOp2P1_uw; + } + ''' + else: + code += ''' + } else { + AA64FpDestP0_uw = AA64FpOp2P0_uw; + } + AA64FpDestP1_uw = 0; + ''' + code += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + + iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"), + "FpCondSelOp", code) + header_output += DataXCondSelDeclare.subst(iop) + decoder_output += DataXCondSelConstructor.subst(iop) + exec_output += BasicExecute.subst(iop) +}}; diff --git a/src/arch/arm/isa/insts/insts.isa b/src/arch/arm/isa/insts/insts.isa index c01e87df8..9d90f7779 100644 --- a/src/arch/arm/isa/insts/insts.isa +++ b/src/arch/arm/isa/insts/insts.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -37,6 +37,9 @@ // // Authors: Gabe Black +//AArch64 instructions +##include "aarch64.isa" + //Basic forms of various templates ##include "basic.isa" @@ -46,8 +49,15 @@ //Loads of a single item ##include "ldr.isa" +//Loads of a single item, AArch64 +##include "ldr64.isa" + //Miscellaneous instructions that don't fit elsewhere ##include "misc.isa" +##include "misc64.isa" + +//Stores of a single item, AArch64 +##include "str64.isa" //Stores of a single item ##include "str.isa" @@ -61,8 +71,12 @@ //Data processing instructions ##include "data.isa" +//AArch64 data processing instructions +##include "data64.isa" + //Branches ##include "branch.isa" +##include "branch64.isa" //Multiply ##include "mult.isa" @@ -72,9 +86,14 @@ //VFP ##include "fp.isa" +##include "fp64.isa" //Neon ##include "neon.isa" +//AArch64 Neon +##include "neon64.isa" +##include "neon64_mem.isa" + //m5 Psuedo-ops ##include "m5ops.isa" diff --git a/src/arch/arm/isa/insts/ldr.isa b/src/arch/arm/isa/insts/ldr.isa index f599fa4b9..6bfe40118 100644 --- a/src/arch/arm/isa/insts/ldr.isa +++ b/src/arch/arm/isa/insts/ldr.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -38,6 +38,7 @@ // Authors: Gabe Black let {{ + import math header_output = "" decoder_output = "" @@ -78,7 +79,8 @@ let {{ newDecoder, newExec) = self.fillTemplates(self.name, self.Name, codeBlobs, self.memFlags, instFlags, base, - wbDecl, pcDecl, self.rasPop) + wbDecl, pcDecl, self.rasPop, + self.size, self.sign) header_output += newHeader decoder_output += newDecoder @@ -160,7 +162,7 @@ let {{ self.size, self.sign, self.user) # Add memory request flags where necessary - self.memFlags.append("%d" % (self.size - 1)) + self.memFlags.append("%d" % int(math.log(self.size, 2))) if self.user: self.memFlags.append("ArmISA::TLB::UserMode") diff --git a/src/arch/arm/isa/insts/ldr64.isa b/src/arch/arm/isa/insts/ldr64.isa new file mode 100644 index 000000000..78460f661 --- /dev/null +++ b/src/arch/arm/isa/insts/ldr64.isa @@ -0,0 +1,446 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + class LoadInst64(LoadStoreInst): + execBase = 'Load64' + micro = False + + def __init__(self, mnem, Name, size=4, sign=False, user=False, + literal=False, flavor="normal", top=False): + super(LoadInst64, self).__init__() + + self.name = mnem + self.Name = Name + self.size = size + self.sign = sign + self.user = user + self.literal = literal + self.flavor = flavor + self.top = top + + self.memFlags = ["ArmISA::TLB::MustBeOne"] + self.instFlags = [] + self.codeBlobs = {"postacc_code" : ""} + + # Add memory request flags where necessary + if self.user: + self.memFlags.append("ArmISA::TLB::UserMode") + + if self.flavor == "dprefetch": + self.memFlags.append("Request::PREFETCH") + self.instFlags = ['IsDataPrefetch'] + elif self.flavor == "iprefetch": + self.memFlags.append("Request::PREFETCH") + self.instFlags = ['IsInstPrefetch'] + if self.micro: + self.instFlags.append("IsMicroop") + + if self.flavor in ("acexp", "exp"): + # For exclusive pair ops alignment check is based on total size + self.memFlags.append("%d" % int(math.log(self.size, 2) + 1)) + elif not (self.size == 16 and self.top): + # Only the first microop should perform alignment checking. + self.memFlags.append("%d" % int(math.log(self.size, 2))) + + if self.flavor not in ("acquire", "acex", "exclusive", + "acexp", "exp"): + self.memFlags.append("ArmISA::TLB::AllowUnaligned") + + if self.flavor in ("acquire", "acex", "acexp"): + self.instFlags.extend(["IsMemBarrier", + "IsWriteBarrier", + "IsReadBarrier"]) + if self.flavor in ("acex", "exclusive", "exp", "acexp"): + self.memFlags.append("Request::LLSC") + + def buildEACode(self): + # Address computation code + eaCode = "" + if self.flavor == "fp": + eaCode += vfp64EnabledCheckCode + + if self.literal: + eaCode += "EA = RawPC" + else: + eaCode += SPAlignmentCheckCode + "EA = XBase" + + if self.size == 16: + if self.top: + eaCode += " + (isBigEndian64(xc->tcBase()) ? 0 : 8)" + else: + eaCode += " + (isBigEndian64(xc->tcBase()) ? 8 : 0)" + if not self.post: + eaCode += self.offset + eaCode += ";" + + self.codeBlobs["ea_code"] = eaCode + + def emitHelper(self, base='Memory64', wbDecl=None): + global header_output, decoder_output, exec_output + + # If this is a microop itself, don't allow anything that would + # require further microcoding. + if self.micro: + assert not wbDecl + + fa_code = None + if not self.micro and self.flavor in ("normal", "widen", "acquire"): + fa_code = ''' + fault->annotate(ArmFault::SAS, %s); + fault->annotate(ArmFault::SSE, %s); + fault->annotate(ArmFault::SRT, dest); + fault->annotate(ArmFault::SF, %s); + fault->annotate(ArmFault::AR, %s); + ''' % ("0" if self.size == 1 else + "1" if self.size == 2 else + "2" if self.size == 4 else "3", + "true" if self.sign else "false", + "true" if (self.size == 8 or + self.flavor == "widen") else "false", + "true" if self.flavor == "acquire" else "false") + + (newHeader, newDecoder, newExec) = \ + self.fillTemplates(self.name, self.Name, self.codeBlobs, + self.memFlags, self.instFlags, + base, wbDecl, faCode=fa_code) + + header_output += newHeader + decoder_output += newDecoder + exec_output += newExec + + class LoadImmInst64(LoadInst64): + def __init__(self, *args, **kargs): + super(LoadImmInst64, self).__init__(*args, **kargs) + self.offset = " + imm" + + self.wbDecl = "MicroAddXiUop(machInst, base, base, imm);" + + class LoadRegInst64(LoadInst64): + def __init__(self, *args, **kargs): + super(LoadRegInst64, self).__init__(*args, **kargs) + self.offset = " + extendReg64(XOffset, type, shiftAmt, 64)" + + self.wbDecl = \ + "MicroAddXERegUop(machInst, base, base, " + \ + " offset, type, shiftAmt);" + + class LoadRawRegInst64(LoadInst64): + def __init__(self, *args, **kargs): + super(LoadRawRegInst64, self).__init__(*args, **kargs) + self.offset = "" + + class LoadSingle64(LoadInst64): + def emit(self): + self.buildEACode() + + # Code that actually handles the access + if self.flavor in ("dprefetch", "iprefetch"): + accCode = 'uint64_t temp M5_VAR_USED = Mem%s;' + elif self.flavor == "fp": + if self.size in (1, 2, 4): + accCode = ''' + AA64FpDestP0_uw = cSwap(Mem%s, + isBigEndian64(xc->tcBase())); + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + elif self.size == 8 or (self.size == 16 and not self.top): + accCode = ''' + uint64_t data = cSwap(Mem%s, + isBigEndian64(xc->tcBase())); + AA64FpDestP0_uw = (uint32_t)data; + AA64FpDestP1_uw = (data >> 32); + ''' + # Only zero out the other half if this isn't part of a + # pair of 8 byte loads implementing a 16 byte load. + if self.size == 8: + accCode += ''' + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + ''' + elif self.size == 16 and self.top: + accCode = ''' + uint64_t data = cSwap(Mem%s, + isBigEndian64(xc->tcBase())); + AA64FpDestP2_uw = (uint32_t)data; + AA64FpDestP3_uw = (data >> 32); + ''' + elif self.flavor == "widen" or self.size == 8: + accCode = "XDest = cSwap(Mem%s, isBigEndian64(xc->tcBase()));" + else: + accCode = "WDest = cSwap(Mem%s, isBigEndian64(xc->tcBase()));" + if self.size == 16: + accCode = accCode % buildMemSuffix(self.sign, 8) + else: + accCode = accCode % buildMemSuffix(self.sign, self.size) + + self.codeBlobs["memacc_code"] = accCode + + # Push it out to the output files + wbDecl = None + if self.writeback and not self.micro: + wbDecl = self.wbDecl + self.emitHelper(self.base, wbDecl) + + class LoadDouble64(LoadInst64): + def emit(self): + self.buildEACode() + + # Code that actually handles the access + if self.flavor == "fp": + accCode = ''' + uint64_t data = cSwap(Mem_ud, isBigEndian64(xc->tcBase())); + AA64FpDestP0_uw = (uint32_t)data; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + AA64FpDest2P0_uw = (data >> 32); + AA64FpDest2P1_uw = 0; + AA64FpDest2P2_uw = 0; + AA64FpDest2P3_uw = 0; + ''' + else: + if self.sign: + if self.size == 4: + accCode = ''' + uint64_t data = cSwap(Mem_ud, + isBigEndian64(xc->tcBase())); + XDest = sext<32>((uint32_t)data); + XDest2 = sext<32>(data >> 32); + ''' + elif self.size == 8: + accCode = ''' + XDest = sext<64>(Mem_tud.a); + XDest2 = sext<64>(Mem_tud.b); + ''' + else: + if self.size == 4: + accCode = ''' + uint64_t data = cSwap(Mem_ud, + isBigEndian64(xc->tcBase())); + XDest = (uint32_t)data; + XDest2 = data >> 32; + ''' + elif self.size == 8: + accCode = ''' + XDest = Mem_tud.a; + XDest2 = Mem_tud.b; + ''' + self.codeBlobs["memacc_code"] = accCode + + # Push it out to the output files + wbDecl = None + if self.writeback and not self.micro: + wbDecl = self.wbDecl + self.emitHelper(self.base, wbDecl) + + class LoadImm64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryImm64' + writeback = False + post = False + + class LoadPre64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryPreIndex64' + writeback = True + post = False + + class LoadPost64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryPostIndex64' + writeback = True + post = True + + class LoadReg64(LoadRegInst64, LoadSingle64): + decConstBase = 'LoadStoreReg64' + base = 'ArmISA::MemoryReg64' + writeback = False + post = False + + class LoadRaw64(LoadRawRegInst64, LoadSingle64): + decConstBase = 'LoadStoreRaw64' + base = 'ArmISA::MemoryRaw64' + writeback = False + post = False + + class LoadEx64(LoadRawRegInst64, LoadSingle64): + decConstBase = 'LoadStoreEx64' + base = 'ArmISA::MemoryEx64' + writeback = False + post = False + + class LoadLit64(LoadImmInst64, LoadSingle64): + decConstBase = 'LoadStoreLit64' + base = 'ArmISA::MemoryLiteral64' + writeback = False + post = False + + def buildLoads64(mnem, NameBase, size, sign, flavor="normal"): + LoadImm64(mnem, NameBase + "_IMM", size, sign, flavor=flavor).emit() + LoadPre64(mnem, NameBase + "_PRE", size, sign, flavor=flavor).emit() + LoadPost64(mnem, NameBase + "_POST", size, sign, flavor=flavor).emit() + LoadReg64(mnem, NameBase + "_REG", size, sign, flavor=flavor).emit() + + buildLoads64("ldrb", "LDRB64", 1, False) + buildLoads64("ldrsb", "LDRSBW64", 1, True) + buildLoads64("ldrsb", "LDRSBX64", 1, True, flavor="widen") + buildLoads64("ldrh", "LDRH64", 2, False) + buildLoads64("ldrsh", "LDRSHW64", 2, True) + buildLoads64("ldrsh", "LDRSHX64", 2, True, flavor="widen") + buildLoads64("ldrsw", "LDRSW64", 4, True, flavor="widen") + buildLoads64("ldr", "LDRW64", 4, False) + buildLoads64("ldr", "LDRX64", 8, False) + buildLoads64("ldr", "LDRBFP64", 1, False, flavor="fp") + buildLoads64("ldr", "LDRHFP64", 2, False, flavor="fp") + buildLoads64("ldr", "LDRSFP64", 4, False, flavor="fp") + buildLoads64("ldr", "LDRDFP64", 8, False, flavor="fp") + + LoadImm64("prfm", "PRFM64_IMM", 8, flavor="dprefetch").emit() + LoadReg64("prfm", "PRFM64_REG", 8, flavor="dprefetch").emit() + LoadLit64("prfm", "PRFM64_LIT", 8, literal=True, flavor="dprefetch").emit() + LoadImm64("prfum", "PRFUM64_IMM", 8, flavor="dprefetch").emit() + + LoadImm64("ldurb", "LDURB64_IMM", 1, False).emit() + LoadImm64("ldursb", "LDURSBW64_IMM", 1, True).emit() + LoadImm64("ldursb", "LDURSBX64_IMM", 1, True, flavor="widen").emit() + LoadImm64("ldurh", "LDURH64_IMM", 2, False).emit() + LoadImm64("ldursh", "LDURSHW64_IMM", 2, True).emit() + LoadImm64("ldursh", "LDURSHX64_IMM", 2, True, flavor="widen").emit() + LoadImm64("ldursw", "LDURSW64_IMM", 4, True, flavor="widen").emit() + LoadImm64("ldur", "LDURW64_IMM", 4, False).emit() + LoadImm64("ldur", "LDURX64_IMM", 8, False).emit() + LoadImm64("ldur", "LDURBFP64_IMM", 1, flavor="fp").emit() + LoadImm64("ldur", "LDURHFP64_IMM", 2, flavor="fp").emit() + LoadImm64("ldur", "LDURSFP64_IMM", 4, flavor="fp").emit() + LoadImm64("ldur", "LDURDFP64_IMM", 8, flavor="fp").emit() + + LoadImm64("ldtrb", "LDTRB64_IMM", 1, False, True).emit() + LoadImm64("ldtrsb", "LDTRSBW64_IMM", 1, True, True).emit() + LoadImm64("ldtrsb", "LDTRSBX64_IMM", 1, True, True, flavor="widen").emit() + LoadImm64("ldtrh", "LDTRH64_IMM", 2, False, True).emit() + LoadImm64("ldtrsh", "LDTRSHW64_IMM", 2, True, True).emit() + LoadImm64("ldtrsh", "LDTRSHX64_IMM", 2, True, True, flavor="widen").emit() + LoadImm64("ldtrsw", "LDTRSW64_IMM", 4, True, flavor="widen").emit() + LoadImm64("ldtr", "LDTRW64_IMM", 4, False, True).emit() + LoadImm64("ldtr", "LDTRX64_IMM", 8, False, True).emit() + + LoadLit64("ldrsw", "LDRSWL64_LIT", 4, True, \ + literal=True, flavor="widen").emit() + LoadLit64("ldr", "LDRWL64_LIT", 4, False, literal=True).emit() + LoadLit64("ldr", "LDRXL64_LIT", 8, False, literal=True).emit() + LoadLit64("ldr", "LDRSFP64_LIT", 4, literal=True, flavor="fp").emit() + LoadLit64("ldr", "LDRDFP64_LIT", 8, literal=True, flavor="fp").emit() + + LoadRaw64("ldar", "LDARX64", 8, flavor="acquire").emit() + LoadRaw64("ldar", "LDARW64", 4, flavor="acquire").emit() + LoadRaw64("ldarh", "LDARH64", 2, flavor="acquire").emit() + LoadRaw64("ldarb", "LDARB64", 1, flavor="acquire").emit() + + LoadEx64("ldaxr", "LDAXRX64", 8, flavor="acex").emit() + LoadEx64("ldaxr", "LDAXRW64", 4, flavor="acex").emit() + LoadEx64("ldaxrh", "LDAXRH64", 2, flavor="acex").emit() + LoadEx64("ldaxrb", "LDAXRB64", 1, flavor="acex").emit() + + LoadEx64("ldxr", "LDXRX64", 8, flavor="exclusive").emit() + LoadEx64("ldxr", "LDXRW64", 4, flavor="exclusive").emit() + LoadEx64("ldxrh", "LDXRH64", 2, flavor="exclusive").emit() + LoadEx64("ldxrb", "LDXRB64", 1, flavor="exclusive").emit() + + class LoadImmU64(LoadImm64): + decConstBase = 'LoadStoreImmU64' + micro = True + + class LoadImmDU64(LoadImmInst64, LoadDouble64): + decConstBase = 'LoadStoreImmDU64' + base = 'ArmISA::MemoryDImm64' + micro = True + post = False + writeback = False + + class LoadImmDouble64(LoadImmInst64, LoadDouble64): + decConstBase = 'LoadStoreImmDU64' + base = 'ArmISA::MemoryDImm64' + micro = False + post = False + writeback = False + + class LoadRegU64(LoadReg64): + decConstBase = 'LoadStoreRegU64' + micro = True + + class LoadLitU64(LoadLit64): + decConstBase = 'LoadStoreLitU64' + micro = True + + LoadImmDouble64("ldaxp", "LDAXPW64", 4, flavor="acexp").emit() + LoadImmDouble64("ldaxp", "LDAXPX64", 8, flavor="acexp").emit() + LoadImmDouble64("ldxp", "LDXPW64", 4, flavor="exp").emit() + LoadImmDouble64("ldxp", "LDXPX64", 8, flavor="exp").emit() + + LoadImmU64("ldrxi_uop", "MicroLdrXImmUop", 8).emit() + LoadRegU64("ldrxr_uop", "MicroLdrXRegUop", 8).emit() + LoadLitU64("ldrxl_uop", "MicroLdrXLitUop", 8, literal=True).emit() + LoadImmU64("ldrfpxi_uop", "MicroLdrFpXImmUop", 8, flavor="fp").emit() + LoadRegU64("ldrfpxr_uop", "MicroLdrFpXRegUop", 8, flavor="fp").emit() + LoadLitU64("ldrfpxl_uop", "MicroLdrFpXLitUop", 8, literal=True, + flavor="fp").emit() + LoadImmU64("ldrqbfpxi_uop", "MicroLdrQBFpXImmUop", + 16, flavor="fp", top = False).emit() + LoadRegU64("ldrqbfpxr_uop", "MicroLdrQBFpXRegUop", + 16, flavor="fp", top = False).emit() + LoadLitU64("ldrqbfpxl_uop", "MicroLdrQBFpXLitUop", + 16, literal=True, flavor="fp", top = False).emit() + LoadImmU64("ldrqtfpxi_uop", "MicroLdrQTFpXImmUop", + 16, flavor="fp", top = True).emit() + LoadRegU64("ldrqtfpxr_uop", "MicroLdrQTFpXRegUop", + 16, flavor="fp", top = True).emit() + LoadLitU64("ldrqtfpxl_uop", "MicroLdrQTFpXLitUop", + 16, literal=True, flavor="fp", top = True).emit() + LoadImmDU64("ldrduxi_uop", "MicroLdrDUXImmUop", 4, sign=False).emit() + LoadImmDU64("ldrdsxi_uop", "MicroLdrDSXImmUop", 4, sign=True).emit() + LoadImmDU64("ldrdfpxi_uop", "MicroLdrDFpXImmUop", 4, flavor="fp").emit() +}}; diff --git a/src/arch/arm/isa/insts/m5ops.isa b/src/arch/arm/isa/insts/m5ops.isa index 06ed34af8..928d1be0d 100644 --- a/src/arch/arm/isa/insts/m5ops.isa +++ b/src/arch/arm/isa/insts/m5ops.isa @@ -1,5 +1,5 @@ // -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -58,6 +58,7 @@ let {{ armCode = ''' PseudoInst::arm(xc->tcBase()); ''' + armIop = InstObjParams("arm", "Arm", "PredOp", { "code": armCode, "predicate_test": predicateTest }, @@ -69,6 +70,7 @@ let {{ quiesceCode = ''' PseudoInst::quiesce(xc->tcBase()); ''' + quiesceIop = InstObjParams("quiesce", "Quiesce", "PredOp", { "code": quiesceCode, "predicate_test": predicateTest }, @@ -81,6 +83,10 @@ let {{ PseudoInst::quiesceNs(xc->tcBase(), join32to64(R1, R0)); ''' + quiesceNsCode64 = ''' + PseudoInst::quiesceNs(xc->tcBase(), X0); + ''' + quiesceNsIop = InstObjParams("quiesceNs", "QuiesceNs", "PredOp", { "code": quiesceNsCode, "predicate_test": predicateTest }, @@ -89,10 +95,22 @@ let {{ decoder_output += BasicConstructor.subst(quiesceNsIop) exec_output += QuiescePredOpExecute.subst(quiesceNsIop) + quiesceNsIop = InstObjParams("quiesceNs", "QuiesceNs64", "PredOp", + { "code": quiesceNsCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsQuiesce"]) + header_output += BasicDeclare.subst(quiesceNsIop) + decoder_output += BasicConstructor.subst(quiesceNsIop) + exec_output += QuiescePredOpExecute.subst(quiesceNsIop) + quiesceCyclesCode = ''' PseudoInst::quiesceCycles(xc->tcBase(), join32to64(R1, R0)); ''' + quiesceCyclesCode64 = ''' + PseudoInst::quiesceCycles(xc->tcBase(), X0); + ''' + quiesceCyclesIop = InstObjParams("quiesceCycles", "QuiesceCycles", "PredOp", { "code": quiesceCyclesCode, "predicate_test": predicateTest }, @@ -101,12 +119,23 @@ let {{ decoder_output += BasicConstructor.subst(quiesceCyclesIop) exec_output += QuiescePredOpExecute.subst(quiesceCyclesIop) + quiesceCyclesIop = InstObjParams("quiesceCycles", "QuiesceCycles64", "PredOp", + { "code": quiesceCyclesCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsQuiesce", "IsUnverifiable"]) + header_output += BasicDeclare.subst(quiesceCyclesIop) + decoder_output += BasicConstructor.subst(quiesceCyclesIop) + exec_output += QuiescePredOpExecute.subst(quiesceCyclesIop) + quiesceTimeCode = ''' uint64_t qt_val = PseudoInst::quiesceTime(xc->tcBase()); R0 = bits(qt_val, 31, 0); R1 = bits(qt_val, 63, 32); ''' + quiesceTimeCode64 = ''' + X0 = PseudoInst::quiesceTime(xc->tcBase()); + ''' quiesceTimeIop = InstObjParams("quiesceTime", "QuiesceTime", "PredOp", { "code": quiesceTimeCode, "predicate_test": predicateTest }, @@ -115,12 +144,23 @@ let {{ decoder_output += BasicConstructor.subst(quiesceTimeIop) exec_output += PredOpExecute.subst(quiesceTimeIop) + quiesceTimeIop = InstObjParams("quiesceTime", "QuiesceTime64", "PredOp", + { "code": quiesceTimeCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(quiesceTimeIop) + decoder_output += BasicConstructor.subst(quiesceTimeIop) + exec_output += PredOpExecute.subst(quiesceTimeIop) + rpnsCode = ''' uint64_t rpns_val = PseudoInst::rpns(xc->tcBase()); R0 = bits(rpns_val, 31, 0); R1 = bits(rpns_val, 63, 32); ''' + rpnsCode64 = ''' + X0 = PseudoInst::rpns(xc->tcBase()); + ''' rpnsIop = InstObjParams("rpns", "Rpns", "PredOp", { "code": rpnsCode, "predicate_test": predicateTest }, @@ -129,10 +169,22 @@ let {{ decoder_output += BasicConstructor.subst(rpnsIop) exec_output += PredOpExecute.subst(rpnsIop) + rpnsIop = InstObjParams("rpns", "Rpns64", "PredOp", + { "code": rpnsCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(rpnsIop) + decoder_output += BasicConstructor.subst(rpnsIop) + exec_output += PredOpExecute.subst(rpnsIop) + wakeCpuCode = ''' PseudoInst::wakeCPU(xc->tcBase(), join32to64(R1,R0)); ''' + wakeCpuCode64 = ''' + PseudoInst::wakeCPU(xc->tcBase(), X0); + ''' + wakeCPUIop = InstObjParams("wakeCPU", "WakeCPU", "PredOp", { "code": wakeCpuCode, "predicate_test": predicateTest }, @@ -141,6 +193,14 @@ let {{ decoder_output += BasicConstructor.subst(wakeCPUIop) exec_output += PredOpExecute.subst(wakeCPUIop) + wakeCPUIop = InstObjParams("wakeCPU", "WakeCPU64", "PredOp", + { "code": wakeCpuCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(wakeCPUIop) + decoder_output += BasicConstructor.subst(wakeCPUIop) + exec_output += PredOpExecute.subst(wakeCPUIop) + deprecated_ivlbIop = InstObjParams("deprecated_ivlb", "Deprecated_ivlb", "PredOp", { "code": '''warn_once("Obsolete M5 ivlb instruction encountered.\\n");''', "predicate_test": predicateTest }) @@ -171,6 +231,11 @@ let {{ m5exit_code = ''' PseudoInst::m5exit(xc->tcBase(), join32to64(R1, R0)); ''' + + m5exit_code64 = ''' + PseudoInst::m5exit(xc->tcBase(), X0); + ''' + m5exitIop = InstObjParams("m5exit", "M5exit", "PredOp", { "code": m5exit_code, "predicate_test": predicateTest }, @@ -190,6 +255,14 @@ let {{ decoder_output += BasicConstructor.subst(m5failIop) exec_output += PredOpExecute.subst(m5failIop) + m5exitIop = InstObjParams("m5exit", "M5exit64", "PredOp", + { "code": m5exit_code64, + "predicate_test": predicateTest }, + ["No_OpClass", "IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5exitIop) + decoder_output += BasicConstructor.subst(m5exitIop) + exec_output += PredOpExecute.subst(m5exitIop) + loadsymbolCode = ''' PseudoInst::loadsymbol(xc->tcBase()); ''' @@ -208,6 +281,10 @@ let {{ R1 = bits(ip_val, 63, 32); ''' + initparamCode64 = ''' + X0 = PseudoInst::initParam(xc->tcBase()); + ''' + initparamIop = InstObjParams("initparam", "Initparam", "PredOp", { "code": initparamCode, "predicate_test": predicateTest }, @@ -216,10 +293,21 @@ let {{ decoder_output += BasicConstructor.subst(initparamIop) exec_output += PredOpExecute.subst(initparamIop) + initparamIop = InstObjParams("initparam", "Initparam64", "PredOp", + { "code": initparamCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(initparamIop) + decoder_output += BasicConstructor.subst(initparamIop) + exec_output += PredOpExecute.subst(initparamIop) + resetstats_code = ''' PseudoInst::resetstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + resetstats_code64 = ''' + PseudoInst::resetstats(xc->tcBase(), X0, X1); + ''' resetstatsIop = InstObjParams("resetstats", "Resetstats", "PredOp", { "code": resetstats_code, "predicate_test": predicateTest }, @@ -228,9 +316,22 @@ let {{ decoder_output += BasicConstructor.subst(resetstatsIop) exec_output += PredOpExecute.subst(resetstatsIop) + resetstatsIop = InstObjParams("resetstats", "Resetstats64", "PredOp", + { "code": resetstats_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(resetstatsIop) + decoder_output += BasicConstructor.subst(resetstatsIop) + exec_output += PredOpExecute.subst(resetstatsIop) + dumpstats_code = ''' PseudoInst::dumpstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + + dumpstats_code64 = ''' + PseudoInst::dumpstats(xc->tcBase(), X0, X1); + ''' + dumpstatsIop = InstObjParams("dumpstats", "Dumpstats", "PredOp", { "code": dumpstats_code, "predicate_test": predicateTest }, @@ -239,9 +340,22 @@ let {{ decoder_output += BasicConstructor.subst(dumpstatsIop) exec_output += PredOpExecute.subst(dumpstatsIop) + dumpstatsIop = InstObjParams("dumpstats", "Dumpstats64", "PredOp", + { "code": dumpstats_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(dumpstatsIop) + decoder_output += BasicConstructor.subst(dumpstatsIop) + exec_output += PredOpExecute.subst(dumpstatsIop) + dumpresetstats_code = ''' PseudoInst::dumpresetstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + + dumpresetstats_code64 = ''' + PseudoInst::dumpresetstats(xc->tcBase(), X0, X1); + ''' + dumpresetstatsIop = InstObjParams("dumpresetstats", "Dumpresetstats", "PredOp", { "code": dumpresetstats_code, "predicate_test": predicateTest }, @@ -250,9 +364,22 @@ let {{ decoder_output += BasicConstructor.subst(dumpresetstatsIop) exec_output += PredOpExecute.subst(dumpresetstatsIop) + dumpresetstatsIop = InstObjParams("dumpresetstats", "Dumpresetstats64", "PredOp", + { "code": dumpresetstats_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(dumpresetstatsIop) + decoder_output += BasicConstructor.subst(dumpresetstatsIop) + exec_output += PredOpExecute.subst(dumpresetstatsIop) + m5checkpoint_code = ''' PseudoInst::m5checkpoint(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2)); ''' + + m5checkpoint_code64 = ''' + PseudoInst::m5checkpoint(xc->tcBase(), X0, X1); + ''' + m5checkpointIop = InstObjParams("m5checkpoint", "M5checkpoint", "PredOp", { "code": m5checkpoint_code, "predicate_test": predicateTest }, @@ -261,11 +388,27 @@ let {{ decoder_output += BasicConstructor.subst(m5checkpointIop) exec_output += PredOpExecute.subst(m5checkpointIop) + m5checkpointIop = InstObjParams("m5checkpoint", "M5checkpoint64", "PredOp", + { "code": m5checkpoint_code64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(m5checkpointIop) + decoder_output += BasicConstructor.subst(m5checkpointIop) + exec_output += PredOpExecute.subst(m5checkpointIop) + m5readfileCode = ''' int n = 4; uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); R0 = PseudoInst::readfile(xc->tcBase(), R0, join32to64(R3,R2), offset); ''' + + m5readfileCode64 = ''' + int n = 4; + uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); + n = 6; + X0 = PseudoInst::readfile(xc->tcBase(), (uint32_t)X0, X1, offset); + ''' + m5readfileIop = InstObjParams("m5readfile", "M5readfile", "PredOp", { "code": m5readfileCode, "predicate_test": predicateTest }, @@ -274,6 +417,14 @@ let {{ decoder_output += BasicConstructor.subst(m5readfileIop) exec_output += PredOpExecute.subst(m5readfileIop) + m5readfileIop = InstObjParams("m5readfile", "M5readfile64", "PredOp", + { "code": m5readfileCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsUnverifiable"]) + header_output += BasicDeclare.subst(m5readfileIop) + decoder_output += BasicConstructor.subst(m5readfileIop) + exec_output += PredOpExecute.subst(m5readfileIop) + m5writefileCode = ''' int n = 4; uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); @@ -282,6 +433,16 @@ let {{ R0 = PseudoInst::writefile(xc->tcBase(), R0, join32to64(R3,R2), offset, filenameAddr); ''' + + m5writefileCode64 = ''' + int n = 4; + uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false); + n = 6; + Addr filenameAddr = getArgument(xc->tcBase(), n, sizeof(Addr), false); + X0 = PseudoInst::writefile(xc->tcBase(), (uint32_t)X0, X1, offset, + filenameAddr); + ''' + m5writefileIop = InstObjParams("m5writefile", "M5writefile", "PredOp", { "code": m5writefileCode, "predicate_test": predicateTest }, @@ -290,6 +451,14 @@ let {{ decoder_output += BasicConstructor.subst(m5writefileIop) exec_output += PredOpExecute.subst(m5writefileIop) + m5writefileIop = InstObjParams("m5writefile", "M5writefile64", "PredOp", + { "code": m5writefileCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5writefileIop) + decoder_output += BasicConstructor.subst(m5writefileIop) + exec_output += PredOpExecute.subst(m5writefileIop) + m5breakIop = InstObjParams("m5break", "M5break", "PredOp", { "code": "PseudoInst::debugbreak(xc->tcBase());", "predicate_test": predicateTest }, @@ -309,6 +478,9 @@ let {{ m5addsymbolCode = ''' PseudoInst::addsymbol(xc->tcBase(), join32to64(R1, R0), R2); ''' + m5addsymbolCode64 = ''' + PseudoInst::addsymbol(xc->tcBase(), X0, (uint32_t)X1); + ''' m5addsymbolIop = InstObjParams("m5addsymbol", "M5addsymbol", "PredOp", { "code": m5addsymbolCode, "predicate_test": predicateTest }, @@ -317,8 +489,17 @@ let {{ decoder_output += BasicConstructor.subst(m5addsymbolIop) exec_output += PredOpExecute.subst(m5addsymbolIop) + m5addsymbolIop = InstObjParams("m5addsymbol", "M5addsymbol64", "PredOp", + { "code": m5addsymbolCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5addsymbolIop) + decoder_output += BasicConstructor.subst(m5addsymbolIop) + exec_output += PredOpExecute.subst(m5addsymbolIop) + m5panicCode = '''panic("M5 panic instruction called at pc=%#x.", xc->pcState().pc());''' + m5panicIop = InstObjParams("m5panic", "M5panic", "PredOp", { "code": m5panicCode, "predicate_test": predicateTest }, @@ -332,6 +513,13 @@ let {{ join32to64(R1, R0), join32to64(R3, R2) );''' + + m5workbeginCode64 = '''PseudoInst::workbegin( + xc->tcBase(), + X0, + X1 + );''' + m5workbeginIop = InstObjParams("m5workbegin", "M5workbegin", "PredOp", { "code": m5workbeginCode, "predicate_test": predicateTest }, @@ -340,11 +528,26 @@ let {{ decoder_output += BasicConstructor.subst(m5workbeginIop) exec_output += PredOpExecute.subst(m5workbeginIop) + m5workbeginIop = InstObjParams("m5workbegin", "M5workbegin64", "PredOp", + { "code": m5workbeginCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5workbeginIop) + decoder_output += BasicConstructor.subst(m5workbeginIop) + exec_output += PredOpExecute.subst(m5workbeginIop) + m5workendCode = '''PseudoInst::workend( xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2) );''' + + m5workendCode64 = '''PseudoInst::workend( + xc->tcBase(), + X0, + X1 + );''' + m5workendIop = InstObjParams("m5workend", "M5workend", "PredOp", { "code": m5workendCode, "predicate_test": predicateTest }, @@ -353,4 +556,11 @@ let {{ decoder_output += BasicConstructor.subst(m5workendIop) exec_output += PredOpExecute.subst(m5workendIop) + m5workendIop = InstObjParams("m5workend", "M5workend64", "PredOp", + { "code": m5workendCode64, + "predicate_test": predicateTest }, + ["IsNonSpeculative"]) + header_output += BasicDeclare.subst(m5workendIop) + decoder_output += BasicConstructor.subst(m5workendIop) + exec_output += PredOpExecute.subst(m5workendIop) }}; diff --git a/src/arch/arm/isa/insts/macromem.isa b/src/arch/arm/isa/insts/macromem.isa index db36a3fff..f164595dd 100644 --- a/src/arch/arm/isa/insts/macromem.isa +++ b/src/arch/arm/isa/insts/macromem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -91,7 +91,8 @@ let {{ SCTLR sctlr = Sctlr; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, Spsr, 0xF, true, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, true, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; @@ -158,8 +159,8 @@ let {{ header_output = decoder_output = exec_output = '' - loadIops = (microLdrUopIop, microLdrRetUopIop, - microLdrFpUopIop, microLdrDBFpUopIop, microLdrDTFpUopIop) + loadIops = (microLdrUopIop, microLdrRetUopIop, microLdrFpUopIop, + microLdrDBFpUopIop, microLdrDTFpUopIop) storeIops = (microStrUopIop, microStrFpUopIop, microStrDBFpUopIop, microStrDTFpUopIop) for iop in loadIops + storeIops: @@ -178,7 +179,7 @@ let {{ let {{ exec_output = header_output = '' - eaCode = 'EA = URa + imm;' + eaCode = 'EA = XURa + imm;' for size in (1, 2, 3, 4, 6, 8, 12, 16): # Set up the memory access. @@ -592,6 +593,26 @@ let {{ URa = URb + shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC); ''' + microAddXiUopIop = InstObjParams('addxi_uop', 'MicroAddXiUop', + 'MicroIntImmXOp', + 'XURa = XURb + imm;', + ['IsMicroop']) + + microAddXiSpAlignUopIop = InstObjParams('addxi_uop', 'MicroAddXiSpAlignUop', + 'MicroIntImmXOp', ''' + if (isSP((IntRegIndex) urb) && bits(XURb, 3, 0) && + SPAlignmentCheckEnabled(xc->tcBase())) { + return new SPAlignmentFault(); + } + XURa = XURb + imm; + ''', ['IsMicroop']) + + microAddXERegUopIop = InstObjParams('addxr_uop', 'MicroAddXERegUop', + 'MicroIntRegXOp', + 'XURa = XURb + ' + \ + 'extendReg64(XURc, type, shiftAmt, 64);', + ['IsMicroop']) + microAddUopIop = InstObjParams('add_uop', 'MicroAddUop', 'MicroIntRegOp', {'code': microAddUopCode, @@ -604,6 +625,11 @@ let {{ 'predicate_test': predicateTest}, ['IsMicroop']) + microSubXiUopIop = InstObjParams('subxi_uop', 'MicroSubXiUop', + 'MicroIntImmXOp', + 'XURa = XURb - imm;', + ['IsMicroop']) + microSubUopCode = ''' URa = URb - shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC); ''' @@ -631,8 +657,8 @@ let {{ SCTLR sctlr = Sctlr; pNPC = URa; CPSR new_cpsr = - cpsrWriteByInstr(cpsrOrCondCodes, URb, - 0xF, true, sctlr.nmfi); + cpsrWriteByInstr(cpsrOrCondCodes, URb, Scr, Nsacr, + 0xF, true, sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; NextThumb = new_cpsr.t; NextJazelle = new_cpsr.j; @@ -651,25 +677,37 @@ let {{ ['IsMicroop']) header_output = MicroIntImmDeclare.subst(microAddiUopIop) + \ + MicroIntImmDeclare.subst(microAddXiUopIop) + \ + MicroIntImmDeclare.subst(microAddXiSpAlignUopIop) + \ MicroIntImmDeclare.subst(microSubiUopIop) + \ + MicroIntImmDeclare.subst(microSubXiUopIop) + \ MicroIntRegDeclare.subst(microAddUopIop) + \ MicroIntRegDeclare.subst(microSubUopIop) + \ + MicroIntXERegDeclare.subst(microAddXERegUopIop) + \ MicroIntMovDeclare.subst(microUopRegMovIop) + \ MicroIntMovDeclare.subst(microUopRegMovRetIop) + \ MicroSetPCCPSRDeclare.subst(microUopSetPCCPSRIop) decoder_output = MicroIntImmConstructor.subst(microAddiUopIop) + \ + MicroIntImmXConstructor.subst(microAddXiUopIop) + \ + MicroIntImmXConstructor.subst(microAddXiSpAlignUopIop) + \ MicroIntImmConstructor.subst(microSubiUopIop) + \ + MicroIntImmXConstructor.subst(microSubXiUopIop) + \ MicroIntRegConstructor.subst(microAddUopIop) + \ MicroIntRegConstructor.subst(microSubUopIop) + \ + MicroIntXERegConstructor.subst(microAddXERegUopIop) + \ MicroIntMovConstructor.subst(microUopRegMovIop) + \ MicroIntMovConstructor.subst(microUopRegMovRetIop) + \ MicroSetPCCPSRConstructor.subst(microUopSetPCCPSRIop) exec_output = PredOpExecute.subst(microAddiUopIop) + \ + BasicExecute.subst(microAddXiUopIop) + \ + BasicExecute.subst(microAddXiSpAlignUopIop) + \ PredOpExecute.subst(microSubiUopIop) + \ + BasicExecute.subst(microSubXiUopIop) + \ PredOpExecute.subst(microAddUopIop) + \ PredOpExecute.subst(microSubUopIop) + \ + BasicExecute.subst(microAddXERegUopIop) + \ PredOpExecute.subst(microUopRegMovIop) + \ PredOpExecute.subst(microUopRegMovRetIop) + \ PredOpExecute.subst(microUopSetPCCPSRIop) @@ -681,6 +719,25 @@ let {{ header_output = MacroMemDeclare.subst(iop) decoder_output = MacroMemConstructor.subst(iop) + iop = InstObjParams("ldpstp", "LdpStp", 'PairMemOp', "", []) + header_output += PairMemDeclare.subst(iop) + decoder_output += PairMemConstructor.subst(iop) + + iopImm = InstObjParams("bigfpmemimm", "BigFpMemImm", "BigFpMemImmOp", "") + iopPre = InstObjParams("bigfpmempre", "BigFpMemPre", "BigFpMemPreOp", "") + iopPost = InstObjParams("bigfpmempost", "BigFpMemPost", "BigFpMemPostOp", "") + for iop in (iopImm, iopPre, iopPost): + header_output += BigFpMemImmDeclare.subst(iop) + decoder_output += BigFpMemImmConstructor.subst(iop) + + iop = InstObjParams("bigfpmemreg", "BigFpMemReg", "BigFpMemRegOp", "") + header_output += BigFpMemRegDeclare.subst(iop) + decoder_output += BigFpMemRegConstructor.subst(iop) + + iop = InstObjParams("bigfpmemlit", "BigFpMemLit", "BigFpMemLitOp", "") + header_output += BigFpMemLitDeclare.subst(iop) + decoder_output += BigFpMemLitConstructor.subst(iop) + iop = InstObjParams("vldmult", "VldMult", 'VldMultOp', "", []) header_output += VMemMultDeclare.subst(iop) decoder_output += VMemMultConstructor.subst(iop) diff --git a/src/arch/arm/isa/insts/mem.isa b/src/arch/arm/isa/insts/mem.isa index c39f1b14f..aed6bab0d 100644 --- a/src/arch/arm/isa/insts/mem.isa +++ b/src/arch/arm/isa/insts/mem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -48,8 +48,8 @@ let {{ self.constructTemplate = eval(self.decConstBase + 'Constructor') def fillTemplates(self, name, Name, codeBlobs, memFlags, instFlags, - base = 'Memory', wbDecl = None, pcDecl = None, - rasPop = False): + base='Memory', wbDecl=None, pcDecl=None, + rasPop=False, size=4, sign=False, faCode=None): # Make sure flags are in lists (convert to lists if not). memFlags = makeList(memFlags) instFlags = makeList(instFlags) @@ -63,6 +63,22 @@ let {{ codeBlobs["ea_code"] = eaCode + if faCode: + # For AArch64 the fa_code snippet comes already assembled here + codeBlobs["fa_code"] = faCode + elif wbDecl == None: + codeBlobs["fa_code"] = ''' + if (dest != INTREG_PC) { + fault->annotate(ArmFault::SAS, %s); + fault->annotate(ArmFault::SSE, %s); + fault->annotate(ArmFault::SRT, dest); + } + ''' %("0" if size == 1 else + "1" if size == 2 else "2", + "true" if sign else "false") + else: + codeBlobs["fa_code"] = '' + macroName = Name instFlagsCopy = list(instFlags) codeBlobsCopy = dict(codeBlobs) @@ -108,6 +124,7 @@ let {{ "use_uops" : use_uops, "use_pc" : use_pc, "use_wb" : use_wb, + "fa_code" : '', "is_ras_pop" : is_ras_pop }, ['IsMacroop']) header_output += self.declareTemplate.subst(iop) @@ -176,8 +193,13 @@ let {{ return Name def buildMemSuffix(sign, size): - if size == 4: - memSuffix = '' + if size == 8: + memSuffix = '_ud' + elif size == 4: + if sign: + memSuffix = '_sw' + else: + memSuffix = '_uw' elif size == 2: if sign: memSuffix = '_sh' diff --git a/src/arch/arm/isa/insts/misc.isa b/src/arch/arm/isa/insts/misc.isa index b8425a240..678a125fb 100644 --- a/src/arch/arm/isa/insts/misc.isa +++ b/src/arch/arm/isa/insts/misc.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2012 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -40,21 +40,102 @@ let {{ svcCode = ''' - if (FullSystem) { - fault = new SupervisorCall; - } else { - fault = new SupervisorCall(machInst); - } + fault = new SupervisorCall(machInst, imm); ''' - svcIop = InstObjParams("svc", "Svc", "PredOp", + svcIop = InstObjParams("svc", "Svc", "ImmOp", { "code": svcCode, "predicate_test": predicateTest }, ["IsSyscall", "IsNonSpeculative", "IsSerializeAfter"]) - header_output = BasicDeclare.subst(svcIop) - decoder_output = BasicConstructor.subst(svcIop) + header_output = ImmOpDeclare.subst(svcIop) + decoder_output = ImmOpConstructor.subst(svcIop) exec_output = PredOpExecute.subst(svcIop) + smcCode = ''' + HCR hcr = Hcr; + CPSR cpsr = Cpsr; + SCR scr = Scr; + + if ((cpsr.mode != MODE_USER) && FullSystem) { + if (ArmSystem::haveVirtualization(xc->tcBase()) && + !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP) && hcr.tsc) { + fault = new HypervisorTrap(machInst, 0, EC_SMC_TO_HYP); + } else { + if (scr.scd) { + fault = disabledFault(); + } else { + fault = new SecureMonitorCall(machInst); + } + } + } else { + fault = disabledFault(); + } + ''' + + smcIop = InstObjParams("smc", "Smc", "PredOp", + { "code": smcCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += BasicDeclare.subst(smcIop) + decoder_output += BasicConstructor.subst(smcIop) + exec_output += PredOpExecute.subst(smcIop) + + hvcCode = ''' + CPSR cpsr = Cpsr; + SCR scr = Scr; + + // Filter out the various cases where this instruction isn't defined + if (!FullSystem || !ArmSystem::haveVirtualization(xc->tcBase()) || + (cpsr.mode == MODE_USER) || + (ArmSystem::haveSecurity(xc->tcBase()) && (!scr.ns || !scr.hce))) { + fault = disabledFault(); + } else { + fault = new HypervisorCall(machInst, imm); + } + ''' + + hvcIop = InstObjParams("hvc", "Hvc", "ImmOp", + { "code": hvcCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += ImmOpDeclare.subst(hvcIop) + decoder_output += ImmOpConstructor.subst(hvcIop) + exec_output += PredOpExecute.subst(hvcIop) + + eretCode = ''' + SCTLR sctlr = Sctlr; + CPSR old_cpsr = Cpsr; + old_cpsr.nz = CondCodesNZ; + old_cpsr.c = CondCodesC; + old_cpsr.v = CondCodesV; + old_cpsr.ge = CondCodesGE; + + CPSR new_cpsr = cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, + true, sctlr.nmfi, xc->tcBase()); + Cpsr = ~CondCodesMask & new_cpsr; + CondCodesNZ = new_cpsr.nz; + CondCodesC = new_cpsr.c; + CondCodesV = new_cpsr.v; + CondCodesGE = new_cpsr.ge; + + NextThumb = (new_cpsr).t; + NextJazelle = (new_cpsr).j; + NextItState = (((new_cpsr).it2 << 2) & 0xFC) + | ((new_cpsr).it1 & 0x3); + + NPC = (old_cpsr.mode == MODE_HYP) ? ElrHyp : LR; + ''' + + eretIop = InstObjParams("eret", "Eret", "PredOp", + { "code": eretCode, + "predicate_test": predicateTest }, + ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += BasicDeclare.subst(eretIop) + decoder_output += BasicConstructor.subst(eretIop) + exec_output += PredOpExecute.subst(eretIop) + + + }}; let {{ @@ -87,6 +168,59 @@ let {{ decoder_output += MrsConstructor.subst(mrsSpsrIop) exec_output += PredOpExecute.subst(mrsSpsrIop) + mrsBankedRegCode = ''' + bool isIntReg; + int regIdx; + + if (decodeMrsMsrBankedReg(byteMask, r, isIntReg, regIdx, Cpsr, Scr, Nsacr)) { + if (isIntReg) { + Dest = DecodedBankedIntReg; + } else { + Dest = xc->readMiscReg(regIdx); + } + } else { + return new UndefinedInstruction(machInst, false, mnemonic); + } + ''' + mrsBankedRegIop = InstObjParams("mrs", "MrsBankedReg", "MrsOp", + { "code": mrsBankedRegCode, + "predicate_test": predicateTest }, + ["IsSerializeBefore"]) + header_output += MrsBankedRegDeclare.subst(mrsBankedRegIop) + decoder_output += MrsBankedRegConstructor.subst(mrsBankedRegIop) + exec_output += PredOpExecute.subst(mrsBankedRegIop) + + msrBankedRegCode = ''' + bool isIntReg; + int regIdx; + + if (decodeMrsMsrBankedReg(byteMask, r, isIntReg, regIdx, Cpsr, Scr, Nsacr)) { + if (isIntReg) { + // This is a bit nasty, you would have thought that + // DecodedBankedIntReg wouldn't be written to unless the + // conditions on the IF statements above are met, however if + // you look at the generated C code you'll find that they are. + // However this is safe as DecodedBankedIntReg (which is used + // in operands.isa to get the index of DecodedBankedIntReg) + // will return INTREG_DUMMY if its not a valid integer + // register, so redirecting the write to somewhere we don't + // care about. + DecodedBankedIntReg = Op1; + } else { + xc->setMiscReg(regIdx, Op1); + } + } else { + return new UndefinedInstruction(machInst, false, mnemonic); + } + ''' + msrBankedRegIop = InstObjParams("msr", "MsrBankedReg", "MsrRegOp", + { "code": msrBankedRegCode, + "predicate_test": predicateTest }, + ["IsSerializeAfter"]) + header_output += MsrBankedRegDeclare.subst(msrBankedRegIop) + decoder_output += MsrBankedRegConstructor.subst(msrBankedRegIop) + exec_output += PredOpExecute.subst(msrBankedRegIop) + msrCpsrRegCode = ''' SCTLR sctlr = Sctlr; CPSR old_cpsr = Cpsr; @@ -96,7 +230,8 @@ let {{ old_cpsr.ge = CondCodesGE; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, Op1, byteMask, false, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, Op1, Scr, Nsacr, byteMask, false, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; @@ -128,7 +263,8 @@ let {{ old_cpsr.v = CondCodesV; old_cpsr.ge = CondCodesGE; CPSR new_cpsr = - cpsrWriteByInstr(old_cpsr, imm, byteMask, false, sctlr.nmfi); + cpsrWriteByInstr(old_cpsr, imm, Scr, Nsacr, byteMask, false, + sctlr.nmfi, xc->tcBase()); Cpsr = ~CondCodesMask & new_cpsr; CondCodesNZ = new_cpsr.nz; CondCodesC = new_cpsr.c; @@ -488,12 +624,10 @@ let {{ decoder_output += BasicConstructor.subst(bkptIop) exec_output += BasicExecute.subst(bkptIop) - nopIop = InstObjParams("nop", "NopInst", "PredOp", \ - { "code" : "", "predicate_test" : predicateTest }, - ['IsNop']) + nopIop = InstObjParams("nop", "NopInst", "ArmStaticInst", "", ['IsNop']) header_output += BasicDeclare.subst(nopIop) - decoder_output += BasicConstructor.subst(nopIop) - exec_output += PredOpExecute.subst(nopIop) + decoder_output += BasicConstructor64.subst(nopIop) + exec_output += BasicExecute.subst(nopIop) yieldIop = InstObjParams("yield", "YieldInst", "PredOp", \ { "code" : "", "predicate_test" : predicateTest }) @@ -502,14 +636,31 @@ let {{ exec_output += PredOpExecute.subst(yieldIop) wfeCode = ''' - // WFE Sleeps if SevMailbox==0 and no unmasked interrupts are pending + HCR hcr = Hcr; + CPSR cpsr = Cpsr; + SCR scr = Scr64; + SCTLR sctlr = Sctlr; + + // WFE Sleeps if SevMailbox==0 and no unmasked interrupts are pending, + ThreadContext *tc = xc->tcBase(); if (SevMailbox == 1) { SevMailbox = 0; - PseudoInst::quiesceSkip(xc->tcBase()); - } else if (xc->tcBase()->getCpuPtr()->getInterruptController()->checkInterrupts(xc->tcBase())) { - PseudoInst::quiesceSkip(xc->tcBase()); + PseudoInst::quiesceSkip(tc); + } else if (tc->getCpuPtr()->getInterruptController()->checkInterrupts(tc)) { + PseudoInst::quiesceSkip(tc); + } else if (cpsr.el == EL0 && !sctlr.ntwe) { + PseudoInst::quiesceSkip(tc); + fault = new SupervisorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveVirtualization(tc) && + !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP) && + hcr.twe) { + PseudoInst::quiesceSkip(tc); + fault = new HypervisorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveSecurity(tc) && cpsr.el != EL3 && scr.twe) { + PseudoInst::quiesceSkip(tc); + fault = new SecureMonitorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE); } else { - PseudoInst::quiesce(xc->tcBase()); + PseudoInst::quiesce(tc); } ''' wfePredFixUpCode = ''' @@ -528,12 +679,30 @@ let {{ exec_output += QuiescePredOpExecuteWithFixup.subst(wfeIop) wfiCode = ''' + HCR hcr = Hcr; + CPSR cpsr = Cpsr; + SCR scr = Scr64; + SCTLR sctlr = Sctlr; + // WFI doesn't sleep if interrupts are pending (masked or not) - if (xc->tcBase()->getCpuPtr()->getInterruptController()->checkRaw()) { - PseudoInst::quiesceSkip(xc->tcBase()); + ThreadContext *tc = xc->tcBase(); + if (tc->getCpuPtr()->getInterruptController()->checkWfiWake(hcr, cpsr, + scr)) { + PseudoInst::quiesceSkip(tc); + } else if (cpsr.el == EL0 && !sctlr.ntwi) { + PseudoInst::quiesceSkip(tc); + fault = new SupervisorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveVirtualization(tc) && hcr.twi && + (cpsr.mode != MODE_HYP) && !inSecureState(scr, cpsr)) { + PseudoInst::quiesceSkip(tc); + fault = new HypervisorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE); + } else if (ArmSystem::haveSecurity(tc) && cpsr.el != EL3 && scr.twi) { + PseudoInst::quiesceSkip(tc); + fault = new SecureMonitorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE); } else { - PseudoInst::quiesce(xc->tcBase()); + PseudoInst::quiesce(tc); } + tc->getCpuPtr()->clearInterrupt(INT_ABT, 0); ''' wfiIop = InstObjParams("wfi", "WfiInst", "PredOp", \ { "code" : wfiCode, "predicate_test" : predicateTest }, @@ -564,6 +733,16 @@ let {{ decoder_output += BasicConstructor.subst(sevIop) exec_output += PredOpExecute.subst(sevIop) + sevlCode = ''' + SevMailbox = 1; + ''' + sevlIop = InstObjParams("sevl", "SevlInst", "PredOp", \ + { "code" : sevlCode, "predicate_test" : predicateTest }, + ["IsNonSpeculative", "IsSquashAfter", "IsUnverifiable"]) + header_output += BasicDeclare.subst(sevlIop) + decoder_output += BasicConstructor.subst(sevlIop) + exec_output += BasicExecute.subst(sevlIop) + itIop = InstObjParams("it", "ItInst", "PredOp", \ { "code" : ";", "predicate_test" : predicateTest }, []) @@ -571,10 +750,7 @@ let {{ decoder_output += BasicConstructor.subst(itIop) exec_output += PredOpExecute.subst(itIop) unknownCode = ''' - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(machInst, true); + return new UndefinedInstruction(machInst, true); ''' unknownIop = InstObjParams("unknown", "Unknown", "UnknownOp", \ { "code": unknownCode, @@ -626,108 +802,152 @@ let {{ exec_output += PredOpExecute.subst(bfiIop) mrc14code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + MiscRegIndex miscReg = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(op1); + if (!canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (mcrMrc14TrapToHyp((const MiscRegIndex) op1, Hcr, Cpsr, Scr, Hdcr, + Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP14_MCR_MRC); } Dest = MiscOp1; ''' - mrc14Iop = InstObjParams("mrc", "Mrc14", "RegRegOp", + mrc14Iop = InstObjParams("mrc", "Mrc14", "RegRegImmOp", { "code": mrc14code, "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc14Iop) - decoder_output += RegRegOpConstructor.subst(mrc14Iop) + header_output += RegRegImmOpDeclare.subst(mrc14Iop) + decoder_output += RegRegImmOpConstructor.subst(mrc14Iop) exec_output += PredOpExecute.subst(mrc14Iop) mcr14code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + MiscRegIndex miscReg = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest); + if (!canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase())) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (mcrMrc14TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, + Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP14_MCR_MRC); } MiscDest = Op1; ''' - mcr14Iop = InstObjParams("mcr", "Mcr14", "RegRegOp", + mcr14Iop = InstObjParams("mcr", "Mcr14", "RegRegImmOp", { "code": mcr14code, "predicate_test": predicateTest }, ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr14Iop) - decoder_output += RegRegOpConstructor.subst(mcr14Iop) + header_output += RegRegImmOpDeclare.subst(mcr14Iop) + decoder_output += RegRegImmOpConstructor.subst(mcr14Iop) exec_output += PredOpExecute.subst(mcr14Iop) - mrc14UserIop = InstObjParams("mrc", "Mrc14User", "RegRegOp", - { "code": "Dest = MiscOp1;", - "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc14UserIop) - decoder_output += RegRegOpConstructor.subst(mrc14UserIop) - exec_output += PredOpExecute.subst(mrc14UserIop) - - mcr14UserIop = InstObjParams("mcr", "Mcr14User", "RegRegOp", - { "code": "MiscDest = Op1", - "predicate_test": predicateTest }, - ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr14UserIop) - decoder_output += RegRegOpConstructor.subst(mcr14UserIop) - exec_output += PredOpExecute.subst(mcr14UserIop) - mrc15code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + int preFlatOp1 = flattenMiscRegNsBanked(op1, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatOp1); + bool hypTrap = mcrMrc15TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, Hstr, + Hcptr, imm); + bool canRead = canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canRead & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); } - Dest = MiscOp1; + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCR_MRC); + } + Dest = MiscNsBankedOp1; ''' - mrc15Iop = InstObjParams("mrc", "Mrc15", "RegRegOp", + mrc15Iop = InstObjParams("mrc", "Mrc15", "RegRegImmOp", { "code": mrc15code, "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc15Iop) - decoder_output += RegRegOpConstructor.subst(mrc15Iop) + header_output += RegRegImmOpDeclare.subst(mrc15Iop) + decoder_output += RegRegImmOpConstructor.subst(mrc15Iop) exec_output += PredOpExecute.subst(mrc15Iop) mcr15code = ''' - CPSR cpsr = Cpsr; - if (cpsr.mode == MODE_USER) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + int preFlatDest = flattenMiscRegNsBanked(dest, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatDest); + bool hypTrap = mcrMrc15TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, Hstr, + Hcptr, imm); + bool canWrite = canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canWrite & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); } - MiscDest = Op1; + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCR_MRC); + } + MiscNsBankedDest = Op1; ''' - mcr15Iop = InstObjParams("mcr", "Mcr15", "RegRegOp", + mcr15Iop = InstObjParams("mcr", "Mcr15", "RegRegImmOp", { "code": mcr15code, "predicate_test": predicateTest }, ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr15Iop) - decoder_output += RegRegOpConstructor.subst(mcr15Iop) + header_output += RegRegImmOpDeclare.subst(mcr15Iop) + decoder_output += RegRegImmOpConstructor.subst(mcr15Iop) exec_output += PredOpExecute.subst(mcr15Iop) - mrc15UserIop = InstObjParams("mrc", "Mrc15User", "RegRegOp", - { "code": "Dest = MiscOp1;", - "predicate_test": predicateTest }, []) - header_output += RegRegOpDeclare.subst(mrc15UserIop) - decoder_output += RegRegOpConstructor.subst(mrc15UserIop) - exec_output += PredOpExecute.subst(mrc15UserIop) - - mcr15UserIop = InstObjParams("mcr", "Mcr15User", "RegRegOp", - { "code": "MiscDest = Op1", - "predicate_test": predicateTest }, - ["IsSerializeAfter","IsNonSpeculative"]) - header_output += RegRegOpDeclare.subst(mcr15UserIop) - decoder_output += RegRegOpConstructor.subst(mcr15UserIop) - exec_output += PredOpExecute.subst(mcr15UserIop) + + mrrc15code = ''' + int preFlatOp1 = flattenMiscRegNsBanked(op1, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatOp1); + bool hypTrap = mcrrMrrc15TrapToHyp(miscReg, Cpsr, Scr, Hstr, Hcr, imm); + bool canRead = canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canRead & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCRR_MRRC); + } + Dest = bits(MiscNsBankedOp164, 63, 32); + Dest2 = bits(MiscNsBankedOp164, 31, 0); + ''' + mrrc15Iop = InstObjParams("mrrc", "Mrrc15", "MrrcOp", + { "code": mrrc15code, + "predicate_test": predicateTest }, []) + header_output += MrrcOpDeclare.subst(mrrc15Iop) + decoder_output += MrrcOpConstructor.subst(mrrc15Iop) + exec_output += PredOpExecute.subst(mrrc15Iop) + + + mcrr15code = ''' + int preFlatDest = flattenMiscRegNsBanked(dest, xc->tcBase()); + MiscRegIndex miscReg = (MiscRegIndex) + xc->tcBase()->flattenMiscIndex(preFlatDest); + bool hypTrap = mcrrMrrc15TrapToHyp(miscReg, Cpsr, Scr, Hstr, Hcr, imm); + bool canWrite = canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase()); + + // if we're in non secure PL1 mode then we can trap regargless of whether + // the register is accessable, in other modes we trap if only if the register + // IS accessable. + if (!canWrite & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) { + return new UndefinedInstruction(machInst, false, mnemonic); + } + if (hypTrap) { + return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCRR_MRRC); + } + MiscNsBankedDest64 = ((uint64_t) Op1 << 32) | Op2; + ''' + mcrr15Iop = InstObjParams("mcrr", "Mcrr15", "McrrOp", + { "code": mcrr15code, + "predicate_test": predicateTest }, []) + header_output += McrrOpDeclare.subst(mcrr15Iop) + decoder_output += McrrOpConstructor.subst(mcrr15Iop) + exec_output += PredOpExecute.subst(mcrr15Iop) + enterxCode = ''' NextThumb = true; @@ -775,35 +995,53 @@ let {{ exec_output += PredOpExecute.subst(clrexIop) isbCode = ''' + // If the barrier is due to a CP15 access check for hyp traps + if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15ISB, Hcr, Cpsr, Scr, + Hdcr, Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP15_MCR_MRC); + } fault = new FlushPipe; ''' - isbIop = InstObjParams("isb", "Isb", "PredOp", + isbIop = InstObjParams("isb", "Isb", "ImmOp", {"code": isbCode, "predicate_test": predicateTest}, ['IsSerializeAfter']) - header_output += BasicDeclare.subst(isbIop) - decoder_output += BasicConstructor.subst(isbIop) + header_output += ImmOpDeclare.subst(isbIop) + decoder_output += ImmOpConstructor.subst(isbIop) exec_output += PredOpExecute.subst(isbIop) dsbCode = ''' + // If the barrier is due to a CP15 access check for hyp traps + if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15DSB, Hcr, Cpsr, Scr, + Hdcr, Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP15_MCR_MRC); + } fault = new FlushPipe; ''' - dsbIop = InstObjParams("dsb", "Dsb", "PredOp", + dsbIop = InstObjParams("dsb", "Dsb", "ImmOp", {"code": dsbCode, "predicate_test": predicateTest}, ['IsMemBarrier', 'IsSerializeAfter']) - header_output += BasicDeclare.subst(dsbIop) - decoder_output += BasicConstructor.subst(dsbIop) + header_output += ImmOpDeclare.subst(dsbIop) + decoder_output += ImmOpConstructor.subst(dsbIop) exec_output += PredOpExecute.subst(dsbIop) dmbCode = ''' + // If the barrier is due to a CP15 access check for hyp traps + if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15DMB, Hcr, Cpsr, Scr, + Hdcr, Hstr, Hcptr, imm)) { + return new HypervisorTrap(machInst, imm, + EC_TRAPPED_CP15_MCR_MRC); + } ''' - dmbIop = InstObjParams("dmb", "Dmb", "PredOp", + dmbIop = InstObjParams("dmb", "Dmb", "ImmOp", {"code": dmbCode, "predicate_test": predicateTest}, ['IsMemBarrier']) - header_output += BasicDeclare.subst(dmbIop) - decoder_output += BasicConstructor.subst(dmbIop) + header_output += ImmOpDeclare.subst(dmbIop) + decoder_output += ImmOpConstructor.subst(dmbIop) exec_output += PredOpExecute.subst(dmbIop) dbgCode = ''' diff --git a/src/arch/arm/isa/insts/misc64.isa b/src/arch/arm/isa/insts/misc64.isa new file mode 100644 index 000000000..6ebbcc2ba --- /dev/null +++ b/src/arch/arm/isa/insts/misc64.isa @@ -0,0 +1,147 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + svcCode = ''' + fault = new SupervisorCall(machInst, bits(machInst, 20, 5)); + ''' + + svcIop = InstObjParams("svc", "Svc64", "ArmStaticInst", + svcCode, ["IsSyscall", "IsNonSpeculative", + "IsSerializeAfter"]) + header_output = BasicDeclare.subst(svcIop) + decoder_output = BasicConstructor64.subst(svcIop) + exec_output = BasicExecute.subst(svcIop) + + # @todo: extend to take into account Virtualization. + smcCode = ''' + SCR scr = Scr64; + CPSR cpsr = Cpsr; + + if (!ArmSystem::haveSecurity(xc->tcBase()) || inUserMode(cpsr) || scr.smd) { + fault = disabledFault(); + } else { + fault = new SecureMonitorCall(machInst); + } + ''' + + smcIop = InstObjParams("smc", "Smc64", "ArmStaticInst", + smcCode, ["IsNonSpeculative", "IsSerializeAfter"]) + header_output += BasicDeclare.subst(smcIop) + decoder_output += BasicConstructor64.subst(smcIop) + exec_output += BasicExecute.subst(smcIop) + + def subst(templateBase, iop): + global header_output, decoder_output, exec_output + header_output += eval(templateBase + "Declare").subst(iop) + decoder_output += eval(templateBase + "Constructor").subst(iop) + exec_output += BasicExecute.subst(iop) + + bfmMaskCode = ''' + uint64_t bitMask; + int diff = imm2 - imm1; + if (imm1 <= imm2) { + bitMask = mask(diff + 1); + } else { + bitMask = mask(imm2 + 1); + bitMask = (bitMask >> imm1) | (bitMask << (intWidth - imm1)); + diff += intWidth; + } + uint64_t topBits M5_VAR_USED = ~mask(diff+1); + uint64_t result = (Op164 >> imm1) | (Op164 << (intWidth - imm1)); + result &= bitMask; + ''' + + bfmCode = bfmMaskCode + 'Dest64 = result | (Dest64 & ~bitMask);' + bfmIop = InstObjParams("bfm", "Bfm64", "RegRegImmImmOp64", bfmCode); + subst("RegRegImmImmOp64", bfmIop) + + ubfmCode = bfmMaskCode + 'Dest64 = result;' + ubfmIop = InstObjParams("ubfm", "Ubfm64", "RegRegImmImmOp64", ubfmCode); + subst("RegRegImmImmOp64", ubfmIop) + + sbfmCode = bfmMaskCode + \ + 'Dest64 = result | (bits(Op164, imm2) ? topBits : 0);' + sbfmIop = InstObjParams("sbfm", "Sbfm64", "RegRegImmImmOp64", sbfmCode); + subst("RegRegImmImmOp64", sbfmIop) + + extrCode = ''' + if (imm == 0) { + Dest64 = Op264; + } else { + Dest64 = (Op164 << (intWidth - imm)) | (Op264 >> imm); + } + ''' + extrIop = InstObjParams("extr", "Extr64", "RegRegRegImmOp64", extrCode); + subst("RegRegRegImmOp64", extrIop); + + unknownCode = ''' + return new UndefinedInstruction(machInst, true); + ''' + unknown64Iop = InstObjParams("unknown", "Unknown64", "UnknownOp64", + unknownCode) + header_output += BasicDeclare.subst(unknown64Iop) + decoder_output += BasicConstructor64.subst(unknown64Iop) + exec_output += BasicExecute.subst(unknown64Iop) + + isbIop = InstObjParams("isb", "Isb64", "ArmStaticInst", + "fault = new FlushPipe;", ['IsSerializeAfter']) + header_output += BasicDeclare.subst(isbIop) + decoder_output += BasicConstructor64.subst(isbIop) + exec_output += BasicExecute.subst(isbIop) + + dsbIop = InstObjParams("dsb", "Dsb64", "ArmStaticInst", + "fault = new FlushPipe;", + ['IsMemBarrier', 'IsSerializeAfter']) + header_output += BasicDeclare.subst(dsbIop) + decoder_output += BasicConstructor64.subst(dsbIop) + exec_output += BasicExecute.subst(dsbIop) + + dmbIop = InstObjParams("dmb", "Dmb64", "ArmStaticInst", "", + ['IsMemBarrier']) + header_output += BasicDeclare.subst(dmbIop) + decoder_output += BasicConstructor64.subst(dmbIop) + exec_output += BasicExecute.subst(dmbIop) + + clrexIop = InstObjParams("clrex", "Clrex64", "ArmStaticInst", + "LLSCLock = 0;") + header_output += BasicDeclare.subst(clrexIop) + decoder_output += BasicConstructor64.subst(clrexIop) + exec_output += BasicExecute.subst(clrexIop) +}}; diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa index 876bb3bb7..ca5c3038c 100644 --- a/src/arch/arm/isa/insts/neon.isa +++ b/src/arch/arm/isa/insts/neon.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -94,8 +94,8 @@ output header {{ template <template <typename T> class Base> StaticInstPtr decodeNeonUThreeUSReg(unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1, IntRegIndex op2) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) { switch (size) { case 0: @@ -112,8 +112,8 @@ output header {{ template <template <typename T> class Base> StaticInstPtr decodeNeonSThreeUSReg(unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1, IntRegIndex op2) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) { switch (size) { case 0: @@ -129,6 +129,38 @@ output header {{ template <template <typename T> class Base> StaticInstPtr + decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2) + { + switch (size) { + case 1: + return new Base<int16_t>(machInst, dest, op1, op2); + case 2: + return new Base<int32_t>(machInst, dest, op1, op2); + default: + return new Unknown(machInst); + } + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + switch (size) { + case 1: + return new Base<int16_t>(machInst, dest, op1, op2, imm); + case 2: + return new Base<int32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } + + template <template <typename T> class Base> + StaticInstPtr decodeNeonUSThreeUSReg(bool notSigned, unsigned size, ExtMachInst machInst, IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) @@ -177,6 +209,38 @@ output header {{ template <template <typename T> class BaseD, template <typename T> class BaseQ> StaticInstPtr + decodeNeonSThreeXReg(bool q, unsigned size, + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) + { + if (q) { + return decodeNeonSThreeUReg<BaseQ>( + size, machInst, dest, op1, op2); + } else { + return decodeNeonSThreeUSReg<BaseD>( + size, machInst, dest, op1, op2); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUThreeXReg(bool q, unsigned size, + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, IntRegIndex op2) + { + if (q) { + return decodeNeonUThreeUReg<BaseQ>( + size, machInst, dest, op1, op2); + } else { + return decodeNeonUThreeUSReg<BaseD>( + size, machInst, dest, op1, op2); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr decodeNeonUSThreeSReg(bool q, bool notSigned, unsigned size, ExtMachInst machInst, IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) @@ -241,6 +305,124 @@ output header {{ template <template <typename T> class BaseD, template <typename T> class BaseQ> StaticInstPtr + decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) + { + if (q) { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1, op2); + else + return new BaseQ<uint32_t>(machInst, dest, op1, op2); + } else { + if (size) + return new Unknown(machInst); + else + return new BaseD<uint32_t>(machInst, dest, op1, op2); + } + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, IntRegIndex op2) + { + if (size) + return new Base<uint64_t>(machInst, dest, op1, op2); + else + return new Base<uint32_t>(machInst, dest, op1, op2); + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + if (size) + return new Base<uint64_t>(machInst, dest, op1, op2, imm); + else + return new Base<uint32_t>(machInst, dest, op1, op2, imm); + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + if (q) { + switch (size) { + case 1: + return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm); + case 2: + return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 1: + return new BaseD<uint16_t>(machInst, dest, op1, op2, imm); + case 2: + return new BaseD<uint32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + if (q) { + switch (size) { + case 1: + return new BaseQ<int16_t>(machInst, dest, op1, op2, imm); + case 2: + return new BaseQ<int32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 1: + return new BaseD<int16_t>(machInst, dest, op1, op2, imm); + case 2: + return new BaseD<int32_t>(machInst, dest, op1, op2, imm); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, + IntRegIndex op2, uint64_t imm) + { + if (q) { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm); + else + return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm); + } else { + if (size) + return new Unknown(machInst); + else + return new BaseD<uint32_t>(machInst, dest, op1, op2, imm); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr decodeNeonUTwoShiftReg(bool q, unsigned size, ExtMachInst machInst, IntRegIndex dest, IntRegIndex op1, uint64_t imm) @@ -345,6 +527,46 @@ output header {{ } } + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUTwoShiftUReg(unsigned size, + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, uint64_t imm) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op1, imm); + case 1: + return new Base<uint16_t>(machInst, dest, op1, imm); + case 2: + return new Base<uint32_t>(machInst, dest, op1, imm); + case 3: + return new Base<uint64_t>(machInst, dest, op1, imm); + default: + return new Unknown(machInst); + } + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonSTwoShiftUReg(unsigned size, + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1, uint64_t imm) + { + switch (size) { + case 0: + return new Base<int8_t>(machInst, dest, op1, imm); + case 1: + return new Base<int16_t>(machInst, dest, op1, imm); + case 2: + return new Base<int32_t>(machInst, dest, op1, imm); + case 3: + return new Base<int64_t>(machInst, dest, op1, imm); + default: + return new Unknown(machInst); + } + } + template <template <typename T> class BaseD, template <typename T> class BaseQ> StaticInstPtr @@ -411,6 +633,66 @@ output header {{ } } + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, uint64_t imm) + { + if (q) { + return decodeNeonUTwoShiftUReg<BaseQ>( + size, machInst, dest, op1, imm); + } else { + return decodeNeonUTwoShiftUSReg<BaseD>( + size, machInst, dest, op1, imm); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, uint64_t imm) + { + if (q) { + return decodeNeonSTwoShiftUReg<BaseQ>( + size, machInst, dest, op1, imm); + } else { + return decodeNeonSTwoShiftUSReg<BaseD>( + size, machInst, dest, op1, imm); + } + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, uint64_t imm) + { + if (size) + return new Base<uint64_t>(machInst, dest, op1, imm); + else + return new Base<uint32_t>(machInst, dest, op1, imm); + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1, uint64_t imm) + { + if (q) { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1, imm); + else + return new BaseQ<uint32_t>(machInst, dest, op1, imm); + } else { + if (size) + return new Unknown(machInst); + else + return new BaseD<uint32_t>(machInst, dest, op1, imm); + } + } + template <template <typename T> class Base> StaticInstPtr decodeNeonUTwoMiscUSReg(unsigned size, @@ -451,8 +733,8 @@ output header {{ template <typename T> class BaseQ> StaticInstPtr decodeNeonUTwoMiscSReg(bool q, unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1) { if (q) { return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); @@ -465,8 +747,8 @@ output header {{ template <typename T> class BaseQ> StaticInstPtr decodeNeonSTwoMiscSReg(bool q, unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1) { if (q) { return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1); @@ -498,8 +780,8 @@ output header {{ template <template <typename T> class Base> StaticInstPtr decodeNeonSTwoMiscUReg(unsigned size, - ExtMachInst machInst, IntRegIndex dest, - IntRegIndex op1) + ExtMachInst machInst, IntRegIndex dest, + IntRegIndex op1) { switch (size) { case 0: @@ -559,6 +841,221 @@ output header {{ } } + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1); + } else { + return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1); + } else { + return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1); + else + return new BaseQ<uint32_t>(machInst, dest, op1); + } else { + if (size) + return new Unknown(machInst); + else + return new BaseD<uint32_t>(machInst, dest, op1); + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (size) + return new BaseQ<uint64_t>(machInst, dest, op1); + else + return new BaseD<uint32_t>(machInst, dest, op1); + } + + template <template <typename T> class Base> + StaticInstPtr + decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (size) + return new Base<uint64_t>(machInst, dest, op1); + else + return new Base<uint32_t>(machInst, dest, op1); + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<uint16_t>(machInst, dest, op1); + case 0x2: + return new BaseQ<uint32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<uint16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ, + template <typename T> class BaseBQ> + StaticInstPtr + decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<uint16_t>(machInst, dest, op1); + case 0x2: + return new BaseBQ<uint32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<uint16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ> + StaticInstPtr + decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<int8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<int16_t>(machInst, dest, op1); + case 0x2: + return new BaseQ<int32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<int8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<int16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ, + template <typename T> class BaseBQ> + StaticInstPtr + decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<uint16_t>(machInst, dest, op1); + case 0x2: + return new BaseBQ<uint32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<uint8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<uint16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } + + template <template <typename T> class BaseD, + template <typename T> class BaseQ, + template <typename T> class BaseBQ> + StaticInstPtr + decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst, + IntRegIndex dest, IntRegIndex op1) + { + if (q) { + switch (size) { + case 0x0: + return new BaseQ<int8_t>(machInst, dest, op1); + case 0x1: + return new BaseQ<int16_t>(machInst, dest, op1); + case 0x2: + return new BaseBQ<int32_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0x0: + return new BaseD<int8_t>(machInst, dest, op1); + case 0x1: + return new BaseD<int16_t>(machInst, dest, op1); + default: + return new Unknown(machInst); + } + } + } }}; output exec {{ @@ -872,10 +1369,7 @@ let {{ readDestCode = 'destElem = gtoh(destReg.elements[i]);' eWalkCode += ''' if (imm < 0 && imm >= eCount) { - if (FullSystem) - fault = new UndefinedInstruction; - else - fault = new UndefinedInstruction(false, mnemonic); + fault = new UndefinedInstruction(machInst, false, mnemonic); } else { for (unsigned i = 0; i < eCount; i++) { Element srcElem1 = gtoh(srcReg1.elements[i]); @@ -926,10 +1420,7 @@ let {{ readDestCode = 'destElem = gtoh(destReg.elements[i]);' eWalkCode += ''' if (imm < 0 && imm >= eCount) { - if (FullSystem) - fault = new UndefinedInstruction; - else - fault = new UndefinedInstruction(false, mnemonic); + fault = new UndefinedInstruction(machInst, false, mnemonic); } else { for (unsigned i = 0; i < eCount; i++) { Element srcElem1 = gtoh(srcReg1.elements[i]); @@ -978,10 +1469,7 @@ let {{ readDestCode = 'destReg = destRegs[i];' eWalkCode += ''' if (imm < 0 && imm >= eCount) { - if (FullSystem) - fault = new UndefinedInstruction; - else - fault = new UndefinedInstruction(false, mnemonic); + fault = new UndefinedInstruction(machInst, false, mnemonic); } else { for (unsigned i = 0; i < rCount; i++) { FloatReg srcReg1 = srcRegs1[i]; @@ -2156,7 +2644,7 @@ let {{ bool done; destReg = processNans(fpscr, done, true, srcReg1, srcReg2); if (!done) { - destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS, + destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>, true, true, VfpRoundNearest); } else if (flushToZero(srcReg1, srcReg2)) { fpscr.idc = 1; @@ -2171,7 +2659,7 @@ let {{ bool done; destReg = processNans(fpscr, done, true, srcReg1, srcReg2); if (!done) { - destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS, + destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>, true, true, VfpRoundNearest); } else if (flushToZero(srcReg1, srcReg2)) { fpscr.idc = 1; @@ -2234,6 +2722,24 @@ let {{ threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True) threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True) + vfmafpCode = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>, + true, true, VfpRoundNearest); + FpscrExc = fpscr; + ''' + threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True) + threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True) + + vfmsfpCode = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>, + true, true, VfpRoundNearest); + FpscrExc = fpscr; + ''' + threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True) + threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True) + vmlsfpCode = ''' FPSCR fpscr = (FPSCR) FpscrExc; float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS, @@ -2765,7 +3271,7 @@ let {{ fpscr.idc = 1; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); - destReg = vfpFpSToFixed(srcElem1, false, false, imm); + destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm); __asm__ __volatile__("" :: "m" (destReg)); finishVfp(fpscr, state, true); FpscrExc = fpscr; @@ -2781,7 +3287,7 @@ let {{ fpscr.idc = 1; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); - destReg = vfpFpSToFixed(srcElem1, true, false, imm); + destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm); __asm__ __volatile__("" :: "m" (destReg)); finishVfp(fpscr, state, true); FpscrExc = fpscr; @@ -2795,7 +3301,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); - destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm); + destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm); __asm__ __volatile__("" :: "m" (destElem)); finishVfp(fpscr, state, true); FpscrExc = fpscr; @@ -2809,7 +3315,7 @@ let {{ FPSCR fpscr = (FPSCR) FpscrExc; VfpSavedState state = prepFpState(VfpRoundNearest); __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1)); - destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm); + destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm); __asm__ __volatile__("" :: "m" (destElem)); finishVfp(fpscr, state, true); FpscrExc = fpscr; @@ -3296,10 +3802,7 @@ let {{ } else { index -= eCount; if (index >= eCount) { - if (FullSystem) - fault = new UndefinedInstruction; - else - fault = new UndefinedInstruction(false, mnemonic); + fault = new UndefinedInstruction(machInst, false, mnemonic); } else { destReg.elements[i] = srcReg2.elements[index]; } diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa new file mode 100644 index 000000000..e065761f4 --- /dev/null +++ b/src/arch/arm/isa/insts/neon64.isa @@ -0,0 +1,3355 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Giacomo Gabrielli +// Mbou Eyole + +let {{ + + header_output = "" + exec_output = "" + + # FP types (FP operations always work with unsigned representations) + floatTypes = ("uint32_t", "uint64_t") + smallFloatTypes = ("uint32_t",) + + def threeEqualRegInstX(name, Name, opClass, types, rCount, op, + readDest=False, pairwise=False, scalar=False, + byElem=False): + assert (not pairwise) or ((not byElem) and (not scalar)) + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, destReg; + ''' + if byElem: + # 2nd register operand has to be read fully + eWalkCode += ''' + FullRegVect srcReg2; + ''' + else: + eWalkCode += ''' + RegVect srcReg2; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + if byElem: + # 2nd operand has to be read fully + for reg in range(rCount, 4): + eWalkCode += ''' + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + if pairwise: + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + Element srcElem1 = gtoh(2 * i < eCount ? + srcReg1.elements[2 * i] : + srcReg2.elements[2 * i - eCount]); + Element srcElem2 = gtoh(2 * i < eCount ? + srcReg1.elements[2 * i + 1] : + srcReg2.elements[2 * i + 1 - eCount]); + Element destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode } + else: + scalarCheck = ''' + if (i != 0) { + destReg.elements[i] = 0; + continue; + } + ''' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + %(scalarCheck)s + Element srcElem1 = gtoh(srcReg1.elements[i]); + Element srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]); + Element destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode, + "scalarCheck" : scalarCheck if scalar else "", + "src2Index" : "imm" if byElem else "i" } + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegImmOp" if byElem else "DataX2RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + if byElem: + header_output += NeonX2RegImmOpDeclare.subst(iop) + else: + header_output += NeonX2RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def threeUnequalRegInstX(name, Name, opClass, types, op, + bigSrc1, bigSrc2, bigDest, readDest, scalar=False, + byElem=False, hi=False): + assert not (scalar and hi) + global header_output, exec_output + src1Cnt = src2Cnt = destCnt = 2 + src1Prefix = src2Prefix = destPrefix = '' + if bigSrc1: + src1Cnt = 4 + src1Prefix = 'Big' + if bigSrc2: + src2Cnt = 4 + src2Prefix = 'Big' + if bigDest: + destCnt = 4 + destPrefix = 'Big' + if byElem: + src2Prefix = 'Full' + eWalkCode = simd64EnabledCheckCode + ''' + %sRegVect srcReg1; + %sRegVect srcReg2; + %sRegVect destReg; + ''' % (src1Prefix, src2Prefix, destPrefix) + srcReg1 = 0 + if hi and not bigSrc1: # long/widening operations + srcReg1 = 2 + for reg in range(src1Cnt): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(srcReg1)d_uw); + ''' % { "reg" : reg, "srcReg1" : srcReg1 } + srcReg1 += 1 + srcReg2 = 0 + if (not byElem) and (hi and not bigSrc2): # long/widening operations + srcReg2 = 2 + for reg in range(src2Cnt): + eWalkCode += ''' + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(srcReg2)d_uw); + ''' % { "reg" : reg, "srcReg2" : srcReg2 } + srcReg2 += 1 + if byElem: + # 2nd operand has to be read fully + for reg in range(src2Cnt, 4): + eWalkCode += ''' + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + for reg in range(destCnt): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + scalarCheck = ''' + if (i != 0) { + destReg.elements[i] = 0; + continue; + } + ''' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + %(scalarCheck)s + %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]); + %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]); + %(destPrefix)sElement destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode, + "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix, + "destPrefix" : destPrefix, + "scalarCheck" : scalarCheck if scalar else "", + "src2Index" : "imm" if byElem else "i" } + destReg = 0 + if hi and not bigDest: + # narrowing operations + destReg = 2 + for reg in range(destCnt): + eWalkCode += ''' + AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg, "destReg": destReg } + destReg += 1 + if destCnt < 4 and not hi: # zero upper half + for reg in range(destCnt, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegImmOp" if byElem else "DataX2RegOp", + { "code": eWalkCode, + "r_count": 2, + "op_class": opClass }, []) + if byElem: + header_output += NeonX2RegImmOpDeclare.subst(iop) + else: + header_output += NeonX2RegOpDeclare.subst(iop) + exec_output += NeonXUnequalRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def threeRegNarrowInstX(name, Name, opClass, types, op, readDest=False, + scalar=False, byElem=False, hi=False): + assert not byElem + threeUnequalRegInstX(name, Name, opClass, types, op, + True, True, False, readDest, scalar, byElem, hi) + + def threeRegLongInstX(name, Name, opClass, types, op, readDest=False, + scalar=False, byElem=False, hi=False): + threeUnequalRegInstX(name, Name, opClass, types, op, + False, False, True, readDest, scalar, byElem, hi) + + def threeRegWideInstX(name, Name, opClass, types, op, readDest=False, + scalar=False, byElem=False, hi=False): + assert not byElem + threeUnequalRegInstX(name, Name, opClass, types, op, + True, False, True, readDest, scalar, byElem, hi) + + def twoEqualRegInstX(name, Name, opClass, types, rCount, op, + readDest=False, scalar=False, byElem=False, + hasImm=False, isDup=False): + global header_output, exec_output + assert (not isDup) or byElem + if byElem: + hasImm = True + if isDup: + eWalkCode = simd64EnabledCheckCode + ''' + FullRegVect srcReg1; + RegVect destReg; + ''' + else: + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, destReg; + ''' + for reg in range(4 if isDup else rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + scalarCheck = ''' + if (i != 0) { + destReg.elements[i] = 0; + continue; + } + ''' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + %(scalarCheck)s + unsigned j = i; + Element srcElem1 = gtoh(srcReg1.elements[%(src1Index)s]); + Element destElem; + %(readDest)s + %(op)s + destReg.elements[j] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode, + "scalarCheck" : scalarCheck if scalar else "", + "src1Index" : "imm" if byElem else "i" } + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegImmOp" if hasImm else "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + if hasImm: + header_output += NeonX1RegImmOpDeclare.subst(iop) + else: + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegLongInstX(name, Name, opClass, types, op, readDest=False, + hi=False, hasImm=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1; + BigRegVect destReg; + ''' + destReg = 0 if not hi else 2 + for reg in range(2): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(destReg)d_uw); + ''' % { "reg" : reg, "destReg": destReg } + destReg += 1 + destReg = 0 if not hi else 2 + if readDest: + for reg in range(4): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + destReg += 1 + readDestCode = '' + if readDest: + readDestCode = 'destReg = gtoh(destReg.elements[i]);' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + Element srcElem1 = gtoh(srcReg1.elements[i]); + BigElement destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode } + for reg in range(4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegImmOp" if hasImm else "DataX1RegOp", + { "code": eWalkCode, + "r_count": 2, + "op_class": opClass }, []) + if hasImm: + header_output += NeonX1RegImmOpDeclare.subst(iop) + else: + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXUnequalRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegNarrowInstX(name, Name, opClass, types, op, readDest=False, + scalar=False, hi=False, hasImm=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + BigRegVect srcReg1; + RegVect destReg; + ''' + for reg in range(4): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + for reg in range(2): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + else: + eWalkCode += ''' + destReg.elements[0] = 0; + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + scalarCheck = ''' + if (i != 0) { + destReg.elements[i] = 0; + continue; + } + ''' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + %(scalarCheck)s + BigElement srcElem1 = gtoh(srcReg1.elements[i]); + Element destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode, + "scalarCheck" : scalarCheck if scalar else "" } + destReg = 0 if not hi else 2 + for reg in range(2): + eWalkCode += ''' + AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg, "destReg": destReg } + destReg += 1 + if not hi: + for reg in range(2, 4): # zero upper half + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegImmOp" if hasImm else "DataX1RegOp", + { "code": eWalkCode, + "r_count": 2, + "op_class": opClass }, []) + if hasImm: + header_output += NeonX1RegImmOpDeclare.subst(iop) + else: + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXUnequalRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def threeRegScrambleInstX(name, Name, opClass, types, rCount, op): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, srcReg2, destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += op + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX2RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def insFromVecElemInstX(name, Name, opClass, types, rCount): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + FullRegVect srcReg1; + RegVect destReg; + ''' + for reg in range(4): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + for reg in range(rCount): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += ''' + Element srcElem1 = gtoh(srcReg1.elements[imm2]); + Element destElem = srcElem1; + destReg.elements[imm1] = htog(destElem); + ''' + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1Reg2ImmOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1Reg2ImmOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegPairwiseScInstX(name, Name, opClass, types, rCount, op): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += ''' + Element srcElem1 = gtoh(srcReg1.elements[0]); + Element srcElem2 = gtoh(srcReg1.elements[1]); + Element destElem; + %(op)s + destReg.elements[0] = htog(destElem); + ''' % { "op" : op } + destCnt = rCount / 2 + for reg in range(destCnt): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + for reg in range(destCnt, 4): # zero upper half + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegAcrossInstX(name, Name, opClass, types, rCount, op, + doubleDest=False, long=False): + global header_output, exec_output + destPrefix = "Big" if long else "" + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1; + %sRegVect destReg; + ''' % destPrefix + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += ''' + destReg.regs[0] = 0; + %(destPrefix)sElement destElem = 0; + for (unsigned i = 0; i < eCount; i++) { + Element srcElem1 = gtoh(srcReg1.elements[i]); + if (i == 0) { + destElem = srcElem1; + } else { + %(op)s + } + } + destReg.elements[0] = htog(destElem); + ''' % { "op" : op, "destPrefix" : destPrefix } + destCnt = 2 if doubleDest else 1 + for reg in range(destCnt): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + for reg in range(destCnt, 4): # zero upper half + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegOpDeclare.subst(iop) + if long: + exec_output += NeonXUnequalRegOpExecute.subst(iop) + else: + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def twoRegCondenseInstX(name, Name, opClass, types, rCount, op, + readDest=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcRegs; + BigRegVect destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcRegs.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + if readDest: + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + eWalkCode += ''' + for (unsigned i = 0; i < eCount / 2; i++) { + Element srcElem1 = gtoh(srcRegs.elements[2 * i]); + Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]); + BigElement destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode } + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXUnequalRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def oneRegImmInstX(name, Name, opClass, types, rCount, op, readDest=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect destReg; + ''' + if readDest: + for reg in range(rCount): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + readDestCode = '' + if readDest: + readDestCode = 'destElem = gtoh(destReg.elements[i]);' + eWalkCode += ''' + for (unsigned i = 0; i < eCount; i++) { + Element destElem; + %(readDest)s + %(op)s + destReg.elements[i] = htog(destElem); + } + ''' % { "op" : op, "readDest" : readDestCode } + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataXImmOnlyOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegImmOnlyOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def dupGprInstX(name, Name, opClass, types, rCount, gprSpec): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect destReg; + for (unsigned i = 0; i < eCount; i++) { + destReg.elements[i] = htog((Element) %sOp1); + } + ''' % gprSpec + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def extInstX(name, Name, opClass, types, rCount, op): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect srcReg1, srcReg2, destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += op + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegImmOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX2RegImmOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def insFromGprInstX(name, Name, opClass, types, rCount, gprSpec): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + RegVect destReg; + ''' + for reg in range(rCount): + eWalkCode += ''' + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + eWalkCode += ''' + destReg.elements[imm] = htog((Element) %sOp1); + ''' % gprSpec + for reg in range(rCount): + eWalkCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX1RegImmOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegImmOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def insToGprInstX(name, Name, opClass, types, rCount, gprSpec, + signExt=False): + global header_output, exec_output + eWalkCode = simd64EnabledCheckCode + ''' + FullRegVect srcReg; + ''' + for reg in range(4): + eWalkCode += ''' + srcReg.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw); + ''' % { "reg" : reg } + if signExt: + eWalkCode += ''' + %sDest = sext<sizeof(Element) * 8>(srcReg.elements[imm]); + ''' % gprSpec + else: + eWalkCode += ''' + %sDest = srcReg.elements[imm]; + ''' % gprSpec + iop = InstObjParams(name, Name, + "DataX1RegImmOp", + { "code": eWalkCode, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX1RegImmOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + def tbxTblInstX(name, Name, opClass, types, length, isTbl, rCount): + global header_output, decoder_output, exec_output + code = simd64EnabledCheckCode + ''' + union + { + uint8_t bytes[64]; + FloatRegBits regs[16]; + } table; + + union + { + uint8_t bytes[%(rCount)d * 4]; + FloatRegBits regs[%(rCount)d]; + } destReg, srcReg2; + + const unsigned length = %(length)d; + const bool isTbl = %(isTbl)s; + ''' % { "rCount" : rCount, "length" : length, "isTbl" : isTbl } + for reg in range(rCount): + code += ''' + srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw); + destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { "reg" : reg } + for reg in range(16): + if reg < length * 4: + code += ''' + table.regs[%(reg)d] = htog(AA64FpOp1P%(p)dV%(v)dS_uw); + ''' % { "reg" : reg, "p" : reg % 4, "v" : reg / 4 } + else: + code += ''' + table.regs[%(reg)d] = 0; + ''' % { "reg" : reg } + code += ''' + for (unsigned i = 0; i < sizeof(destReg); i++) { + uint8_t index = srcReg2.bytes[i]; + if (index < 16 * length) { + destReg.bytes[i] = table.bytes[index]; + } else { + if (isTbl) + destReg.bytes[i] = 0; + // else destReg.bytes[i] unchanged + } + } + ''' + for reg in range(rCount): + code += ''' + AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]); + ''' % { "reg" : reg } + if rCount < 4: # zero upper half + for reg in range(rCount, 4): + code += ''' + AA64FpDestP%(reg)d_uw = 0; + ''' % { "reg" : reg } + iop = InstObjParams(name, Name, + "DataX2RegOp", + { "code": code, + "r_count": rCount, + "op_class": opClass }, []) + header_output += NeonX2RegOpDeclare.subst(iop) + exec_output += NeonXEqualRegOpExecute.subst(iop) + for type in types: + substDict = { "targs" : type, + "class_name" : Name } + exec_output += NeonXExecDeclare.subst(substDict) + + # ABS + absCode = ''' + if (srcElem1 < 0) { + destElem = -srcElem1; + } else { + destElem = srcElem1; + } + ''' + twoEqualRegInstX("abs", "AbsDX", "SimdAluOp", signedTypes, 2, absCode) + twoEqualRegInstX("abs", "AbsQX", "SimdAluOp", signedTypes, 4, absCode) + # ADD + addCode = "destElem = srcElem1 + srcElem2;" + threeEqualRegInstX("add", "AddDX", "SimdAddOp", unsignedTypes, 2, addCode) + threeEqualRegInstX("add", "AddQX", "SimdAddOp", unsignedTypes, 4, addCode) + # ADDHN, ADDHN2 + addhnCode = ''' + destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >> + (sizeof(Element) * 8); + ''' + threeRegNarrowInstX("addhn", "AddhnX", "SimdAddOp", smallUnsignedTypes, + addhnCode) + threeRegNarrowInstX("addhn2", "Addhn2X", "SimdAddOp", smallUnsignedTypes, + addhnCode, hi=True) + # ADDP (scalar) + twoRegPairwiseScInstX("addp", "AddpScQX", "SimdAddOp", ("uint64_t",), 4, + addCode) + # ADDP (vector) + threeEqualRegInstX("addp", "AddpDX", "SimdAddOp", smallUnsignedTypes, 2, + addCode, pairwise=True) + threeEqualRegInstX("addp", "AddpQX", "SimdAddOp", unsignedTypes, 4, + addCode, pairwise=True) + # ADDV + # Note: SimdAddOp can be a bit optimistic here + addAcrossCode = "destElem += srcElem1;" + twoRegAcrossInstX("addv", "AddvDX", "SimdAddOp", ("uint8_t", "uint16_t"), + 2, addAcrossCode) + twoRegAcrossInstX("addv", "AddvQX", "SimdAddOp", smallUnsignedTypes, 4, + addAcrossCode) + # AND + andCode = "destElem = srcElem1 & srcElem2;" + threeEqualRegInstX("and", "AndDX", "SimdAluOp", ("uint64_t",), 2, andCode) + threeEqualRegInstX("and", "AndQX", "SimdAluOp", ("uint64_t",), 4, andCode) + # BIC (immediate) + bicImmCode = "destElem &= ~imm;" + oneRegImmInstX("bic", "BicImmDX", "SimdAluOp", ("uint64_t",), 2, + bicImmCode, True) + oneRegImmInstX("bic", "BicImmQX", "SimdAluOp", ("uint64_t",), 4, + bicImmCode, True) + # BIC (register) + bicCode = "destElem = srcElem1 & ~srcElem2;" + threeEqualRegInstX("bic", "BicDX", "SimdAluOp", ("uint64_t",), 2, bicCode) + threeEqualRegInstX("bic", "BicQX", "SimdAluOp", ("uint64_t",), 4, bicCode) + # BIF + bifCode = "destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);" + threeEqualRegInstX("bif", "BifDX", "SimdAluOp", ("uint64_t",), 2, bifCode, + True) + threeEqualRegInstX("bif", "BifQX", "SimdAluOp", ("uint64_t",), 4, bifCode, + True) + # BIT + bitCode = "destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);" + threeEqualRegInstX("bit", "BitDX", "SimdAluOp", ("uint64_t",), 2, bitCode, + True) + threeEqualRegInstX("bit", "BitQX", "SimdAluOp", ("uint64_t",), 4, bitCode, + True) + # BSL + bslCode = "destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);" + threeEqualRegInstX("bsl", "BslDX", "SimdAluOp", ("uint64_t",), 2, bslCode, + True) + threeEqualRegInstX("bsl", "BslQX", "SimdAluOp", ("uint64_t",), 4, bslCode, + True) + # CLS + clsCode = ''' + unsigned count = 0; + if (srcElem1 < 0) { + srcElem1 <<= 1; + while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) { + count++; + srcElem1 <<= 1; + } + } else { + srcElem1 <<= 1; + while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) { + count++; + srcElem1 <<= 1; + } + } + destElem = count; + ''' + twoEqualRegInstX("cls", "ClsDX", "SimdAluOp", smallSignedTypes, 2, clsCode) + twoEqualRegInstX("cls", "ClsQX", "SimdAluOp", smallSignedTypes, 4, clsCode) + # CLZ + clzCode = ''' + unsigned count = 0; + while (srcElem1 >= 0 && count < sizeof(Element) * 8) { + count++; + srcElem1 <<= 1; + } + destElem = count; + ''' + twoEqualRegInstX("clz", "ClzDX", "SimdAluOp", smallSignedTypes, 2, clzCode) + twoEqualRegInstX("clz", "ClzQX", "SimdAluOp", smallSignedTypes, 4, clzCode) + # CMEQ (register) + cmeqCode = "destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;" + threeEqualRegInstX("cmeq", "CmeqDX", "SimdCmpOp", unsignedTypes, 2, + cmeqCode) + threeEqualRegInstX("cmeq", "CmeqQX", "SimdCmpOp", unsignedTypes, 4, + cmeqCode) + # CMEQ (zero) + cmeqZeroCode = "destElem = (srcElem1 == 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmeq", "CmeqZeroDX", "SimdCmpOp", signedTypes, 2, + cmeqZeroCode) + twoEqualRegInstX("cmeq", "CmeqZeroQX", "SimdCmpOp", signedTypes, 4, + cmeqZeroCode) + # CMGE (register) + cmgeCode = "destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;" + threeEqualRegInstX("cmge", "CmgeDX", "SimdCmpOp", signedTypes, 2, cmgeCode) + threeEqualRegInstX("cmge", "CmgeQX", "SimdCmpOp", signedTypes, 4, cmgeCode) + # CMGE (zero) + cmgeZeroCode = "destElem = (srcElem1 >= 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmge", "CmgeZeroDX", "SimdCmpOp", signedTypes, 2, + cmgeZeroCode) + twoEqualRegInstX("cmge", "CmgeZeroQX", "SimdCmpOp", signedTypes, 4, + cmgeZeroCode) + # CMGT (register) + cmgtCode = "destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;" + threeEqualRegInstX("cmgt", "CmgtDX", "SimdCmpOp", signedTypes, 2, cmgtCode) + threeEqualRegInstX("cmgt", "CmgtQX", "SimdCmpOp", signedTypes, 4, cmgtCode) + # CMGT (zero) + cmgtZeroCode = "destElem = (srcElem1 > 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmgt", "CmgtZeroDX", "SimdCmpOp", signedTypes, 2, + cmgtZeroCode) + twoEqualRegInstX("cmgt", "CmgtZeroQX", "SimdCmpOp", signedTypes, 4, + cmgtZeroCode) + # CMHI (register) + threeEqualRegInstX("cmhi", "CmhiDX", "SimdCmpOp", unsignedTypes, 2, + cmgtCode) + threeEqualRegInstX("cmhi", "CmhiQX", "SimdCmpOp", unsignedTypes, 4, + cmgtCode) + # CMHS (register) + threeEqualRegInstX("cmhs", "CmhsDX", "SimdCmpOp", unsignedTypes, 2, + cmgeCode) + threeEqualRegInstX("cmhs", "CmhsQX", "SimdCmpOp", unsignedTypes, 4, + cmgeCode) + # CMLE (zero) + cmleZeroCode = "destElem = (srcElem1 <= 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmle", "CmleZeroDX", "SimdCmpOp", signedTypes, 2, + cmleZeroCode) + twoEqualRegInstX("cmle", "CmleZeroQX", "SimdCmpOp", signedTypes, 4, + cmleZeroCode) + # CMLT (zero) + cmltZeroCode = "destElem = (srcElem1 < 0) ? (Element)(-1) : 0;" + twoEqualRegInstX("cmlt", "CmltZeroDX", "SimdCmpOp", signedTypes, 2, + cmltZeroCode) + twoEqualRegInstX("cmlt", "CmltZeroQX", "SimdCmpOp", signedTypes, 4, + cmltZeroCode) + # CMTST (register) + tstCode = "destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;" + threeEqualRegInstX("cmtst", "CmtstDX", "SimdAluOp", unsignedTypes, 2, + tstCode) + threeEqualRegInstX("cmtst", "CmtstQX", "SimdAluOp", unsignedTypes, 4, + tstCode) + # CNT + cntCode = ''' + unsigned count = 0; + while (srcElem1 && count < sizeof(Element) * 8) { + count += srcElem1 & 0x1; + srcElem1 >>= 1; + } + destElem = count; + ''' + twoEqualRegInstX("cnt", "CntDX", "SimdAluOp", ("uint8_t",), 2, cntCode) + twoEqualRegInstX("cnt", "CntQX", "SimdAluOp", ("uint8_t",), 4, cntCode) + # DUP (element) + dupCode = "destElem = srcElem1;" + twoEqualRegInstX("dup", "DupElemDX", "SimdMiscOp", smallUnsignedTypes, 2, + dupCode, isDup=True, byElem=True) + twoEqualRegInstX("dup", "DupElemQX", "SimdMiscOp", unsignedTypes, 4, + dupCode, isDup=True, byElem=True) + twoEqualRegInstX("dup", "DupElemScX", "SimdMiscOp", unsignedTypes, 4, + dupCode, isDup=True, byElem=True, scalar=True) + # DUP (general register) + dupGprInstX("dup", "DupGprWDX", "SimdMiscOp", smallUnsignedTypes, 2, 'W') + dupGprInstX("dup", "DupGprWQX", "SimdMiscOp", smallUnsignedTypes, 4, 'W') + dupGprInstX("dup", "DupGprXQX", "SimdMiscOp", ("uint64_t",), 4, 'X') + # EOR + eorCode = "destElem = srcElem1 ^ srcElem2;" + threeEqualRegInstX("eor", "EorDX", "SimdAluOp", ("uint64_t",), 2, eorCode) + threeEqualRegInstX("eor", "EorQX", "SimdAluOp", ("uint64_t",), 4, eorCode) + # EXT + extCode = ''' + for (unsigned i = 0; i < eCount; i++) { + unsigned index = i + imm; + if (index < eCount) { + destReg.elements[i] = srcReg1.elements[index]; + } else { + index -= eCount; + if (index >= eCount) { + fault = new UndefinedInstruction(machInst, false, mnemonic); + } else { + destReg.elements[i] = srcReg2.elements[index]; + } + } + } + ''' + extInstX("Ext", "ExtDX", "SimdMiscOp", ("uint8_t",), 2, extCode) + extInstX("Ext", "ExtQX", "SimdMiscOp", ("uint8_t",), 4, extCode) + # FABD + fpOp = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + destElem = %s; + FpscrExc = fpscr; + ''' + fabdCode = fpOp % "fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))" + threeEqualRegInstX("fabd", "FabdDX", "SimdFloatAddOp", smallFloatTypes, 2, + fabdCode) + threeEqualRegInstX("fabd", "FabdQX", "SimdFloatAddOp", floatTypes, 4, + fabdCode) + threeEqualRegInstX("fabd", "FabdScX", "SimdFloatAddOp", floatTypes, 4, + fabdCode, scalar=True) + # FABS + fabsCode = fpOp % "fplibAbs<Element>(srcElem1)" + twoEqualRegInstX("Abs", "FabsDX", "SimdFloatAluOp", smallFloatTypes, 2, + fabsCode) + twoEqualRegInstX("Abs", "FabsQX", "SimdFloatAluOp", floatTypes, 4, + fabsCode) + # FACGE + fpCmpAbsOp = fpOp % ("fplibCompare%s<Element>(fplibAbs<Element>(srcElem1)," + " fplibAbs<Element>(srcElem2), fpscr) ? -1 : 0") + facgeCode = fpCmpAbsOp % "GE" + threeEqualRegInstX("facge", "FacgeDX", "SimdFloatCmpOp", smallFloatTypes, + 2, facgeCode) + threeEqualRegInstX("facge", "FacgeQX", "SimdFloatCmpOp", floatTypes, 4, + facgeCode) + threeEqualRegInstX("facge", "FacgeScX", "SimdFloatCmpOp", floatTypes, 4, + facgeCode, scalar=True) + # FACGT + facgtCode = fpCmpAbsOp % "GT" + threeEqualRegInstX("facgt", "FacgtDX", "SimdFloatCmpOp", smallFloatTypes, + 2, facgtCode) + threeEqualRegInstX("facgt", "FacgtQX", "SimdFloatCmpOp", floatTypes, 4, + facgtCode) + threeEqualRegInstX("facgt", "FacgtScX", "SimdFloatCmpOp", floatTypes, 4, + facgtCode, scalar=True) + # FADD + fpBinOp = fpOp % "fplib%s<Element>(srcElem1, srcElem2, fpscr)" + faddCode = fpBinOp % "Add" + threeEqualRegInstX("fadd", "FaddDX", "SimdFloatAddOp", smallFloatTypes, 2, + faddCode) + threeEqualRegInstX("fadd", "FaddQX", "SimdFloatAddOp", floatTypes, 4, + faddCode) + # FADDP (scalar) + twoRegPairwiseScInstX("faddp", "FaddpScDX", "SimdFloatAddOp", + ("uint32_t",), 2, faddCode) + twoRegPairwiseScInstX("faddp", "FaddpScQX", "SimdFloatAddOp", + ("uint64_t",), 4, faddCode) + # FADDP (vector) + threeEqualRegInstX("faddp", "FaddpDX", "SimdFloatAddOp", smallFloatTypes, + 2, faddCode, pairwise=True) + threeEqualRegInstX("faddp", "FaddpQX", "SimdFloatAddOp", floatTypes, 4, + faddCode, pairwise=True) + # FCMEQ (register) + fpCmpOp = fpOp % ("fplibCompare%s<Element>(srcElem1, srcElem2, fpscr) ?" + " -1 : 0") + fcmeqCode = fpCmpOp % "EQ" + threeEqualRegInstX("fcmeq", "FcmeqDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmeqCode) + threeEqualRegInstX("fcmeq", "FcmeqQX", "SimdFloatCmpOp", floatTypes, 4, + fcmeqCode) + threeEqualRegInstX("fcmeq", "FcmeqScX", "SimdFloatCmpOp", floatTypes, 4, + fcmeqCode, scalar=True) + # FCMEQ (zero) + fpCmpZeroOp = fpOp % "fplibCompare%s<Element>(srcElem1, 0, fpscr) ? -1 : 0" + fcmeqZeroCode = fpCmpZeroOp % "EQ" + twoEqualRegInstX("fcmeq", "FcmeqZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmeqZeroCode) + twoEqualRegInstX("fcmeq", "FcmeqZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmeqZeroCode) + twoEqualRegInstX("fcmeq", "FcmeqZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmeqZeroCode, scalar=True) + # FCMGE (register) + fcmgeCode = fpCmpOp % "GE" + threeEqualRegInstX("fcmge", "FcmgeDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmgeCode) + threeEqualRegInstX("fcmge", "FcmgeQX", "SimdFloatCmpOp", floatTypes, 4, + fcmgeCode) + threeEqualRegInstX("fcmge", "FcmgeScX", "SimdFloatCmpOp", floatTypes, 4, + fcmgeCode, scalar=True) + # FCMGE (zero) + fcmgeZeroCode = fpCmpZeroOp % "GE" + twoEqualRegInstX("fcmge", "FcmgeZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmgeZeroCode) + twoEqualRegInstX("fcmge", "FcmgeZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmgeZeroCode) + twoEqualRegInstX("fcmge", "FcmgeZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmgeZeroCode, scalar=True) + # FCMGT (register) + fcmgtCode = fpCmpOp % "GT" + threeEqualRegInstX("fcmgt", "FcmgtDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmgtCode) + threeEqualRegInstX("fcmgt", "FcmgtQX", "SimdFloatCmpOp", floatTypes, 4, + fcmgtCode) + threeEqualRegInstX("fcmgt", "FcmgtScX", "SimdFloatCmpOp", floatTypes, 4, + fcmgtCode, scalar=True) + # FCMGT (zero) + fcmgtZeroCode = fpCmpZeroOp % "GT" + twoEqualRegInstX("fcmgt", "FcmgtZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmgtZeroCode) + twoEqualRegInstX("fcmgt", "FcmgtZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmgtZeroCode) + twoEqualRegInstX("fcmgt", "FcmgtZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmgtZeroCode, scalar=True) + # FCMLE (zero) + fpCmpRevZeroOp = fpOp % ("fplibCompare%s<Element>(0, srcElem1, fpscr) ?" + " -1 : 0") + fcmleZeroCode = fpCmpRevZeroOp % "GE" + twoEqualRegInstX("fcmle", "FcmleZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmleZeroCode) + twoEqualRegInstX("fcmle", "FcmleZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmleZeroCode) + twoEqualRegInstX("fcmle", "FcmleZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmleZeroCode, scalar=True) + # FCMLT (zero) + fcmltZeroCode = fpCmpRevZeroOp % "GT" + twoEqualRegInstX("fcmlt", "FcmltZeroDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fcmltZeroCode) + twoEqualRegInstX("fcmlt", "FcmltZeroQX", "SimdFloatCmpOp", floatTypes, 4, + fcmltZeroCode) + twoEqualRegInstX("fcmlt", "FcmltZeroScX", "SimdFloatCmpOp", floatTypes, 4, + fcmltZeroCode, scalar=True) + # FCVTAS + fcvtCode = fpOp % ("fplibFPToFixed<Element, Element>(" + "srcElem1, %s, %s, %s, fpscr)") + fcvtasCode = fcvtCode % ("0", "false", "FPRounding_TIEAWAY") + twoEqualRegInstX("fcvtas", "FcvtasDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtasCode) + twoEqualRegInstX("fcvtas", "FcvtasQX", "SimdCvtOp", floatTypes, 4, + fcvtasCode) + twoEqualRegInstX("fcvtas", "FcvtasScX", "SimdCvtOp", floatTypes, 4, + fcvtasCode, scalar=True) + # FCVTAU + fcvtauCode = fcvtCode % ("0", "true", "FPRounding_TIEAWAY") + twoEqualRegInstX("fcvtau", "FcvtauDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtauCode) + twoEqualRegInstX("fcvtau", "FcvtauQX", "SimdCvtOp", floatTypes, 4, + fcvtauCode) + twoEqualRegInstX("fcvtau", "FcvtauScX", "SimdCvtOp", floatTypes, 4, + fcvtauCode, scalar=True) + # FCVTL, FCVTL2 + fcvtlCode = fpOp % ("fplibConvert<Element, BigElement>(" + "srcElem1, FPCRRounding(fpscr), fpscr)") + twoRegLongInstX("fcvtl", "FcvtlX", "SimdCvtOp", ("uint16_t", "uint32_t"), + fcvtlCode) + twoRegLongInstX("fcvtl", "Fcvtl2X", "SimdCvtOp", ("uint16_t", "uint32_t"), + fcvtlCode, hi=True) + # FCVTMS + fcvtmsCode = fcvtCode % ("0", "false", "FPRounding_NEGINF") + twoEqualRegInstX("fcvtms", "FcvtmsDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtmsCode) + twoEqualRegInstX("fcvtms", "FcvtmsQX", "SimdCvtOp", floatTypes, 4, + fcvtmsCode) + twoEqualRegInstX("fcvtms", "FcvtmsScX", "SimdCvtOp", floatTypes, 4, + fcvtmsCode, scalar=True) + # FCVTMU + fcvtmuCode = fcvtCode % ("0", "true", "FPRounding_NEGINF") + twoEqualRegInstX("fcvtmu", "FcvtmuDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtmuCode) + twoEqualRegInstX("fcvtmu", "FcvtmuQX", "SimdCvtOp", floatTypes, 4, + fcvtmuCode) + twoEqualRegInstX("fcvtmu", "FcvtmuScX", "SimdCvtOp", floatTypes, 4, + fcvtmuCode, scalar=True) + # FCVTN, FCVTN2 + fcvtnCode = fpOp % ("fplibConvert<BigElement, Element>(" + "srcElem1, FPCRRounding(fpscr), fpscr)") + twoRegNarrowInstX("fcvtn", "FcvtnX", "SimdCvtOp", + ("uint16_t", "uint32_t"), fcvtnCode) + twoRegNarrowInstX("fcvtn", "Fcvtn2X", "SimdCvtOp", + ("uint16_t", "uint32_t"), fcvtnCode, hi=True) + # FCVTNS + fcvtnsCode = fcvtCode % ("0", "false", "FPRounding_TIEEVEN") + twoEqualRegInstX("fcvtns", "FcvtnsDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtnsCode) + twoEqualRegInstX("fcvtns", "FcvtnsQX", "SimdCvtOp", floatTypes, 4, + fcvtnsCode) + twoEqualRegInstX("fcvtns", "FcvtnsScX", "SimdCvtOp", floatTypes, 4, + fcvtnsCode, scalar=True) + # FCVTNU + fcvtnuCode = fcvtCode % ("0", "true", "FPRounding_TIEEVEN") + twoEqualRegInstX("fcvtnu", "FcvtnuDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtnuCode) + twoEqualRegInstX("fcvtnu", "FcvtnuQX", "SimdCvtOp", floatTypes, 4, + fcvtnuCode) + twoEqualRegInstX("fcvtnu", "FcvtnuScX", "SimdCvtOp", floatTypes, 4, + fcvtnuCode, scalar=True) + # FCVTPS + fcvtpsCode = fcvtCode % ("0", "false", "FPRounding_POSINF") + twoEqualRegInstX("fcvtps", "FcvtpsDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtpsCode) + twoEqualRegInstX("fcvtps", "FcvtpsQX", "SimdCvtOp", floatTypes, 4, + fcvtpsCode) + twoEqualRegInstX("fcvtps", "FcvtpsScX", "SimdCvtOp", floatTypes, 4, + fcvtpsCode, scalar=True) + # FCVTPU + fcvtpuCode = fcvtCode % ("0", "true", "FPRounding_POSINF") + twoEqualRegInstX("fcvtpu", "FcvtpuDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtpuCode) + twoEqualRegInstX("fcvtpu", "FcvtpuQX", "SimdCvtOp", floatTypes, 4, + fcvtpuCode) + twoEqualRegInstX("fcvtpu", "FcvtpuScX", "SimdCvtOp", floatTypes, 4, + fcvtpuCode, scalar=True) + # FCVTXN, FCVTXN2 + fcvtxnCode = fpOp % ("fplibConvert<BigElement, Element>(" + "srcElem1, FPRounding_ODD, fpscr)") + twoRegNarrowInstX("fcvtxn", "FcvtxnX", "SimdCvtOp", smallFloatTypes, + fcvtxnCode) + twoRegNarrowInstX("fcvtxn", "Fcvtxn2X", "SimdCvtOp", smallFloatTypes, + fcvtxnCode, hi=True) + twoRegNarrowInstX("fcvtxn", "FcvtxnScX", "SimdCvtOp", smallFloatTypes, + fcvtxnCode, scalar=True) + # FCVTZS (fixed-point) + fcvtzsCode = fcvtCode % ("imm", "false", "FPRounding_ZERO") + twoEqualRegInstX("fcvtzs", "FcvtzsFixedDX", "SimdCvtOp", smallFloatTypes, + 2, fcvtzsCode, hasImm=True) + twoEqualRegInstX("fcvtzs", "FcvtzsFixedQX", "SimdCvtOp", floatTypes, 4, + fcvtzsCode, hasImm=True) + twoEqualRegInstX("fcvtzs", "FcvtzsFixedScX", "SimdCvtOp", floatTypes, 4, + fcvtzsCode, hasImm=True, scalar=True) + # FCVTZS (integer) + fcvtzsIntCode = fcvtCode % ("0", "false", "FPRounding_ZERO") + twoEqualRegInstX("fcvtzs", "FcvtzsIntDX", "SimdCvtOp", smallFloatTypes, + 2, fcvtzsIntCode) + twoEqualRegInstX("fcvtzs", "FcvtzsIntQX", "SimdCvtOp", floatTypes, 4, + fcvtzsIntCode) + twoEqualRegInstX("fcvtzs", "FcvtzsIntScX", "SimdCvtOp", floatTypes, 4, + fcvtzsIntCode, scalar=True) + # FCVTZU (fixed-point) + fcvtzuCode = fcvtCode % ("imm", "true", "FPRounding_ZERO") + twoEqualRegInstX("fcvtzu", "FcvtzuFixedDX", "SimdCvtOp", smallFloatTypes, + 2, fcvtzuCode, hasImm=True) + twoEqualRegInstX("fcvtzu", "FcvtzuFixedQX", "SimdCvtOp", floatTypes, 4, + fcvtzuCode, hasImm=True) + twoEqualRegInstX("fcvtzu", "FcvtzuFixedScX", "SimdCvtOp", floatTypes, 4, + fcvtzuCode, hasImm=True, scalar=True) + # FCVTZU (integer) + fcvtzuIntCode = fcvtCode % ("0", "true", "FPRounding_ZERO") + twoEqualRegInstX("fcvtzu", "FcvtzuIntDX", "SimdCvtOp", smallFloatTypes, 2, + fcvtzuIntCode) + twoEqualRegInstX("fcvtzu", "FcvtzuIntQX", "SimdCvtOp", floatTypes, 4, + fcvtzuIntCode) + twoEqualRegInstX("fcvtzu", "FcvtzuIntScX", "SimdCvtOp", floatTypes, 4, + fcvtzuIntCode, scalar=True) + # FDIV + fdivCode = fpBinOp % "Div" + threeEqualRegInstX("fdiv", "FdivDX", "SimdFloatDivOp", smallFloatTypes, 2, + fdivCode) + threeEqualRegInstX("fdiv", "FdivQX", "SimdFloatDivOp", floatTypes, 4, + fdivCode) + # FMAX + fmaxCode = fpBinOp % "Max" + threeEqualRegInstX("fmax", "FmaxDX", "SimdFloatCmpOp", smallFloatTypes, 2, + fmaxCode) + threeEqualRegInstX("fmax", "FmaxQX", "SimdFloatCmpOp", floatTypes, 4, + fmaxCode) + # FMAXNM + fmaxnmCode = fpBinOp % "MaxNum" + threeEqualRegInstX("fmaxnm", "FmaxnmDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fmaxnmCode) + threeEqualRegInstX("fmaxnm", "FmaxnmQX", "SimdFloatCmpOp", floatTypes, 4, + fmaxnmCode) + # FMAXNMP (scalar) + twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScDX", "SimdFloatCmpOp", + ("uint32_t",), 2, fmaxnmCode) + twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScQX", "SimdFloatCmpOp", + ("uint64_t",), 4, fmaxnmCode) + # FMAXNMP (vector) + threeEqualRegInstX("fmaxnmp", "FmaxnmpDX", "SimdFloatCmpOp", + smallFloatTypes, 2, fmaxnmCode, pairwise=True) + threeEqualRegInstX("fmaxnmp", "FmaxnmpQX", "SimdFloatCmpOp", floatTypes, 4, + fmaxnmCode, pairwise=True) + # FMAXNMV + # Note: SimdFloatCmpOp can be a bit optimistic here + fpAcrossOp = fpOp % "fplib%s<Element>(destElem, srcElem1, fpscr)" + fmaxnmAcrossCode = fpAcrossOp % "MaxNum" + twoRegAcrossInstX("fmaxnmv", "FmaxnmvQX", "SimdFloatCmpOp", ("uint32_t",), + 4, fmaxnmAcrossCode) + # FMAXP (scalar) + twoRegPairwiseScInstX("fmaxp", "FmaxpScDX", "SimdFloatCmpOp", + ("uint32_t",), 2, fmaxCode) + twoRegPairwiseScInstX("fmaxp", "FmaxpScQX", "SimdFloatCmpOp", + ("uint64_t",), 4, fmaxCode) + # FMAXP (vector) + threeEqualRegInstX("fmaxp", "FmaxpDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fmaxCode, pairwise=True) + threeEqualRegInstX("fmaxp", "FmaxpQX", "SimdFloatCmpOp", floatTypes, 4, + fmaxCode, pairwise=True) + # FMAXV + # Note: SimdFloatCmpOp can be a bit optimistic here + fmaxAcrossCode = fpAcrossOp % "Max" + twoRegAcrossInstX("fmaxv", "FmaxvQX", "SimdFloatCmpOp", ("uint32_t",), 4, + fmaxAcrossCode) + # FMIN + fminCode = fpBinOp % "Min" + threeEqualRegInstX("fmin", "FminDX", "SimdFloatCmpOp", smallFloatTypes, 2, + fminCode) + threeEqualRegInstX("fmin", "FminQX", "SimdFloatCmpOp", floatTypes, 4, + fminCode) + # FMINNM + fminnmCode = fpBinOp % "MinNum" + threeEqualRegInstX("fminnm", "FminnmDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fminnmCode) + threeEqualRegInstX("fminnm", "FminnmQX", "SimdFloatCmpOp", floatTypes, 4, + fminnmCode) + # FMINNMP (scalar) + twoRegPairwiseScInstX("fminnmp", "FminnmpScDX", "SimdFloatCmpOp", + ("uint32_t",), 2, fminnmCode) + twoRegPairwiseScInstX("fminnmp", "FminnmpScQX", "SimdFloatCmpOp", + ("uint64_t",), 4, fminnmCode) + # FMINNMP (vector) + threeEqualRegInstX("fminnmp", "FminnmpDX", "SimdFloatCmpOp", + smallFloatTypes, 2, fminnmCode, pairwise=True) + threeEqualRegInstX("fminnmp", "FminnmpQX", "SimdFloatCmpOp", floatTypes, 4, + fminnmCode, pairwise=True) + # FMINNMV + # Note: SimdFloatCmpOp can be a bit optimistic here + fminnmAcrossCode = fpAcrossOp % "MinNum" + twoRegAcrossInstX("fminnmv", "FminnmvQX", "SimdFloatCmpOp", ("uint32_t",), + 4, fminnmAcrossCode) + # FMINP (scalar) + twoRegPairwiseScInstX("fminp", "FminpScDX", "SimdFloatCmpOp", + ("uint32_t",), 2, fminCode) + twoRegPairwiseScInstX("fminp", "FminpScQX", "SimdFloatCmpOp", + ("uint64_t",), 4, fminCode) + # FMINP (vector) + threeEqualRegInstX("fminp", "FminpDX", "SimdFloatCmpOp", smallFloatTypes, + 2, fminCode, pairwise=True) + threeEqualRegInstX("fminp", "FminpQX", "SimdFloatCmpOp", floatTypes, 4, + fminCode, pairwise=True) + # FMINV + # Note: SimdFloatCmpOp can be a bit optimistic here + fminAcrossCode = fpAcrossOp % "Min" + twoRegAcrossInstX("fminv", "FminvQX", "SimdFloatCmpOp", ("uint32_t",), 4, + fminAcrossCode) + # FMLA (by element) + fmlaCode = fpOp % ("fplibMulAdd<Element>(" + "destElem, srcElem1, srcElem2, fpscr)") + threeEqualRegInstX("fmla", "FmlaElemDX", "SimdFloatMultAccOp", + smallFloatTypes, 2, fmlaCode, True, byElem=True) + threeEqualRegInstX("fmla", "FmlaElemQX", "SimdFloatMultAccOp", floatTypes, + 4, fmlaCode, True, byElem=True) + threeEqualRegInstX("fmla", "FmlaElemScX", "SimdFloatMultAccOp", floatTypes, + 4, fmlaCode, True, byElem=True, scalar=True) + # FMLA (vector) + threeEqualRegInstX("fmla", "FmlaDX", "SimdFloatMultAccOp", smallFloatTypes, + 2, fmlaCode, True) + threeEqualRegInstX("fmla", "FmlaQX", "SimdFloatMultAccOp", floatTypes, 4, + fmlaCode, True) + # FMLS (by element) + fmlsCode = fpOp % ("fplibMulAdd<Element>(destElem," + " fplibNeg<Element>(srcElem1), srcElem2, fpscr)") + threeEqualRegInstX("fmls", "FmlsElemDX", "SimdFloatMultAccOp", + smallFloatTypes, 2, fmlsCode, True, byElem=True) + threeEqualRegInstX("fmls", "FmlsElemQX", "SimdFloatMultAccOp", floatTypes, + 4, fmlsCode, True, byElem=True) + threeEqualRegInstX("fmls", "FmlsElemScX", "SimdFloatMultAccOp", floatTypes, + 4, fmlsCode, True, byElem=True, scalar=True) + # FMLS (vector) + threeEqualRegInstX("fmls", "FmlsDX", "SimdFloatMultAccOp", smallFloatTypes, + 2, fmlsCode, True) + threeEqualRegInstX("fmls", "FmlsQX", "SimdFloatMultAccOp", floatTypes, 4, + fmlsCode, True) + # FMOV + fmovCode = 'destElem = imm;' + oneRegImmInstX("fmov", "FmovDX", "SimdMiscOp", smallFloatTypes, 2, + fmovCode) + oneRegImmInstX("fmov", "FmovQX", "SimdMiscOp", floatTypes, 4, fmovCode) + # FMUL (by element) + fmulCode = fpBinOp % "Mul" + threeEqualRegInstX("fmul", "FmulElemDX", "SimdFloatMultOp", + smallFloatTypes, 2, fmulCode, byElem=True) + threeEqualRegInstX("fmul", "FmulElemQX", "SimdFloatMultOp", floatTypes, 4, + fmulCode, byElem=True) + threeEqualRegInstX("fmul", "FmulElemScX", "SimdFloatMultOp", floatTypes, 4, + fmulCode, byElem=True, scalar=True) + # FMUL (vector) + threeEqualRegInstX("fmul", "FmulDX", "SimdFloatMultOp", smallFloatTypes, 2, + fmulCode) + threeEqualRegInstX("fmul", "FmulQX", "SimdFloatMultOp", floatTypes, 4, + fmulCode) + # FMULX + fmulxCode = fpBinOp % "MulX" + threeEqualRegInstX("fmulx", "FmulxDX", "SimdFloatMultOp", smallFloatTypes, + 2, fmulxCode) + threeEqualRegInstX("fmulx", "FmulxQX", "SimdFloatMultOp", floatTypes, 4, + fmulxCode) + threeEqualRegInstX("fmulx", "FmulxScX", "SimdFloatMultOp", floatTypes, 4, + fmulxCode, scalar=True) + # FMULX (by element) + threeEqualRegInstX("fmulx", "FmulxElemDX", "SimdFloatMultOp", + smallFloatTypes, 2, fmulxCode, byElem=True) + threeEqualRegInstX("fmulx", "FmulxElemQX", "SimdFloatMultOp", floatTypes, + 4, fmulxCode, byElem=True) + threeEqualRegInstX("fmulx", "FmulxElemScX", "SimdFloatMultOp", floatTypes, + 4, fmulxCode, byElem=True, scalar=True) + # FNEG + fnegCode = fpOp % "fplibNeg<Element>(srcElem1)" + twoEqualRegInstX("Neg", "FnegDX", "SimdFloatAluOp", smallFloatTypes, 2, + fnegCode) + twoEqualRegInstX("Neg", "FnegQX", "SimdFloatAluOp", floatTypes, 4, + fnegCode) + # FRECPE + frecpeCode = fpOp % "fplibRecipEstimate<Element>(srcElem1, fpscr)" + twoEqualRegInstX("frecpe", "FrecpeDX", "SimdFloatMultAccOp", + smallFloatTypes, 2, frecpeCode) + twoEqualRegInstX("frecpe", "FrecpeQX", "SimdFloatMultAccOp", floatTypes, 4, + frecpeCode) + twoEqualRegInstX("frecpe", "FrecpeScX", "SimdFloatMultAccOp", floatTypes, + 4, frecpeCode, scalar=True) + # FRECPS + frecpsCode = fpBinOp % "RecipStepFused" + threeEqualRegInstX("frecps", "FrecpsDX", "SimdFloatMultAccOp", + smallFloatTypes, 2, frecpsCode) + threeEqualRegInstX("frecps", "FrecpsQX", "SimdFloatMultAccOp", floatTypes, + 4, frecpsCode) + threeEqualRegInstX("frecps", "FrecpsScX", "SimdFloatMultAccOp", floatTypes, + 4, frecpsCode, scalar=True) + # FRECPX + frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)" + twoEqualRegInstX("frecpx", "FrecpxX", "SimdFloatMultAccOp", floatTypes, 4, + frecpxCode, scalar=True) + # FRINTA + frintCode = fpOp % "fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)" + frintaCode = frintCode % ("FPRounding_TIEAWAY", "false") + twoEqualRegInstX("frinta", "FrintaDX", "SimdCvtOp", smallFloatTypes, 2, + frintaCode) + twoEqualRegInstX("frinta", "FrintaQX", "SimdCvtOp", floatTypes, 4, + frintaCode) + # FRINTI + frintiCode = frintCode % ("FPCRRounding(fpscr)", "false") + twoEqualRegInstX("frinti", "FrintiDX", "SimdCvtOp", smallFloatTypes, 2, + frintiCode) + twoEqualRegInstX("frinti", "FrintiQX", "SimdCvtOp", floatTypes, 4, + frintiCode) + # FRINTM + frintmCode = frintCode % ("FPRounding_NEGINF", "false") + twoEqualRegInstX("frintm", "FrintmDX", "SimdCvtOp", smallFloatTypes, 2, + frintmCode) + twoEqualRegInstX("frintm", "FrintmQX", "SimdCvtOp", floatTypes, 4, + frintmCode) + # FRINTN + frintnCode = frintCode % ("FPRounding_TIEEVEN", "false") + twoEqualRegInstX("frintn", "FrintnDX", "SimdCvtOp", smallFloatTypes, 2, + frintnCode) + twoEqualRegInstX("frintn", "FrintnQX", "SimdCvtOp", floatTypes, 4, + frintnCode) + # FRINTP + frintpCode = frintCode % ("FPRounding_POSINF", "false") + twoEqualRegInstX("frintp", "FrintpDX", "SimdCvtOp", smallFloatTypes, 2, + frintpCode) + twoEqualRegInstX("frintp", "FrintpQX", "SimdCvtOp", floatTypes, 4, + frintpCode) + # FRINTX + frintxCode = frintCode % ("FPCRRounding(fpscr)", "true") + twoEqualRegInstX("frintx", "FrintxDX", "SimdCvtOp", smallFloatTypes, 2, + frintxCode) + twoEqualRegInstX("frintx", "FrintxQX", "SimdCvtOp", floatTypes, 4, + frintxCode) + # FRINTZ + frintzCode = frintCode % ("FPRounding_ZERO", "false") + twoEqualRegInstX("frintz", "FrintzDX", "SimdCvtOp", smallFloatTypes, 2, + frintzCode) + twoEqualRegInstX("frintz", "FrintzQX", "SimdCvtOp", floatTypes, 4, + frintzCode) + # FRSQRTE + frsqrteCode = fpOp % "fplibRSqrtEstimate<Element>(srcElem1, fpscr)" + twoEqualRegInstX("frsqrte", "FrsqrteDX", "SimdFloatSqrtOp", + smallFloatTypes, 2, frsqrteCode) + twoEqualRegInstX("frsqrte", "FrsqrteQX", "SimdFloatSqrtOp", floatTypes, 4, + frsqrteCode) + twoEqualRegInstX("frsqrte", "FrsqrteScX", "SimdFloatSqrtOp", floatTypes, 4, + frsqrteCode, scalar=True) + # FRSQRTS + frsqrtsCode = fpBinOp % "RSqrtStepFused" + threeEqualRegInstX("frsqrts", "FrsqrtsDX", "SimdFloatMiscOp", + smallFloatTypes, 2, frsqrtsCode) + threeEqualRegInstX("frsqrts", "FrsqrtsQX", "SimdFloatMiscOp", floatTypes, + 4, frsqrtsCode) + threeEqualRegInstX("frsqrts", "FrsqrtsScX", "SimdFloatMiscOp", floatTypes, + 4, frsqrtsCode, scalar=True) + # FSQRT + fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)" + twoEqualRegInstX("fsqrt", "FsqrtDX", "SimdFloatSqrtOp", smallFloatTypes, 2, + fsqrtCode) + twoEqualRegInstX("fsqrt", "FsqrtQX", "SimdFloatSqrtOp", floatTypes, 4, + fsqrtCode) + # FSUB + fsubCode = fpBinOp % "Sub" + threeEqualRegInstX("fsub", "FsubDX", "SimdFloatAddOp", smallFloatTypes, 2, + fsubCode) + threeEqualRegInstX("fsub", "FsubQX", "SimdFloatAddOp", floatTypes, 4, + fsubCode) + # INS (element) + insFromVecElemInstX("ins", "InsElemX", "SimdMiscOp", unsignedTypes, 4) + # INS (general register) + insFromGprInstX("ins", "InsGprWX", "SimdMiscOp", smallUnsignedTypes, 4, + 'W') + insFromGprInstX("ins", "InsGprXX", "SimdMiscOp", unsignedTypes, 4, 'X') + # MLA (by element) + mlaCode = "destElem += srcElem1 * srcElem2;" + threeEqualRegInstX("mla", "MlaElemDX", "SimdMultAccOp", + ("uint16_t", "uint32_t"), 2, mlaCode, True, byElem=True) + threeEqualRegInstX("mla", "MlaElemQX", "SimdMultAccOp", + ("uint16_t", "uint32_t"), 4, mlaCode, True, byElem=True) + # MLA (vector) + threeEqualRegInstX("mla", "MlaDX", "SimdMultAccOp", smallUnsignedTypes, 2, + mlaCode, True) + threeEqualRegInstX("mla", "MlaQX", "SimdMultAccOp", smallUnsignedTypes, 4, + mlaCode, True) + # MLS (by element) + mlsCode = "destElem -= srcElem1 * srcElem2;" + threeEqualRegInstX("mls", "MlsElemDX", "SimdMultAccOp", + ("uint16_t", "uint32_t"), 2, mlsCode, True, byElem=True) + threeEqualRegInstX("mls", "MlsElemQX", "SimdMultAccOp", + ("uint16_t", "uint32_t"), 4, mlsCode, True, byElem=True) + # MLS (vector) + threeEqualRegInstX("mls", "MlsDX", "SimdMultAccOp", smallUnsignedTypes, 2, + mlsCode, True) + threeEqualRegInstX("mls", "MlsQX", "SimdMultAccOp", smallUnsignedTypes, 4, + mlsCode, True) + # MOV (element) -> alias to INS (element) + # MOV (from general) -> alias to INS (general register) + # MOV (scalar) -> alias to DUP (element) + # MOV (to general) -> alias to UMOV + # MOV (vector) -> alias to ORR (register) + # MOVI + movImmCode = "destElem = imm;" + oneRegImmInstX("movi", "MoviDX", "SimdMiscOp", ("uint64_t",), 2, + movImmCode) + oneRegImmInstX("movi", "MoviQX", "SimdMiscOp", ("uint64_t",), 4, + movImmCode) + # MUL (by element) + mulCode = "destElem = srcElem1 * srcElem2;" + threeEqualRegInstX("mul", "MulElemDX", "SimdMultOp", + ("uint16_t", "uint32_t"), 2, mulCode, byElem=True) + threeEqualRegInstX("mul", "MulElemQX", "SimdMultOp", + ("uint16_t", "uint32_t"), 4, mulCode, byElem=True) + # MUL (vector) + threeEqualRegInstX("mul", "MulDX", "SimdMultOp", smallUnsignedTypes, 2, + mulCode) + threeEqualRegInstX("mul", "MulQX", "SimdMultOp", smallUnsignedTypes, 4, + mulCode) + # MVN + mvnCode = "destElem = ~srcElem1;" + twoEqualRegInstX("mvn", "MvnDX", "SimdAluOp", ("uint64_t",), 2, mvnCode) + twoEqualRegInstX("mvn", "MvnQX", "SimdAluOp", ("uint64_t",), 4, mvnCode) + # MVNI + mvniCode = "destElem = ~imm;" + oneRegImmInstX("mvni", "MvniDX", "SimdAluOp", ("uint64_t",), 2, mvniCode) + oneRegImmInstX("mvni", "MvniQX", "SimdAluOp", ("uint64_t",), 4, mvniCode) + # NEG + negCode = "destElem = -srcElem1;" + twoEqualRegInstX("neg", "NegDX", "SimdAluOp", signedTypes, 2, negCode) + twoEqualRegInstX("neg", "NegQX", "SimdAluOp", signedTypes, 4, negCode) + # NOT -> alias to MVN + # ORN + ornCode = "destElem = srcElem1 | ~srcElem2;" + threeEqualRegInstX("orn", "OrnDX", "SimdAluOp", ("uint64_t",), 2, ornCode) + threeEqualRegInstX("orn", "OrnQX", "SimdAluOp", ("uint64_t",), 4, ornCode) + # ORR (immediate) + orrImmCode = "destElem |= imm;" + oneRegImmInstX("orr", "OrrImmDX", "SimdAluOp", ("uint64_t",), 2, + orrImmCode, True) + oneRegImmInstX("orr", "OrrImmQX", "SimdAluOp", ("uint64_t",), 4, + orrImmCode, True) + # ORR (register) + orrCode = "destElem = srcElem1 | srcElem2;" + threeEqualRegInstX("orr", "OrrDX", "SimdAluOp", ("uint64_t",), 2, orrCode) + threeEqualRegInstX("orr", "OrrQX", "SimdAluOp", ("uint64_t",), 4, orrCode) + # PMUL + pmulCode = ''' + destElem = 0; + for (unsigned j = 0; j < sizeof(Element) * 8; j++) { + if (bits(srcElem2, j)) + destElem ^= srcElem1 << j; + } + ''' + threeEqualRegInstX("pmul", "PmulDX", "SimdMultOp", ("uint8_t",), 2, + pmulCode) + threeEqualRegInstX("pmul", "PmulQX", "SimdMultOp", ("uint8_t",), 4, + pmulCode) + # PMULL, PMULL2 + # Note: 64-bit PMULL is not available (Crypto. Extension) + pmullCode = ''' + destElem = 0; + for (unsigned j = 0; j < sizeof(Element) * 8; j++) { + if (bits(srcElem2, j)) + destElem ^= (BigElement)srcElem1 << j; + } + ''' + threeRegLongInstX("pmull", "PmullX", "SimdMultOp", ("uint8_t",), pmullCode) + threeRegLongInstX("pmull", "Pmull2X", "SimdMultOp", ("uint8_t",), + pmullCode, hi=True) + # RADDHN, RADDHN2 + raddhnCode = ''' + destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 + + ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> + (sizeof(Element) * 8); + ''' + threeRegNarrowInstX("raddhn", "RaddhnX", "SimdAddOp", smallUnsignedTypes, + raddhnCode) + threeRegNarrowInstX("raddhn2", "Raddhn2X", "SimdAddOp", smallUnsignedTypes, + raddhnCode, hi=True) + # RBIT + rbitCode = ''' + destElem = 0; + Element temp = srcElem1; + for (int i = 0; i < 8 * sizeof(Element); i++) { + destElem = destElem | ((temp & 0x1) << + (8 * sizeof(Element) - 1 - i)); + temp >>= 1; + } + ''' + twoEqualRegInstX("rbit", "RbitDX", "SimdAluOp", ("uint8_t",), 2, rbitCode) + twoEqualRegInstX("rbit", "RbitQX", "SimdAluOp", ("uint8_t",), 4, rbitCode) + # REV16 + rev16Code = ''' + destElem = srcElem1; + unsigned groupSize = ((1 << 1) / sizeof(Element)); + unsigned reverseMask = (groupSize - 1); + j = i ^ reverseMask; + ''' + twoEqualRegInstX("rev16", "Rev16DX", "SimdAluOp", ("uint8_t",), 2, + rev16Code) + twoEqualRegInstX("rev16", "Rev16QX", "SimdAluOp", ("uint8_t",), 4, + rev16Code) + # REV32 + rev32Code = ''' + destElem = srcElem1; + unsigned groupSize = ((1 << 2) / sizeof(Element)); + unsigned reverseMask = (groupSize - 1); + j = i ^ reverseMask; + ''' + twoEqualRegInstX("rev32", "Rev32DX", "SimdAluOp", ("uint8_t", "uint16_t"), + 2, rev32Code) + twoEqualRegInstX("rev32", "Rev32QX", "SimdAluOp", ("uint8_t", "uint16_t"), + 4, rev32Code) + # REV64 + rev64Code = ''' + destElem = srcElem1; + unsigned groupSize = ((1 << 3) / sizeof(Element)); + unsigned reverseMask = (groupSize - 1); + j = i ^ reverseMask; + ''' + twoEqualRegInstX("rev64", "Rev64DX", "SimdAluOp", smallUnsignedTypes, 2, + rev64Code) + twoEqualRegInstX("rev64", "Rev64QX", "SimdAluOp", smallUnsignedTypes, 4, + rev64Code) + # RSHRN, RSHRN2 + rshrnCode = ''' + if (imm > sizeof(srcElem1) * 8) { + destElem = 0; + } else if (imm) { + Element rBit = bits(srcElem1, imm - 1); + destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; + } else { + destElem = srcElem1; + } + ''' + twoRegNarrowInstX("rshrn", "RshrnX", "SimdShiftOp", smallUnsignedTypes, + rshrnCode, hasImm=True) + twoRegNarrowInstX("rshrn2", "Rshrn2X", "SimdShiftOp", smallUnsignedTypes, + rshrnCode, hasImm=True, hi=True) + # RSUBHN, RSUBHN2 + rsubhnCode = ''' + destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 + + ((BigElement)1 << (sizeof(Element) * 8 - 1))) >> + (sizeof(Element) * 8); + ''' + threeRegNarrowInstX("rsubhn", "RsubhnX", "SimdAddOp", smallTypes, + rsubhnCode) + threeRegNarrowInstX("rsubhn2", "Rsubhn2X", "SimdAddOp", smallTypes, + rsubhnCode, hi=True) + # SABA + abaCode = ''' + destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : + (srcElem2 - srcElem1); + ''' + threeEqualRegInstX("saba", "SabaDX", "SimdAddAccOp", smallSignedTypes, 2, + abaCode, True) + threeEqualRegInstX("saba", "SabaQX", "SimdAddAccOp", smallSignedTypes, 4, + abaCode, True) + # SABAL, SABAL2 + abalCode = ''' + destElem += (srcElem1 > srcElem2) ? + ((BigElement)srcElem1 - (BigElement)srcElem2) : + ((BigElement)srcElem2 - (BigElement)srcElem1); + ''' + threeRegLongInstX("sabal", "SabalX", "SimdAddAccOp", smallSignedTypes, + abalCode, True) + threeRegLongInstX("sabal2", "Sabal2X", "SimdAddAccOp", smallSignedTypes, + abalCode, True, hi=True) + # SABD + abdCode = ''' + destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) : + (srcElem2 - srcElem1); + ''' + threeEqualRegInstX("sabd", "SabdDX", "SimdAddOp", smallSignedTypes, 2, + abdCode) + threeEqualRegInstX("sabd", "SabdQX", "SimdAddOp", smallSignedTypes, 4, + abdCode) + # SABDL, SABDL2 + abdlCode = ''' + destElem = (srcElem1 > srcElem2) ? + ((BigElement)srcElem1 - (BigElement)srcElem2) : + ((BigElement)srcElem2 - (BigElement)srcElem1); + ''' + threeRegLongInstX("sabdl", "SabdlX", "SimdAddAccOp", smallSignedTypes, + abdlCode, True) + threeRegLongInstX("sabdl2", "Sabdl2X", "SimdAddAccOp", smallSignedTypes, + abdlCode, True, hi=True) + # SADALP + adalpCode = "destElem += (BigElement)srcElem1 + (BigElement)srcElem2;" + twoRegCondenseInstX("sadalp", "SadalpDX", "SimdAddOp", smallSignedTypes, 2, + adalpCode, True) + twoRegCondenseInstX("sadalp", "SadalpQX", "SimdAddOp", smallSignedTypes, 4, + adalpCode, True) + # SADDL, SADDL2 + addlwCode = "destElem = (BigElement)srcElem1 + (BigElement)srcElem2;" + threeRegLongInstX("saddl", "SaddlX", "SimdAddAccOp", smallSignedTypes, + addlwCode) + threeRegLongInstX("saddl2", "Saddl2X", "SimdAddAccOp", smallSignedTypes, + addlwCode, hi=True) + # SADDLP + twoRegCondenseInstX("saddlp", "SaddlpDX", "SimdAddOp", smallSignedTypes, 2, + addlwCode) + twoRegCondenseInstX("saddlp", "SaddlpQX", "SimdAddOp", smallSignedTypes, 4, + addlwCode) + # SADDLV + # Note: SimdAddOp can be a bit optimistic here + addAcrossLongCode = "destElem += (BigElement)srcElem1;" + twoRegAcrossInstX("saddlv", "SaddlvDX", "SimdAddOp", ("int8_t", "int16_t"), + 2, addAcrossLongCode, long=True) + twoRegAcrossInstX("saddlv", "SaddlvQX", "SimdAddOp", ("int8_t", "int16_t"), + 4, addAcrossLongCode, long=True) + twoRegAcrossInstX("saddlv", "SaddlvBQX", "SimdAddOp", ("int32_t",), 4, + addAcrossLongCode, doubleDest=True, long=True) + # SADDW, SADDW2 + threeRegWideInstX("saddw", "SaddwX", "SimdAddAccOp", smallSignedTypes, + addlwCode) + threeRegWideInstX("saddw2", "Saddw2X", "SimdAddAccOp", smallSignedTypes, + addlwCode, hi=True) + # SCVTF (fixed-point) + scvtfFixedCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, imm," + " false, FPCRRounding(fpscr), fpscr)") + twoEqualRegInstX("scvtf", "ScvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2, + scvtfFixedCode % 32, hasImm=True) + twoEqualRegInstX("scvtf", "ScvtfFixedSQX", "SimdCvtOp", smallFloatTypes, 4, + scvtfFixedCode % 32, hasImm=True) + twoEqualRegInstX("scvtf", "ScvtfFixedDQX", "SimdCvtOp", ("uint64_t",), 4, + scvtfFixedCode % 64, hasImm=True) + twoEqualRegInstX("scvtf", "ScvtfFixedScSX", "SimdCvtOp", smallFloatTypes, + 4, scvtfFixedCode % 32, hasImm=True, scalar=True) + twoEqualRegInstX("scvtf", "ScvtfFixedScDX", "SimdCvtOp", ("uint64_t",), 4, + scvtfFixedCode % 64, hasImm=True, scalar=True) + # SCVTF (integer) + scvtfIntCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, 0," + " false, FPCRRounding(fpscr), fpscr)") + twoEqualRegInstX("scvtf", "ScvtfIntDX", "SimdCvtOp", smallFloatTypes, 2, + scvtfIntCode % 32) + twoEqualRegInstX("scvtf", "ScvtfIntSQX", "SimdCvtOp", smallFloatTypes, 4, + scvtfIntCode % 32) + twoEqualRegInstX("scvtf", "ScvtfIntDQX", "SimdCvtOp", ("uint64_t",), 4, + scvtfIntCode % 64) + twoEqualRegInstX("scvtf", "ScvtfIntScSX", "SimdCvtOp", smallFloatTypes, 4, + scvtfIntCode % 32, scalar=True) + twoEqualRegInstX("scvtf", "ScvtfIntScDX", "SimdCvtOp", ("uint64_t",), 4, + scvtfIntCode % 64, scalar=True) + # SHADD + haddCode = ''' + Element carryBit = + (((unsigned)srcElem1 & 0x1) + + ((unsigned)srcElem2 & 0x1)) >> 1; + // Use division instead of a shift to ensure the sign extension works + // right. The compiler will figure out if it can be a shift. Mask the + // inputs so they get truncated correctly. + destElem = (((srcElem1 & ~(Element)1) / 2) + + ((srcElem2 & ~(Element)1) / 2)) + carryBit; + ''' + threeEqualRegInstX("shadd", "ShaddDX", "SimdAddOp", smallSignedTypes, 2, + haddCode) + threeEqualRegInstX("shadd", "ShaddQX", "SimdAddOp", smallSignedTypes, 4, + haddCode) + # SHL + shlCode = ''' + if (imm >= sizeof(Element) * 8) + destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1; + else + destElem = srcElem1 << imm; + ''' + twoEqualRegInstX("shl", "ShlDX", "SimdShiftOp", unsignedTypes, 2, shlCode, + hasImm=True) + twoEqualRegInstX("shl", "ShlQX", "SimdShiftOp", unsignedTypes, 4, shlCode, + hasImm=True) + # SHLL, SHLL2 + shllCode = "destElem = ((BigElement)srcElem1) << (sizeof(Element) * 8);" + twoRegLongInstX("shll", "ShllX", "SimdShiftOp", smallTypes, shllCode) + twoRegLongInstX("shll", "Shll2X", "SimdShiftOp", smallTypes, shllCode, + hi=True) + # SHRN, SHRN2 + shrnCode = ''' + if (imm >= sizeof(srcElem1) * 8) { + destElem = 0; + } else { + destElem = srcElem1 >> imm; + } + ''' + twoRegNarrowInstX("shrn", "ShrnX", "SimdShiftOp", smallUnsignedTypes, + shrnCode, hasImm=True) + twoRegNarrowInstX("shrn2", "Shrn2X", "SimdShiftOp", smallUnsignedTypes, + shrnCode, hasImm=True, hi=True) + # SHSUB + hsubCode = ''' + Element borrowBit = + (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1; + // Use division instead of a shift to ensure the sign extension works + // right. The compiler will figure out if it can be a shift. Mask the + // inputs so they get truncated correctly. + destElem = (((srcElem1 & ~(Element)1) / 2) - + ((srcElem2 & ~(Element)1) / 2)) - borrowBit; + ''' + threeEqualRegInstX("shsub", "ShsubDX", "SimdAddOp", smallSignedTypes, 2, + hsubCode) + threeEqualRegInstX("shsub", "ShsubQX", "SimdAddOp", smallSignedTypes, 4, + hsubCode) + # SLI + sliCode = ''' + if (imm >= sizeof(Element) * 8) + destElem = destElem; + else + destElem = (srcElem1 << imm) | (destElem & mask(imm)); + ''' + twoEqualRegInstX("sli", "SliDX", "SimdShiftOp", unsignedTypes, 2, sliCode, + True, hasImm=True) + twoEqualRegInstX("sli", "SliQX", "SimdShiftOp", unsignedTypes, 4, sliCode, + True, hasImm=True) + # SMAX + maxCode = "destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;" + threeEqualRegInstX("smax", "SmaxDX", "SimdCmpOp", smallSignedTypes, 2, + maxCode) + threeEqualRegInstX("smax", "SmaxQX", "SimdCmpOp", smallSignedTypes, 4, + maxCode) + # SMAXP + threeEqualRegInstX("smaxp", "SmaxpDX", "SimdCmpOp", smallSignedTypes, 2, + maxCode, pairwise=True) + threeEqualRegInstX("smaxp", "SmaxpQX", "SimdCmpOp", smallSignedTypes, 4, + maxCode, pairwise=True) + # SMAXV + maxAcrossCode = ''' + if (i == 0 || srcElem1 > destElem) + destElem = srcElem1; + ''' + twoRegAcrossInstX("smaxv", "SmaxvDX", "SimdCmpOp", ("int8_t", "int16_t"), + 2, maxAcrossCode) + twoRegAcrossInstX("smaxv", "SmaxvQX", "SimdCmpOp", smallSignedTypes, 4, + maxAcrossCode) + # SMIN + minCode = "destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;" + threeEqualRegInstX("smin", "SminDX", "SimdCmpOp", smallSignedTypes, 2, + minCode) + threeEqualRegInstX("smin", "SminQX", "SimdCmpOp", smallSignedTypes, 4, + minCode) + # SMINP + threeEqualRegInstX("sminp", "SminpDX", "SimdCmpOp", smallSignedTypes, 2, + minCode, pairwise=True) + threeEqualRegInstX("sminp", "SminpQX", "SimdCmpOp", smallSignedTypes, 4, + minCode, pairwise=True) + # SMINV + minAcrossCode = ''' + if (i == 0 || srcElem1 < destElem) + destElem = srcElem1; + ''' + twoRegAcrossInstX("sminv", "SminvDX", "SimdCmpOp", ("int8_t", "int16_t"), + 2, minAcrossCode) + twoRegAcrossInstX("sminv", "SminvQX", "SimdCmpOp", smallSignedTypes, 4, + minAcrossCode) + # SMLAL, SMLAL2 (by element) + mlalCode = "destElem += (BigElement)srcElem1 * (BigElement)srcElem2;" + threeRegLongInstX("smlal", "SmlalElemX", "SimdMultAccOp", + ("int16_t", "int32_t"), mlalCode, True, byElem=True) + threeRegLongInstX("smlal", "SmlalElem2X", "SimdMultAccOp", + ("int16_t", "int32_t"), mlalCode, True, byElem=True, + hi=True) + # SMLAL, SMLAL2 (vector) + threeRegLongInstX("smlal", "SmlalX", "SimdMultAccOp", smallSignedTypes, + mlalCode, True) + threeRegLongInstX("smlal", "Smlal2X", "SimdMultAccOp", smallSignedTypes, + mlalCode, True, hi=True) + # SMLSL, SMLSL2 (by element) + mlslCode = "destElem -= (BigElement)srcElem1 * (BigElement)srcElem2;" + threeRegLongInstX("smlsl", "SmlslElemX", "SimdMultAccOp", smallSignedTypes, + mlslCode, True, byElem=True) + threeRegLongInstX("smlsl", "SmlslElem2X", "SimdMultAccOp", + smallSignedTypes, mlslCode, True, byElem=True, hi=True) + # SMLSL, SMLSL2 (vector) + threeRegLongInstX("smlsl", "SmlslX", "SimdMultAccOp", smallSignedTypes, + mlslCode, True) + threeRegLongInstX("smlsl", "Smlsl2X", "SimdMultAccOp", smallSignedTypes, + mlslCode, True, hi=True) + # SMOV + insToGprInstX("smov", "SmovWX", "SimdMiscOp", ("int8_t", "int16_t"), 4, + 'W', True) + insToGprInstX("smov", "SmovXX", "SimdMiscOp", smallSignedTypes, 4, 'X', + True) + # SMULL, SMULL2 (by element) + mullCode = "destElem = (BigElement)srcElem1 * (BigElement)srcElem2;" + threeRegLongInstX("smull", "SmullElemX", "SimdMultOp", smallSignedTypes, + mullCode, byElem=True) + threeRegLongInstX("smull", "SmullElem2X", "SimdMultOp", smallSignedTypes, + mullCode, byElem=True, hi=True) + # SMULL, SMULL2 (vector) + threeRegLongInstX("smull", "SmullX", "SimdMultOp", smallSignedTypes, + mullCode) + threeRegLongInstX("smull", "Smull2X", "SimdMultOp", smallSignedTypes, + mullCode, hi=True) + # SQABS + sqabsCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { + fpscr.qc = 1; + destElem = ~srcElem1; + } else if (srcElem1 < 0) { + destElem = -srcElem1; + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("sqabs", "SqabsDX", "SimdAluOp", smallSignedTypes, 2, + sqabsCode) + twoEqualRegInstX("sqabs", "SqabsQX", "SimdAluOp", signedTypes, 4, + sqabsCode) + twoEqualRegInstX("sqabs", "SqabsScX", "SimdAluOp", signedTypes, 4, + sqabsCode, scalar=True) + # SQADD + sqaddCode = ''' + destElem = srcElem1 + srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + bool negDest = (destElem < 0); + bool negSrc1 = (srcElem1 < 0); + bool negSrc2 = (srcElem2 < 0); + if ((negDest != negSrc1) && (negSrc1 == negSrc2)) { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + if (negDest) + destElem -= 1; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqadd", "SqaddDX", "SimdAddOp", smallSignedTypes, 2, + sqaddCode) + threeEqualRegInstX("sqadd", "SqaddQX", "SimdAddOp", signedTypes, 4, + sqaddCode) + threeEqualRegInstX("sqadd", "SqaddScX", "SimdAddOp", signedTypes, 4, + sqaddCode, scalar=True) + # SQDMLAL, SQDMLAL2 (by element) + qdmlalCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); + Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); + Element halfNeg = maxNeg / 2; + if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || + (srcElem1 == halfNeg && srcElem2 == maxNeg) || + (srcElem1 == maxNeg && srcElem2 == halfNeg)) { + midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); + fpscr.qc = 1; + } + bool negPreDest = ltz(destElem); + destElem += midElem; + bool negDest = ltz(destElem); + bool negMid = ltz(midElem); + if (negPreDest == negMid && negMid != negDest) { + destElem = mask(sizeof(BigElement) * 8 - 1); + if (negPreDest) + destElem = ~destElem; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeRegLongInstX("sqdmlal", "SqdmlalElemX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, byElem=True) + threeRegLongInstX("sqdmlal", "SqdmlalElem2X", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, byElem=True, + hi=True) + threeRegLongInstX("sqdmlal", "SqdmlalElemScX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, byElem=True, + scalar=True) + # SQDMLAL, SQDMLAL2 (vector) + threeRegLongInstX("sqdmlal", "SqdmlalX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True) + threeRegLongInstX("sqdmlal", "Sqdmlal2X", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, hi=True) + threeRegLongInstX("sqdmlal", "SqdmlalScX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlalCode, True, scalar=True) + # SQDMLSL, SQDMLSL2 (by element) + qdmlslCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); + Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); + Element halfNeg = maxNeg / 2; + if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || + (srcElem1 == halfNeg && srcElem2 == maxNeg) || + (srcElem1 == maxNeg && srcElem2 == halfNeg)) { + midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8)); + fpscr.qc = 1; + } + bool negPreDest = ltz(destElem); + destElem -= midElem; + bool negDest = ltz(destElem); + bool posMid = ltz((BigElement)-midElem); + if (negPreDest == posMid && posMid != negDest) { + destElem = mask(sizeof(BigElement) * 8 - 1); + if (negPreDest) + destElem = ~destElem; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeRegLongInstX("sqdmlsl", "SqdmlslElemX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, byElem=True) + threeRegLongInstX("sqdmlsl", "SqdmlslElem2X", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, byElem=True, + hi=True) + threeRegLongInstX("sqdmlsl", "SqdmlslElemScX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, byElem=True, + scalar=True) + # SQDMLSL, SQDMLSL2 (vector) + threeRegLongInstX("sqdmlsl", "SqdmlslX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True) + threeRegLongInstX("sqdmlsl", "Sqdmlsl2X", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, hi=True) + threeRegLongInstX("sqdmlsl", "SqdmlslScX", "SimdMultAccOp", + ("int16_t", "int32_t"), qdmlslCode, True, scalar=True) + # SQDMULH (by element) + sqdmulhCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >> + (sizeof(Element) * 8); + if (srcElem1 == srcElem2 && + srcElem1 == (Element)((Element)1 << + (sizeof(Element) * 8 - 1))) { + destElem = ~srcElem1; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqdmulh", "SqdmulhElemDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqdmulhCode, byElem=True) + threeEqualRegInstX("sqdmulh", "SqdmulhElemQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True) + threeEqualRegInstX("sqdmulh", "SqdmulhElemScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True, + scalar=True) + # SQDMULH (vector) + threeEqualRegInstX("sqdmulh", "SqdmulhDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqdmulhCode) + threeEqualRegInstX("sqdmulh", "SqdmulhQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqdmulhCode) + threeEqualRegInstX("sqdmulh", "SqdmulhScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqdmulhCode, scalar=True) + # SQDMULL, SQDMULL2 (by element) + qdmullCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2); + if (srcElem1 == srcElem2 && + srcElem1 == (Element)((Element)1 << + (Element)(sizeof(Element) * 8 - 1))) { + destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8)); + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeRegLongInstX("sqdmull", "SqdmullElemX", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, byElem=True) + threeRegLongInstX("sqdmull", "SqdmullElem2X", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, byElem=True, + hi=True) + threeRegLongInstX("sqdmull", "SqdmullElemScX", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, byElem=True, + scalar=True) + # SQDMULL, SQDMULL2 (vector) + threeRegLongInstX("sqdmull", "SqdmullX", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True) + threeRegLongInstX("sqdmull", "Sqdmull2X", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, hi=True) + threeRegLongInstX("sqdmull", "SqdmullScX", "SimdMultOp", + ("int16_t", "int32_t"), qdmullCode, True, scalar=True) + # SQNEG + sqnegCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) { + fpscr.qc = 1; + destElem = ~srcElem1; + } else { + destElem = -srcElem1; + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("sqneg", "SqnegDX", "SimdAluOp", smallSignedTypes, 2, + sqnegCode) + twoEqualRegInstX("sqneg", "SqnegQX", "SimdAluOp", signedTypes, 4, + sqnegCode) + twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4, + sqnegCode, scalar=True) + # SQRDMULH (by element) + sqrdmulhCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 + + ((int64_t)1 << (sizeof(Element) * 8 - 1))) >> + (sizeof(Element) * 8); + Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1); + Element halfNeg = maxNeg / 2; + if ((srcElem1 == maxNeg && srcElem2 == maxNeg) || + (srcElem1 == halfNeg && srcElem2 == maxNeg) || + (srcElem1 == maxNeg && srcElem2 == halfNeg)) { + if (destElem < 0) { + destElem = mask(sizeof(Element) * 8 - 1); + } else { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + } + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqrdmulh", "SqrdmulhElemDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqrdmulhCode, byElem=True) + threeEqualRegInstX("sqrdmulh", "SqrdmulhElemQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True) + threeEqualRegInstX("sqrdmulh", "SqrdmulhElemScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True, + scalar=True) + # SQRDMULH (vector) + threeEqualRegInstX("sqrdmulh", "SqrdmulhDX", "SimdMultOp", + ("int16_t", "int32_t"), 2, sqrdmulhCode) + threeEqualRegInstX("sqrdmulh", "SqrdmulhQX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmulhCode) + threeEqualRegInstX("sqrdmulh", "SqrdmulhScX", "SimdMultOp", + ("int16_t", "int32_t"), 4, sqrdmulhCode, scalar=True) + # SQRSHL + sqrshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + Element rBit = 0; + if (shiftAmt <= sizeof(Element) * 8) + rBit = bits(srcElem1, shiftAmt - 1); + if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0) + rBit = 1; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + // Make sure the right shift sign extended when it should. + if (srcElem1 < 0 && destElem >= 0) { + destElem |= -((Element)1 << (sizeof(Element) * 8 - + 1 - shiftAmt)); + } + destElem += rBit; + } else if (shiftAmt > 0) { + bool sat = false; + if (shiftAmt >= sizeof(Element) * 8) { + if (srcElem1 != 0) + sat = true; + else + destElem = 0; + } else { + if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - 1 - shiftAmt) != + ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { + sat = true; + } else { + destElem = srcElem1 << shiftAmt; + } + } + if (sat) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqrshl", "SqrshlDX", "SimdCmpOp", smallSignedTypes, 2, + sqrshlCode) + threeEqualRegInstX("sqrshl", "SqrshlQX", "SimdCmpOp", signedTypes, 4, + sqrshlCode) + threeEqualRegInstX("sqrshl", "SqrshlScX", "SimdCmpOp", signedTypes, 4, + sqrshlCode, scalar=True) + # SQRSHRN, SQRSHRN2 + sqrshrnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0 && srcElem1 != -1) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = (srcElem1 >> (imm - 1)); + uint64_t rBit = mid & 0x1; + mid >>= 1; + mid |= -(mid & ((BigElement)1 << + (sizeof(BigElement) * 8 - 1 - imm))); + mid += rBit; + if (mid != (Element)mid) { + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + if (srcElem1 != (Element)srcElem1) { + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + fpscr.qc = 1; + } else { + destElem = srcElem1; + } + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqrshrn", "SqrshrnX", "SimdShiftOp", smallSignedTypes, + sqrshrnCode, hasImm=True) + twoRegNarrowInstX("sqrshrn2", "Sqrshrn2X", "SimdShiftOp", smallSignedTypes, + sqrshrnCode, hasImm=True, hi=True) + twoRegNarrowInstX("sqrshrn", "SqrshrnScX", "SimdShiftOp", smallSignedTypes, + sqrshrnCode, hasImm=True, scalar=True) + # SQRSHRUN, SQRSHRUN2 + sqrshrunCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = (srcElem1 >> (imm - 1)); + uint64_t rBit = mid & 0x1; + mid >>= 1; + mid |= -(mid & ((BigElement)1 << + (sizeof(BigElement) * 8 - 1 - imm))); + mid += rBit; + if (bits(mid, sizeof(BigElement) * 8 - 1, + sizeof(Element) * 8) != 0) { + if (srcElem1 < 0) { + destElem = 0; + } else { + destElem = mask(sizeof(Element) * 8); + } + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + if (srcElem1 < 0) { + fpscr.qc = 1; + destElem = 0; + } else { + destElem = srcElem1; + } + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqrshrun", "SqrshrunX", "SimdShiftOp", smallSignedTypes, + sqrshrunCode, hasImm=True) + twoRegNarrowInstX("sqrshrun", "Sqrshrun2X", "SimdShiftOp", + smallSignedTypes, sqrshrunCode, hasImm=True, hi=True) + twoRegNarrowInstX("sqrshrun", "SqrshrunScX", "SimdShiftOp", + smallSignedTypes, sqrshrunCode, hasImm=True, scalar=True) + # SQSHL (immediate) + sqshlImmCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm >= sizeof(Element) * 8) { + if (srcElem1 != 0) { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + if (srcElem1 > 0) + destElem = ~destElem; + fpscr.qc = 1; + } else { + destElem = 0; + } + } else if (imm) { + destElem = (srcElem1 << imm); + uint64_t topBits = bits((uint64_t)srcElem1, + sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - 1 - imm); + if (topBits != 0 && topBits != mask(imm + 1)) { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + if (srcElem1 > 0) + destElem = ~destElem; + fpscr.qc = 1; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("sqshl", "SqshlImmDX", "SimdAluOp", smallSignedTypes, 2, + sqshlImmCode, hasImm=True) + twoEqualRegInstX("sqshl", "SqshlImmQX", "SimdAluOp", signedTypes, 4, + sqshlImmCode, hasImm=True) + twoEqualRegInstX("sqshl", "SqshlImmScX", "SimdAluOp", signedTypes, 4, + sqshlImmCode, hasImm=True, scalar=True) + # SQSHL (register) + sqshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + // Make sure the right shift sign extended when it should. + if (srcElem1 < 0 && destElem >= 0) { + destElem |= -((Element)1 << (sizeof(Element) * 8 - + 1 - shiftAmt)); + } + } else if (shiftAmt > 0) { + bool sat = false; + if (shiftAmt >= sizeof(Element) * 8) { + if (srcElem1 != 0) + sat = true; + else + destElem = 0; + } else { + if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - 1 - shiftAmt) != + ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) { + sat = true; + } else { + destElem = srcElem1 << shiftAmt; + } + } + if (sat) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqshl", "SqshlDX", "SimdAluOp", smallSignedTypes, 2, + sqshlCode) + threeEqualRegInstX("sqshl", "SqshlQX", "SimdAluOp", signedTypes, 4, + sqshlCode) + threeEqualRegInstX("sqshl", "SqshlScX", "SimdAluOp", signedTypes, 4, + sqshlCode, scalar=True) + # SQSHLU + sqshluCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm >= sizeof(Element) * 8) { + if (srcElem1 < 0) { + destElem = 0; + fpscr.qc = 1; + } else if (srcElem1 > 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = 0; + } + } else if (imm) { + destElem = (srcElem1 << imm); + uint64_t topBits = bits((uint64_t)srcElem1, + sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - imm); + if (srcElem1 < 0) { + destElem = 0; + fpscr.qc = 1; + } else if (topBits != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } + } else { + if (srcElem1 < 0) { + fpscr.qc = 1; + destElem = 0; + } else { + destElem = srcElem1; + } + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("sqshlu", "SqshluDX", "SimdAluOp", smallSignedTypes, 2, + sqshluCode, hasImm=True) + twoEqualRegInstX("sqshlu", "SqshluQX", "SimdAluOp", signedTypes, 4, + sqshluCode, hasImm=True) + twoEqualRegInstX("sqshlu", "SqshluScX", "SimdAluOp", signedTypes, 4, + sqshluCode, hasImm=True, scalar=True) + # SQSHRN, SQSHRN2 + sqshrnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0 && srcElem1 != -1) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); + mid |= -(mid & ((BigElement)1 << + (sizeof(BigElement) * 8 - 1 - imm))); + if (mid != (Element)mid) { + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqshrn", "SqshrnX", "SimdShiftOp", smallSignedTypes, + sqshrnCode, hasImm=True) + twoRegNarrowInstX("sqshrn2", "Sqshrn2X", "SimdShiftOp", smallSignedTypes, + sqshrnCode, hasImm=True, hi=True) + twoRegNarrowInstX("sqshrn", "SqshrnScX", "SimdShiftOp", smallSignedTypes, + sqshrnCode, hasImm=True, scalar=True) + # SQSHRUN, SQSHRUN2 + sqshrunCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); + if (bits(mid, sizeof(BigElement) * 8 - 1, + sizeof(Element) * 8) != 0) { + if (srcElem1 < 0) { + destElem = 0; + } else { + destElem = mask(sizeof(Element) * 8); + } + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqshrun", "SqshrunX", "SimdShiftOp", smallSignedTypes, + sqshrunCode, hasImm=True) + twoRegNarrowInstX("sqshrun", "Sqshrun2X", "SimdShiftOp", smallSignedTypes, + sqshrunCode, hasImm=True, hi=True) + twoRegNarrowInstX("sqshrun", "SqshrunScX", "SimdShiftOp", smallSignedTypes, + sqshrunCode, hasImm=True, scalar=True) + # SQSUB + sqsubCode = ''' + destElem = srcElem1 - srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + bool negDest = (destElem < 0); + bool negSrc1 = (srcElem1 < 0); + bool posSrc2 = (srcElem2 >= 0); + if ((negDest != negSrc1) && (negSrc1 == posSrc2)) { + destElem = (Element)1 << (sizeof(Element) * 8 - 1); + if (negDest) + destElem -= 1; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("sqsub", "SqsubDX", "SimdAddOp", smallSignedTypes, 2, + sqsubCode) + threeEqualRegInstX("sqsub", "SqsubQX", "SimdAddOp", signedTypes, 4, + sqsubCode) + threeEqualRegInstX("sqsub", "SqsubScX", "SimdAddOp", signedTypes, 4, + sqsubCode, scalar=True) + # SQXTN, SQXTN2 + sqxtnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = srcElem1; + if ((BigElement)destElem != srcElem1) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8 - 1); + if (srcElem1 < 0) + destElem = ~destElem; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqxtn", "SqxtnX", "SimdMiscOp", smallSignedTypes, + sqxtnCode) + twoRegNarrowInstX("sqxtn", "Sqxtn2X", "SimdMiscOp", smallSignedTypes, + sqxtnCode, hi=True) + twoRegNarrowInstX("sqxtn", "SqxtnScX", "SimdMiscOp", smallSignedTypes, + sqxtnCode, scalar=True) + # SQXTUN, SQXTUN2 + sqxtunCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = srcElem1; + if (srcElem1 < 0 || + ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8); + if (srcElem1 < 0) + destElem = ~destElem; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("sqxtun", "SqxtunX", "SimdMiscOp", smallSignedTypes, + sqxtunCode) + twoRegNarrowInstX("sqxtun", "Sqxtun2X", "SimdMiscOp", smallSignedTypes, + sqxtunCode, hi=True) + twoRegNarrowInstX("sqxtun", "SqxtunScX", "SimdMiscOp", smallSignedTypes, + sqxtunCode, scalar=True) + # SRHADD + rhaddCode = ''' + Element carryBit = + (((unsigned)srcElem1 & 0x1) + + ((unsigned)srcElem2 & 0x1) + 1) >> 1; + // Use division instead of a shift to ensure the sign extension works + // right. The compiler will figure out if it can be a shift. Mask the + // inputs so they get truncated correctly. + destElem = (((srcElem1 & ~(Element)1) / 2) + + ((srcElem2 & ~(Element)1) / 2)) + carryBit; + ''' + threeEqualRegInstX("srhadd", "SrhaddDX", "SimdAddOp", smallSignedTypes, 2, + rhaddCode) + threeEqualRegInstX("srhadd", "SrhaddQX", "SimdAddOp", smallSignedTypes, 4, + rhaddCode) + # SRI + sriCode = ''' + if (imm >= sizeof(Element) * 8) + destElem = destElem; + else + destElem = (srcElem1 >> imm) | + (destElem & ~mask(sizeof(Element) * 8 - imm)); + ''' + twoEqualRegInstX("sri", "SriDX", "SimdShiftOp", unsignedTypes, 2, sriCode, + True, hasImm=True) + twoEqualRegInstX("sri", "SriQX", "SimdShiftOp", unsignedTypes, 4, sriCode, + True, hasImm=True) + # SRSHL + rshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + Element rBit = 0; + if (shiftAmt <= sizeof(Element) * 8) + rBit = bits(srcElem1, shiftAmt - 1); + if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1)) + rBit = 1; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + // Make sure the right shift sign extended when it should. + if (ltz(srcElem1) && !ltz(destElem)) { + destElem |= -((Element)1 << (sizeof(Element) * 8 - + 1 - shiftAmt)); + } + destElem += rBit; + } else if (shiftAmt > 0) { + if (shiftAmt >= sizeof(Element) * 8) { + destElem = 0; + } else { + destElem = srcElem1 << shiftAmt; + } + } else { + destElem = srcElem1; + } + ''' + threeEqualRegInstX("srshl", "SrshlDX", "SimdShiftOp", signedTypes, 2, + rshlCode) + threeEqualRegInstX("srshl", "SrshlQX", "SimdShiftOp", signedTypes, 4, + rshlCode) + # SRSHR + rshrCode = ''' + if (imm > sizeof(srcElem1) * 8) { + destElem = 0; + } else if (imm) { + Element rBit = bits(srcElem1, imm - 1); + destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit; + } else { + destElem = srcElem1; + } + ''' + twoEqualRegInstX("srshr", "SrshrDX", "SimdShiftOp", signedTypes, 2, + rshrCode, hasImm=True) + twoEqualRegInstX("srshr", "SrshrQX", "SimdShiftOp", signedTypes, 4, + rshrCode, hasImm=True) + # SRSRA + rsraCode = ''' + if (imm > sizeof(srcElem1) * 8) { + destElem += 0; + } else if (imm) { + Element rBit = bits(srcElem1, imm - 1); + destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit; + } else { + destElem += srcElem1; + } + ''' + twoEqualRegInstX("srsra", "SrsraDX", "SimdShiftOp", signedTypes, 2, + rsraCode, True, hasImm=True) + twoEqualRegInstX("srsra", "SrsraQX", "SimdShiftOp", signedTypes, 4, + rsraCode, True, hasImm=True) + # SSHL + shlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + // Make sure the right shift sign extended when it should. + if (ltz(srcElem1) && !ltz(destElem)) { + destElem |= -((Element)1 << (sizeof(Element) * 8 - + 1 - shiftAmt)); + } + } else { + if (shiftAmt >= sizeof(Element) * 8) { + destElem = 0; + } else { + destElem = srcElem1 << shiftAmt; + } + } + ''' + threeEqualRegInstX("sshl", "SshlDX", "SimdShiftOp", signedTypes, 2, + shlCode) + threeEqualRegInstX("sshl", "SshlQX", "SimdShiftOp", signedTypes, 4, + shlCode) + # SSHLL, SSHLL2 + shllCode = ''' + if (imm >= sizeof(destElem) * 8) { + destElem = 0; + } else { + destElem = (BigElement)srcElem1 << imm; + } + ''' + twoRegLongInstX("sshll", "SshllX", "SimdShiftOp", smallSignedTypes, + shllCode, hasImm=True) + twoRegLongInstX("sshll", "Sshll2X", "SimdShiftOp", smallSignedTypes, + shllCode, hasImm=True, hi=True) + # SSHR + shrCode = ''' + if (imm >= sizeof(srcElem1) * 8) { + if (ltz(srcElem1)) + destElem = -1; + else + destElem = 0; + } else { + destElem = srcElem1 >> imm; + } + ''' + twoEqualRegInstX("sshr", "SshrDX", "SimdShiftOp", signedTypes, 2, shrCode, + hasImm=True) + twoEqualRegInstX("sshr", "SshrQX", "SimdShiftOp", signedTypes, 4, shrCode, + hasImm=True) + # SSRA + sraCode = ''' + Element mid;; + if (imm >= sizeof(srcElem1) * 8) { + mid = ltz(srcElem1) ? -1 : 0; + } else { + mid = srcElem1 >> imm; + if (ltz(srcElem1) && !ltz(mid)) { + mid |= -(mid & ((Element)1 << + (sizeof(Element) * 8 - 1 - imm))); + } + } + destElem += mid; + ''' + twoEqualRegInstX("ssra", "SsraDX", "SimdShiftOp", signedTypes, 2, sraCode, + True, hasImm=True) + twoEqualRegInstX("ssra", "SsraQX", "SimdShiftOp", signedTypes, 4, sraCode, + True, hasImm=True) + # SSUBL + sublwCode = "destElem = (BigElement)srcElem1 - (BigElement)srcElem2;" + threeRegLongInstX("ssubl", "SsublX", "SimdAddOp", smallSignedTypes, + sublwCode) + threeRegLongInstX("ssubl2", "Ssubl2X", "SimdAddOp", smallSignedTypes, + sublwCode, hi=True) + # SSUBW + threeRegWideInstX("ssubw", "SsubwX", "SimdAddOp", smallSignedTypes, + sublwCode) + threeRegWideInstX("ssubw2", "Ssubw2X", "SimdAddOp", smallSignedTypes, + sublwCode, hi=True) + # SUB + subCode = "destElem = srcElem1 - srcElem2;" + threeEqualRegInstX("sub", "SubDX", "SimdAddOp", unsignedTypes, 2, subCode) + threeEqualRegInstX("sub", "SubQX", "SimdAddOp", unsignedTypes, 4, subCode) + # SUBHN, SUBHN2 + subhnCode = ''' + destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >> + (sizeof(Element) * 8); + ''' + threeRegNarrowInstX("subhn", "SubhnX", "SimdAddOp", smallUnsignedTypes, + subhnCode) + threeRegNarrowInstX("subhn2", "Subhn2X", "SimdAddOp", smallUnsignedTypes, + subhnCode, hi=True) + # SUQADD + suqaddCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + Element tmp = destElem + srcElem1; + if (bits(destElem, sizeof(Element) * 8 - 1) == 0) { + if (bits(tmp, sizeof(Element) * 8 - 1) == 1 || + tmp < srcElem1 || tmp < destElem) { + destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1; + fpscr.qc = 1; + } else { + destElem = tmp; + } + } else { + Element absDestElem = (~destElem) + 1; + if (absDestElem < srcElem1) { + // Still check for positive sat., no need to check for negative sat. + if (bits(tmp, sizeof(Element) * 8 - 1) == 1) { + destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1; + fpscr.qc = 1; + } else { + destElem = tmp; + } + } else { + destElem = tmp; + } + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("suqadd", "SuqaddDX", "SimdAddOp", smallUnsignedTypes, 2, + suqaddCode, True) + twoEqualRegInstX("suqadd", "SuqaddQX", "SimdAddOp", unsignedTypes, 4, + suqaddCode, True) + twoEqualRegInstX("suqadd", "SuqaddScX", "SimdAddOp", unsignedTypes, 4, + suqaddCode, True, scalar=True) + # SXTL -> alias to SSHLL + # TBL + tbxTblInstX("tbl", "Tbl1DX", "SimdMiscOp", ("uint8_t",), 1, "true", 2) + tbxTblInstX("tbl", "Tbl1QX", "SimdMiscOp", ("uint8_t",), 1, "true", 4) + tbxTblInstX("tbl", "Tbl2DX", "SimdMiscOp", ("uint8_t",), 2, "true", 2) + tbxTblInstX("tbl", "Tbl2QX", "SimdMiscOp", ("uint8_t",), 2, "true", 4) + tbxTblInstX("tbl", "Tbl3DX", "SimdMiscOp", ("uint8_t",), 3, "true", 2) + tbxTblInstX("tbl", "Tbl3QX", "SimdMiscOp", ("uint8_t",), 3, "true", 4) + tbxTblInstX("tbl", "Tbl4DX", "SimdMiscOp", ("uint8_t",), 4, "true", 2) + tbxTblInstX("tbl", "Tbl4QX", "SimdMiscOp", ("uint8_t",), 4, "true", 4) + # TBX + tbxTblInstX("tbx", "Tbx1DX", "SimdMiscOp", ("uint8_t",), 1, "false", 2) + tbxTblInstX("tbx", "Tbx1QX", "SimdMiscOp", ("uint8_t",), 1, "false", 4) + tbxTblInstX("tbx", "Tbx2DX", "SimdMiscOp", ("uint8_t",), 2, "false", 2) + tbxTblInstX("tbx", "Tbx2QX", "SimdMiscOp", ("uint8_t",), 2, "false", 4) + tbxTblInstX("tbx", "Tbx3DX", "SimdMiscOp", ("uint8_t",), 3, "false", 2) + tbxTblInstX("tbx", "Tbx3QX", "SimdMiscOp", ("uint8_t",), 3, "false", 4) + tbxTblInstX("tbx", "Tbx4DX", "SimdMiscOp", ("uint8_t",), 4, "false", 2) + tbxTblInstX("tbx", "Tbx4QX", "SimdMiscOp", ("uint8_t",), 4, "false", 4) + # TRN1 + trnCode = ''' + unsigned part = %s; + for (unsigned i = 0; i < eCount / 2; i++) { + destReg.elements[2 * i] = srcReg1.elements[2 * i + part]; + destReg.elements[2 * i + 1] = srcReg2.elements[2 * i + part]; + } + ''' + threeRegScrambleInstX("trn1", "Trn1DX", "SimdAluOp", smallUnsignedTypes, 2, + trnCode % "0") + threeRegScrambleInstX("trn1", "Trn1QX", "SimdAluOp", unsignedTypes, 4, + trnCode % "0") + # TRN2 + threeRegScrambleInstX("trn2", "Trn2DX", "SimdAluOp", smallUnsignedTypes, 2, + trnCode % "1") + threeRegScrambleInstX("trn2", "Trn2QX", "SimdAluOp", unsignedTypes, 4, + trnCode % "1") + # UABA + threeEqualRegInstX("uaba", "UabaDX", "SimdAddAccOp", smallUnsignedTypes, 2, + abaCode, True) + threeEqualRegInstX("uaba", "UabaQX", "SimdAddAccOp", smallUnsignedTypes, 4, + abaCode, True) + # UABAL, UABAL2 + threeRegLongInstX("uabal", "UabalX", "SimdAddAccOp", smallUnsignedTypes, + abalCode, True) + threeRegLongInstX("uabal2", "Uabal2X", "SimdAddAccOp", smallUnsignedTypes, + abalCode, True, hi=True) + # UABD + threeEqualRegInstX("uabd", "UabdDX", "SimdAddOp", smallUnsignedTypes, 2, + abdCode) + threeEqualRegInstX("uabd", "UabdQX", "SimdAddOp", smallUnsignedTypes, 4, + abdCode) + # UABDL, UABDL2 + threeRegLongInstX("uabdl", "UabdlX", "SimdAddAccOp", smallUnsignedTypes, + abdlCode, True) + threeRegLongInstX("uabdl2", "Uabdl2X", "SimdAddAccOp", smallUnsignedTypes, + abdlCode, True, hi=True) + # UADALP + twoRegCondenseInstX("uadalp", "UadalpDX", "SimdAddOp", smallUnsignedTypes, + 2, adalpCode, True) + twoRegCondenseInstX("uadalp", "UadalpQX", "SimdAddOp", smallUnsignedTypes, + 4, adalpCode, True) + # UADDL, UADDL2 + threeRegLongInstX("uaddl", "UaddlX", "SimdAddAccOp", smallUnsignedTypes, + addlwCode) + threeRegLongInstX("uaddl2", "Uaddl2X", "SimdAddAccOp", smallUnsignedTypes, + addlwCode, hi=True) + # UADDLP + twoRegCondenseInstX("uaddlp", "UaddlpDX", "SimdAddOp", smallUnsignedTypes, + 2, addlwCode) + twoRegCondenseInstX("uaddlp", "UaddlpQX", "SimdAddOp", smallUnsignedTypes, + 4, addlwCode) + # UADDLV + twoRegAcrossInstX("uaddlv", "UaddlvDX", "SimdAddOp", + ("uint8_t", "uint16_t"), 2, addAcrossLongCode, long=True) + twoRegAcrossInstX("uaddlv", "UaddlvQX", "SimdAddOp", + ("uint8_t", "uint16_t"), 4, addAcrossLongCode, long=True) + twoRegAcrossInstX("uaddlv", "UaddlvBQX", "SimdAddOp", ("uint32_t",), 4, + addAcrossLongCode, doubleDest=True, long=True) + # UADDW + threeRegWideInstX("uaddw", "UaddwX", "SimdAddAccOp", smallUnsignedTypes, + addlwCode) + threeRegWideInstX("uaddw2", "Uaddw2X", "SimdAddAccOp", smallUnsignedTypes, + addlwCode, hi=True) + # UCVTF (fixed-point) + ucvtfFixedCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, imm, true," + " FPCRRounding(fpscr), fpscr)") + twoEqualRegInstX("ucvtf", "UcvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2, + ucvtfFixedCode, hasImm=True) + twoEqualRegInstX("ucvtf", "UcvtfFixedQX", "SimdCvtOp", floatTypes, 4, + ucvtfFixedCode, hasImm=True) + twoEqualRegInstX("ucvtf", "UcvtfFixedScX", "SimdCvtOp", floatTypes, 4, + ucvtfFixedCode, hasImm=True, scalar=True) + # UCVTF (integer) + ucvtfIntCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, 0, true," + " FPCRRounding(fpscr), fpscr)") + twoEqualRegInstX("ucvtf", "UcvtfIntDX", "SimdCvtOp", smallFloatTypes, 2, + ucvtfIntCode) + twoEqualRegInstX("ucvtf", "UcvtfIntQX", "SimdCvtOp", floatTypes, 4, + ucvtfIntCode) + twoEqualRegInstX("ucvtf", "UcvtfIntScX", "SimdCvtOp", floatTypes, 4, + ucvtfIntCode, scalar=True) + # UHADD + threeEqualRegInstX("uhadd", "UhaddDX", "SimdAddOp", smallUnsignedTypes, 2, + haddCode) + threeEqualRegInstX("uhadd", "UhaddQX", "SimdAddOp", smallUnsignedTypes, 4, + haddCode) + # UHSUB + threeEqualRegInstX("uhsub", "UhsubDX", "SimdAddOp", smallUnsignedTypes, 2, + hsubCode) + threeEqualRegInstX("uhsub", "UhsubQX", "SimdAddOp", smallUnsignedTypes, 4, + hsubCode) + # UMAX + threeEqualRegInstX("umax", "UmaxDX", "SimdCmpOp", smallUnsignedTypes, 2, + maxCode) + threeEqualRegInstX("umax", "UmaxQX", "SimdCmpOp", smallUnsignedTypes, 4, + maxCode) + # UMAXP + threeEqualRegInstX("umaxp", "UmaxpDX", "SimdCmpOp", smallUnsignedTypes, 2, + maxCode, pairwise=True) + threeEqualRegInstX("umaxp", "UmaxpQX", "SimdCmpOp", smallUnsignedTypes, 4, + maxCode, pairwise=True) + # UMAXV + twoRegAcrossInstX("umaxv", "UmaxvDX", "SimdCmpOp", ("uint8_t", "uint16_t"), + 2, maxAcrossCode) + twoRegAcrossInstX("umaxv", "UmaxvQX", "SimdCmpOp", smallUnsignedTypes, 4, + maxAcrossCode) + # UMIN + threeEqualRegInstX("umin", "UminDX", "SimdCmpOp", smallUnsignedTypes, 2, + minCode) + threeEqualRegInstX("umin", "UminQX", "SimdCmpOp", smallUnsignedTypes, 4, + minCode) + # UMINP + threeEqualRegInstX("uminp", "UminpDX", "SimdCmpOp", smallUnsignedTypes, 2, + minCode, pairwise=True) + threeEqualRegInstX("uminp", "UminpQX", "SimdCmpOp", smallUnsignedTypes, 4, + minCode, pairwise=True) + # UMINV + twoRegAcrossInstX("uminv", "UminvDX", "SimdCmpOp", ("uint8_t", "uint16_t"), + 2, minAcrossCode) + twoRegAcrossInstX("uminv", "UminvQX", "SimdCmpOp", smallUnsignedTypes, 4, + minAcrossCode) + # UMLAL (by element) + threeRegLongInstX("umlal", "UmlalElemX", "SimdMultAccOp", + smallUnsignedTypes, mlalCode, True, byElem=True) + threeRegLongInstX("umlal", "UmlalElem2X", "SimdMultAccOp", + smallUnsignedTypes, mlalCode, True, byElem=True, hi=True) + # UMLAL (vector) + threeRegLongInstX("umlal", "UmlalX", "SimdMultAccOp", smallUnsignedTypes, + mlalCode, True) + threeRegLongInstX("umlal", "Umlal2X", "SimdMultAccOp", smallUnsignedTypes, + mlalCode, True, hi=True) + # UMLSL (by element) + threeRegLongInstX("umlsl", "UmlslElemX", "SimdMultAccOp", + smallUnsignedTypes, mlslCode, True, byElem=True) + threeRegLongInstX("umlsl", "UmlslElem2X", "SimdMultAccOp", + smallUnsignedTypes, mlslCode, True, byElem=True, hi=True) + # UMLSL (vector) + threeRegLongInstX("umlsl", "UmlslX", "SimdMultAccOp", smallUnsignedTypes, + mlslCode, True) + threeRegLongInstX("umlsl", "Umlsl2X", "SimdMultAccOp", smallUnsignedTypes, + mlslCode, True, hi=True) + # UMOV + insToGprInstX("umov", "UmovWX", "SimdMiscOp", smallUnsignedTypes, 4, 'W') + insToGprInstX("umov", "UmovXX", "SimdMiscOp", ("uint64_t",), 4, 'X') + # UMULL, UMULL2 (by element) + threeRegLongInstX("umull", "UmullElemX", "SimdMultOp", smallUnsignedTypes, + mullCode, byElem=True) + threeRegLongInstX("umull", "UmullElem2X", "SimdMultOp", smallUnsignedTypes, + mullCode, byElem=True, hi=True) + # UMULL, UMULL2 (vector) + threeRegLongInstX("umull", "UmullX", "SimdMultOp", smallUnsignedTypes, + mullCode) + threeRegLongInstX("umull", "Umull2X", "SimdMultOp", smallUnsignedTypes, + mullCode, hi=True) + # UQADD + uqaddCode = ''' + destElem = srcElem1 + srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (destElem < srcElem1 || destElem < srcElem2) { + destElem = (Element)(-1); + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("uqadd", "UqaddDX", "SimdAddOp", smallUnsignedTypes, 2, + uqaddCode) + threeEqualRegInstX("uqadd", "UqaddQX", "SimdAddOp", unsignedTypes, 4, + uqaddCode) + threeEqualRegInstX("uqadd", "UqaddScX", "SimdAddOp", unsignedTypes, 4, + uqaddCode, scalar=True) + # UQRSHL + uqrshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + Element rBit = 0; + if (shiftAmt <= sizeof(Element) * 8) + rBit = bits(srcElem1, shiftAmt - 1); + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + destElem += rBit; + } else { + if (shiftAmt >= sizeof(Element) * 8) { + if (srcElem1 != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = 0; + } + } else { + if (bits(srcElem1, sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - shiftAmt)) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = srcElem1 << shiftAmt; + } + } + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("uqrshl", "UqrshlDX", "SimdCmpOp", smallUnsignedTypes, + 2, uqrshlCode) + threeEqualRegInstX("uqrshl", "UqrshlQX", "SimdCmpOp", unsignedTypes, 4, + uqrshlCode) + threeEqualRegInstX("uqrshl", "UqrshlScX", "SimdCmpOp", unsignedTypes, 4, + uqrshlCode, scalar=True) + # UQRSHRN + uqrshrnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = (srcElem1 >> (imm - 1)); + uint64_t rBit = mid & 0x1; + mid >>= 1; + mid += rBit; + if (mid != (Element)mid) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + if (srcElem1 != (Element)srcElem1) { + destElem = mask(sizeof(Element) * 8 - 1); + fpscr.qc = 1; + } else { + destElem = srcElem1; + } + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("uqrshrn", "UqrshrnX", "SimdShiftOp", smallUnsignedTypes, + uqrshrnCode, hasImm=True) + twoRegNarrowInstX("uqrshrn2", "Uqrshrn2X", "SimdShiftOp", + smallUnsignedTypes, uqrshrnCode, hasImm=True, hi=True) + twoRegNarrowInstX("uqrshrn", "UqrshrnScX", "SimdShiftOp", + smallUnsignedTypes, uqrshrnCode, hasImm=True, + scalar=True) + # UQSHL (immediate) + uqshlImmCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm >= sizeof(Element) * 8) { + if (srcElem1 != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = 0; + } + } else if (imm) { + destElem = (srcElem1 << imm); + uint64_t topBits = bits((uint64_t)srcElem1, + sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - imm); + if (topBits != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("uqshl", "UqshlImmDX", "SimdAluOp", smallUnsignedTypes, 2, + uqshlImmCode, hasImm=True) + twoEqualRegInstX("uqshl", "UqshlImmQX", "SimdAluOp", unsignedTypes, 4, + uqshlImmCode, hasImm=True) + twoEqualRegInstX("uqshl", "UqshlImmScX", "SimdAluOp", unsignedTypes, 4, + uqshlImmCode, hasImm=True, scalar=True) + # UQSHL (register) + uqshlCode = ''' + int16_t shiftAmt = (int8_t)srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (shiftAmt < 0) { + shiftAmt = -shiftAmt; + if (shiftAmt >= sizeof(Element) * 8) { + shiftAmt = sizeof(Element) * 8 - 1; + destElem = 0; + } else { + destElem = (srcElem1 >> shiftAmt); + } + } else if (shiftAmt > 0) { + if (shiftAmt >= sizeof(Element) * 8) { + if (srcElem1 != 0) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = 0; + } + } else { + if (bits(srcElem1, sizeof(Element) * 8 - 1, + sizeof(Element) * 8 - shiftAmt)) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = srcElem1 << shiftAmt; + } + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("uqshl", "UqshlDX", "SimdAluOp", smallUnsignedTypes, 2, + uqshlCode) + threeEqualRegInstX("uqshl", "UqshlQX", "SimdAluOp", unsignedTypes, 4, + uqshlCode) + threeEqualRegInstX("uqshl", "UqshlScX", "SimdAluOp", unsignedTypes, 4, + uqshlCode, scalar=True) + # UQSHRN, UQSHRN2 + uqshrnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + if (imm > sizeof(srcElem1) * 8) { + if (srcElem1 != 0) + fpscr.qc = 1; + destElem = 0; + } else if (imm) { + BigElement mid = ((srcElem1 >> (imm - 1)) >> 1); + if (mid != (Element)mid) { + destElem = mask(sizeof(Element) * 8); + fpscr.qc = 1; + } else { + destElem = mid; + } + } else { + destElem = srcElem1; + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("uqshrn", "UqshrnX", "SimdShiftOp", smallUnsignedTypes, + uqshrnCode, hasImm=True) + twoRegNarrowInstX("uqshrn2", "Uqshrn2X", "SimdShiftOp", smallUnsignedTypes, + uqshrnCode, hasImm=True, hi=True) + twoRegNarrowInstX("uqshrn", "UqshrnScX", "SimdShiftOp", smallUnsignedTypes, + uqshrnCode, hasImm=True, scalar=True) + # UQSUB + uqsubCode = ''' + destElem = srcElem1 - srcElem2; + FPSCR fpscr = (FPSCR) FpscrQc; + if (destElem > srcElem1) { + destElem = 0; + fpscr.qc = 1; + } + FpscrQc = fpscr; + ''' + threeEqualRegInstX("uqsub", "UqsubDX", "SimdAddOp", smallUnsignedTypes, 2, + uqsubCode) + threeEqualRegInstX("uqsub", "UqsubQX", "SimdAddOp", unsignedTypes, 4, + uqsubCode) + threeEqualRegInstX("uqsub", "UqsubScX", "SimdAddOp", unsignedTypes, 4, + uqsubCode, scalar=True) + # UQXTN + uqxtnCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + destElem = srcElem1; + if ((BigElement)destElem != srcElem1) { + fpscr.qc = 1; + destElem = mask(sizeof(Element) * 8); + } + FpscrQc = fpscr; + ''' + twoRegNarrowInstX("uqxtn", "UqxtnX", "SimdMiscOp", smallUnsignedTypes, + uqxtnCode) + twoRegNarrowInstX("uqxtn", "Uqxtn2X", "SimdMiscOp", smallUnsignedTypes, + uqxtnCode, hi=True) + twoRegNarrowInstX("uqxtn", "UqxtnScX", "SimdMiscOp", smallUnsignedTypes, + uqxtnCode, scalar=True) + # URECPE + urecpeCode = "destElem = unsignedRecipEstimate(srcElem1);" + twoEqualRegInstX("urecpe", "UrecpeDX", "SimdMultAccOp", ("uint32_t",), 2, + urecpeCode) + twoEqualRegInstX("urecpe", "UrecpeQX", "SimdMultAccOp", ("uint32_t",), 4, + urecpeCode) + # URHADD + threeEqualRegInstX("urhadd", "UrhaddDX", "SimdAddOp", smallUnsignedTypes, + 2, rhaddCode) + threeEqualRegInstX("urhadd", "UrhaddQX", "SimdAddOp", smallUnsignedTypes, + 4, rhaddCode) + # URSHL + threeEqualRegInstX("urshl", "UrshlDX", "SimdShiftOp", unsignedTypes, 2, + rshlCode) + threeEqualRegInstX("urshl", "UrshlQX", "SimdShiftOp", unsignedTypes, 4, + rshlCode) + # URSHR + twoEqualRegInstX("urshr", "UrshrDX", "SimdShiftOp", unsignedTypes, 2, + rshrCode, hasImm=True) + twoEqualRegInstX("urshr", "UrshrQX", "SimdShiftOp", unsignedTypes, 4, + rshrCode, hasImm=True) + # URSQRTE + ursqrteCode = "destElem = unsignedRSqrtEstimate(srcElem1);" + twoEqualRegInstX("ursqrte", "UrsqrteDX", "SimdSqrtOp", ("uint32_t",), 2, + ursqrteCode) + twoEqualRegInstX("ursqrte", "UrsqrteQX", "SimdSqrtOp", ("uint32_t",), 4, + ursqrteCode) + # URSRA + twoEqualRegInstX("ursra", "UrsraDX", "SimdShiftOp", unsignedTypes, 2, + rsraCode, True, hasImm=True) + twoEqualRegInstX("ursra", "UrsraQX", "SimdShiftOp", unsignedTypes, 4, + rsraCode, True, hasImm=True) + # USHL + threeEqualRegInstX("ushl", "UshlDX", "SimdShiftOp", unsignedTypes, 2, + shlCode) + threeEqualRegInstX("ushl", "UshlQX", "SimdShiftOp", unsignedTypes, 4, + shlCode) + # USHLL, USHLL2 + twoRegLongInstX("ushll", "UshllX", "SimdShiftOp", smallUnsignedTypes, + shllCode, hasImm=True) + twoRegLongInstX("ushll", "Ushll2X", "SimdShiftOp", smallUnsignedTypes, + shllCode, hi=True, hasImm=True) + # USHR + twoEqualRegInstX("ushr", "UshrDX", "SimdShiftOp", unsignedTypes, 2, + shrCode, hasImm=True) + twoEqualRegInstX("ushr", "UshrQX", "SimdShiftOp", unsignedTypes, 4, + shrCode, hasImm=True) + # USQADD + usqaddCode = ''' + FPSCR fpscr = (FPSCR) FpscrQc; + Element tmp = destElem + srcElem1; + if (bits(srcElem1, sizeof(Element) * 8 - 1) == 0) { + if (tmp < srcElem1 || tmp < destElem) { + destElem = (Element)(-1); + fpscr.qc = 1; + } else { + destElem = tmp; + } + } else { + Element absSrcElem1 = (~srcElem1) + 1; + if (absSrcElem1 > destElem) { + destElem = 0; + fpscr.qc = 1; + } else { + destElem = tmp; + } + } + FpscrQc = fpscr; + ''' + twoEqualRegInstX("usqadd", "UsqaddDX", "SimdAddOp", smallUnsignedTypes, 2, + usqaddCode, True) + twoEqualRegInstX("usqadd", "UsqaddQX", "SimdAddOp", unsignedTypes, 4, + usqaddCode, True) + twoEqualRegInstX("usqadd", "UsqaddScX", "SimdAddOp", unsignedTypes, 4, + usqaddCode, True, scalar=True) + # USRA + twoEqualRegInstX("usra", "UsraDX", "SimdShiftOp", unsignedTypes, 2, + sraCode, True, hasImm=True) + twoEqualRegInstX("usra", "UsraQX", "SimdShiftOp", unsignedTypes, 4, + sraCode, True, hasImm=True) + # USUBL + threeRegLongInstX("usubl", "UsublX", "SimdAddOp", smallUnsignedTypes, + sublwCode) + threeRegLongInstX("usubl2", "Usubl2X", "SimdAddOp", smallUnsignedTypes, + sublwCode, hi=True) + # USUBW + threeRegWideInstX("usubw", "UsubwX", "SimdAddOp", smallUnsignedTypes, + sublwCode) + threeRegWideInstX("usubw2", "Usubw2X", "SimdAddOp", smallUnsignedTypes, + sublwCode, hi=True) + # UXTL -> alias to USHLL + # UZP1 + uzpCode = ''' + unsigned part = %s; + for (unsigned i = 0; i < eCount / 2; i++) { + destReg.elements[i] = srcReg1.elements[2 * i + part]; + destReg.elements[eCount / 2 + i] = srcReg2.elements[2 * i + part]; + } + ''' + threeRegScrambleInstX("Uzp1", "Uzp1DX", "SimdAluOp", smallUnsignedTypes, 2, + uzpCode % "0") + threeRegScrambleInstX("Uzp1", "Uzp1QX", "SimdAluOp", unsignedTypes, 4, + uzpCode % "0") + # UZP2 + threeRegScrambleInstX("Uzp2", "Uzp2DX", "SimdAluOp", smallUnsignedTypes, 2, + uzpCode % "1") + threeRegScrambleInstX("Uzp2", "Uzp2QX", "SimdAluOp", unsignedTypes, 4, + uzpCode % "1") + # XTN, XTN2 + xtnCode = "destElem = srcElem1;" + twoRegNarrowInstX("Xtn", "XtnX", "SimdMiscOp", smallUnsignedTypes, xtnCode) + twoRegNarrowInstX("Xtn", "Xtn2X", "SimdMiscOp", smallUnsignedTypes, + xtnCode, hi=True) + # ZIP1 + zipCode = ''' + unsigned base = %s; + for (unsigned i = 0; i < eCount / 2; i++) { + destReg.elements[2 * i] = srcReg1.elements[base + i]; + destReg.elements[2 * i + 1] = srcReg2.elements[base + i]; + } + ''' + threeRegScrambleInstX("zip1", "Zip1DX", "SimdAluOp", smallUnsignedTypes, 2, + zipCode % "0") + threeRegScrambleInstX("zip1", "Zip1QX", "SimdAluOp", unsignedTypes, 4, + zipCode % "0") + # ZIP2 + threeRegScrambleInstX("zip2", "Zip2DX", "SimdAluOp", smallUnsignedTypes, 2, + zipCode % "eCount / 2") + threeRegScrambleInstX("zip2", "Zip2QX", "SimdAluOp", unsignedTypes, 4, + zipCode % "eCount / 2") + +}}; diff --git a/src/arch/arm/isa/insts/neon64_mem.isa b/src/arch/arm/isa/insts/neon64_mem.isa new file mode 100644 index 000000000..32a37f87e --- /dev/null +++ b/src/arch/arm/isa/insts/neon64_mem.isa @@ -0,0 +1,471 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Mbou Eyole +// Giacomo Gabrielli + +let {{ + + header_output = '' + decoder_output = '' + exec_output = '' + + def mkMemAccMicroOp(name): + global header_output, decoder_output, exec_output + SPAlignmentCheckCodeNeon = ''' + if (baseIsSP && bits(XURa, 3, 0) && + SPAlignmentCheckEnabled(xc->tcBase())) { + return new SPAlignmentFault(); + } + ''' + eaCode = SPAlignmentCheckCodeNeon + ''' + EA = XURa + imm; + ''' + memDecl = ''' + const int MaxNumBytes = 16; + union MemUnion { + uint8_t bytes[MaxNumBytes]; + uint32_t floatRegBits[MaxNumBytes / 4]; + }; + ''' + + # Do endian conversion for all the elements + convCode = ''' + VReg x = {0, 0}; + + x.lo = (((XReg) memUnion.floatRegBits[1]) << 32) | + (XReg) memUnion.floatRegBits[0]; + x.hi = (((XReg) memUnion.floatRegBits[3]) << 32) | + (XReg) memUnion.floatRegBits[2]; + + const unsigned eCount = 16 / (1 << eSize); + + if (isBigEndian64(xc->tcBase())) { + for (unsigned i = 0; i < eCount; i++) { + switch (eSize) { + case 0x3: // 64-bit + writeVecElem(&x, (XReg) gtobe( + (uint64_t) readVecElem(x, i, eSize)), i, eSize); + break; + case 0x2: // 32-bit + writeVecElem(&x, (XReg) gtobe( + (uint32_t) readVecElem(x, i, eSize)), i, eSize); + break; + case 0x1: // 16-bit + writeVecElem(&x, (XReg) gtobe( + (uint16_t) readVecElem(x, i, eSize)), i, eSize); + break; + default: // 8-bit + break; // Nothing to do here + } + } + } else { + for (unsigned i = 0; i < eCount; i++) { + switch (eSize) { + case 0x3: // 64-bit + writeVecElem(&x, (XReg) gtole( + (uint64_t) readVecElem(x, i, eSize)), i, eSize); + break; + case 0x2: // 32-bit + writeVecElem(&x, (XReg) gtole( + (uint32_t) readVecElem(x, i, eSize)), i, eSize); + break; + case 0x1: // 16-bit + writeVecElem(&x, (XReg) gtole( + (uint16_t) readVecElem(x, i, eSize)), i, eSize); + break; + default: // 8-bit + break; // Nothing to do here + } + } + } + + memUnion.floatRegBits[0] = (uint32_t) x.lo; + memUnion.floatRegBits[1] = (uint32_t) (x.lo >> 32); + memUnion.floatRegBits[2] = (uint32_t) x.hi; + memUnion.floatRegBits[3] = (uint32_t) (x.hi >> 32); + ''' + + # Offload everything into registers + regSetCode = '' + for reg in range(4): + regSetCode += ''' + AA64FpDestP%(reg)d_uw = gtoh(memUnion.floatRegBits[%(reg)d]); + ''' % { 'reg' : reg } + + # Pull everything in from registers + regGetCode = '' + for reg in range(4): + regGetCode += ''' + memUnion.floatRegBits[%(reg)d] = htog(AA64FpDestP%(reg)d_uw); + ''' % { 'reg' : reg } + + loadMemAccCode = convCode + regSetCode + storeMemAccCode = regGetCode + convCode + + loadIop = InstObjParams(name + 'ld', + 'MicroNeonLoad64', + 'MicroNeonMemOp', + { 'mem_decl' : memDecl, + 'memacc_code' : loadMemAccCode, + 'ea_code' : simd64EnabledCheckCode + eaCode, + }, + [ 'IsMicroop', 'IsMemRef', 'IsLoad' ]) + storeIop = InstObjParams(name + 'st', + 'MicroNeonStore64', + 'MicroNeonMemOp', + { 'mem_decl' : memDecl, + 'memacc_code' : storeMemAccCode, + 'ea_code' : simd64EnabledCheckCode + eaCode, + }, + [ 'IsMicroop', 'IsMemRef', 'IsStore' ]) + + exec_output += NeonLoadExecute64.subst(loadIop) + \ + NeonLoadInitiateAcc64.subst(loadIop) + \ + NeonLoadCompleteAcc64.subst(loadIop) + \ + NeonStoreExecute64.subst(storeIop) + \ + NeonStoreInitiateAcc64.subst(storeIop) + \ + NeonStoreCompleteAcc64.subst(storeIop) + header_output += MicroNeonMemDeclare64.subst(loadIop) + \ + MicroNeonMemDeclare64.subst(storeIop) + + def mkMarshalMicroOp(name, Name): + global header_output, decoder_output, exec_output + + getInputCodeOp1L = '' + for v in range(4): + for p in range(4): + getInputCodeOp1L += ''' + writeVecElem(&input[%(v)d], (XReg) AA64FpOp1P%(p)dV%(v)d_uw, + %(p)d, 0x2); + ''' % { 'v' : v, 'p' : p } + + getInputCodeOp1S = '' + for v in range(4): + for p in range(4): + getInputCodeOp1S += ''' + writeVecElem(&input[%(v)d], (XReg) AA64FpOp1P%(p)dV%(v)dS_uw, + %(p)d, 0x2); + ''' % { 'v' : v, 'p' : p } + + if name == 'deint_neon_uop': + + eCode = ''' + VReg input[4]; // input data from scratch area + VReg output[2]; // output data to arch. SIMD regs + VReg temp; + temp.lo = 0; + temp.hi = 0; + ''' + for p in range(4): + eCode += ''' + writeVecElem(&temp, (XReg) AA64FpDestP%(p)dV1L_uw, %(p)d, 0x2); + ''' % { 'p' : p } + eCode += getInputCodeOp1L + + # Note that numRegs is not always the same as numStructElems; in + # particular, for LD1/ST1, numStructElems is 1 but numRegs can be + # 1, 2, 3 or 4 + + eCode += ''' + output[0].lo = 0; + output[0].hi = 0; + output[1].lo = 0; + output[1].hi = 0; + + int eCount = dataSize / (8 << eSize); + int eSizeBytes = 1 << eSize; // element size in bytes + int numBytes = step * dataSize / 4; + int totNumBytes = numRegs * dataSize / 8; + + int structElemNo, pos, a, b; + XReg data; + + for (int r = 0; r < 2; ++r) { + for (int i = 0; i < eCount; ++i) { + if (numBytes < totNumBytes) { + structElemNo = r + (step * 2); + if (numStructElems == 1) { + pos = (eSizeBytes * i) + + (eCount * structElemNo * eSizeBytes); + } else { + pos = (numStructElems * eSizeBytes * i) + + (structElemNo * eSizeBytes); + } + a = pos / 16; + b = (pos % 16) / eSizeBytes; + data = (XReg) readVecElem(input[a], (XReg) b, + eSize); + writeVecElem(&output[r], data, i, eSize); + numBytes += eSizeBytes; + } + } + } + ''' + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV0L_uw = (uint32_t) readVecElem(output[0], + %(p)d, 0x2); + ''' % { 'p' : p } + eCode += ''' + if ((numRegs % 2 == 0) || (numRegs == 3 && step == 0)) { + ''' + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV1L_uw = (uint32_t) readVecElem( + output[1], %(p)d, 0x2); + ''' % { 'p' : p } + eCode += ''' + } else { + ''' + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV1L_uw = (uint32_t) readVecElem(temp, + %(p)d, 0x2); + ''' % { 'p' : p } + eCode += ''' + } + ''' + + iop = InstObjParams(name, Name, 'MicroNeonMixOp64', + { 'code' : eCode }, ['IsMicroop']) + header_output += MicroNeonMixDeclare64.subst(iop) + exec_output += MicroNeonMixExecute64.subst(iop) + + elif name == 'int_neon_uop': + + eCode = ''' + VReg input[4]; // input data from arch. SIMD regs + VReg output[2]; // output data to scratch area + ''' + + eCode += getInputCodeOp1S + + # Note that numRegs is not always the same as numStructElems; in + # particular, for LD1/ST1, numStructElems is 1 but numRegs can be + # 1, 2, 3 or 4 + + eCode += ''' + int eCount = dataSize / (8 << eSize); + int eSizeBytes = 1 << eSize; + int totNumBytes = numRegs * dataSize / 8; + int numOutputElems = 128 / (8 << eSize); + int stepOffset = step * 32; + + for (int i = 0; i < 2; ++i) { + output[i].lo = 0; + output[i].hi = 0; + } + + int r = 0, k = 0, i, j; + XReg data; + + for (int pos = stepOffset; pos < 32 + stepOffset; + pos += eSizeBytes) { + if (pos < totNumBytes) { + if (numStructElems == 1) { + i = (pos / eSizeBytes) % eCount; + j = pos / (eCount * eSizeBytes); + } else { + i = pos / (numStructElems * eSizeBytes); + j = (pos % (numStructElems * eSizeBytes)) / + eSizeBytes; + } + data = (XReg) readVecElem(input[j], (XReg) i, eSize); + writeVecElem(&output[r], data, k, eSize); + k++; + if (k == numOutputElems){ + k = 0; + ++r; + } + } + } + ''' + for v in range(2): + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV%(v)d_uw = (uint32_t) readVecElem( + output[%(v)d], %(p)d, 0x2); + ''' % { 'v': v, 'p': p} + + iop = InstObjParams(name, Name, 'MicroNeonMixOp64', + { 'code' : eCode }, ['IsMicroop']) + header_output += MicroNeonMixDeclare64.subst(iop) + exec_output += MicroNeonMixExecute64.subst(iop) + + elif name == 'unpack_neon_uop': + + eCode = ''' + VReg input[4]; //input data from scratch area + VReg output[2]; //output data to arch. SIMD regs + ''' + + eCode += getInputCodeOp1L + + # Fill output regs with register data initially. Note that + # elements in output register outside indexed lanes are left + # untouched + for v in range(2): + for p in range(4): + eCode += ''' + writeVecElem(&output[%(v)d], (XReg) AA64FpDestP%(p)dV%(v)dL_uw, + %(p)d, 0x2); + ''' % { 'v': v, 'p': p} + eCode += ''' + int eCount = dataSize / (8 << eSize); + int eCount128 = 128 / (8 << eSize); + int eSizeBytes = 1 << eSize; + int totNumBytes = numStructElems * eSizeBytes; + int numInputElems = eCount128; + int stepOffset = step * 2 * eSizeBytes; + int stepLimit = 2 * eSizeBytes; + + int r = 0, i, j; + XReg data; + + for (int pos = stepOffset; pos < stepLimit + stepOffset; + pos += eSizeBytes) { + if (pos < totNumBytes) { + r = pos / eSizeBytes; + j = r / numInputElems; + i = r % numInputElems; + data = (XReg) readVecElem(input[j], (XReg) i, eSize); + + if (replicate) { + for (int i = 0; i < eCount128; ++i) { + if (i < eCount) { + writeVecElem(&output[r % 2], data, i, + eSize); + } else { // zero extend if necessary + writeVecElem(&output[r % 2], (XReg) 0, i, + eSize); + } + } + } else { + writeVecElem(&output[r % 2], data, lane, eSize); + } + } + } + ''' + for v in range(2): + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV%(v)dL_uw = (uint32_t) readVecElem( + output[%(v)d], %(p)d, 0x2); + ''' % { 'v' : v, 'p' : p } + + iop = InstObjParams(name, Name, 'MicroNeonMixLaneOp64', + { 'code' : eCode }, ['IsMicroop']) + header_output += MicroNeonMixLaneDeclare64.subst(iop) + exec_output += MicroNeonMixExecute64.subst(iop) + + elif name == 'pack_neon_uop': + + eCode = ''' + VReg input[4]; // input data from arch. SIMD regs + VReg output[2]; // output data to scratch area + ''' + + eCode += getInputCodeOp1S + + eCode += ''' + int eSizeBytes = 1 << eSize; + int numOutputElems = 128 / (8 << eSize); + int totNumBytes = numStructElems * eSizeBytes; + int stepOffset = step * 32; + int stepLimit = 32; + + int r = 0, i, j; + XReg data; + + for (int i = 0; i < 2; ++i) { + output[i].lo = 0; + output[i].hi = 0; + } + + for (int pos = stepOffset; pos < stepLimit + stepOffset; + pos += eSizeBytes) { + if (pos < totNumBytes) { + r = pos / 16; + j = pos / eSizeBytes; + i = (pos / eSizeBytes) % numOutputElems; + data = (XReg) readVecElem(input[j], lane, eSize); + writeVecElem(&output[r % 2], data, i, eSize); + } + } + ''' + + for v in range(2): + for p in range(4): + eCode += ''' + AA64FpDestP%(p)dV%(v)d_uw = (uint32_t) readVecElem( + output[%(v)d], %(p)d, 0x2); + ''' % { 'v' : v, 'p' : p } + + iop = InstObjParams(name, Name, 'MicroNeonMixLaneOp64', + { 'code' : eCode }, ['IsMicroop']) + header_output += MicroNeonMixLaneDeclare64.subst(iop) + exec_output += MicroNeonMixExecute64.subst(iop) + + # Generate instructions + mkMemAccMicroOp('mem_neon_uop') + mkMarshalMicroOp('deint_neon_uop', 'MicroDeintNeon64') + mkMarshalMicroOp('int_neon_uop', 'MicroIntNeon64') + mkMarshalMicroOp('unpack_neon_uop', 'MicroUnpackNeon64') + mkMarshalMicroOp('pack_neon_uop', 'MicroPackNeon64') + +}}; + +let {{ + + iop = InstObjParams('vldmult64', 'VldMult64', 'VldMultOp64', '', []) + header_output += VMemMultDeclare64.subst(iop) + decoder_output += VMemMultConstructor64.subst(iop) + + iop = InstObjParams('vstmult64', 'VstMult64', 'VstMultOp64', '', []) + header_output += VMemMultDeclare64.subst(iop) + decoder_output += VMemMultConstructor64.subst(iop) + + iop = InstObjParams('vldsingle64', 'VldSingle64', 'VldSingleOp64', '', []) + header_output += VMemSingleDeclare64.subst(iop) + decoder_output += VMemSingleConstructor64.subst(iop) + + iop = InstObjParams('vstsingle64', 'VstSingle64', 'VstSingleOp64', '', []) + header_output += VMemSingleDeclare64.subst(iop) + decoder_output += VMemSingleConstructor64.subst(iop) + +}}; diff --git a/src/arch/arm/isa/insts/str.isa b/src/arch/arm/isa/insts/str.isa index 80846053b..3f595692a 100644 --- a/src/arch/arm/isa/insts/str.isa +++ b/src/arch/arm/isa/insts/str.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -38,6 +38,7 @@ // Authors: Gabe Black let {{ + import math header_output = "" decoder_output = "" @@ -77,7 +78,9 @@ let {{ (newHeader, newDecoder, newExec) = self.fillTemplates(self.name, self.Name, codeBlobs, - self.memFlags, self.instFlags, base, wbDecl) + self.memFlags, self.instFlags, + base, wbDecl, None, False, + self.size, self.sign) header_output += newHeader decoder_output += newDecoder @@ -171,7 +174,7 @@ let {{ self.size, self.sign, self.user) # Add memory request flags where necessary - self.memFlags.append("%d" % (self.size - 1)) + self.memFlags.append("%d" % int(math.log(self.size, 2))) if self.user: self.memFlags.append("ArmISA::TLB::UserMode") diff --git a/src/arch/arm/isa/insts/str64.isa b/src/arch/arm/isa/insts/str64.isa new file mode 100644 index 000000000..c15dca16e --- /dev/null +++ b/src/arch/arm/isa/insts/str64.isa @@ -0,0 +1,372 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + + header_output = "" + decoder_output = "" + exec_output = "" + + class StoreInst64(LoadStoreInst): + execBase = 'Store64' + micro = False + + def __init__(self, mnem, Name, size=4, user=False, flavor="normal", + top = False): + super(StoreInst64, self).__init__() + + self.name = mnem + self.Name = Name + self.size = size + self.user = user + self.flavor = flavor + self.top = top + + self.memFlags = ["ArmISA::TLB::MustBeOne"] + self.instFlags = [] + self.codeBlobs = { "postacc_code" : "" } + + # Add memory request flags where necessary + if self.user: + self.memFlags.append("ArmISA::TLB::UserMode") + + if self.flavor in ("relexp", "exp"): + # For exclusive pair ops alignment check is based on total size + self.memFlags.append("%d" % int(math.log(self.size, 2) + 1)) + elif not (self.size == 16 and self.top): + # Only the first microop should perform alignment checking. + self.memFlags.append("%d" % int(math.log(self.size, 2))) + + if self.flavor not in ("release", "relex", "exclusive", + "relexp", "exp"): + self.memFlags.append("ArmISA::TLB::AllowUnaligned") + + if self.micro: + self.instFlags.append("IsMicroop") + + if self.flavor in ("release", "relex", "relexp"): + self.instFlags.extend(["IsMemBarrier", + "IsWriteBarrier", + "IsReadBarrier"]) + if self.flavor in ("relex", "exclusive", "exp", "relexp"): + self.instFlags.append("IsStoreConditional") + self.memFlags.append("Request::LLSC") + + def emitHelper(self, base = 'Memory64', wbDecl = None): + global header_output, decoder_output, exec_output + + # If this is a microop itself, don't allow anything that would + # require further microcoding. + if self.micro: + assert not wbDecl + + fa_code = None + if not self.micro and self.flavor in ("normal", "release"): + fa_code = ''' + fault->annotate(ArmFault::SAS, %s); + fault->annotate(ArmFault::SSE, false); + fault->annotate(ArmFault::SRT, dest); + fault->annotate(ArmFault::SF, %s); + fault->annotate(ArmFault::AR, %s); + ''' % ("0" if self.size == 1 else + "1" if self.size == 2 else + "2" if self.size == 4 else "3", + "true" if self.size == 8 else "false", + "true" if self.flavor == "release" else "false") + + (newHeader, newDecoder, newExec) = \ + self.fillTemplates(self.name, self.Name, self.codeBlobs, + self.memFlags, self.instFlags, + base, wbDecl, faCode=fa_code) + + header_output += newHeader + decoder_output += newDecoder + exec_output += newExec + + def buildEACode(self): + # Address computation + eaCode = "" + if self.flavor == "fp": + eaCode += vfp64EnabledCheckCode + + eaCode += SPAlignmentCheckCode + "EA = XBase" + if self.size == 16: + if self.top: + eaCode += " + (isBigEndian64(xc->tcBase()) ? 0 : 8)" + else: + eaCode += " + (isBigEndian64(xc->tcBase()) ? 8 : 0)" + if not self.post: + eaCode += self.offset + eaCode += ";" + + self.codeBlobs["ea_code"] = eaCode + + + class StoreImmInst64(StoreInst64): + def __init__(self, *args, **kargs): + super(StoreImmInst64, self).__init__(*args, **kargs) + self.offset = "+ imm" + + self.wbDecl = "MicroAddXiUop(machInst, base, base, imm);" + + class StoreRegInst64(StoreInst64): + def __init__(self, *args, **kargs): + super(StoreRegInst64, self).__init__(*args, **kargs) + self.offset = "+ extendReg64(XOffset, type, shiftAmt, 64)" + + self.wbDecl = \ + "MicroAddXERegUop(machInst, base, base, " + \ + " offset, type, shiftAmt);" + + class StoreRawRegInst64(StoreInst64): + def __init__(self, *args, **kargs): + super(StoreRawRegInst64, self).__init__(*args, **kargs) + self.offset = "" + + class StoreSingle64(StoreInst64): + def emit(self): + self.buildEACode() + + # Code that actually handles the access + if self.flavor == "fp": + if self.size in (1, 2, 4): + accCode = ''' + Mem%(suffix)s = + cSwap(AA64FpDestP0%(suffix)s, isBigEndian64(xc->tcBase())); + ''' + elif self.size == 8 or (self.size == 16 and not self.top): + accCode = ''' + uint64_t data = AA64FpDestP1_uw; + data = (data << 32) | AA64FpDestP0_uw; + Mem%(suffix)s = cSwap(data, isBigEndian64(xc->tcBase())); + ''' + elif self.size == 16 and self.top: + accCode = ''' + uint64_t data = AA64FpDestP3_uw; + data = (data << 32) | AA64FpDestP2_uw; + Mem%(suffix)s = cSwap(data, isBigEndian64(xc->tcBase())); + ''' + else: + accCode = \ + 'Mem%(suffix)s = cSwap(XDest%(suffix)s, isBigEndian64(xc->tcBase()));' + if self.size == 16: + accCode = accCode % \ + { "suffix" : buildMemSuffix(False, 8) } + else: + accCode = accCode % \ + { "suffix" : buildMemSuffix(False, self.size) } + + self.codeBlobs["memacc_code"] = accCode + + if self.flavor in ("relex", "exclusive"): + self.instFlags.append("IsStoreConditional") + self.memFlags.append("Request::LLSC") + + # Push it out to the output files + wbDecl = None + if self.writeback and not self.micro: + wbDecl = self.wbDecl + self.emitHelper(self.base, wbDecl) + + class StoreDouble64(StoreInst64): + def emit(self): + self.buildEACode() + + # Code that actually handles the access + if self.flavor == "fp": + accCode = ''' + uint64_t data = AA64FpDest2P0_uw; + data = (data << 32) | AA64FpDestP0_uw; + Mem_ud = cSwap(data, isBigEndian64(xc->tcBase())); + ''' + else: + if self.size == 4: + accCode = ''' + uint64_t data = XDest2_uw; + data = (data << 32) | XDest_uw; + Mem_ud = cSwap(data, isBigEndian64(xc->tcBase())); + ''' + elif self.size == 8: + accCode = ''' + // This temporary needs to be here so that the parser + // will correctly identify this instruction as a store. + Twin64_t temp; + temp.a = XDest_ud; + temp.b = XDest2_ud; + Mem_tud = temp; + ''' + self.codeBlobs["memacc_code"] = accCode + + # Push it out to the output files + wbDecl = None + if self.writeback and not self.micro: + wbDecl = self.wbDecl + self.emitHelper(self.base, wbDecl) + + class StoreImm64(StoreImmInst64, StoreSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryImm64' + writeback = False + post = False + + class StorePre64(StoreImmInst64, StoreSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryPreIndex64' + writeback = True + post = False + + class StorePost64(StoreImmInst64, StoreSingle64): + decConstBase = 'LoadStoreImm64' + base = 'ArmISA::MemoryPostIndex64' + writeback = True + post = True + + class StoreReg64(StoreRegInst64, StoreSingle64): + decConstBase = 'LoadStoreReg64' + base = 'ArmISA::MemoryReg64' + writeback = False + post = False + + class StoreRaw64(StoreRawRegInst64, StoreSingle64): + decConstBase = 'LoadStoreRaw64' + base = 'ArmISA::MemoryRaw64' + writeback = False + post = False + + class StoreEx64(StoreRawRegInst64, StoreSingle64): + decConstBase = 'LoadStoreEx64' + base = 'ArmISA::MemoryEx64' + writeback = False + post = False + execBase = 'StoreEx64' + def __init__(self, *args, **kargs): + super(StoreEx64, self).__init__(*args, **kargs) + self.codeBlobs["postacc_code"] = "XResult = !writeResult;" + + def buildStores64(mnem, NameBase, size, flavor="normal"): + StoreImm64(mnem, NameBase + "_IMM", size, flavor=flavor).emit() + StorePre64(mnem, NameBase + "_PRE", size, flavor=flavor).emit() + StorePost64(mnem, NameBase + "_POST", size, flavor=flavor).emit() + StoreReg64(mnem, NameBase + "_REG", size, flavor=flavor).emit() + + buildStores64("strb", "STRB64", 1) + buildStores64("strh", "STRH64", 2) + buildStores64("str", "STRW64", 4) + buildStores64("str", "STRX64", 8) + buildStores64("str", "STRBFP64", 1, flavor="fp") + buildStores64("str", "STRHFP64", 2, flavor="fp") + buildStores64("str", "STRSFP64", 4, flavor="fp") + buildStores64("str", "STRDFP64", 8, flavor="fp") + + StoreImm64("sturb", "STURB64_IMM", 1).emit() + StoreImm64("sturh", "STURH64_IMM", 2).emit() + StoreImm64("stur", "STURW64_IMM", 4).emit() + StoreImm64("stur", "STURX64_IMM", 8).emit() + StoreImm64("stur", "STURBFP64_IMM", 1, flavor="fp").emit() + StoreImm64("stur", "STURHFP64_IMM", 2, flavor="fp").emit() + StoreImm64("stur", "STURSFP64_IMM", 4, flavor="fp").emit() + StoreImm64("stur", "STURDFP64_IMM", 8, flavor="fp").emit() + + StoreImm64("sttrb", "STTRB64_IMM", 1, user=True).emit() + StoreImm64("sttrh", "STTRH64_IMM", 2, user=True).emit() + StoreImm64("sttr", "STTRW64_IMM", 4, user=True).emit() + StoreImm64("sttr", "STTRX64_IMM", 8, user=True).emit() + + StoreRaw64("stlr", "STLRX64", 8, flavor="release").emit() + StoreRaw64("stlr", "STLRW64", 4, flavor="release").emit() + StoreRaw64("stlrh", "STLRH64", 2, flavor="release").emit() + StoreRaw64("stlrb", "STLRB64", 1, flavor="release").emit() + + StoreEx64("stlxr", "STLXRX64", 8, flavor="relex").emit() + StoreEx64("stlxr", "STLXRW64", 4, flavor="relex").emit() + StoreEx64("stlxrh", "STLXRH64", 2, flavor="relex").emit() + StoreEx64("stlxrb", "STLXRB64", 1, flavor="relex").emit() + + StoreEx64("stxr", "STXRX64", 8, flavor="exclusive").emit() + StoreEx64("stxr", "STXRW64", 4, flavor="exclusive").emit() + StoreEx64("stxrh", "STXRH64", 2, flavor="exclusive").emit() + StoreEx64("stxrb", "STXRB64", 1, flavor="exclusive").emit() + + class StoreImmU64(StoreImm64): + decConstBase = 'LoadStoreImmU64' + micro = True + + class StoreImmDU64(StoreImmInst64, StoreDouble64): + decConstBase = 'LoadStoreImmDU64' + base = 'ArmISA::MemoryDImm64' + micro = True + post = False + writeback = False + + class StoreImmDEx64(StoreImmInst64, StoreDouble64): + execBase = 'StoreEx64' + decConstBase = 'StoreImmDEx64' + base = 'ArmISA::MemoryDImmEx64' + micro = False + post = False + writeback = False + def __init__(self, *args, **kargs): + super(StoreImmDEx64, self).__init__(*args, **kargs) + self.codeBlobs["postacc_code"] = "XResult = !writeResult;" + + class StoreRegU64(StoreReg64): + decConstBase = 'LoadStoreRegU64' + micro = True + + StoreImmDEx64("stlxp", "STLXPW64", 4, flavor="relexp").emit() + StoreImmDEx64("stlxp", "STLXPX64", 8, flavor="relexp").emit() + StoreImmDEx64("stxp", "STXPW64", 4, flavor="exp").emit() + StoreImmDEx64("stxp", "STXPX64", 8, flavor="exp").emit() + + StoreImmU64("strxi_uop", "MicroStrXImmUop", 8).emit() + StoreRegU64("strxr_uop", "MicroStrXRegUop", 8).emit() + StoreImmU64("strfpxi_uop", "MicroStrFpXImmUop", 8, flavor="fp").emit() + StoreRegU64("strfpxr_uop", "MicroStrFpXRegUop", 8, flavor="fp").emit() + StoreImmU64("strqbfpxi_uop", "MicroStrQBFpXImmUop", + 16, flavor="fp", top=False).emit() + StoreRegU64("strqbfpxr_uop", "MicroStrQBFpXRegUop", + 16, flavor="fp", top=False).emit() + StoreImmU64("strqtfpxi_uop", "MicroStrQTFpXImmUop", + 16, flavor="fp", top=True).emit() + StoreRegU64("strqtfpxr_uop", "MicroStrQTFpXRegUop", + 16, flavor="fp", top=True).emit() + StoreImmDU64("strdxi_uop", "MicroStrDXImmUop", 4).emit() + StoreImmDU64("strdfpxi_uop", "MicroStrDFpXImmUop", 4, flavor="fp").emit() + +}}; diff --git a/src/arch/arm/isa/insts/swap.isa b/src/arch/arm/isa/insts/swap.isa index b42a1c4b2..f2ceed28e 100644 --- a/src/arch/arm/isa/insts/swap.isa +++ b/src/arch/arm/isa/insts/swap.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -73,10 +73,7 @@ let {{ swpPreAccCode = ''' if (!((SCTLR)Sctlr).sw) { - if (FullSystem) - return new UndefinedInstruction; - else - return new UndefinedInstruction(false, mnemonic); + return new UndefinedInstruction(machInst, false, mnemonic); } ''' diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa index 64deef044..7a1213377 100644 --- a/src/arch/arm/isa/operands.isa +++ b/src/arch/arm/isa/operands.isa @@ -1,5 +1,5 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -80,6 +80,31 @@ let {{ xc->%(func)s(this, %(op_idx)s, %(final_val)s); } ''' + aarch64Read = ''' + ((xc->%(func)s(this, %(op_idx)s)) & mask(intWidth)) + ''' + aarch64Write = ''' + xc->%(func)s(this, %(op_idx)s, (%(final_val)s) & mask(intWidth)) + ''' + aarchX64Read = ''' + ((xc->%(func)s(this, %(op_idx)s)) & mask(aarch64 ? 64 : 32)) + ''' + aarchX64Write = ''' + xc->%(func)s(this, %(op_idx)s, (%(final_val)s) & mask(aarch64 ? 64 : 32)) + ''' + aarchW64Read = ''' + ((xc->%(func)s(this, %(op_idx)s)) & mask(32)) + ''' + aarchW64Write = ''' + xc->%(func)s(this, %(op_idx)s, (%(final_val)s) & mask(32)) + ''' + cntrlNsBankedWrite = ''' + xc->setMiscReg(flattenMiscRegNsBanked(dest, xc->tcBase()), %(final_val)s) + ''' + + cntrlNsBankedRead = ''' + xc->readMiscReg(flattenMiscRegNsBanked(op1, xc->tcBase())) + ''' #PCState operands need to have a sorting index (the number at the end) #less than all the integer registers which might update the PC. That way @@ -99,6 +124,18 @@ let {{ return ('IntReg', 'uw', idx, 'IsInteger', srtNormal, maybePCRead, maybePCWrite) + def intReg64(idx): + return ('IntReg', 'ud', idx, 'IsInteger', srtNormal, + aarch64Read, aarch64Write) + + def intRegX64(idx, id = srtNormal): + return ('IntReg', 'ud', idx, 'IsInteger', id, + aarchX64Read, aarchX64Write) + + def intRegW64(idx, id = srtNormal): + return ('IntReg', 'ud', idx, 'IsInteger', id, + aarchW64Read, aarchW64Write) + def intRegNPC(idx): return ('IntReg', 'uw', idx, 'IsInteger', srtNormal) @@ -120,26 +157,49 @@ let {{ def cntrlReg(idx, id = srtNormal, type = 'uw'): return ('ControlReg', type, idx, None, id) + def cntrlNsBankedReg(idx, id = srtNormal, type = 'uw'): + return ('ControlReg', type, idx, (None, None, 'IsControl'), id, cntrlNsBankedRead, cntrlNsBankedWrite) + + def cntrlNsBankedReg64(idx, id = srtNormal, type = 'ud'): + return ('ControlReg', type, idx, (None, None, 'IsControl'), id, cntrlNsBankedRead, cntrlNsBankedWrite) + def cntrlRegNC(idx, id = srtNormal, type = 'uw'): return ('ControlReg', type, idx, None, id) def pcStateReg(idx, id): - return ('PCState', 'uw', idx, (None, None, 'IsControl'), id) + return ('PCState', 'ud', idx, (None, None, 'IsControl'), id) }}; def operands {{ #Abstracted integer reg operands 'Dest': intReg('dest'), + 'Dest64': intReg64('dest'), + 'XDest': intRegX64('dest'), + 'WDest': intRegW64('dest'), 'IWDest': intRegIWPC('dest'), 'AIWDest': intRegAIWPC('dest'), 'Dest2': intReg('dest2'), + 'XDest2': intRegX64('dest2'), + 'FDest2': floatReg('dest2'), 'Result': intReg('result'), + 'XResult': intRegX64('result'), + 'XBase': intRegX64('base', id = srtBase), 'Base': intRegAPC('base', id = srtBase), + 'XOffset': intRegX64('offset'), 'Index': intReg('index'), 'Shift': intReg('shift'), 'Op1': intReg('op1'), 'Op2': intReg('op2'), 'Op3': intReg('op3'), + 'Op164': intReg64('op1'), + 'Op264': intReg64('op2'), + 'Op364': intReg64('op3'), + 'XOp1': intRegX64('op1'), + 'XOp2': intRegX64('op2'), + 'XOp3': intRegX64('op3'), + 'WOp1': intRegW64('op1'), + 'WOp2': intRegW64('op2'), + 'WOp3': intRegW64('op3'), 'Reg0': intReg('reg0'), 'Reg1': intReg('reg1'), 'Reg2': intReg('reg2'), @@ -147,13 +207,19 @@ def operands {{ #Fixed index integer reg operands 'SpMode': intRegNPC('intRegInMode((OperatingMode)regMode, INTREG_SP)'), + 'DecodedBankedIntReg': intRegNPC('decodeMrsMsrBankedIntRegIndex(byteMask, r)'), 'LR': intRegNPC('INTREG_LR'), + 'XLR': intRegX64('INTREG_X30'), 'R7': intRegNPC('7'), # First four arguments are passed in registers 'R0': intRegNPC('0'), 'R1': intRegNPC('1'), 'R2': intRegNPC('2'), 'R3': intRegNPC('3'), + 'X0': intRegX64('0'), + 'X1': intRegX64('1'), + 'X2': intRegX64('2'), + 'X3': intRegX64('3'), #Pseudo integer condition code registers 'CondCodesNZ': intRegCC('INTREG_CONDCODES_NZ'), @@ -230,9 +296,95 @@ def operands {{ 'FpOp2P2': floatReg('(op2 + 2)'), 'FpOp2P3': floatReg('(op2 + 3)'), + # Create AArch64 unpacked view of the FP registers + 'AA64FpOp1P0': floatReg('((op1 * 4) + 0)'), + 'AA64FpOp1P1': floatReg('((op1 * 4) + 1)'), + 'AA64FpOp1P2': floatReg('((op1 * 4) + 2)'), + 'AA64FpOp1P3': floatReg('((op1 * 4) + 3)'), + 'AA64FpOp2P0': floatReg('((op2 * 4) + 0)'), + 'AA64FpOp2P1': floatReg('((op2 * 4) + 1)'), + 'AA64FpOp2P2': floatReg('((op2 * 4) + 2)'), + 'AA64FpOp2P3': floatReg('((op2 * 4) + 3)'), + 'AA64FpOp3P0': floatReg('((op3 * 4) + 0)'), + 'AA64FpOp3P1': floatReg('((op3 * 4) + 1)'), + 'AA64FpOp3P2': floatReg('((op3 * 4) + 2)'), + 'AA64FpOp3P3': floatReg('((op3 * 4) + 3)'), + 'AA64FpDestP0': floatReg('((dest * 4) + 0)'), + 'AA64FpDestP1': floatReg('((dest * 4) + 1)'), + 'AA64FpDestP2': floatReg('((dest * 4) + 2)'), + 'AA64FpDestP3': floatReg('((dest * 4) + 3)'), + 'AA64FpDest2P0': floatReg('((dest2 * 4) + 0)'), + 'AA64FpDest2P1': floatReg('((dest2 * 4) + 1)'), + 'AA64FpDest2P2': floatReg('((dest2 * 4) + 2)'), + 'AA64FpDest2P3': floatReg('((dest2 * 4) + 3)'), + + 'AA64FpOp1P0V0': floatReg('((((op1+0)) * 4) + 0)'), + 'AA64FpOp1P1V0': floatReg('((((op1+0)) * 4) + 1)'), + 'AA64FpOp1P2V0': floatReg('((((op1+0)) * 4) + 2)'), + 'AA64FpOp1P3V0': floatReg('((((op1+0)) * 4) + 3)'), + + 'AA64FpOp1P0V1': floatReg('((((op1+1)) * 4) + 0)'), + 'AA64FpOp1P1V1': floatReg('((((op1+1)) * 4) + 1)'), + 'AA64FpOp1P2V1': floatReg('((((op1+1)) * 4) + 2)'), + 'AA64FpOp1P3V1': floatReg('((((op1+1)) * 4) + 3)'), + + 'AA64FpOp1P0V2': floatReg('((((op1+2)) * 4) + 0)'), + 'AA64FpOp1P1V2': floatReg('((((op1+2)) * 4) + 1)'), + 'AA64FpOp1P2V2': floatReg('((((op1+2)) * 4) + 2)'), + 'AA64FpOp1P3V2': floatReg('((((op1+2)) * 4) + 3)'), + + 'AA64FpOp1P0V3': floatReg('((((op1+3)) * 4) + 0)'), + 'AA64FpOp1P1V3': floatReg('((((op1+3)) * 4) + 1)'), + 'AA64FpOp1P2V3': floatReg('((((op1+3)) * 4) + 2)'), + 'AA64FpOp1P3V3': floatReg('((((op1+3)) * 4) + 3)'), + + 'AA64FpOp1P0V0S': floatReg('((((op1+0)%32) * 4) + 0)'), + 'AA64FpOp1P1V0S': floatReg('((((op1+0)%32) * 4) + 1)'), + 'AA64FpOp1P2V0S': floatReg('((((op1+0)%32) * 4) + 2)'), + 'AA64FpOp1P3V0S': floatReg('((((op1+0)%32) * 4) + 3)'), + + 'AA64FpOp1P0V1S': floatReg('((((op1+1)%32) * 4) + 0)'), + 'AA64FpOp1P1V1S': floatReg('((((op1+1)%32) * 4) + 1)'), + 'AA64FpOp1P2V1S': floatReg('((((op1+1)%32) * 4) + 2)'), + 'AA64FpOp1P3V1S': floatReg('((((op1+1)%32) * 4) + 3)'), + + 'AA64FpOp1P0V2S': floatReg('((((op1+2)%32) * 4) + 0)'), + 'AA64FpOp1P1V2S': floatReg('((((op1+2)%32) * 4) + 1)'), + 'AA64FpOp1P2V2S': floatReg('((((op1+2)%32) * 4) + 2)'), + 'AA64FpOp1P3V2S': floatReg('((((op1+2)%32) * 4) + 3)'), + + 'AA64FpOp1P0V3S': floatReg('((((op1+3)%32) * 4) + 0)'), + 'AA64FpOp1P1V3S': floatReg('((((op1+3)%32) * 4) + 1)'), + 'AA64FpOp1P2V3S': floatReg('((((op1+3)%32) * 4) + 2)'), + 'AA64FpOp1P3V3S': floatReg('((((op1+3)%32) * 4) + 3)'), + + 'AA64FpDestP0V0': floatReg('((((dest+0)) * 4) + 0)'), + 'AA64FpDestP1V0': floatReg('((((dest+0)) * 4) + 1)'), + 'AA64FpDestP2V0': floatReg('((((dest+0)) * 4) + 2)'), + 'AA64FpDestP3V0': floatReg('((((dest+0)) * 4) + 3)'), + + 'AA64FpDestP0V1': floatReg('((((dest+1)) * 4) + 0)'), + 'AA64FpDestP1V1': floatReg('((((dest+1)) * 4) + 1)'), + 'AA64FpDestP2V1': floatReg('((((dest+1)) * 4) + 2)'), + 'AA64FpDestP3V1': floatReg('((((dest+1)) * 4) + 3)'), + + 'AA64FpDestP0V0L': floatReg('((((dest+0)%32) * 4) + 0)'), + 'AA64FpDestP1V0L': floatReg('((((dest+0)%32) * 4) + 1)'), + 'AA64FpDestP2V0L': floatReg('((((dest+0)%32) * 4) + 2)'), + 'AA64FpDestP3V0L': floatReg('((((dest+0)%32) * 4) + 3)'), + + 'AA64FpDestP0V1L': floatReg('((((dest+1)%32) * 4) + 0)'), + 'AA64FpDestP1V1L': floatReg('((((dest+1)%32) * 4) + 1)'), + 'AA64FpDestP2V1L': floatReg('((((dest+1)%32) * 4) + 2)'), + 'AA64FpDestP3V1L': floatReg('((((dest+1)%32) * 4) + 3)'), + #Abstracted control reg operands 'MiscDest': cntrlReg('dest'), 'MiscOp1': cntrlReg('op1'), + 'MiscNsBankedDest': cntrlNsBankedReg('dest'), + 'MiscNsBankedOp1': cntrlNsBankedReg('op1'), + 'MiscNsBankedDest64': cntrlNsBankedReg64('dest'), + 'MiscNsBankedOp164': cntrlNsBankedReg64('op1'), #Fixed index control regs 'Cpsr': cntrlReg('MISCREG_CPSR', srtCpsr), @@ -244,22 +396,41 @@ def operands {{ 'FpscrQc': cntrlRegNC('MISCREG_FPSCR_QC'), 'FpscrExc': cntrlRegNC('MISCREG_FPSCR_EXC'), 'Cpacr': cntrlReg('MISCREG_CPACR'), + 'Cpacr64': cntrlReg('MISCREG_CPACR_EL1'), 'Fpexc': cntrlRegNC('MISCREG_FPEXC'), + 'Nsacr': cntrlReg('MISCREG_NSACR'), + 'ElrHyp': cntrlRegNC('MISCREG_ELR_HYP'), + 'Hcr': cntrlReg('MISCREG_HCR'), + 'Hcr64': cntrlReg('MISCREG_HCR_EL2'), + 'Hdcr': cntrlReg('MISCREG_HDCR'), + 'Hcptr': cntrlReg('MISCREG_HCPTR'), + 'CptrEl264': cntrlReg('MISCREG_CPTR_EL2'), + 'CptrEl364': cntrlReg('MISCREG_CPTR_EL3'), + 'Hstr': cntrlReg('MISCREG_HSTR'), + 'Scr': cntrlReg('MISCREG_SCR'), + 'Scr64': cntrlReg('MISCREG_SCR_EL3'), 'Sctlr': cntrlRegNC('MISCREG_SCTLR'), 'SevMailbox': cntrlRegNC('MISCREG_SEV_MAILBOX'), 'LLSCLock': cntrlRegNC('MISCREG_LOCKFLAG'), + 'Dczid' : cntrlRegNC('MISCREG_DCZID_EL0'), #Register fields for microops 'URa' : intReg('ura'), + 'XURa' : intRegX64('ura'), + 'WURa' : intRegW64('ura'), 'IWRa' : intRegIWPC('ura'), 'Fa' : floatReg('ura'), + 'FaP1' : floatReg('ura + 1'), 'URb' : intReg('urb'), + 'XURb' : intRegX64('urb'), 'URc' : intReg('urc'), + 'XURc' : intRegX64('urc'), #Memory Operand 'Mem': ('Mem', 'uw', None, ('IsMemRef', 'IsLoad', 'IsStore'), srtNormal), #PCState fields + 'RawPC': pcStateReg('pc', srtPC), 'PC': pcStateReg('instPC', srtPC), 'NPC': pcStateReg('instNPC', srtPC), 'pNPC': pcStateReg('instNPC', srtEPC), diff --git a/src/arch/arm/isa/templates/basic.isa b/src/arch/arm/isa/templates/basic.isa index b3878b89a..de4506e05 100644 --- a/src/arch/arm/isa/templates/basic.isa +++ b/src/arch/arm/isa/templates/basic.isa @@ -1,5 +1,17 @@ // -*- mode:c++ -*- +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// // Copyright (c) 2007-2008 The Florida State University // All rights reserved. // @@ -60,6 +72,13 @@ def template BasicConstructor {{ } }}; +def template BasicConstructor64 {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst) : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s) + { + %(constructor)s; + } +}}; + // Basic instruction class execute method template. def template BasicExecute {{ diff --git a/src/arch/arm/isa/templates/branch64.isa b/src/arch/arm/isa/templates/branch64.isa new file mode 100644 index 000000000..84b3e6ae7 --- /dev/null +++ b/src/arch/arm/isa/templates/branch64.isa @@ -0,0 +1,141 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +def template BranchImm64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, int64_t _imm); + %(BasicExecDeclare)s +}; +}}; + +def template BranchImm64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + int64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _imm) + { + %(constructor)s; + } +}}; + +def template BranchImmCond64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, int64_t _imm, + ConditionCode _condCode); + %(BasicExecDeclare)s +}; +}}; + +def template BranchImmCond64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + int64_t _imm, + ConditionCode _condCode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _imm, _condCode) + { + %(constructor)s; + } +}}; + +def template BranchReg64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1); + %(BasicExecDeclare)s +}; +}}; + +def template BranchReg64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _op1) + { + %(constructor)s; + } +}}; + +def template BranchImmReg64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + int64_t imm, IntRegIndex _op1); + %(BasicExecDeclare)s +}; +}}; + +def template BranchImmReg64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + int64_t _imm, + IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _imm, _op1) + { + %(constructor)s; + } +}}; + +def template BranchImmImmReg64Declare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, int64_t _imm1, int64_t _imm2, + IntRegIndex _op1); + %(BasicExecDeclare)s +}; +}}; + +def template BranchImmImmReg64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + int64_t _imm1, int64_t _imm2, + IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _imm1, _imm2, _op1) + { + %(constructor)s; + } +}}; diff --git a/src/arch/arm/isa/templates/data64.isa b/src/arch/arm/isa/templates/data64.isa new file mode 100644 index 000000000..b6f7ce8d0 --- /dev/null +++ b/src/arch/arm/isa/templates/data64.isa @@ -0,0 +1,279 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +def template DataXImmDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, uint64_t _imm); + %(BasicExecDeclare)s +}; +}}; + +def template DataXImmConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm) + { + %(constructor)s; + } +}}; + +def template DataXSRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, + int32_t _shiftAmt, ArmShiftType _shiftType); + %(BasicExecDeclare)s +}; +}}; + +def template DataXSRegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + int32_t _shiftAmt, + ArmShiftType _shiftType) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _shiftAmt, _shiftType) + { + %(constructor)s; + } +}}; + +def template DataXERegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, + ArmExtendType _extendType, int32_t _shiftAmt); + %(BasicExecDeclare)s +}; +}}; + +def template DataXERegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + ArmExtendType _extendType, + int32_t _shiftAmt) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _extendType, _shiftAmt) + { + %(constructor)s; + } +}}; + +def template DataX1RegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1); + %(BasicExecDeclare)s +}; +}}; + +def template DataX1RegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, _op1) + { + %(constructor)s; + } +}}; + +def template DataX2RegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2); + %(BasicExecDeclare)s +}; +}}; + +def template DataX2RegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2) + { + %(constructor)s; + } +}}; + +def template DataX2RegImmDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, uint64_t _imm); + %(BasicExecDeclare)s +}; +}}; + +def template DataX2RegImmConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _imm) + { + %(constructor)s; + } +}}; + +def template DataX3RegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, IntRegIndex _op3); + %(BasicExecDeclare)s +}; +}}; + +def template DataX3RegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + IntRegIndex _op3) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _op3) + { + %(constructor)s; + } +}}; + +def template DataXCondCompImmDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, + uint64_t _imm, ConditionCode _condCode, uint8_t _defCc); + %(BasicExecDeclare)s +}; +}}; + +def template DataXCondCompImmConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _op1, + uint64_t _imm, + ConditionCode _condCode, + uint8_t _defCc) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _op1, _imm, _condCode, _defCc) + { + %(constructor)s; + } +}}; + +def template DataXCondCompRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, + IntRegIndex _op2, ConditionCode _condCode, + uint8_t _defCc); + %(BasicExecDeclare)s +}; +}}; + +def template DataXCondCompRegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _op1, + IntRegIndex _op2, + ConditionCode _condCode, + uint8_t _defCc) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _op1, _op2, _condCode, _defCc) + { + %(constructor)s; + } +}}; + +def template DataXCondSelDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _op1, IntRegIndex _op2, + ConditionCode _condCode); + %(BasicExecDeclare)s +}; +}}; + +def template DataXCondSelConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + ConditionCode _condCode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _condCode) + { + %(constructor)s; + } +}}; diff --git a/src/arch/arm/isa/templates/macromem.isa b/src/arch/arm/isa/templates/macromem.isa index 195204a95..465090660 100644 --- a/src/arch/arm/isa/templates/macromem.isa +++ b/src/arch/arm/isa/templates/macromem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -338,6 +338,18 @@ def template MicroIntImmConstructor {{ } }}; +def template MicroIntImmXConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + RegIndex _ura, + RegIndex _urb, + int32_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _ura, _urb, _imm) + { + %(constructor)s; + } +}}; + def template MicroIntRegDeclare {{ class %(class_name)s : public %(base_class)s { @@ -349,6 +361,28 @@ def template MicroIntRegDeclare {{ }; }}; +def template MicroIntXERegConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + RegIndex _ura, RegIndex _urb, RegIndex _urc, + ArmExtendType _type, uint32_t _shiftAmt) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _ura, _urb, _urc, _type, _shiftAmt) + { + %(constructor)s; + } +}}; + +def template MicroIntXERegDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, + RegIndex _ura, RegIndex _urb, RegIndex _urc, + ArmExtendType _type, uint32_t _shiftAmt); + %(BasicExecDeclare)s + }; +}}; + def template MicroIntRegConstructor {{ %(class_name)s::%(class_name)s(ExtMachInst machInst, RegIndex _ura, RegIndex _urb, RegIndex _urc, @@ -402,6 +436,96 @@ def template MacroMemConstructor {{ }}; +def template BigFpMemImmDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(const char *mnemonic, ExtMachInst machInst, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm); + %(BasicExecPanic)s +}; +}}; + +def template BigFpMemImmConstructor {{ +%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst, + bool load, IntRegIndex dest, IntRegIndex base, int64_t imm) + : %(base_class)s(mnemonic, machInst, %(op_class)s, load, dest, base, imm) +{ + %(constructor)s; +} +}}; + +def template BigFpMemRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(const char *mnemonic, ExtMachInst machInst, + bool load, IntRegIndex dest, IntRegIndex base, + IntRegIndex offset, ArmExtendType type, int64_t imm); + %(BasicExecPanic)s +}; +}}; + +def template BigFpMemRegConstructor {{ +%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst, + bool load, IntRegIndex dest, IntRegIndex base, + IntRegIndex offset, ArmExtendType type, int64_t imm) + : %(base_class)s(mnemonic, machInst, %(op_class)s, load, dest, base, + offset, type, imm) +{ + %(constructor)s; +} +}}; + +def template BigFpMemLitDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(const char *mnemonic, ExtMachInst machInst, + IntRegIndex dest, int64_t imm); + %(BasicExecPanic)s +}; +}}; + +def template BigFpMemLitConstructor {{ +%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst, + IntRegIndex dest, int64_t imm) + : %(base_class)s(mnemonic, machInst, %(op_class)s, dest, imm) +{ + %(constructor)s; +} +}}; + +def template PairMemDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(const char *mnemonic, ExtMachInst machInst, + uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, + bool exclusive, bool acrel, uint32_t imm, + AddrMode mode, IntRegIndex rn, IntRegIndex rt, + IntRegIndex rt2); + %(BasicExecPanic)s +}; +}}; + +def template PairMemConstructor {{ +%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst, + uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, + bool exclusive, bool acrel, uint32_t imm, AddrMode mode, + IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) + : %(base_class)s(mnemonic, machInst, %(op_class)s, size, + fp, load, noAlloc, signExt, exclusive, acrel, + imm, mode, rn, rt, rt2) +{ + %(constructor)s; +} +}}; + def template VMemMultDeclare {{ class %(class_name)s : public %(base_class)s { diff --git a/src/arch/arm/isa/templates/mem.isa b/src/arch/arm/isa/templates/mem.isa index 871378f3f..7682c277d 100644 --- a/src/arch/arm/isa/templates/mem.isa +++ b/src/arch/arm/isa/templates/mem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010, 2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -697,6 +697,11 @@ def template LoadStoreImmDeclare {{ %(InitiateAccDeclare)s %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } }; }}; @@ -763,6 +768,11 @@ def template StoreRegDeclare {{ %(InitiateAccDeclare)s %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } }; }}; @@ -808,6 +818,11 @@ def template LoadRegDeclare {{ %(InitiateAccDeclare)s %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } }; }}; @@ -828,6 +843,11 @@ def template LoadImmDeclare {{ %(InitiateAccDeclare)s %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } }; }}; diff --git a/src/arch/arm/isa/templates/mem64.isa b/src/arch/arm/isa/templates/mem64.isa new file mode 100644 index 000000000..87dcba988 --- /dev/null +++ b/src/arch/arm/isa/templates/mem64.isa @@ -0,0 +1,686 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +let {{ + SPAlignmentCheckCode = ''' + if (baseIsSP && bits(XBase, 3, 0) && + SPAlignmentCheckEnabled(xc->tcBase())) { + return new SPAlignmentFault(); + } + ''' +}}; + +def template Load64Execute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + fault = readMemAtomic(xc, traceData, EA, Mem, memAccessFlags); + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template Store64Execute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = writeMemAtomic(xc, traceData, Mem, EA, + memAccessFlags, NULL); + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template Store64InitiateAcc {{ + Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = writeMemTiming(xc, traceData, Mem, EA, memAccessFlags, + NULL); + } + + return fault; + } +}}; + +def template StoreEx64Execute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + uint64_t writeResult = 0; + if (fault == NoFault) { + fault = writeMemAtomic(xc, traceData, Mem, EA, memAccessFlags, + &writeResult); + } + + if (fault == NoFault) { + %(postacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template StoreEx64InitiateAcc {{ + Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = writeMemTiming(xc, traceData, Mem, EA, memAccessFlags, + NULL); + } + + return fault; + } +}}; + +def template Load64InitiateAcc {{ + Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + fault = readMemTiming(xc, traceData, EA, Mem, memAccessFlags); + } + + return fault; + } +}}; + +def template Load64CompleteAcc {{ + Fault %(class_name)s::completeAcc(PacketPtr pkt, + %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + + // ARM instructions will not have a pkt if the predicate is false + getMem(pkt, Mem, traceData); + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template Store64CompleteAcc {{ + Fault %(class_name)s::completeAcc(PacketPtr pkt, + %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + return NoFault; + } +}}; + +def template StoreEx64CompleteAcc {{ + Fault %(class_name)s::completeAcc(PacketPtr pkt, + %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + + uint64_t writeResult = pkt->req->getExtraData(); + %(postacc_code)s; + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template DCStore64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _base, IntRegIndex _dest, uint64_t _imm); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template DCStore64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, IntRegIndex _base, IntRegIndex _dest, uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + (IntRegIndex)_base, _dest, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + } +}}; + +def template DCStore64Execute {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = xc->writeMem(NULL, op_size, EA, memAccessFlags, NULL); + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template DCStore64InitiateAcc {{ + Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = xc->writeMem(NULL, op_size, EA, memAccessFlags, NULL); + } + + return fault; + } +}}; + + +def template LoadStoreImm64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreImmU64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm, + bool noAlloc = false, bool exclusive = false, + bool acrel = false); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreImmDU64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _dest2, IntRegIndex _base, + int64_t _imm = 0, bool noAlloc = false, bool exclusive = false, + bool acrel = false); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template StoreImmDEx64Declare {{ + /** + * Static instruction class for "%(mnemonic)s". + */ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _result, IntRegIndex _dest, IntRegIndex _dest2, + IntRegIndex _base, int64_t _imm = 0); + + %(BasicExecDeclare)s + + %(InitiateAccDeclare)s + + %(CompleteAccDeclare)s + }; +}}; + + +def template LoadStoreReg64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset, + ArmExtendType _type, uint32_t _shiftAmt); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreRegU64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset, + ArmExtendType _type, uint32_t _shiftAmt, + bool noAlloc = false, bool exclusive = false, + bool acrel = false); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreRaw64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _base); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreEx64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + IntRegIndex _base, IntRegIndex _result); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreLit64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, int64_t _imm); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreLitU64Declare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, int64_t _imm, + bool noAlloc = false, bool exclusive = false, + bool acrel = false); + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + + virtual void + annotateFault(ArmFault *fault) { + %(fa_code)s + } + }; +}}; + +def template LoadStoreImm64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + (IntRegIndex)_dest, (IntRegIndex)_base, _imm) + { + %(constructor)s; +#if %(use_uops)d + assert(numMicroops >= 2); + uops = new StaticInstPtr[numMicroops]; + uops[0] = new %(acc_name)s(machInst, _dest, _base, _imm); + uops[0]->setDelayedCommit(); + uops[1] = new %(wb_decl)s; + uops[1]->setLastMicroop(); +#endif + } +}}; + +def template LoadStoreImmU64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, int64_t _imm, + bool noAlloc, bool exclusive, bool acrel) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _base, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + setExcAcRel(exclusive, acrel); + } +}}; + +def template LoadStoreImmDU64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _dest2, IntRegIndex _base, + int64_t _imm, bool noAlloc, bool exclusive, bool acrel) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _dest2, _base, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + setExcAcRel(exclusive, acrel); + } +}}; + +def template StoreImmDEx64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _result, IntRegIndex _dest, IntRegIndex _dest2, + IntRegIndex _base, int64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _result, _dest, _dest2, _base, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + } +}}; + + +def template LoadStoreReg64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset, + ArmExtendType _type, uint32_t _shiftAmt) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _base, _offset, _type, _shiftAmt) + { + %(constructor)s; +#if %(use_uops)d + assert(numMicroops >= 2); + uops = new StaticInstPtr[numMicroops]; + uops[0] = new %(acc_name)s(machInst, _dest, _base, _offset, + _type, _shiftAmt); + uops[0]->setDelayedCommit(); + uops[1] = new %(wb_decl)s; + uops[1]->setLastMicroop(); +#endif + } +}}; + +def template LoadStoreRegU64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset, + ArmExtendType _type, uint32_t _shiftAmt, + bool noAlloc, bool exclusive, bool acrel) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _base, _offset, _type, _shiftAmt) + { + %(constructor)s; + assert(!%(use_uops)d); + setExcAcRel(exclusive, acrel); + } +}}; + +def template LoadStoreRaw64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, _base) + { + %(constructor)s; + } +}}; + +def template LoadStoreEx64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _base, IntRegIndex _result) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _base, _result) + { + %(constructor)s; + } +}}; + +def template LoadStoreLit64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, int64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + (IntRegIndex)_dest, _imm) + { + %(constructor)s; +#if %(use_uops)d + assert(numMicroops >= 2); + uops = new StaticInstPtr[numMicroops]; + uops[0] = new %(acc_name)s(machInst, _dest, _imm); + uops[0]->setDelayedCommit(); + uops[1] = new %(wb_decl)s; + uops[1]->setLastMicroop(); +#endif + } +}}; + +def template LoadStoreLitU64Constructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, int64_t _imm, + bool noAlloc, bool exclusive, bool acrel) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + (IntRegIndex)_dest, _imm) + { + %(constructor)s; + assert(!%(use_uops)d); + setExcAcRel(exclusive, acrel); + } +}}; diff --git a/src/arch/arm/isa/templates/misc.isa b/src/arch/arm/isa/templates/misc.isa index 212897aa0..36db5b6c2 100644 --- a/src/arch/arm/isa/templates/misc.isa +++ b/src/arch/arm/isa/templates/misc.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -62,6 +62,69 @@ def template MrsConstructor {{ } }}; +def template MrsBankedRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + uint8_t byteMask; + bool r; + + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, + uint8_t _sysM, bool _r); + %(BasicExecDeclare)s +}; +}}; + +def template MrsBankedRegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + uint8_t _sysM, + bool _r) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest), + byteMask(_sysM), r(_r) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + +def template MsrBankedRegDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + bool r; + + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, + uint8_t _sysM, bool _r); + %(BasicExecDeclare)s +}; +}}; + +def template MsrBankedRegConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _op1, + uint8_t _sysM, + bool _r) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _op1, _sysM), + r(_r) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + def template MsrRegDeclare {{ class %(class_name)s : public %(base_class)s { @@ -114,6 +177,66 @@ def template MsrImmConstructor {{ } }}; +def template MrrcOpDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, + IntRegIndex _dest, IntRegIndex _dest2, uint32_t imm); + %(BasicExecDeclare)s +}; +}}; + +def template MrrcOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex op1, + IntRegIndex dest, + IntRegIndex dest2, + uint32_t imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, op1, dest, + dest2, imm) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + +def template McrrOpDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _dest, uint32_t imm); + %(BasicExecDeclare)s +}; +}}; + +def template McrrOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex op1, + IntRegIndex op2, + IntRegIndex dest, + uint32_t imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, op1, op2, + dest, imm) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + def template ImmOpDeclare {{ class %(class_name)s : public %(base_class)s { @@ -310,6 +433,35 @@ def template RegRegImmOpConstructor {{ } }}; +def template RegImmImmOpDeclare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, uint64_t _imm1, uint64_t _imm2); + %(BasicExecDeclare)s +}; +}}; + +def template RegImmImmOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + uint64_t _imm1, + uint64_t _imm2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm1, _imm2) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + def template RegRegImmImmOpDeclare {{ class %(class_name)s : public %(base_class)s { diff --git a/src/arch/arm/isa/templates/misc64.isa b/src/arch/arm/isa/templates/misc64.isa new file mode 100644 index 000000000..09d3d4470 --- /dev/null +++ b/src/arch/arm/isa/templates/misc64.isa @@ -0,0 +1,91 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2011 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Gabe Black + +def template RegRegImmImmOp64Declare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, + uint64_t _imm1, uint64_t _imm2); + %(BasicExecDeclare)s +}; +}}; + +def template RegRegImmImmOp64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + uint64_t _imm1, + uint64_t _imm2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm1, _imm2) + { + %(constructor)s; + } +}}; + +def template RegRegRegImmOp64Declare {{ +class %(class_name)s : public %(base_class)s +{ + protected: + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, + IntRegIndex _op2, uint64_t _imm); + %(BasicExecDeclare)s +}; +}}; + +def template RegRegRegImmOp64Constructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _imm) + { + %(constructor)s; + } +}}; + diff --git a/src/arch/arm/isa/templates/neon.isa b/src/arch/arm/isa/templates/neon.isa index 573d245b8..ffa6b53d4 100644 --- a/src/arch/arm/isa/templates/neon.isa +++ b/src/arch/arm/isa/templates/neon.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2012 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -39,8 +39,26 @@ let {{ simdEnabledCheckCode = ''' - if (!neonEnabled(Cpacr, Cpsr, Fpexc)) - return disabledFault(); + { + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq, Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase(), Fpexc, true)) + {return disabledFault();} + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } + } ''' }}; diff --git a/src/arch/arm/isa/templates/neon64.isa b/src/arch/arm/isa/templates/neon64.isa new file mode 100644 index 000000000..d20e4e653 --- /dev/null +++ b/src/arch/arm/isa/templates/neon64.isa @@ -0,0 +1,527 @@ +// -*- mode: c++ -*- + +// Copyright (c) 2012-2013 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Mbou Eyole +// Giacomo Gabrielli + +let {{ + simd64EnabledCheckCode = vfp64EnabledCheckCode +}}; + +def template NeonX2RegOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX2RegImmOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _imm) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX1RegOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX1RegImmOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX1Reg2ImmOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm1, + uint64_t _imm2) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm1, _imm2) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonX1RegImmOnlyOpDeclare {{ +template <class _Element> +class %(class_name)s : public %(base_class)s +{ + protected: + typedef _Element Element; + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, uint64_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm) + { + %(constructor)s; + } + + %(BasicExecDeclare)s +}; +}}; + +def template NeonXExecDeclare {{ + template + Fault %(class_name)s<%(targs)s>::execute( + %(CPU_exec_context)s *, Trace::InstRecord *) const; +}}; + +def template NeonXEqualRegOpExecute {{ + template <class Element> + Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + %(op_decl)s; + %(op_rd)s; + + const unsigned rCount = %(r_count)d; + const unsigned eCount = rCount * sizeof(FloatRegBits) / sizeof(Element); + const unsigned eCountFull = 4 * sizeof(FloatRegBits) / sizeof(Element); + + union RegVect { + FloatRegBits regs[rCount]; + Element elements[eCount]; + }; + + union FullRegVect { + FloatRegBits regs[4]; + Element elements[eCountFull]; + }; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; + +def template NeonXUnequalRegOpExecute {{ + template <class Element> + Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + typedef typename bigger_type_t<Element>::type BigElement; + Fault fault = NoFault; + %(op_decl)s; + %(op_rd)s; + + const unsigned rCount = %(r_count)d; + const unsigned eCount = rCount * sizeof(FloatRegBits) / sizeof(Element); + const unsigned eCountFull = 4 * sizeof(FloatRegBits) / sizeof(Element); + + union RegVect { + FloatRegBits regs[rCount]; + Element elements[eCount]; + BigElement bigElements[eCount / 2]; + }; + + union BigRegVect { + FloatRegBits regs[2 * rCount]; + BigElement elements[eCount]; + }; + + union FullRegVect { + FloatRegBits regs[4]; + Element elements[eCountFull]; + }; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; + +def template MicroNeonMemDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + protected: + // True if the base register is SP (used for SP alignment checking) + bool baseIsSP; + // Access size in bytes + uint8_t accSize; + // Vector element size (0 -> 8-bit, 1 -> 16-bit, 2 -> 32-bit, + // 3 -> 64-bit) + uint8_t eSize; + + public: + %(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _ura, + uint32_t _imm, unsigned extraMemFlags, bool _baseIsSP, + uint8_t _accSize, uint8_t _eSize) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, + _ura, _imm), + baseIsSP(_baseIsSP), accSize(_accSize), eSize(_eSize) + { + memAccessFlags |= extraMemFlags; + %(constructor)s; + } + + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + }; +}}; + +def template NeonLoadExecute64 {{ + Fault %(class_name)s::execute( + %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(mem_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemUnion memUnion; + uint8_t *dataPtr = memUnion.bytes; + + if (fault == NoFault) { + fault = xc->readMem(EA, dataPtr, accSize, memAccessFlags); + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template NeonLoadInitiateAcc64 {{ + Fault %(class_name)s::initiateAcc( + %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(mem_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemUnion memUnion; + uint8_t *dataPtr = memUnion.bytes; + + if (fault == NoFault) { + fault = xc->readMem(EA, dataPtr, accSize, memAccessFlags); + } + + return fault; + } +}}; + +def template NeonLoadCompleteAcc64 {{ + Fault %(class_name)s::completeAcc( + PacketPtr pkt, %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + + %(mem_decl)s; + %(op_decl)s; + %(op_rd)s; + + MemUnion &memUnion = *(MemUnion *)pkt->getPtr<uint8_t>(); + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template NeonStoreExecute64 {{ + Fault %(class_name)s::execute( + %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(mem_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemUnion memUnion; + uint8_t *dataPtr = memUnion.bytes; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = xc->writeMem(dataPtr, accSize, EA, memAccessFlags, + NULL); + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template NeonStoreInitiateAcc64 {{ + Fault %(class_name)s::initiateAcc( + %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(mem_decl)s; + %(op_rd)s; + %(ea_code)s; + + MemUnion memUnion; + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = xc->writeMem(memUnion.bytes, accSize, EA, memAccessFlags, + NULL); + } + + return fault; + } +}}; + +def template NeonStoreCompleteAcc64 {{ + Fault %(class_name)s::completeAcc( + PacketPtr pkt, %(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + return NoFault; + } +}}; + +def template VMemMultDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + public: + // Constructor + %(class_name)s(ExtMachInst machInst, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t numRegs, bool wb); + %(BasicExecPanic)s + }; +}}; + +def template VMemSingleDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + public: + // Constructor + %(class_name)s(ExtMachInst machInst, RegIndex rn, RegIndex vd, + RegIndex rm, uint8_t eSize, uint8_t dataSize, + uint8_t numStructElems, uint8_t index, bool wb, + bool replicate = false); + %(BasicExecPanic)s + }; +}}; + +def template VMemMultConstructor64 {{ + %(class_name)s::%(class_name)s( + ExtMachInst machInst, RegIndex rn, RegIndex vd, RegIndex rm, + uint8_t _eSize, uint8_t _dataSize, uint8_t _numStructElems, + uint8_t _numRegs, bool _wb) : + %(base_class)s( + "%(mnemonic)s", machInst, %(op_class)s, rn, vd, rm, + _eSize, _dataSize, _numStructElems, _numRegs, _wb) + { + %(constructor)s; + } +}}; + +def template VMemSingleConstructor64 {{ + %(class_name)s::%(class_name)s( + ExtMachInst machInst, RegIndex rn, RegIndex vd, RegIndex rm, + uint8_t _eSize, uint8_t _dataSize, uint8_t _numStructElems, + uint8_t _index, bool _wb, bool _replicate) : + %(base_class)s( + "%(mnemonic)s", machInst, %(op_class)s, rn, vd, rm, + _eSize, _dataSize, _numStructElems, _index, _wb, + _replicate) + { + %(constructor)s; + } +}}; + +def template MicroNeonMixDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _op1, + uint8_t _eSize, uint8_t _dataSize, + uint8_t _numStructElems, uint8_t _numRegs, + uint8_t _step) : + %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _eSize, _dataSize, _numStructElems, + _numRegs, _step) + { + %(constructor)s; + } + + %(BasicExecDeclare)s + }; +}}; + +def template MicroNeonMixLaneDeclare64 {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _op1, + uint8_t _eSize, uint8_t _dataSize, + uint8_t _numStructElems, uint8_t _lane, uint8_t _step, + bool _replicate = false) : + %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _eSize, _dataSize, _numStructElems, + _lane, _step, _replicate) + { + %(constructor)s; + } + + %(BasicExecDeclare)s + }; +}}; + +def template MicroNeonMixExecute64 {{ + Fault %(class_name)s::execute(%(CPU_exec_context)s *xc, + Trace::InstRecord *traceData) const + { + Fault fault = NoFault; + uint64_t resTemp = 0; + resTemp = resTemp; + %(op_decl)s; + %(op_rd)s; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; diff --git a/src/arch/arm/isa/templates/templates.isa b/src/arch/arm/isa/templates/templates.isa index 148139225..2263cdff4 100644 --- a/src/arch/arm/isa/templates/templates.isa +++ b/src/arch/arm/isa/templates/templates.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2011 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -40,26 +40,37 @@ //Basic instruction templates ##include "basic.isa" +//Templates for AArch64 bit data instructions. +##include "data64.isa" + //Templates for predicated instructions ##include "pred.isa" //Templates for memory instructions ##include "mem.isa" +//Templates for AArch64 memory instructions +##include "mem64.isa" + //Miscellaneous instructions that don't fit elsewhere ##include "misc.isa" +##include "misc64.isa" //Templates for microcoded memory instructions ##include "macromem.isa" //Templates for branches ##include "branch.isa" +##include "branch64.isa" //Templates for multiplies ##include "mult.isa" //Templates for VFP instructions ##include "vfp.isa" +##include "vfp64.isa" //Templates for Neon instructions ##include "neon.isa" + +##include "neon64.isa" diff --git a/src/arch/arm/isa/templates/vfp.isa b/src/arch/arm/isa/templates/vfp.isa index 90dd751ff..176b6604c 100644 --- a/src/arch/arm/isa/templates/vfp.isa +++ b/src/arch/arm/isa/templates/vfp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010 ARM Limited +// Copyright (c) 2010-2013 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -39,32 +39,117 @@ let {{ vfpEnabledCheckCode = ''' - if (!vfpEnabled(Cpacr, Cpsr, Fpexc)) - return disabledFault(); + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase(), Fpexc)) + {return disabledFault();} + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } + ''' + + vfp64EnabledCheckCode = ''' + CPSR cpsrEnCheck = Cpsr; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsrEnCheck.el; + if (!vfpNeon64Enabled(Cpacr64, el)) + return new SupervisorTrap(machInst, 0x1E00000, + EC_TRAPPED_SIMD_FP); + + if (ArmSystem::haveVirtualization(xc->tcBase()) && el <= EL2) { + HCPTR cptrEnCheck = xc->tcBase()->readMiscReg(MISCREG_CPTR_EL2); + if (cptrEnCheck.tfp) + return new HypervisorTrap(machInst, 0x1E00000, + EC_TRAPPED_SIMD_FP); + } + + if (ArmSystem::haveSecurity(xc->tcBase())) { + HCPTR cptrEnCheck = xc->tcBase()->readMiscReg(MISCREG_CPTR_EL3); + if (cptrEnCheck.tfp) + return new SecureMonitorTrap(machInst, 0x1E00000, + EC_TRAPPED_SIMD_FP); + } ''' vmsrEnabledCheckCode = ''' - if (!vfpEnabled(Cpacr, Cpsr)) + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase())) if (dest != (int)MISCREG_FPEXC && dest != (int)MISCREG_FPSID) - return disabledFault(); + {return disabledFault();} if (!inPrivilegedMode(Cpsr)) if (dest != (int)MISCREG_FPSCR) return disabledFault(); - + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } ''' vmrsEnabledCheckCode = ''' - if (!vfpEnabled(Cpacr, Cpsr)) + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase())) if (op1 != (int)MISCREG_FPEXC && op1 != (int)MISCREG_FPSID && op1 != (int)MISCREG_MVFR0 && op1 != (int)MISCREG_MVFR1) - return disabledFault(); + {return disabledFault();} if (!inPrivilegedMode(Cpsr)) if (op1 != (int)MISCREG_FPSCR) return disabledFault(); + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } ''' vmrsApsrEnabledCheckCode = ''' - if (!vfpEnabled(Cpacr, Cpsr)) - return disabledFault(); + uint32_t issEnCheck; + bool trapEnCheck; + uint32_t seq; + if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck, + trapEnCheck, xc->tcBase())) + {return disabledFault();} + if (trapEnCheck) { + CPSR cpsrEnCheck = Cpsr; + if (cpsrEnCheck.mode == MODE_HYP) { + return new UndefinedInstruction(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } else { + if (!inSecureState(Scr, Cpsr)) { + return new HypervisorTrap(machInst, issEnCheck, + EC_TRAPPED_HCPTR); + } + } + } ''' }}; diff --git a/src/arch/arm/isa/templates/vfp64.isa b/src/arch/arm/isa/templates/vfp64.isa new file mode 100644 index 000000000..518cedaae --- /dev/null +++ b/src/arch/arm/isa/templates/vfp64.isa @@ -0,0 +1,140 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2012 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Thomas Grocutt + +def template AA64FpRegRegOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegRegOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegImmOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, uint64_t _imm, VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _imm, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegRegImmOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + uint64_t _imm, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _imm, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegRegRegOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; + +def template AA64FpRegRegRegRegOpDeclare {{ +class %(class_name)s : public %(base_class)s +{ + public: + // Constructor + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2, + IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop); + %(BasicExecDeclare)s +}; +}}; + +def template AA64FpRegRegRegRegOpConstructor {{ + inline %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, + IntRegIndex _op1, + IntRegIndex _op2, + IntRegIndex _op3, + VfpMicroMode mode) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2, _op3, mode) + { + %(constructor)s; + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } +}}; diff --git a/src/arch/arm/isa_traits.hh b/src/arch/arm/isa_traits.hh index 742ca2037..506c5009c 100644 --- a/src/arch/arm/isa_traits.hh +++ b/src/arch/arm/isa_traits.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -95,6 +95,9 @@ namespace ArmISA const Addr PAddrImplMask = (ULL(1) << PABits) - 1; + // Max. physical address range in bits supported by the architecture + const unsigned MaxPhysAddrRange = 48; + // return a no-op instruction... used for instruction fetch faults const ExtMachInst NoopMachInst = 0x01E320F000ULL; @@ -124,6 +127,8 @@ namespace ArmISA INT_IRQ, INT_FIQ, INT_SEV, // Special interrupt for recieving SEV's + INT_VIRT_IRQ, + INT_VIRT_FIQ, NumInterruptTypes }; } // namespace ArmISA diff --git a/src/arch/arm/linux/linux.cc b/src/arch/arm/linux/linux.cc index 1e3a1e725..62519d38b 100644 --- a/src/arch/arm/linux/linux.cc +++ b/src/arch/arm/linux/linux.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2003-2005 The Regents of The University of Michigan * Copyright (c) 2007-2008 The Florida State University * All rights reserved. @@ -34,55 +46,108 @@ #include "arch/arm/linux/linux.hh" // open(2) flags translation table -OpenFlagTransTable ArmLinux::openFlagTable[] = { +OpenFlagTransTable ArmLinux32::openFlagTable[] = { +#ifdef _MSC_VER + { ArmLinux32::TGT_O_RDONLY, _O_RDONLY }, + { ArmLinux32::TGT_O_WRONLY, _O_WRONLY }, + { ArmLinux32::TGT_O_RDWR, _O_RDWR }, + { ArmLinux32::TGT_O_APPEND, _O_APPEND }, + { ArmLinux32::TGT_O_CREAT, _O_CREAT }, + { ArmLinux32::TGT_O_TRUNC, _O_TRUNC }, + { ArmLinux32::TGT_O_EXCL, _O_EXCL }, +#ifdef _O_NONBLOCK + { ArmLinux32::TGT_O_NONBLOCK, _O_NONBLOCK }, +#endif +#ifdef _O_NOCTTY + { ArmLinux32::TGT_O_NOCTTY, _O_NOCTTY }, +#endif +#ifdef _O_SYNC + { ArmLinux32::TGT_O_SYNC, _O_SYNC }, +#endif +#else /* !_MSC_VER */ + { ArmLinux32::TGT_O_RDONLY, O_RDONLY }, + { ArmLinux32::TGT_O_WRONLY, O_WRONLY }, + { ArmLinux32::TGT_O_RDWR, O_RDWR }, + { ArmLinux32::TGT_O_CREAT, O_CREAT }, + { ArmLinux32::TGT_O_EXCL, O_EXCL }, + { ArmLinux32::TGT_O_NOCTTY, O_NOCTTY }, + { ArmLinux32::TGT_O_TRUNC, O_TRUNC }, + { ArmLinux32::TGT_O_APPEND, O_APPEND }, + { ArmLinux32::TGT_O_NONBLOCK, O_NONBLOCK }, +#ifdef O_SYNC + { ArmLinux32::TGT_O_SYNC, O_SYNC }, +#endif +#ifdef FASYNC + { ArmLinux32::TGT_FASYNC, FASYNC }, +#endif +#ifdef O_DIRECT + { ArmLinux32::TGT_O_DIRECT, O_DIRECT }, +#endif +#ifdef O_LARGEFILE + { ArmLinux32::TGT_O_LARGEFILE, O_LARGEFILE }, +#endif +#ifdef O_DIRECTORY + { ArmLinux32::TGT_O_DIRECTORY, O_DIRECTORY }, +#endif +#ifdef O_NOFOLLOW + { ArmLinux32::TGT_O_NOFOLLOW, O_NOFOLLOW }, +#endif +#endif /* _MSC_VER */ +}; + +const int ArmLinux32::NUM_OPEN_FLAGS = sizeof(ArmLinux32::openFlagTable) / + sizeof(ArmLinux32::openFlagTable[0]); + +// open(2) flags translation table +OpenFlagTransTable ArmLinux64::openFlagTable[] = { #ifdef _MSC_VER - { ArmLinux::TGT_O_RDONLY, _O_RDONLY }, - { ArmLinux::TGT_O_WRONLY, _O_WRONLY }, - { ArmLinux::TGT_O_RDWR, _O_RDWR }, - { ArmLinux::TGT_O_APPEND, _O_APPEND }, - { ArmLinux::TGT_O_CREAT, _O_CREAT }, - { ArmLinux::TGT_O_TRUNC, _O_TRUNC }, - { ArmLinux::TGT_O_EXCL, _O_EXCL }, + { ArmLinux64::TGT_O_RDONLY, _O_RDONLY }, + { ArmLinux64::TGT_O_WRONLY, _O_WRONLY }, + { ArmLinux64::TGT_O_RDWR, _O_RDWR }, + { ArmLinux64::TGT_O_APPEND, _O_APPEND }, + { ArmLinux64::TGT_O_CREAT, _O_CREAT }, + { ArmLinux64::TGT_O_TRUNC, _O_TRUNC }, + { ArmLinux64::TGT_O_EXCL, _O_EXCL }, #ifdef _O_NONBLOCK - { ArmLinux::TGT_O_NONBLOCK, _O_NONBLOCK }, + { ArmLinux64::TGT_O_NONBLOCK, _O_NONBLOCK }, #endif #ifdef _O_NOCTTY - { ArmLinux::TGT_O_NOCTTY, _O_NOCTTY }, + { ArmLinux64::TGT_O_NOCTTY, _O_NOCTTY }, #endif #ifdef _O_SYNC - { ArmLinux::TGT_O_SYNC, _O_SYNC }, + { ArmLinux64::TGT_O_SYNC, _O_SYNC }, #endif #else /* !_MSC_VER */ - { ArmLinux::TGT_O_RDONLY, O_RDONLY }, - { ArmLinux::TGT_O_WRONLY, O_WRONLY }, - { ArmLinux::TGT_O_RDWR, O_RDWR }, - { ArmLinux::TGT_O_CREAT, O_CREAT }, - { ArmLinux::TGT_O_EXCL, O_EXCL }, - { ArmLinux::TGT_O_NOCTTY, O_NOCTTY }, - { ArmLinux::TGT_O_TRUNC, O_TRUNC }, - { ArmLinux::TGT_O_APPEND, O_APPEND }, - { ArmLinux::TGT_O_NONBLOCK, O_NONBLOCK }, + { ArmLinux64::TGT_O_RDONLY, O_RDONLY }, + { ArmLinux64::TGT_O_WRONLY, O_WRONLY }, + { ArmLinux64::TGT_O_RDWR, O_RDWR }, + { ArmLinux64::TGT_O_CREAT, O_CREAT }, + { ArmLinux64::TGT_O_EXCL, O_EXCL }, + { ArmLinux64::TGT_O_NOCTTY, O_NOCTTY }, + { ArmLinux64::TGT_O_TRUNC, O_TRUNC }, + { ArmLinux64::TGT_O_APPEND, O_APPEND }, + { ArmLinux64::TGT_O_NONBLOCK, O_NONBLOCK }, #ifdef O_SYNC - { ArmLinux::TGT_O_SYNC, O_SYNC }, + { ArmLinux64::TGT_O_SYNC, O_SYNC }, #endif #ifdef FASYNC - { ArmLinux::TGT_FASYNC, FASYNC }, + { ArmLinux64::TGT_FASYNC, FASYNC }, #endif #ifdef O_DIRECT - { ArmLinux::TGT_O_DIRECT, O_DIRECT }, + { ArmLinux64::TGT_O_DIRECT, O_DIRECT }, #endif #ifdef O_LARGEFILE - { ArmLinux::TGT_O_LARGEFILE, O_LARGEFILE }, + { ArmLinux64::TGT_O_LARGEFILE, O_LARGEFILE }, #endif #ifdef O_DIRECTORY - { ArmLinux::TGT_O_DIRECTORY, O_DIRECTORY }, + { ArmLinux64::TGT_O_DIRECTORY, O_DIRECTORY }, #endif #ifdef O_NOFOLLOW - { ArmLinux::TGT_O_NOFOLLOW, O_NOFOLLOW }, + { ArmLinux64::TGT_O_NOFOLLOW, O_NOFOLLOW }, #endif #endif /* _MSC_VER */ }; -const int ArmLinux::NUM_OPEN_FLAGS = - (sizeof(ArmLinux::openFlagTable)/sizeof(ArmLinux::openFlagTable[0])); +const int ArmLinux64::NUM_OPEN_FLAGS = sizeof(ArmLinux64::openFlagTable) / + sizeof(ArmLinux64::openFlagTable[0]); diff --git a/src/arch/arm/linux/linux.hh b/src/arch/arm/linux/linux.hh index 5a3e68a78..fbf5d2185 100644 --- a/src/arch/arm/linux/linux.hh +++ b/src/arch/arm/linux/linux.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2011-2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -47,7 +47,7 @@ #include "kern/linux/linux.hh" -class ArmLinux : public Linux +class ArmLinux32 : public Linux { public: @@ -123,8 +123,10 @@ class ArmLinux : public Linux uint16_t st_uid; uint16_t st_gid; uint32_t st_rdev; + uint32_t __pad1; uint32_t st_size; uint32_t st_blksize; + uint32_t __pad2; uint32_t st_blocks; uint32_t st_atimeX; uint32_t st_atime_nsec; @@ -198,8 +200,192 @@ class ArmLinux : public Linux int32_t tms_cutime; //!< user time of children int32_t tms_cstime; //!< system time of children }; +}; + +class ArmLinux64 : public Linux +{ + public: + + /// This table maps the target open() flags to the corresponding + /// host open() flags. + static OpenFlagTransTable openFlagTable[]; + + /// Number of entries in openFlagTable[]. + static const int NUM_OPEN_FLAGS; + + //@{ + /// Basic ARM Linux types + typedef uint64_t size_t; + typedef uint64_t off_t; + typedef int64_t time_t; + typedef int64_t clock_t; + //@} + + //@{ + /// open(2) flag values. + static const int TGT_O_RDONLY = 00000000; //!< O_RDONLY + static const int TGT_O_WRONLY = 00000001; //!< O_WRONLY + static const int TGT_O_RDWR = 00000002; //!< O_RDWR + static const int TGT_O_CREAT = 00000100; //!< O_CREAT + static const int TGT_O_EXCL = 00000200; //!< O_EXCL + static const int TGT_O_NOCTTY = 00000400; //!< O_NOCTTY + static const int TGT_O_TRUNC = 00001000; //!< O_TRUNC + static const int TGT_O_APPEND = 00002000; //!< O_APPEND + static const int TGT_O_NONBLOCK = 00004000; //!< O_NONBLOCK + static const int TGT_O_SYNC = 00010000; //!< O_SYNC + static const int TGT_FASYNC = 00020000; //!< FASYNC + static const int TGT_O_DIRECT = 00040000; //!< O_DIRECT + static const int TGT_O_LARGEFILE = 00100000; //!< O_LARGEFILE + static const int TGT_O_DIRECTORY = 00200000; //!< O_DIRECTORY + static const int TGT_O_NOFOLLOW = 00400000; //!< O_NOFOLLOW + static const int TGT_O_NOATIME = 01000000; //!< O_NOATIME + static const int TGT_O_CLOEXEC = 02000000; //!< O_NOATIME + //@} + /// For mmap(). + static const unsigned TGT_MAP_ANONYMOUS = 0x20; + static const unsigned TGT_MAP_FIXED = 0x10; + //@{ + /// For getrusage(). + static const int TGT_RUSAGE_SELF = 0; + static const int TGT_RUSAGE_CHILDREN = -1; + static const int TGT_RUSAGE_BOTH = -2; + //@} + + //@{ + /// ioctl() command codes. + static const unsigned TIOCGETP_ = 0x5401; + static const unsigned TIOCSETP_ = 0x80067409; + static const unsigned TIOCSETN_ = 0x8006740a; + static const unsigned TIOCSETC_ = 0x80067411; + static const unsigned TIOCGETC_ = 0x40067412; + static const unsigned FIONREAD_ = 0x4004667f; + static const unsigned TIOCISATTY_ = 0x2000745e; + static const unsigned TIOCGETS_ = 0x402c7413; + static const unsigned TIOCGETA_ = 0x5405; + static const unsigned TCSETAW_ = 0x5407; // 2.6.15 kernel + //@} + + /// For table(). + static const int TBL_SYSINFO = 12; + + /// Resource enumeration for getrlimit(). + enum rlimit_resources { + TGT_RLIMIT_CPU = 0, + TGT_RLIMIT_FSIZE = 1, + TGT_RLIMIT_DATA = 2, + TGT_RLIMIT_STACK = 3, + TGT_RLIMIT_CORE = 4, + TGT_RLIMIT_RSS = 5, + TGT_RLIMIT_NPROC = 6, + TGT_RLIMIT_NOFILE = 7, + TGT_RLIMIT_MEMLOCK = 8, + TGT_RLIMIT_AS = 9, + TGT_RLIMIT_LOCKS = 10 + }; + + /// Limit struct for getrlimit/setrlimit. + struct rlimit { + uint64_t rlim_cur; //!< soft limit + uint64_t rlim_max; //!< hard limit + }; + + /// For gettimeofday(). + struct timeval { + int64_t tv_sec; //!< seconds + int64_t tv_usec; //!< microseconds + }; + + // For writev/readv + struct tgt_iovec { + uint64_t iov_base; // void * + uint64_t iov_len; + }; + + typedef struct { + uint64_t st_dev; + uint64_t st_ino; + uint64_t st_nlink; + uint32_t st_mode; + uint32_t st_uid; + uint32_t st_gid; + uint32_t __pad0; + uint64_t st_rdev; + uint64_t st_size; + uint64_t st_blksize; + uint64_t st_blocks; + uint64_t st_atimeX; + uint64_t st_atime_nsec; + uint64_t st_mtimeX; + uint64_t st_mtime_nsec; + uint64_t st_ctimeX; + uint64_t st_ctime_nsec; + } tgt_stat; + + typedef struct { + uint64_t st_dev; + uint64_t st_ino; + uint32_t st_mode; + uint32_t st_nlink; + uint32_t st_uid; + uint32_t st_gid; + uint32_t __pad0; + uint64_t st_rdev; + uint64_t st_size; + uint64_t st_blksize; + uint64_t st_blocks; + uint64_t st_atimeX; + uint64_t st_atime_nsec; + uint64_t st_mtimeX; + uint64_t st_mtime_nsec; + uint64_t st_ctimeX; + uint64_t st_ctime_nsec; + } tgt_stat64; + + typedef struct { + int64_t uptime; /* Seconds since boot */ + uint64_t loads[3]; /* 1, 5, and 15 minute load averages */ + uint64_t totalram; /* Total usable main memory size */ + uint64_t freeram; /* Available memory size */ + uint64_t sharedram; /* Amount of shared memory */ + uint64_t bufferram; /* Memory used by buffers */ + uint64_t totalswap; /* Total swap space size */ + uint64_t freeswap; /* swap space still available */ + uint16_t procs; /* Number of current processes */ + uint16_t pad; + uint64_t totalhigh; /* Total high memory size */ + uint64_t freehigh; /* Available high memory size */ + uint32_t mem_unit; /* Memory unit size in bytes */ + } tgt_sysinfo; + + /// For getrusage(). + struct rusage { + struct timeval ru_utime; //!< user time used + struct timeval ru_stime; //!< system time used + int64_t ru_maxrss; //!< max rss + int64_t ru_ixrss; //!< integral shared memory size + int64_t ru_idrss; //!< integral unshared data " + int64_t ru_isrss; //!< integral unshared stack " + int64_t ru_minflt; //!< page reclaims - total vmfaults + int64_t ru_majflt; //!< page faults + int64_t ru_nswap; //!< swaps + int64_t ru_inblock; //!< block input operations + int64_t ru_oublock; //!< block output operations + int64_t ru_msgsnd; //!< messages sent + int64_t ru_msgrcv; //!< messages received + int64_t ru_nsignals; //!< signals received + int64_t ru_nvcsw; //!< voluntary context switches + int64_t ru_nivcsw; //!< involuntary " + }; + + /// For times(). + struct tms { + int64_t tms_utime; //!< user time + int64_t tms_stime; //!< system time + int64_t tms_cutime; //!< user time of children + int64_t tms_cstime; //!< system time of children + }; }; #endif diff --git a/src/arch/arm/linux/process.cc b/src/arch/arm/linux/process.cc index 169565a04..e34a813ea 100644 --- a/src/arch/arm/linux/process.cc +++ b/src/arch/arm/linux/process.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -41,6 +41,7 @@ * Authors: Korey Sewell * Stephen Hines * Ali Saidi + * Giacomo Gabrielli */ #include "arch/arm/linux/linux.hh" @@ -58,8 +59,8 @@ using namespace ArmISA; /// Target uname() handler. static SyscallReturn -unameFunc(SyscallDesc *desc, int callnum, LiveProcess *process, - ThreadContext *tc) +unameFunc32(SyscallDesc *desc, int callnum, LiveProcess *process, + ThreadContext *tc) { int index = 0; TypedBufferArg<Linux::utsname> name(process->getSyscallArg(tc, index)); @@ -74,13 +75,56 @@ unameFunc(SyscallDesc *desc, int callnum, LiveProcess *process, return 0; } -SyscallDesc ArmLinuxProcess::syscallDescs[] = { +/// Target uname() handler. +static SyscallReturn +unameFunc64(SyscallDesc *desc, int callnum, LiveProcess *process, + ThreadContext *tc) +{ + int index = 0; + TypedBufferArg<Linux::utsname> name(process->getSyscallArg(tc, index)); + + strcpy(name->sysname, "Linux"); + strcpy(name->nodename, "gem5"); + strcpy(name->release, "3.7.0+"); + strcpy(name->version, "#1 SMP Sat Dec 1 00:00:00 GMT 2012"); + strcpy(name->machine, "armv8l"); + + name.copyOut(tc->getMemProxy()); + return 0; +} + +/// Target set_tls() handler. +static SyscallReturn +setTLSFunc32(SyscallDesc *desc, int callnum, LiveProcess *process, + ThreadContext *tc) +{ + int index = 0; + uint32_t tlsPtr = process->getSyscallArg(tc, index); + + tc->getMemProxy().writeBlob(ArmLinuxProcess32::commPage + 0x0ff0, + (uint8_t *)&tlsPtr, sizeof(tlsPtr)); + tc->setMiscReg(MISCREG_TPIDRURO,tlsPtr); + return 0; +} + +static SyscallReturn +setTLSFunc64(SyscallDesc *desc, int callnum, LiveProcess *process, + ThreadContext *tc) +{ + int index = 0; + uint32_t tlsPtr = process->getSyscallArg(tc, index); + + tc->setMiscReg(MISCREG_TPIDRRO_EL0, tlsPtr); + return 0; +} + +static SyscallDesc syscallDescs32[] = { /* 0 */ SyscallDesc("syscall", unimplementedFunc), /* 1 */ SyscallDesc("exit", exitFunc), /* 2 */ SyscallDesc("fork", unimplementedFunc), /* 3 */ SyscallDesc("read", readFunc), /* 4 */ SyscallDesc("write", writeFunc), - /* 5 */ SyscallDesc("open", openFunc<ArmLinux>), + /* 5 */ SyscallDesc("open", openFunc<ArmLinux32>), /* 6 */ SyscallDesc("close", closeFunc), /* 7 */ SyscallDesc("unused#7", unimplementedFunc), /* 8 */ SyscallDesc("creat", unimplementedFunc), @@ -88,9 +132,9 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 10 */ SyscallDesc("unlink", unlinkFunc), /* 11 */ SyscallDesc("execve", unimplementedFunc), /* 12 */ SyscallDesc("chdir", unimplementedFunc), - /* 13 */ SyscallDesc("time", timeFunc<ArmLinux>), + /* 13 */ SyscallDesc("time", timeFunc<ArmLinux32>), /* 14 */ SyscallDesc("mknod", unimplementedFunc), - /* 15 */ SyscallDesc("chmod", chmodFunc<ArmLinux>), + /* 15 */ SyscallDesc("chmod", chmodFunc<ArmLinux32>), /* 16 */ SyscallDesc("lchown", chownFunc), /* 17 */ SyscallDesc("unused#17", unimplementedFunc), /* 18 */ SyscallDesc("unused#18", unimplementedFunc), @@ -118,7 +162,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 40 */ SyscallDesc("rmdir", unimplementedFunc), /* 41 */ SyscallDesc("dup", dupFunc), /* 42 */ SyscallDesc("pipe", pipePseudoFunc), - /* 43 */ SyscallDesc("times", timesFunc<ArmLinux>), + /* 43 */ SyscallDesc("times", timesFunc<ArmLinux32>), /* 44 */ SyscallDesc("unused#44", unimplementedFunc), /* 45 */ SyscallDesc("brk", brkFunc), /* 46 */ SyscallDesc("setgid", unimplementedFunc), @@ -129,7 +173,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 51 */ SyscallDesc("acct", unimplementedFunc), /* 52 */ SyscallDesc("umount2", unimplementedFunc), /* 53 */ SyscallDesc("unused#53", unimplementedFunc), - /* 54 */ SyscallDesc("ioctl", ioctlFunc<ArmLinux>), + /* 54 */ SyscallDesc("ioctl", ioctlFunc<ArmLinux32>), /* 55 */ SyscallDesc("fcntl", fcntlFunc), /* 56 */ SyscallDesc("unused#56", unimplementedFunc), /* 57 */ SyscallDesc("setpgid", unimplementedFunc), @@ -151,9 +195,9 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 73 */ SyscallDesc("sigpending", unimplementedFunc), /* 74 */ SyscallDesc("sethostname", ignoreFunc), /* 75 */ SyscallDesc("setrlimit", ignoreFunc), - /* 76 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux>), - /* 77 */ SyscallDesc("getrusage", getrusageFunc<ArmLinux>), - /* 78 */ SyscallDesc("gettimeofday", gettimeofdayFunc<ArmLinux>), + /* 76 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux32>), + /* 77 */ SyscallDesc("getrusage", getrusageFunc<ArmLinux32>), + /* 78 */ SyscallDesc("gettimeofday", gettimeofdayFunc<ArmLinux32>), /* 79 */ SyscallDesc("settimeofday", unimplementedFunc), /* 80 */ SyscallDesc("getgroups", unimplementedFunc), /* 81 */ SyscallDesc("setgroups", unimplementedFunc), @@ -165,7 +209,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 87 */ SyscallDesc("swapon", unimplementedFunc), /* 88 */ SyscallDesc("reboot", unimplementedFunc), /* 89 */ SyscallDesc("readdir", unimplementedFunc), - /* 90 */ SyscallDesc("mmap", mmapFunc<ArmLinux>), + /* 90 */ SyscallDesc("mmap", mmapFunc<ArmLinux32>), /* 91 */ SyscallDesc("munmap", munmapFunc), /* 92 */ SyscallDesc("truncate", truncateFunc), /* 93 */ SyscallDesc("ftruncate", ftruncateFunc), @@ -181,9 +225,9 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 103 */ SyscallDesc("syslog", unimplementedFunc), /* 104 */ SyscallDesc("setitimer", unimplementedFunc), /* 105 */ SyscallDesc("getitimer", unimplementedFunc), - /* 106 */ SyscallDesc("stat", statFunc<ArmLinux>), + /* 106 */ SyscallDesc("stat", statFunc<ArmLinux32>), /* 107 */ SyscallDesc("lstat", unimplementedFunc), - /* 108 */ SyscallDesc("fstat", fstatFunc<ArmLinux>), + /* 108 */ SyscallDesc("fstat", fstatFunc<ArmLinux32>), /* 109 */ SyscallDesc("unused#109", unimplementedFunc), /* 110 */ SyscallDesc("unused#101", unimplementedFunc), /* 111 */ SyscallDesc("vhangup", unimplementedFunc), @@ -191,17 +235,17 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 113 */ SyscallDesc("syscall", unimplementedFunc), /* 114 */ SyscallDesc("wait4", unimplementedFunc), /* 115 */ SyscallDesc("swapoff", unimplementedFunc), - /* 116 */ SyscallDesc("sysinfo", sysinfoFunc<ArmLinux>), + /* 116 */ SyscallDesc("sysinfo", sysinfoFunc<ArmLinux32>), /* 117 */ SyscallDesc("ipc", unimplementedFunc), /* 118 */ SyscallDesc("fsync", unimplementedFunc), /* 119 */ SyscallDesc("sigreturn", unimplementedFunc), /* 120 */ SyscallDesc("clone", cloneFunc), /* 121 */ SyscallDesc("setdomainname", unimplementedFunc), - /* 122 */ SyscallDesc("uname", unameFunc), + /* 122 */ SyscallDesc("uname", unameFunc32), /* 123 */ SyscallDesc("unused#123", unimplementedFunc), /* 124 */ SyscallDesc("adjtimex", unimplementedFunc), /* 125 */ SyscallDesc("mprotect", ignoreFunc), - /* 126 */ SyscallDesc("sigprocmask", unimplementedFunc), + /* 126 */ SyscallDesc("sigprocmask", ignoreWarnOnceFunc), /* 127 */ SyscallDesc("unused#127", unimplementedFunc), /* 128 */ SyscallDesc("init_module", unimplementedFunc), /* 129 */ SyscallDesc("delete_module", unimplementedFunc), @@ -221,7 +265,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 143 */ SyscallDesc("flock", unimplementedFunc), /* 144 */ SyscallDesc("msync", unimplementedFunc), /* 145 */ SyscallDesc("readv", unimplementedFunc), - /* 146 */ SyscallDesc("writev", writevFunc<ArmLinux>), + /* 146 */ SyscallDesc("writev", writevFunc<ArmLinux32>), /* 147 */ SyscallDesc("getsid", unimplementedFunc), /* 148 */ SyscallDesc("fdatasync", unimplementedFunc), /* 149 */ SyscallDesc("sysctl", unimplementedFunc), @@ -238,7 +282,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 160 */ SyscallDesc("sched_get_priority_min", unimplementedFunc), /* 161 */ SyscallDesc("sched_rr_get_interval", unimplementedFunc), /* 162 */ SyscallDesc("nanosleep", ignoreWarnOnceFunc), - /* 163 */ SyscallDesc("mremap", mremapFunc<ArmLinux>), // ARM-specific + /* 163 */ SyscallDesc("mremap", mremapFunc<ArmLinux32>), // ARM-specific /* 164 */ SyscallDesc("setresuid", unimplementedFunc), /* 165 */ SyscallDesc("getresuid", unimplementedFunc), /* 166 */ SyscallDesc("unused#166", unimplementedFunc), @@ -266,13 +310,13 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 188 */ SyscallDesc("unused#188", unimplementedFunc), /* 189 */ SyscallDesc("unused#189", unimplementedFunc), /* 190 */ SyscallDesc("vfork", unimplementedFunc), - /* 191 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux>), - /* 192 */ SyscallDesc("mmap2", mmapFunc<ArmLinux>), + /* 191 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux32>), + /* 192 */ SyscallDesc("mmap2", mmapFunc<ArmLinux32>), /* 193 */ SyscallDesc("truncate64", unimplementedFunc), /* 194 */ SyscallDesc("ftruncate64", ftruncate64Func), - /* 195 */ SyscallDesc("stat64", stat64Func<ArmLinux>), - /* 196 */ SyscallDesc("lstat64", lstat64Func<ArmLinux>), - /* 197 */ SyscallDesc("fstat64", fstat64Func<ArmLinux>), + /* 195 */ SyscallDesc("stat64", stat64Func<ArmLinux32>), + /* 196 */ SyscallDesc("lstat64", lstat64Func<ArmLinux32>), + /* 197 */ SyscallDesc("fstat64", fstat64Func<ArmLinux32>), /* 198 */ SyscallDesc("lchown", unimplementedFunc), /* 199 */ SyscallDesc("getuid", getuidFunc), /* 200 */ SyscallDesc("getgid", getgidFunc), @@ -319,7 +363,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 241 */ SyscallDesc("sched_setaffinity", unimplementedFunc), /* 242 */ SyscallDesc("sched_getaffinity", unimplementedFunc), /* 243 */ SyscallDesc("io_setup", unimplementedFunc), - /* 244 */ SyscallDesc("io_destory", unimplementedFunc), + /* 244 */ SyscallDesc("io_destroy", unimplementedFunc), /* 245 */ SyscallDesc("io_getevents", unimplementedFunc), /* 246 */ SyscallDesc("io_submit", unimplementedFunc), /* 247 */ SyscallDesc("io_cancel", unimplementedFunc), @@ -441,68 +485,1187 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = { /* 363 */ SyscallDesc("sys_rt_tgsigqueueinfo", unimplementedFunc), /* 364 */ SyscallDesc("sys_perf_event_open", unimplementedFunc), /* 365 */ SyscallDesc("sys_recvmmsg", unimplementedFunc), - }; -/// Target set_tls() handler. -static SyscallReturn -setTLSFunc(SyscallDesc *desc, int callnum, LiveProcess *process, - ThreadContext *tc) -{ - int index = 0; - uint32_t tlsPtr = process->getSyscallArg(tc, index); - - tc->getMemProxy().writeBlob(ArmLinuxProcess::commPage + 0x0ff0, - (uint8_t *)&tlsPtr, sizeof(tlsPtr)); - tc->setMiscReg(MISCREG_TPIDRURO,tlsPtr); - return 0; -} +static SyscallDesc syscallDescs64[] = { + /* 0 */ SyscallDesc("io_setup", unimplementedFunc), + /* 1 */ SyscallDesc("io_destroy", unimplementedFunc), + /* 2 */ SyscallDesc("io_submit", unimplementedFunc), + /* 3 */ SyscallDesc("io_cancel", unimplementedFunc), + /* 4 */ SyscallDesc("io_getevents", unimplementedFunc), + /* 5 */ SyscallDesc("setxattr", unimplementedFunc), + /* 6 */ SyscallDesc("lsetxattr", unimplementedFunc), + /* 7 */ SyscallDesc("fsetxattr", unimplementedFunc), + /* 8 */ SyscallDesc("getxattr", unimplementedFunc), + /* 9 */ SyscallDesc("lgetxattr", unimplementedFunc), + /* 10 */ SyscallDesc("fgetxattr", unimplementedFunc), + /* 11 */ SyscallDesc("listxattr", unimplementedFunc), + /* 12 */ SyscallDesc("llistxattr", unimplementedFunc), + /* 13 */ SyscallDesc("flistxattr", unimplementedFunc), + /* 14 */ SyscallDesc("removexattr", unimplementedFunc), + /* 15 */ SyscallDesc("lremovexattr", unimplementedFunc), + /* 16 */ SyscallDesc("fremovexattr", unimplementedFunc), + /* 17 */ SyscallDesc("getcwd", getcwdFunc), + /* 18 */ SyscallDesc("lookup_dcookie", unimplementedFunc), + /* 19 */ SyscallDesc("eventfd2", unimplementedFunc), + /* 20 */ SyscallDesc("epoll_create1", unimplementedFunc), + /* 21 */ SyscallDesc("epoll_ctl", unimplementedFunc), + /* 22 */ SyscallDesc("epoll_pwait", unimplementedFunc), + /* 23 */ SyscallDesc("dup", dupFunc), + /* 24 */ SyscallDesc("dup3", unimplementedFunc), + /* 25 */ SyscallDesc("fcntl64", fcntl64Func), + /* 26 */ SyscallDesc("inotify_init1", unimplementedFunc), + /* 27 */ SyscallDesc("inotify_add_watch", unimplementedFunc), + /* 28 */ SyscallDesc("inotify_rm_watch", unimplementedFunc), + /* 29 */ SyscallDesc("ioctl", ioctlFunc<ArmLinux64>), + /* 30 */ SyscallDesc("ioprio_set", unimplementedFunc), + /* 31 */ SyscallDesc("ioprio_get", unimplementedFunc), + /* 32 */ SyscallDesc("flock", unimplementedFunc), + /* 33 */ SyscallDesc("mknodat", unimplementedFunc), + /* 34 */ SyscallDesc("mkdirat", unimplementedFunc), + /* 35 */ SyscallDesc("unlinkat", unimplementedFunc), + /* 36 */ SyscallDesc("symlinkat", unimplementedFunc), + /* 37 */ SyscallDesc("linkat", unimplementedFunc), + /* 38 */ SyscallDesc("renameat", unimplementedFunc), + /* 39 */ SyscallDesc("umount2", unimplementedFunc), + /* 40 */ SyscallDesc("mount", unimplementedFunc), + /* 41 */ SyscallDesc("pivot_root", unimplementedFunc), + /* 42 */ SyscallDesc("nfsservctl", unimplementedFunc), + /* 43 */ SyscallDesc("statfs64", unimplementedFunc), + /* 44 */ SyscallDesc("fstatfs64", unimplementedFunc), + /* 45 */ SyscallDesc("truncate64", unimplementedFunc), + /* 46 */ SyscallDesc("ftruncate64", ftruncate64Func), + /* 47 */ SyscallDesc("fallocate", unimplementedFunc), + /* 48 */ SyscallDesc("faccessat", unimplementedFunc), + /* 49 */ SyscallDesc("chdir", unimplementedFunc), + /* 50 */ SyscallDesc("fchdir", unimplementedFunc), + /* 51 */ SyscallDesc("chroot", unimplementedFunc), + /* 52 */ SyscallDesc("fchmod", unimplementedFunc), + /* 53 */ SyscallDesc("fchmodat", unimplementedFunc), + /* 54 */ SyscallDesc("fchownat", unimplementedFunc), + /* 55 */ SyscallDesc("fchown", unimplementedFunc), + /* 56 */ SyscallDesc("openat", openatFunc<ArmLinux64>), + /* 57 */ SyscallDesc("close", closeFunc), + /* 58 */ SyscallDesc("vhangup", unimplementedFunc), + /* 59 */ SyscallDesc("pipe2", unimplementedFunc), + /* 60 */ SyscallDesc("quotactl", unimplementedFunc), + /* 61 */ SyscallDesc("getdents64", unimplementedFunc), + /* 62 */ SyscallDesc("llseek", lseekFunc), + /* 63 */ SyscallDesc("read", readFunc), + /* 64 */ SyscallDesc("write", writeFunc), + /* 65 */ SyscallDesc("readv", unimplementedFunc), + /* 66 */ SyscallDesc("writev", writevFunc<ArmLinux64>), + /* 67 */ SyscallDesc("pread64", unimplementedFunc), + /* 68 */ SyscallDesc("pwrite64", unimplementedFunc), + /* 69 */ SyscallDesc("preadv", unimplementedFunc), + /* 70 */ SyscallDesc("pwritev", unimplementedFunc), + /* 71 */ SyscallDesc("sendfile64", unimplementedFunc), + /* 72 */ SyscallDesc("pselect6", unimplementedFunc), + /* 73 */ SyscallDesc("ppoll", unimplementedFunc), + /* 74 */ SyscallDesc("signalfd4", unimplementedFunc), + /* 75 */ SyscallDesc("vmsplice", unimplementedFunc), + /* 76 */ SyscallDesc("splice", unimplementedFunc), + /* 77 */ SyscallDesc("tee", unimplementedFunc), + /* 78 */ SyscallDesc("readlinkat", unimplementedFunc), + /* 79 */ SyscallDesc("fstatat64", fstatat64Func<ArmLinux64>), + /* 80 */ SyscallDesc("fstat64", fstat64Func<ArmLinux64>), + /* 81 */ SyscallDesc("sync", unimplementedFunc), + /* 82 */ SyscallDesc("fsync", unimplementedFunc), + /* 83 */ SyscallDesc("fdatasync", unimplementedFunc), + /* 84 */ SyscallDesc("sync_file_range", unimplementedFunc), + /* 85 */ SyscallDesc("timerfd_create", unimplementedFunc), + /* 86 */ SyscallDesc("timerfd_settime", unimplementedFunc), + /* 87 */ SyscallDesc("timerfd_gettime", unimplementedFunc), + /* 88 */ SyscallDesc("utimensat", unimplementedFunc), + /* 89 */ SyscallDesc("acct", unimplementedFunc), + /* 90 */ SyscallDesc("capget", unimplementedFunc), + /* 91 */ SyscallDesc("capset", unimplementedFunc), + /* 92 */ SyscallDesc("personality", unimplementedFunc), + /* 93 */ SyscallDesc("exit", exitFunc), + /* 94 */ SyscallDesc("exit_group", exitGroupFunc), + /* 95 */ SyscallDesc("waitid", unimplementedFunc), + /* 96 */ SyscallDesc("set_tid_address", unimplementedFunc), + /* 97 */ SyscallDesc("unshare", unimplementedFunc), + /* 98 */ SyscallDesc("futex", unimplementedFunc), + /* 99 */ SyscallDesc("set_robust_list", unimplementedFunc), + /* 100 */ SyscallDesc("get_robust_list", unimplementedFunc), + /* 101 */ SyscallDesc("nanosleep", ignoreWarnOnceFunc), + /* 102 */ SyscallDesc("getitimer", unimplementedFunc), + /* 103 */ SyscallDesc("setitimer", unimplementedFunc), + /* 104 */ SyscallDesc("kexec_load", unimplementedFunc), + /* 105 */ SyscallDesc("init_module", unimplementedFunc), + /* 106 */ SyscallDesc("delete_module", unimplementedFunc), + /* 107 */ SyscallDesc("timer_create", unimplementedFunc), + /* 108 */ SyscallDesc("timer_gettime", unimplementedFunc), + /* 109 */ SyscallDesc("timer_getoverrun", unimplementedFunc), + /* 110 */ SyscallDesc("timer_settime", unimplementedFunc), + /* 111 */ SyscallDesc("timer_delete", unimplementedFunc), + /* 112 */ SyscallDesc("clock_settime", unimplementedFunc), + /* 113 */ SyscallDesc("clock_gettime", unimplementedFunc), + /* 114 */ SyscallDesc("clock_getres", unimplementedFunc), + /* 115 */ SyscallDesc("clock_nanosleep", unimplementedFunc), + /* 116 */ SyscallDesc("syslog", unimplementedFunc), + /* 117 */ SyscallDesc("ptrace", unimplementedFunc), + /* 118 */ SyscallDesc("sched_setparam", unimplementedFunc), + /* 119 */ SyscallDesc("sched_setscheduler", unimplementedFunc), + /* 120 */ SyscallDesc("sched_getscheduler", unimplementedFunc), + /* 121 */ SyscallDesc("sched_getparam", unimplementedFunc), + /* 122 */ SyscallDesc("sched_setaffinity", unimplementedFunc), + /* 123 */ SyscallDesc("sched_getaffinity", unimplementedFunc), + /* 124 */ SyscallDesc("sched_yield", unimplementedFunc), + /* 125 */ SyscallDesc("sched_get_priority_max", unimplementedFunc), + /* 126 */ SyscallDesc("sched_get_priority_min", unimplementedFunc), + /* 127 */ SyscallDesc("sched_rr_get_interval", unimplementedFunc), + /* 128 */ SyscallDesc("restart_syscall", unimplementedFunc), + /* 129 */ SyscallDesc("kill", ignoreFunc), + /* 130 */ SyscallDesc("tkill", unimplementedFunc), + /* 131 */ SyscallDesc("tgkill", unimplementedFunc), + /* 132 */ SyscallDesc("sigaltstack", unimplementedFunc), + /* 133 */ SyscallDesc("rt_sigsuspend", unimplementedFunc), + /* 134 */ SyscallDesc("rt_sigaction", ignoreFunc), + /* 135 */ SyscallDesc("rt_sigprocmask", ignoreWarnOnceFunc), + /* 136 */ SyscallDesc("rt_sigpending", unimplementedFunc), + /* 137 */ SyscallDesc("rt_sigtimedwait", unimplementedFunc), + /* 138 */ SyscallDesc("rt_sigqueueinfo", ignoreFunc), + /* 139 */ SyscallDesc("rt_sigreturn", unimplementedFunc), + /* 140 */ SyscallDesc("setpriority", unimplementedFunc), + /* 141 */ SyscallDesc("getpriority", unimplementedFunc), + /* 142 */ SyscallDesc("reboot", unimplementedFunc), + /* 143 */ SyscallDesc("setregid", unimplementedFunc), + /* 144 */ SyscallDesc("setgid", unimplementedFunc), + /* 145 */ SyscallDesc("setreuid", unimplementedFunc), + /* 146 */ SyscallDesc("setuid", unimplementedFunc), + /* 147 */ SyscallDesc("setresuid", unimplementedFunc), + /* 148 */ SyscallDesc("getresuid", unimplementedFunc), + /* 149 */ SyscallDesc("setresgid", unimplementedFunc), + /* 150 */ SyscallDesc("getresgid", unimplementedFunc), + /* 151 */ SyscallDesc("setfsuid", unimplementedFunc), + /* 152 */ SyscallDesc("setfsgid", unimplementedFunc), + /* 153 */ SyscallDesc("times", timesFunc<ArmLinux64>), + /* 154 */ SyscallDesc("setpgid", unimplementedFunc), + /* 155 */ SyscallDesc("getpgid", unimplementedFunc), + /* 156 */ SyscallDesc("getsid", unimplementedFunc), + /* 157 */ SyscallDesc("setsid", unimplementedFunc), + /* 158 */ SyscallDesc("getgroups", unimplementedFunc), + /* 159 */ SyscallDesc("setgroups", unimplementedFunc), + /* 160 */ SyscallDesc("uname", unameFunc64), + /* 161 */ SyscallDesc("sethostname", ignoreFunc), + /* 162 */ SyscallDesc("setdomainname", unimplementedFunc), + /* 163 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux64>), + /* 164 */ SyscallDesc("setrlimit", ignoreFunc), + /* 165 */ SyscallDesc("getrusage", getrusageFunc<ArmLinux64>), + /* 166 */ SyscallDesc("umask", unimplementedFunc), + /* 167 */ SyscallDesc("prctl", unimplementedFunc), + /* 168 */ SyscallDesc("getcpu", unimplementedFunc), + /* 169 */ SyscallDesc("gettimeofday", gettimeofdayFunc<ArmLinux64>), + /* 170 */ SyscallDesc("settimeofday", unimplementedFunc), + /* 171 */ SyscallDesc("adjtimex", unimplementedFunc), + /* 172 */ SyscallDesc("getpid", getpidFunc), + /* 173 */ SyscallDesc("getppid", getppidFunc), + /* 174 */ SyscallDesc("getuid", getuidFunc), + /* 175 */ SyscallDesc("geteuid", geteuidFunc), + /* 176 */ SyscallDesc("getgid", getgidFunc), + /* 177 */ SyscallDesc("getegid", getegidFunc), + /* 178 */ SyscallDesc("gettid", unimplementedFunc), + /* 179 */ SyscallDesc("sysinfo", sysinfoFunc<ArmLinux64>), + /* 180 */ SyscallDesc("mq_open", unimplementedFunc), + /* 181 */ SyscallDesc("mq_unlink", unimplementedFunc), + /* 182 */ SyscallDesc("mq_timedsend", unimplementedFunc), + /* 183 */ SyscallDesc("mq_timedreceive", unimplementedFunc), + /* 184 */ SyscallDesc("mq_notify", unimplementedFunc), + /* 185 */ SyscallDesc("mq_getsetattr", unimplementedFunc), + /* 186 */ SyscallDesc("msgget", unimplementedFunc), + /* 187 */ SyscallDesc("msgctl", unimplementedFunc), + /* 188 */ SyscallDesc("msgrcv", unimplementedFunc), + /* 189 */ SyscallDesc("msgsnd", unimplementedFunc), + /* 190 */ SyscallDesc("semget", unimplementedFunc), + /* 191 */ SyscallDesc("semctl", unimplementedFunc), + /* 192 */ SyscallDesc("semtimedop", unimplementedFunc), + /* 193 */ SyscallDesc("semop", unimplementedFunc), + /* 194 */ SyscallDesc("shmget", unimplementedFunc), + /* 195 */ SyscallDesc("shmctl", unimplementedFunc), + /* 196 */ SyscallDesc("shmat", unimplementedFunc), + /* 197 */ SyscallDesc("shmdt", unimplementedFunc), + /* 198 */ SyscallDesc("socket", unimplementedFunc), + /* 199 */ SyscallDesc("socketpair", unimplementedFunc), + /* 200 */ SyscallDesc("bind", unimplementedFunc), + /* 201 */ SyscallDesc("listen", unimplementedFunc), + /* 202 */ SyscallDesc("accept", unimplementedFunc), + /* 203 */ SyscallDesc("connect", unimplementedFunc), + /* 204 */ SyscallDesc("getsockname", unimplementedFunc), + /* 205 */ SyscallDesc("getpeername", unimplementedFunc), + /* 206 */ SyscallDesc("sendto", unimplementedFunc), + /* 207 */ SyscallDesc("recvfrom", unimplementedFunc), + /* 208 */ SyscallDesc("setsockopt", unimplementedFunc), + /* 209 */ SyscallDesc("getsockopt", unimplementedFunc), + /* 210 */ SyscallDesc("shutdown", unimplementedFunc), + /* 211 */ SyscallDesc("sendmsg", unimplementedFunc), + /* 212 */ SyscallDesc("recvmsg", unimplementedFunc), + /* 213 */ SyscallDesc("readahead", unimplementedFunc), + /* 214 */ SyscallDesc("brk", brkFunc), + /* 215 */ SyscallDesc("munmap", munmapFunc), + /* 216 */ SyscallDesc("mremap", mremapFunc<ArmLinux64>), + /* 217 */ SyscallDesc("add_key", unimplementedFunc), + /* 218 */ SyscallDesc("request_key", unimplementedFunc), + /* 219 */ SyscallDesc("keyctl", unimplementedFunc), + /* 220 */ SyscallDesc("clone", unimplementedFunc), + /* 221 */ SyscallDesc("execve", unimplementedFunc), + /* 222 */ SyscallDesc("mmap2", mmapFunc<ArmLinux64>), + /* 223 */ SyscallDesc("fadvise64_64", unimplementedFunc), + /* 224 */ SyscallDesc("swapon", unimplementedFunc), + /* 225 */ SyscallDesc("swapoff", unimplementedFunc), + /* 226 */ SyscallDesc("mprotect", ignoreFunc), + /* 227 */ SyscallDesc("msync", unimplementedFunc), + /* 228 */ SyscallDesc("mlock", unimplementedFunc), + /* 229 */ SyscallDesc("munlock", unimplementedFunc), + /* 230 */ SyscallDesc("mlockall", unimplementedFunc), + /* 231 */ SyscallDesc("munlockall", unimplementedFunc), + /* 232 */ SyscallDesc("mincore", unimplementedFunc), + /* 233 */ SyscallDesc("madvise", unimplementedFunc), + /* 234 */ SyscallDesc("remap_file_pages", unimplementedFunc), + /* 235 */ SyscallDesc("mbind", unimplementedFunc), + /* 236 */ SyscallDesc("get_mempolicy", unimplementedFunc), + /* 237 */ SyscallDesc("set_mempolicy", unimplementedFunc), + /* 238 */ SyscallDesc("migrate_pages", unimplementedFunc), + /* 239 */ SyscallDesc("move_pages", unimplementedFunc), + /* 240 */ SyscallDesc("rt_tgsigqueueinfo", unimplementedFunc), + /* 241 */ SyscallDesc("perf_event_open", unimplementedFunc), + /* 242 */ SyscallDesc("accept4", unimplementedFunc), + /* 243 */ SyscallDesc("recvmmsg", unimplementedFunc), + /* 244 */ SyscallDesc("unused#244", unimplementedFunc), + /* 245 */ SyscallDesc("unused#245", unimplementedFunc), + /* 246 */ SyscallDesc("unused#246", unimplementedFunc), + /* 247 */ SyscallDesc("unused#247", unimplementedFunc), + /* 248 */ SyscallDesc("unused#248", unimplementedFunc), + /* 249 */ SyscallDesc("unused#249", unimplementedFunc), + /* 250 */ SyscallDesc("unused#250", unimplementedFunc), + /* 251 */ SyscallDesc("unused#251", unimplementedFunc), + /* 252 */ SyscallDesc("unused#252", unimplementedFunc), + /* 253 */ SyscallDesc("unused#253", unimplementedFunc), + /* 254 */ SyscallDesc("unused#254", unimplementedFunc), + /* 255 */ SyscallDesc("unused#255", unimplementedFunc), + /* 256 */ SyscallDesc("unused#256", unimplementedFunc), + /* 257 */ SyscallDesc("unused#257", unimplementedFunc), + /* 258 */ SyscallDesc("unused#258", unimplementedFunc), + /* 259 */ SyscallDesc("unused#259", unimplementedFunc), + /* 260 */ SyscallDesc("wait4", unimplementedFunc), + /* 261 */ SyscallDesc("prlimit64", unimplementedFunc), + /* 262 */ SyscallDesc("fanotify_init", unimplementedFunc), + /* 263 */ SyscallDesc("fanotify_mark", unimplementedFunc), + /* 264 */ SyscallDesc("name_to_handle_at", unimplementedFunc), + /* 265 */ SyscallDesc("open_by_handle_at", unimplementedFunc), + /* 266 */ SyscallDesc("clock_adjtime", unimplementedFunc), + /* 267 */ SyscallDesc("syncfs", unimplementedFunc), + /* 268 */ SyscallDesc("setns", unimplementedFunc), + /* 269 */ SyscallDesc("sendmmsg", unimplementedFunc), + /* 270 */ SyscallDesc("process_vm_readv", unimplementedFunc), + /* 271 */ SyscallDesc("process_vm_writev", unimplementedFunc), + /* 272 */ SyscallDesc("unused#272", unimplementedFunc), + /* 273 */ SyscallDesc("unused#273", unimplementedFunc), + /* 274 */ SyscallDesc("unused#274", unimplementedFunc), + /* 275 */ SyscallDesc("unused#275", unimplementedFunc), + /* 276 */ SyscallDesc("unused#276", unimplementedFunc), + /* 277 */ SyscallDesc("unused#277", unimplementedFunc), + /* 278 */ SyscallDesc("unused#278", unimplementedFunc), + /* 279 */ SyscallDesc("unused#279", unimplementedFunc), + /* 280 */ SyscallDesc("unused#280", unimplementedFunc), + /* 281 */ SyscallDesc("unused#281", unimplementedFunc), + /* 282 */ SyscallDesc("unused#282", unimplementedFunc), + /* 283 */ SyscallDesc("unused#283", unimplementedFunc), + /* 284 */ SyscallDesc("unused#284", unimplementedFunc), + /* 285 */ SyscallDesc("unused#285", unimplementedFunc), + /* 286 */ SyscallDesc("unused#286", unimplementedFunc), + /* 287 */ SyscallDesc("unused#287", unimplementedFunc), + /* 288 */ SyscallDesc("unused#288", unimplementedFunc), + /* 289 */ SyscallDesc("unused#289", unimplementedFunc), + /* 290 */ SyscallDesc("unused#290", unimplementedFunc), + /* 291 */ SyscallDesc("unused#291", unimplementedFunc), + /* 292 */ SyscallDesc("unused#292", unimplementedFunc), + /* 293 */ SyscallDesc("unused#293", unimplementedFunc), + /* 294 */ SyscallDesc("unused#294", unimplementedFunc), + /* 295 */ SyscallDesc("unused#295", unimplementedFunc), + /* 296 */ SyscallDesc("unused#296", unimplementedFunc), + /* 297 */ SyscallDesc("unused#297", unimplementedFunc), + /* 298 */ SyscallDesc("unused#298", unimplementedFunc), + /* 299 */ SyscallDesc("unused#299", unimplementedFunc), + /* 300 */ SyscallDesc("unused#300", unimplementedFunc), + /* 301 */ SyscallDesc("unused#301", unimplementedFunc), + /* 302 */ SyscallDesc("unused#302", unimplementedFunc), + /* 303 */ SyscallDesc("unused#303", unimplementedFunc), + /* 304 */ SyscallDesc("unused#304", unimplementedFunc), + /* 305 */ SyscallDesc("unused#305", unimplementedFunc), + /* 306 */ SyscallDesc("unused#306", unimplementedFunc), + /* 307 */ SyscallDesc("unused#307", unimplementedFunc), + /* 308 */ SyscallDesc("unused#308", unimplementedFunc), + /* 309 */ SyscallDesc("unused#309", unimplementedFunc), + /* 310 */ SyscallDesc("unused#310", unimplementedFunc), + /* 311 */ SyscallDesc("unused#311", unimplementedFunc), + /* 312 */ SyscallDesc("unused#312", unimplementedFunc), + /* 313 */ SyscallDesc("unused#313", unimplementedFunc), + /* 314 */ SyscallDesc("unused#314", unimplementedFunc), + /* 315 */ SyscallDesc("unused#315", unimplementedFunc), + /* 316 */ SyscallDesc("unused#316", unimplementedFunc), + /* 317 */ SyscallDesc("unused#317", unimplementedFunc), + /* 318 */ SyscallDesc("unused#318", unimplementedFunc), + /* 319 */ SyscallDesc("unused#319", unimplementedFunc), + /* 320 */ SyscallDesc("unused#320", unimplementedFunc), + /* 321 */ SyscallDesc("unused#321", unimplementedFunc), + /* 322 */ SyscallDesc("unused#322", unimplementedFunc), + /* 323 */ SyscallDesc("unused#323", unimplementedFunc), + /* 324 */ SyscallDesc("unused#324", unimplementedFunc), + /* 325 */ SyscallDesc("unused#325", unimplementedFunc), + /* 326 */ SyscallDesc("unused#326", unimplementedFunc), + /* 327 */ SyscallDesc("unused#327", unimplementedFunc), + /* 328 */ SyscallDesc("unused#328", unimplementedFunc), + /* 329 */ SyscallDesc("unused#329", unimplementedFunc), + /* 330 */ SyscallDesc("unused#330", unimplementedFunc), + /* 331 */ SyscallDesc("unused#331", unimplementedFunc), + /* 332 */ SyscallDesc("unused#332", unimplementedFunc), + /* 333 */ SyscallDesc("unused#333", unimplementedFunc), + /* 334 */ SyscallDesc("unused#334", unimplementedFunc), + /* 335 */ SyscallDesc("unused#335", unimplementedFunc), + /* 336 */ SyscallDesc("unused#336", unimplementedFunc), + /* 337 */ SyscallDesc("unused#337", unimplementedFunc), + /* 338 */ SyscallDesc("unused#338", unimplementedFunc), + /* 339 */ SyscallDesc("unused#339", unimplementedFunc), + /* 340 */ SyscallDesc("unused#340", unimplementedFunc), + /* 341 */ SyscallDesc("unused#341", unimplementedFunc), + /* 342 */ SyscallDesc("unused#342", unimplementedFunc), + /* 343 */ SyscallDesc("unused#343", unimplementedFunc), + /* 344 */ SyscallDesc("unused#344", unimplementedFunc), + /* 345 */ SyscallDesc("unused#345", unimplementedFunc), + /* 346 */ SyscallDesc("unused#346", unimplementedFunc), + /* 347 */ SyscallDesc("unused#347", unimplementedFunc), + /* 348 */ SyscallDesc("unused#348", unimplementedFunc), + /* 349 */ SyscallDesc("unused#349", unimplementedFunc), + /* 350 */ SyscallDesc("unused#350", unimplementedFunc), + /* 351 */ SyscallDesc("unused#351", unimplementedFunc), + /* 352 */ SyscallDesc("unused#352", unimplementedFunc), + /* 353 */ SyscallDesc("unused#353", unimplementedFunc), + /* 354 */ SyscallDesc("unused#354", unimplementedFunc), + /* 355 */ SyscallDesc("unused#355", unimplementedFunc), + /* 356 */ SyscallDesc("unused#356", unimplementedFunc), + /* 357 */ SyscallDesc("unused#357", unimplementedFunc), + /* 358 */ SyscallDesc("unused#358", unimplementedFunc), + /* 359 */ SyscallDesc("unused#359", unimplementedFunc), + /* 360 */ SyscallDesc("unused#360", unimplementedFunc), + /* 361 */ SyscallDesc("unused#361", unimplementedFunc), + /* 362 */ SyscallDesc("unused#362", unimplementedFunc), + /* 363 */ SyscallDesc("unused#363", unimplementedFunc), + /* 364 */ SyscallDesc("unused#364", unimplementedFunc), + /* 365 */ SyscallDesc("unused#365", unimplementedFunc), + /* 366 */ SyscallDesc("unused#366", unimplementedFunc), + /* 367 */ SyscallDesc("unused#367", unimplementedFunc), + /* 368 */ SyscallDesc("unused#368", unimplementedFunc), + /* 369 */ SyscallDesc("unused#369", unimplementedFunc), + /* 370 */ SyscallDesc("unused#370", unimplementedFunc), + /* 371 */ SyscallDesc("unused#371", unimplementedFunc), + /* 372 */ SyscallDesc("unused#372", unimplementedFunc), + /* 373 */ SyscallDesc("unused#373", unimplementedFunc), + /* 374 */ SyscallDesc("unused#374", unimplementedFunc), + /* 375 */ SyscallDesc("unused#375", unimplementedFunc), + /* 376 */ SyscallDesc("unused#376", unimplementedFunc), + /* 377 */ SyscallDesc("unused#377", unimplementedFunc), + /* 378 */ SyscallDesc("unused#378", unimplementedFunc), + /* 379 */ SyscallDesc("unused#379", unimplementedFunc), + /* 380 */ SyscallDesc("unused#380", unimplementedFunc), + /* 381 */ SyscallDesc("unused#381", unimplementedFunc), + /* 382 */ SyscallDesc("unused#382", unimplementedFunc), + /* 383 */ SyscallDesc("unused#383", unimplementedFunc), + /* 384 */ SyscallDesc("unused#384", unimplementedFunc), + /* 385 */ SyscallDesc("unused#385", unimplementedFunc), + /* 386 */ SyscallDesc("unused#386", unimplementedFunc), + /* 387 */ SyscallDesc("unused#387", unimplementedFunc), + /* 388 */ SyscallDesc("unused#388", unimplementedFunc), + /* 389 */ SyscallDesc("unused#389", unimplementedFunc), + /* 390 */ SyscallDesc("unused#390", unimplementedFunc), + /* 391 */ SyscallDesc("unused#391", unimplementedFunc), + /* 392 */ SyscallDesc("unused#392", unimplementedFunc), + /* 393 */ SyscallDesc("unused#393", unimplementedFunc), + /* 394 */ SyscallDesc("unused#394", unimplementedFunc), + /* 395 */ SyscallDesc("unused#395", unimplementedFunc), + /* 396 */ SyscallDesc("unused#396", unimplementedFunc), + /* 397 */ SyscallDesc("unused#397", unimplementedFunc), + /* 398 */ SyscallDesc("unused#398", unimplementedFunc), + /* 399 */ SyscallDesc("unused#399", unimplementedFunc), + /* 400 */ SyscallDesc("unused#400", unimplementedFunc), + /* 401 */ SyscallDesc("unused#401", unimplementedFunc), + /* 402 */ SyscallDesc("unused#402", unimplementedFunc), + /* 403 */ SyscallDesc("unused#403", unimplementedFunc), + /* 404 */ SyscallDesc("unused#404", unimplementedFunc), + /* 405 */ SyscallDesc("unused#405", unimplementedFunc), + /* 406 */ SyscallDesc("unused#406", unimplementedFunc), + /* 407 */ SyscallDesc("unused#407", unimplementedFunc), + /* 408 */ SyscallDesc("unused#408", unimplementedFunc), + /* 409 */ SyscallDesc("unused#409", unimplementedFunc), + /* 410 */ SyscallDesc("unused#410", unimplementedFunc), + /* 411 */ SyscallDesc("unused#411", unimplementedFunc), + /* 412 */ SyscallDesc("unused#412", unimplementedFunc), + /* 413 */ SyscallDesc("unused#413", unimplementedFunc), + /* 414 */ SyscallDesc("unused#414", unimplementedFunc), + /* 415 */ SyscallDesc("unused#415", unimplementedFunc), + /* 416 */ SyscallDesc("unused#416", unimplementedFunc), + /* 417 */ SyscallDesc("unused#417", unimplementedFunc), + /* 418 */ SyscallDesc("unused#418", unimplementedFunc), + /* 419 */ SyscallDesc("unused#419", unimplementedFunc), + /* 420 */ SyscallDesc("unused#420", unimplementedFunc), + /* 421 */ SyscallDesc("unused#421", unimplementedFunc), + /* 422 */ SyscallDesc("unused#422", unimplementedFunc), + /* 423 */ SyscallDesc("unused#423", unimplementedFunc), + /* 424 */ SyscallDesc("unused#424", unimplementedFunc), + /* 425 */ SyscallDesc("unused#425", unimplementedFunc), + /* 426 */ SyscallDesc("unused#426", unimplementedFunc), + /* 427 */ SyscallDesc("unused#427", unimplementedFunc), + /* 428 */ SyscallDesc("unused#428", unimplementedFunc), + /* 429 */ SyscallDesc("unused#429", unimplementedFunc), + /* 430 */ SyscallDesc("unused#430", unimplementedFunc), + /* 431 */ SyscallDesc("unused#431", unimplementedFunc), + /* 432 */ SyscallDesc("unused#432", unimplementedFunc), + /* 433 */ SyscallDesc("unused#433", unimplementedFunc), + /* 434 */ SyscallDesc("unused#434", unimplementedFunc), + /* 435 */ SyscallDesc("unused#435", unimplementedFunc), + /* 436 */ SyscallDesc("unused#436", unimplementedFunc), + /* 437 */ SyscallDesc("unused#437", unimplementedFunc), + /* 438 */ SyscallDesc("unused#438", unimplementedFunc), + /* 439 */ SyscallDesc("unused#439", unimplementedFunc), + /* 440 */ SyscallDesc("unused#440", unimplementedFunc), + /* 441 */ SyscallDesc("unused#441", unimplementedFunc), + /* 442 */ SyscallDesc("unused#442", unimplementedFunc), + /* 443 */ SyscallDesc("unused#443", unimplementedFunc), + /* 444 */ SyscallDesc("unused#444", unimplementedFunc), + /* 445 */ SyscallDesc("unused#445", unimplementedFunc), + /* 446 */ SyscallDesc("unused#446", unimplementedFunc), + /* 447 */ SyscallDesc("unused#447", unimplementedFunc), + /* 448 */ SyscallDesc("unused#448", unimplementedFunc), + /* 449 */ SyscallDesc("unused#449", unimplementedFunc), + /* 450 */ SyscallDesc("unused#450", unimplementedFunc), + /* 451 */ SyscallDesc("unused#451", unimplementedFunc), + /* 452 */ SyscallDesc("unused#452", unimplementedFunc), + /* 453 */ SyscallDesc("unused#453", unimplementedFunc), + /* 454 */ SyscallDesc("unused#454", unimplementedFunc), + /* 455 */ SyscallDesc("unused#455", unimplementedFunc), + /* 456 */ SyscallDesc("unused#456", unimplementedFunc), + /* 457 */ SyscallDesc("unused#457", unimplementedFunc), + /* 458 */ SyscallDesc("unused#458", unimplementedFunc), + /* 459 */ SyscallDesc("unused#459", unimplementedFunc), + /* 460 */ SyscallDesc("unused#460", unimplementedFunc), + /* 461 */ SyscallDesc("unused#461", unimplementedFunc), + /* 462 */ SyscallDesc("unused#462", unimplementedFunc), + /* 463 */ SyscallDesc("unused#463", unimplementedFunc), + /* 464 */ SyscallDesc("unused#464", unimplementedFunc), + /* 465 */ SyscallDesc("unused#465", unimplementedFunc), + /* 466 */ SyscallDesc("unused#466", unimplementedFunc), + /* 467 */ SyscallDesc("unused#467", unimplementedFunc), + /* 468 */ SyscallDesc("unused#468", unimplementedFunc), + /* 469 */ SyscallDesc("unused#469", unimplementedFunc), + /* 470 */ SyscallDesc("unused#470", unimplementedFunc), + /* 471 */ SyscallDesc("unused#471", unimplementedFunc), + /* 472 */ SyscallDesc("unused#472", unimplementedFunc), + /* 473 */ SyscallDesc("unused#473", unimplementedFunc), + /* 474 */ SyscallDesc("unused#474", unimplementedFunc), + /* 475 */ SyscallDesc("unused#475", unimplementedFunc), + /* 476 */ SyscallDesc("unused#476", unimplementedFunc), + /* 477 */ SyscallDesc("unused#477", unimplementedFunc), + /* 478 */ SyscallDesc("unused#478", unimplementedFunc), + /* 479 */ SyscallDesc("unused#479", unimplementedFunc), + /* 480 */ SyscallDesc("unused#480", unimplementedFunc), + /* 481 */ SyscallDesc("unused#481", unimplementedFunc), + /* 482 */ SyscallDesc("unused#482", unimplementedFunc), + /* 483 */ SyscallDesc("unused#483", unimplementedFunc), + /* 484 */ SyscallDesc("unused#484", unimplementedFunc), + /* 485 */ SyscallDesc("unused#485", unimplementedFunc), + /* 486 */ SyscallDesc("unused#486", unimplementedFunc), + /* 487 */ SyscallDesc("unused#487", unimplementedFunc), + /* 488 */ SyscallDesc("unused#488", unimplementedFunc), + /* 489 */ SyscallDesc("unused#489", unimplementedFunc), + /* 490 */ SyscallDesc("unused#490", unimplementedFunc), + /* 491 */ SyscallDesc("unused#491", unimplementedFunc), + /* 492 */ SyscallDesc("unused#492", unimplementedFunc), + /* 493 */ SyscallDesc("unused#493", unimplementedFunc), + /* 494 */ SyscallDesc("unused#494", unimplementedFunc), + /* 495 */ SyscallDesc("unused#495", unimplementedFunc), + /* 496 */ SyscallDesc("unused#496", unimplementedFunc), + /* 497 */ SyscallDesc("unused#497", unimplementedFunc), + /* 498 */ SyscallDesc("unused#498", unimplementedFunc), + /* 499 */ SyscallDesc("unused#499", unimplementedFunc), + /* 500 */ SyscallDesc("unused#500", unimplementedFunc), + /* 501 */ SyscallDesc("unused#501", unimplementedFunc), + /* 502 */ SyscallDesc("unused#502", unimplementedFunc), + /* 503 */ SyscallDesc("unused#503", unimplementedFunc), + /* 504 */ SyscallDesc("unused#504", unimplementedFunc), + /* 505 */ SyscallDesc("unused#505", unimplementedFunc), + /* 506 */ SyscallDesc("unused#506", unimplementedFunc), + /* 507 */ SyscallDesc("unused#507", unimplementedFunc), + /* 508 */ SyscallDesc("unused#508", unimplementedFunc), + /* 509 */ SyscallDesc("unused#509", unimplementedFunc), + /* 510 */ SyscallDesc("unused#510", unimplementedFunc), + /* 511 */ SyscallDesc("unused#511", unimplementedFunc), + /* 512 */ SyscallDesc("unused#512", unimplementedFunc), + /* 513 */ SyscallDesc("unused#513", unimplementedFunc), + /* 514 */ SyscallDesc("unused#514", unimplementedFunc), + /* 515 */ SyscallDesc("unused#515", unimplementedFunc), + /* 516 */ SyscallDesc("unused#516", unimplementedFunc), + /* 517 */ SyscallDesc("unused#517", unimplementedFunc), + /* 518 */ SyscallDesc("unused#518", unimplementedFunc), + /* 519 */ SyscallDesc("unused#519", unimplementedFunc), + /* 520 */ SyscallDesc("unused#520", unimplementedFunc), + /* 521 */ SyscallDesc("unused#521", unimplementedFunc), + /* 522 */ SyscallDesc("unused#522", unimplementedFunc), + /* 523 */ SyscallDesc("unused#523", unimplementedFunc), + /* 524 */ SyscallDesc("unused#524", unimplementedFunc), + /* 525 */ SyscallDesc("unused#525", unimplementedFunc), + /* 526 */ SyscallDesc("unused#526", unimplementedFunc), + /* 527 */ SyscallDesc("unused#527", unimplementedFunc), + /* 528 */ SyscallDesc("unused#528", unimplementedFunc), + /* 529 */ SyscallDesc("unused#529", unimplementedFunc), + /* 530 */ SyscallDesc("unused#530", unimplementedFunc), + /* 531 */ SyscallDesc("unused#531", unimplementedFunc), + /* 532 */ SyscallDesc("unused#532", unimplementedFunc), + /* 533 */ SyscallDesc("unused#533", unimplementedFunc), + /* 534 */ SyscallDesc("unused#534", unimplementedFunc), + /* 535 */ SyscallDesc("unused#535", unimplementedFunc), + /* 536 */ SyscallDesc("unused#536", unimplementedFunc), + /* 537 */ SyscallDesc("unused#537", unimplementedFunc), + /* 538 */ SyscallDesc("unused#538", unimplementedFunc), + /* 539 */ SyscallDesc("unused#539", unimplementedFunc), + /* 540 */ SyscallDesc("unused#540", unimplementedFunc), + /* 541 */ SyscallDesc("unused#541", unimplementedFunc), + /* 542 */ SyscallDesc("unused#542", unimplementedFunc), + /* 543 */ SyscallDesc("unused#543", unimplementedFunc), + /* 544 */ SyscallDesc("unused#544", unimplementedFunc), + /* 545 */ SyscallDesc("unused#545", unimplementedFunc), + /* 546 */ SyscallDesc("unused#546", unimplementedFunc), + /* 547 */ SyscallDesc("unused#547", unimplementedFunc), + /* 548 */ SyscallDesc("unused#548", unimplementedFunc), + /* 549 */ SyscallDesc("unused#549", unimplementedFunc), + /* 550 */ SyscallDesc("unused#550", unimplementedFunc), + /* 551 */ SyscallDesc("unused#551", unimplementedFunc), + /* 552 */ SyscallDesc("unused#552", unimplementedFunc), + /* 553 */ SyscallDesc("unused#553", unimplementedFunc), + /* 554 */ SyscallDesc("unused#554", unimplementedFunc), + /* 555 */ SyscallDesc("unused#555", unimplementedFunc), + /* 556 */ SyscallDesc("unused#556", unimplementedFunc), + /* 557 */ SyscallDesc("unused#557", unimplementedFunc), + /* 558 */ SyscallDesc("unused#558", unimplementedFunc), + /* 559 */ SyscallDesc("unused#559", unimplementedFunc), + /* 560 */ SyscallDesc("unused#560", unimplementedFunc), + /* 561 */ SyscallDesc("unused#561", unimplementedFunc), + /* 562 */ SyscallDesc("unused#562", unimplementedFunc), + /* 563 */ SyscallDesc("unused#563", unimplementedFunc), + /* 564 */ SyscallDesc("unused#564", unimplementedFunc), + /* 565 */ SyscallDesc("unused#565", unimplementedFunc), + /* 566 */ SyscallDesc("unused#566", unimplementedFunc), + /* 567 */ SyscallDesc("unused#567", unimplementedFunc), + /* 568 */ SyscallDesc("unused#568", unimplementedFunc), + /* 569 */ SyscallDesc("unused#569", unimplementedFunc), + /* 570 */ SyscallDesc("unused#570", unimplementedFunc), + /* 571 */ SyscallDesc("unused#571", unimplementedFunc), + /* 572 */ SyscallDesc("unused#572", unimplementedFunc), + /* 573 */ SyscallDesc("unused#573", unimplementedFunc), + /* 574 */ SyscallDesc("unused#574", unimplementedFunc), + /* 575 */ SyscallDesc("unused#575", unimplementedFunc), + /* 576 */ SyscallDesc("unused#576", unimplementedFunc), + /* 577 */ SyscallDesc("unused#577", unimplementedFunc), + /* 578 */ SyscallDesc("unused#578", unimplementedFunc), + /* 579 */ SyscallDesc("unused#579", unimplementedFunc), + /* 580 */ SyscallDesc("unused#580", unimplementedFunc), + /* 581 */ SyscallDesc("unused#581", unimplementedFunc), + /* 582 */ SyscallDesc("unused#582", unimplementedFunc), + /* 583 */ SyscallDesc("unused#583", unimplementedFunc), + /* 584 */ SyscallDesc("unused#584", unimplementedFunc), + /* 585 */ SyscallDesc("unused#585", unimplementedFunc), + /* 586 */ SyscallDesc("unused#586", unimplementedFunc), + /* 587 */ SyscallDesc("unused#587", unimplementedFunc), + /* 588 */ SyscallDesc("unused#588", unimplementedFunc), + /* 589 */ SyscallDesc("unused#589", unimplementedFunc), + /* 590 */ SyscallDesc("unused#590", unimplementedFunc), + /* 591 */ SyscallDesc("unused#591", unimplementedFunc), + /* 592 */ SyscallDesc("unused#592", unimplementedFunc), + /* 593 */ SyscallDesc("unused#593", unimplementedFunc), + /* 594 */ SyscallDesc("unused#594", unimplementedFunc), + /* 595 */ SyscallDesc("unused#595", unimplementedFunc), + /* 596 */ SyscallDesc("unused#596", unimplementedFunc), + /* 597 */ SyscallDesc("unused#597", unimplementedFunc), + /* 598 */ SyscallDesc("unused#598", unimplementedFunc), + /* 599 */ SyscallDesc("unused#599", unimplementedFunc), + /* 600 */ SyscallDesc("unused#600", unimplementedFunc), + /* 601 */ SyscallDesc("unused#601", unimplementedFunc), + /* 602 */ SyscallDesc("unused#602", unimplementedFunc), + /* 603 */ SyscallDesc("unused#603", unimplementedFunc), + /* 604 */ SyscallDesc("unused#604", unimplementedFunc), + /* 605 */ SyscallDesc("unused#605", unimplementedFunc), + /* 606 */ SyscallDesc("unused#606", unimplementedFunc), + /* 607 */ SyscallDesc("unused#607", unimplementedFunc), + /* 608 */ SyscallDesc("unused#608", unimplementedFunc), + /* 609 */ SyscallDesc("unused#609", unimplementedFunc), + /* 610 */ SyscallDesc("unused#610", unimplementedFunc), + /* 611 */ SyscallDesc("unused#611", unimplementedFunc), + /* 612 */ SyscallDesc("unused#612", unimplementedFunc), + /* 613 */ SyscallDesc("unused#613", unimplementedFunc), + /* 614 */ SyscallDesc("unused#614", unimplementedFunc), + /* 615 */ SyscallDesc("unused#615", unimplementedFunc), + /* 616 */ SyscallDesc("unused#616", unimplementedFunc), + /* 617 */ SyscallDesc("unused#617", unimplementedFunc), + /* 618 */ SyscallDesc("unused#618", unimplementedFunc), + /* 619 */ SyscallDesc("unused#619", unimplementedFunc), + /* 620 */ SyscallDesc("unused#620", unimplementedFunc), + /* 621 */ SyscallDesc("unused#621", unimplementedFunc), + /* 622 */ SyscallDesc("unused#622", unimplementedFunc), + /* 623 */ SyscallDesc("unused#623", unimplementedFunc), + /* 624 */ SyscallDesc("unused#624", unimplementedFunc), + /* 625 */ SyscallDesc("unused#625", unimplementedFunc), + /* 626 */ SyscallDesc("unused#626", unimplementedFunc), + /* 627 */ SyscallDesc("unused#627", unimplementedFunc), + /* 628 */ SyscallDesc("unused#628", unimplementedFunc), + /* 629 */ SyscallDesc("unused#629", unimplementedFunc), + /* 630 */ SyscallDesc("unused#630", unimplementedFunc), + /* 631 */ SyscallDesc("unused#631", unimplementedFunc), + /* 632 */ SyscallDesc("unused#632", unimplementedFunc), + /* 633 */ SyscallDesc("unused#633", unimplementedFunc), + /* 634 */ SyscallDesc("unused#634", unimplementedFunc), + /* 635 */ SyscallDesc("unused#635", unimplementedFunc), + /* 636 */ SyscallDesc("unused#636", unimplementedFunc), + /* 637 */ SyscallDesc("unused#637", unimplementedFunc), + /* 638 */ SyscallDesc("unused#638", unimplementedFunc), + /* 639 */ SyscallDesc("unused#639", unimplementedFunc), + /* 640 */ SyscallDesc("unused#640", unimplementedFunc), + /* 641 */ SyscallDesc("unused#641", unimplementedFunc), + /* 642 */ SyscallDesc("unused#642", unimplementedFunc), + /* 643 */ SyscallDesc("unused#643", unimplementedFunc), + /* 644 */ SyscallDesc("unused#644", unimplementedFunc), + /* 645 */ SyscallDesc("unused#645", unimplementedFunc), + /* 646 */ SyscallDesc("unused#646", unimplementedFunc), + /* 647 */ SyscallDesc("unused#647", unimplementedFunc), + /* 648 */ SyscallDesc("unused#648", unimplementedFunc), + /* 649 */ SyscallDesc("unused#649", unimplementedFunc), + /* 650 */ SyscallDesc("unused#650", unimplementedFunc), + /* 651 */ SyscallDesc("unused#651", unimplementedFunc), + /* 652 */ SyscallDesc("unused#652", unimplementedFunc), + /* 653 */ SyscallDesc("unused#653", unimplementedFunc), + /* 654 */ SyscallDesc("unused#654", unimplementedFunc), + /* 655 */ SyscallDesc("unused#655", unimplementedFunc), + /* 656 */ SyscallDesc("unused#656", unimplementedFunc), + /* 657 */ SyscallDesc("unused#657", unimplementedFunc), + /* 658 */ SyscallDesc("unused#658", unimplementedFunc), + /* 659 */ SyscallDesc("unused#659", unimplementedFunc), + /* 660 */ SyscallDesc("unused#660", unimplementedFunc), + /* 661 */ SyscallDesc("unused#661", unimplementedFunc), + /* 662 */ SyscallDesc("unused#662", unimplementedFunc), + /* 663 */ SyscallDesc("unused#663", unimplementedFunc), + /* 664 */ SyscallDesc("unused#664", unimplementedFunc), + /* 665 */ SyscallDesc("unused#665", unimplementedFunc), + /* 666 */ SyscallDesc("unused#666", unimplementedFunc), + /* 667 */ SyscallDesc("unused#667", unimplementedFunc), + /* 668 */ SyscallDesc("unused#668", unimplementedFunc), + /* 669 */ SyscallDesc("unused#669", unimplementedFunc), + /* 670 */ SyscallDesc("unused#670", unimplementedFunc), + /* 671 */ SyscallDesc("unused#671", unimplementedFunc), + /* 672 */ SyscallDesc("unused#672", unimplementedFunc), + /* 673 */ SyscallDesc("unused#673", unimplementedFunc), + /* 674 */ SyscallDesc("unused#674", unimplementedFunc), + /* 675 */ SyscallDesc("unused#675", unimplementedFunc), + /* 676 */ SyscallDesc("unused#676", unimplementedFunc), + /* 677 */ SyscallDesc("unused#677", unimplementedFunc), + /* 678 */ SyscallDesc("unused#678", unimplementedFunc), + /* 679 */ SyscallDesc("unused#679", unimplementedFunc), + /* 680 */ SyscallDesc("unused#680", unimplementedFunc), + /* 681 */ SyscallDesc("unused#681", unimplementedFunc), + /* 682 */ SyscallDesc("unused#682", unimplementedFunc), + /* 683 */ SyscallDesc("unused#683", unimplementedFunc), + /* 684 */ SyscallDesc("unused#684", unimplementedFunc), + /* 685 */ SyscallDesc("unused#685", unimplementedFunc), + /* 686 */ SyscallDesc("unused#686", unimplementedFunc), + /* 687 */ SyscallDesc("unused#687", unimplementedFunc), + /* 688 */ SyscallDesc("unused#688", unimplementedFunc), + /* 689 */ SyscallDesc("unused#689", unimplementedFunc), + /* 690 */ SyscallDesc("unused#690", unimplementedFunc), + /* 691 */ SyscallDesc("unused#691", unimplementedFunc), + /* 692 */ SyscallDesc("unused#692", unimplementedFunc), + /* 693 */ SyscallDesc("unused#693", unimplementedFunc), + /* 694 */ SyscallDesc("unused#694", unimplementedFunc), + /* 695 */ SyscallDesc("unused#695", unimplementedFunc), + /* 696 */ SyscallDesc("unused#696", unimplementedFunc), + /* 697 */ SyscallDesc("unused#697", unimplementedFunc), + /* 698 */ SyscallDesc("unused#698", unimplementedFunc), + /* 699 */ SyscallDesc("unused#699", unimplementedFunc), + /* 700 */ SyscallDesc("unused#700", unimplementedFunc), + /* 701 */ SyscallDesc("unused#701", unimplementedFunc), + /* 702 */ SyscallDesc("unused#702", unimplementedFunc), + /* 703 */ SyscallDesc("unused#703", unimplementedFunc), + /* 704 */ SyscallDesc("unused#704", unimplementedFunc), + /* 705 */ SyscallDesc("unused#705", unimplementedFunc), + /* 706 */ SyscallDesc("unused#706", unimplementedFunc), + /* 707 */ SyscallDesc("unused#707", unimplementedFunc), + /* 708 */ SyscallDesc("unused#708", unimplementedFunc), + /* 709 */ SyscallDesc("unused#709", unimplementedFunc), + /* 710 */ SyscallDesc("unused#710", unimplementedFunc), + /* 711 */ SyscallDesc("unused#711", unimplementedFunc), + /* 712 */ SyscallDesc("unused#712", unimplementedFunc), + /* 713 */ SyscallDesc("unused#713", unimplementedFunc), + /* 714 */ SyscallDesc("unused#714", unimplementedFunc), + /* 715 */ SyscallDesc("unused#715", unimplementedFunc), + /* 716 */ SyscallDesc("unused#716", unimplementedFunc), + /* 717 */ SyscallDesc("unused#717", unimplementedFunc), + /* 718 */ SyscallDesc("unused#718", unimplementedFunc), + /* 719 */ SyscallDesc("unused#719", unimplementedFunc), + /* 720 */ SyscallDesc("unused#720", unimplementedFunc), + /* 721 */ SyscallDesc("unused#721", unimplementedFunc), + /* 722 */ SyscallDesc("unused#722", unimplementedFunc), + /* 723 */ SyscallDesc("unused#723", unimplementedFunc), + /* 724 */ SyscallDesc("unused#724", unimplementedFunc), + /* 725 */ SyscallDesc("unused#725", unimplementedFunc), + /* 726 */ SyscallDesc("unused#726", unimplementedFunc), + /* 727 */ SyscallDesc("unused#727", unimplementedFunc), + /* 728 */ SyscallDesc("unused#728", unimplementedFunc), + /* 729 */ SyscallDesc("unused#729", unimplementedFunc), + /* 730 */ SyscallDesc("unused#730", unimplementedFunc), + /* 731 */ SyscallDesc("unused#731", unimplementedFunc), + /* 732 */ SyscallDesc("unused#732", unimplementedFunc), + /* 733 */ SyscallDesc("unused#733", unimplementedFunc), + /* 734 */ SyscallDesc("unused#734", unimplementedFunc), + /* 735 */ SyscallDesc("unused#735", unimplementedFunc), + /* 736 */ SyscallDesc("unused#736", unimplementedFunc), + /* 737 */ SyscallDesc("unused#737", unimplementedFunc), + /* 738 */ SyscallDesc("unused#738", unimplementedFunc), + /* 739 */ SyscallDesc("unused#739", unimplementedFunc), + /* 740 */ SyscallDesc("unused#740", unimplementedFunc), + /* 741 */ SyscallDesc("unused#741", unimplementedFunc), + /* 742 */ SyscallDesc("unused#742", unimplementedFunc), + /* 743 */ SyscallDesc("unused#743", unimplementedFunc), + /* 744 */ SyscallDesc("unused#744", unimplementedFunc), + /* 745 */ SyscallDesc("unused#745", unimplementedFunc), + /* 746 */ SyscallDesc("unused#746", unimplementedFunc), + /* 747 */ SyscallDesc("unused#747", unimplementedFunc), + /* 748 */ SyscallDesc("unused#748", unimplementedFunc), + /* 749 */ SyscallDesc("unused#749", unimplementedFunc), + /* 750 */ SyscallDesc("unused#750", unimplementedFunc), + /* 751 */ SyscallDesc("unused#751", unimplementedFunc), + /* 752 */ SyscallDesc("unused#752", unimplementedFunc), + /* 753 */ SyscallDesc("unused#753", unimplementedFunc), + /* 754 */ SyscallDesc("unused#754", unimplementedFunc), + /* 755 */ SyscallDesc("unused#755", unimplementedFunc), + /* 756 */ SyscallDesc("unused#756", unimplementedFunc), + /* 757 */ SyscallDesc("unused#757", unimplementedFunc), + /* 758 */ SyscallDesc("unused#758", unimplementedFunc), + /* 759 */ SyscallDesc("unused#759", unimplementedFunc), + /* 760 */ SyscallDesc("unused#760", unimplementedFunc), + /* 761 */ SyscallDesc("unused#761", unimplementedFunc), + /* 762 */ SyscallDesc("unused#762", unimplementedFunc), + /* 763 */ SyscallDesc("unused#763", unimplementedFunc), + /* 764 */ SyscallDesc("unused#764", unimplementedFunc), + /* 765 */ SyscallDesc("unused#765", unimplementedFunc), + /* 766 */ SyscallDesc("unused#766", unimplementedFunc), + /* 767 */ SyscallDesc("unused#767", unimplementedFunc), + /* 768 */ SyscallDesc("unused#768", unimplementedFunc), + /* 769 */ SyscallDesc("unused#769", unimplementedFunc), + /* 770 */ SyscallDesc("unused#770", unimplementedFunc), + /* 771 */ SyscallDesc("unused#771", unimplementedFunc), + /* 772 */ SyscallDesc("unused#772", unimplementedFunc), + /* 773 */ SyscallDesc("unused#773", unimplementedFunc), + /* 774 */ SyscallDesc("unused#774", unimplementedFunc), + /* 775 */ SyscallDesc("unused#775", unimplementedFunc), + /* 776 */ SyscallDesc("unused#776", unimplementedFunc), + /* 777 */ SyscallDesc("unused#777", unimplementedFunc), + /* 778 */ SyscallDesc("unused#778", unimplementedFunc), + /* 779 */ SyscallDesc("unused#779", unimplementedFunc), + /* 780 */ SyscallDesc("unused#780", unimplementedFunc), + /* 781 */ SyscallDesc("unused#781", unimplementedFunc), + /* 782 */ SyscallDesc("unused#782", unimplementedFunc), + /* 783 */ SyscallDesc("unused#783", unimplementedFunc), + /* 784 */ SyscallDesc("unused#784", unimplementedFunc), + /* 785 */ SyscallDesc("unused#785", unimplementedFunc), + /* 786 */ SyscallDesc("unused#786", unimplementedFunc), + /* 787 */ SyscallDesc("unused#787", unimplementedFunc), + /* 788 */ SyscallDesc("unused#788", unimplementedFunc), + /* 789 */ SyscallDesc("unused#789", unimplementedFunc), + /* 790 */ SyscallDesc("unused#790", unimplementedFunc), + /* 791 */ SyscallDesc("unused#791", unimplementedFunc), + /* 792 */ SyscallDesc("unused#792", unimplementedFunc), + /* 793 */ SyscallDesc("unused#793", unimplementedFunc), + /* 794 */ SyscallDesc("unused#794", unimplementedFunc), + /* 795 */ SyscallDesc("unused#795", unimplementedFunc), + /* 796 */ SyscallDesc("unused#796", unimplementedFunc), + /* 797 */ SyscallDesc("unused#797", unimplementedFunc), + /* 798 */ SyscallDesc("unused#798", unimplementedFunc), + /* 799 */ SyscallDesc("unused#799", unimplementedFunc), + /* 800 */ SyscallDesc("unused#800", unimplementedFunc), + /* 801 */ SyscallDesc("unused#801", unimplementedFunc), + /* 802 */ SyscallDesc("unused#802", unimplementedFunc), + /* 803 */ SyscallDesc("unused#803", unimplementedFunc), + /* 804 */ SyscallDesc("unused#804", unimplementedFunc), + /* 805 */ SyscallDesc("unused#805", unimplementedFunc), + /* 806 */ SyscallDesc("unused#806", unimplementedFunc), + /* 807 */ SyscallDesc("unused#807", unimplementedFunc), + /* 808 */ SyscallDesc("unused#808", unimplementedFunc), + /* 809 */ SyscallDesc("unused#809", unimplementedFunc), + /* 810 */ SyscallDesc("unused#810", unimplementedFunc), + /* 811 */ SyscallDesc("unused#811", unimplementedFunc), + /* 812 */ SyscallDesc("unused#812", unimplementedFunc), + /* 813 */ SyscallDesc("unused#813", unimplementedFunc), + /* 814 */ SyscallDesc("unused#814", unimplementedFunc), + /* 815 */ SyscallDesc("unused#815", unimplementedFunc), + /* 816 */ SyscallDesc("unused#816", unimplementedFunc), + /* 817 */ SyscallDesc("unused#817", unimplementedFunc), + /* 818 */ SyscallDesc("unused#818", unimplementedFunc), + /* 819 */ SyscallDesc("unused#819", unimplementedFunc), + /* 820 */ SyscallDesc("unused#820", unimplementedFunc), + /* 821 */ SyscallDesc("unused#821", unimplementedFunc), + /* 822 */ SyscallDesc("unused#822", unimplementedFunc), + /* 823 */ SyscallDesc("unused#823", unimplementedFunc), + /* 824 */ SyscallDesc("unused#824", unimplementedFunc), + /* 825 */ SyscallDesc("unused#825", unimplementedFunc), + /* 826 */ SyscallDesc("unused#826", unimplementedFunc), + /* 827 */ SyscallDesc("unused#827", unimplementedFunc), + /* 828 */ SyscallDesc("unused#828", unimplementedFunc), + /* 829 */ SyscallDesc("unused#829", unimplementedFunc), + /* 830 */ SyscallDesc("unused#830", unimplementedFunc), + /* 831 */ SyscallDesc("unused#831", unimplementedFunc), + /* 832 */ SyscallDesc("unused#832", unimplementedFunc), + /* 833 */ SyscallDesc("unused#833", unimplementedFunc), + /* 834 */ SyscallDesc("unused#834", unimplementedFunc), + /* 835 */ SyscallDesc("unused#835", unimplementedFunc), + /* 836 */ SyscallDesc("unused#836", unimplementedFunc), + /* 837 */ SyscallDesc("unused#837", unimplementedFunc), + /* 838 */ SyscallDesc("unused#838", unimplementedFunc), + /* 839 */ SyscallDesc("unused#839", unimplementedFunc), + /* 840 */ SyscallDesc("unused#840", unimplementedFunc), + /* 841 */ SyscallDesc("unused#841", unimplementedFunc), + /* 842 */ SyscallDesc("unused#842", unimplementedFunc), + /* 843 */ SyscallDesc("unused#843", unimplementedFunc), + /* 844 */ SyscallDesc("unused#844", unimplementedFunc), + /* 845 */ SyscallDesc("unused#845", unimplementedFunc), + /* 846 */ SyscallDesc("unused#846", unimplementedFunc), + /* 847 */ SyscallDesc("unused#847", unimplementedFunc), + /* 848 */ SyscallDesc("unused#848", unimplementedFunc), + /* 849 */ SyscallDesc("unused#849", unimplementedFunc), + /* 850 */ SyscallDesc("unused#850", unimplementedFunc), + /* 851 */ SyscallDesc("unused#851", unimplementedFunc), + /* 852 */ SyscallDesc("unused#852", unimplementedFunc), + /* 853 */ SyscallDesc("unused#853", unimplementedFunc), + /* 854 */ SyscallDesc("unused#854", unimplementedFunc), + /* 855 */ SyscallDesc("unused#855", unimplementedFunc), + /* 856 */ SyscallDesc("unused#856", unimplementedFunc), + /* 857 */ SyscallDesc("unused#857", unimplementedFunc), + /* 858 */ SyscallDesc("unused#858", unimplementedFunc), + /* 859 */ SyscallDesc("unused#859", unimplementedFunc), + /* 860 */ SyscallDesc("unused#860", unimplementedFunc), + /* 861 */ SyscallDesc("unused#861", unimplementedFunc), + /* 862 */ SyscallDesc("unused#862", unimplementedFunc), + /* 863 */ SyscallDesc("unused#863", unimplementedFunc), + /* 864 */ SyscallDesc("unused#864", unimplementedFunc), + /* 865 */ SyscallDesc("unused#865", unimplementedFunc), + /* 866 */ SyscallDesc("unused#866", unimplementedFunc), + /* 867 */ SyscallDesc("unused#867", unimplementedFunc), + /* 868 */ SyscallDesc("unused#868", unimplementedFunc), + /* 869 */ SyscallDesc("unused#869", unimplementedFunc), + /* 870 */ SyscallDesc("unused#870", unimplementedFunc), + /* 871 */ SyscallDesc("unused#871", unimplementedFunc), + /* 872 */ SyscallDesc("unused#872", unimplementedFunc), + /* 873 */ SyscallDesc("unused#873", unimplementedFunc), + /* 874 */ SyscallDesc("unused#874", unimplementedFunc), + /* 875 */ SyscallDesc("unused#875", unimplementedFunc), + /* 876 */ SyscallDesc("unused#876", unimplementedFunc), + /* 877 */ SyscallDesc("unused#877", unimplementedFunc), + /* 878 */ SyscallDesc("unused#878", unimplementedFunc), + /* 879 */ SyscallDesc("unused#879", unimplementedFunc), + /* 880 */ SyscallDesc("unused#880", unimplementedFunc), + /* 881 */ SyscallDesc("unused#881", unimplementedFunc), + /* 882 */ SyscallDesc("unused#882", unimplementedFunc), + /* 883 */ SyscallDesc("unused#883", unimplementedFunc), + /* 884 */ SyscallDesc("unused#884", unimplementedFunc), + /* 885 */ SyscallDesc("unused#885", unimplementedFunc), + /* 886 */ SyscallDesc("unused#886", unimplementedFunc), + /* 887 */ SyscallDesc("unused#887", unimplementedFunc), + /* 888 */ SyscallDesc("unused#888", unimplementedFunc), + /* 889 */ SyscallDesc("unused#889", unimplementedFunc), + /* 890 */ SyscallDesc("unused#890", unimplementedFunc), + /* 891 */ SyscallDesc("unused#891", unimplementedFunc), + /* 892 */ SyscallDesc("unused#892", unimplementedFunc), + /* 893 */ SyscallDesc("unused#893", unimplementedFunc), + /* 894 */ SyscallDesc("unused#894", unimplementedFunc), + /* 895 */ SyscallDesc("unused#895", unimplementedFunc), + /* 896 */ SyscallDesc("unused#896", unimplementedFunc), + /* 897 */ SyscallDesc("unused#897", unimplementedFunc), + /* 898 */ SyscallDesc("unused#898", unimplementedFunc), + /* 899 */ SyscallDesc("unused#899", unimplementedFunc), + /* 900 */ SyscallDesc("unused#900", unimplementedFunc), + /* 901 */ SyscallDesc("unused#901", unimplementedFunc), + /* 902 */ SyscallDesc("unused#902", unimplementedFunc), + /* 903 */ SyscallDesc("unused#903", unimplementedFunc), + /* 904 */ SyscallDesc("unused#904", unimplementedFunc), + /* 905 */ SyscallDesc("unused#905", unimplementedFunc), + /* 906 */ SyscallDesc("unused#906", unimplementedFunc), + /* 907 */ SyscallDesc("unused#907", unimplementedFunc), + /* 908 */ SyscallDesc("unused#908", unimplementedFunc), + /* 909 */ SyscallDesc("unused#909", unimplementedFunc), + /* 910 */ SyscallDesc("unused#910", unimplementedFunc), + /* 911 */ SyscallDesc("unused#911", unimplementedFunc), + /* 912 */ SyscallDesc("unused#912", unimplementedFunc), + /* 913 */ SyscallDesc("unused#913", unimplementedFunc), + /* 914 */ SyscallDesc("unused#914", unimplementedFunc), + /* 915 */ SyscallDesc("unused#915", unimplementedFunc), + /* 916 */ SyscallDesc("unused#916", unimplementedFunc), + /* 917 */ SyscallDesc("unused#917", unimplementedFunc), + /* 918 */ SyscallDesc("unused#918", unimplementedFunc), + /* 919 */ SyscallDesc("unused#919", unimplementedFunc), + /* 920 */ SyscallDesc("unused#920", unimplementedFunc), + /* 921 */ SyscallDesc("unused#921", unimplementedFunc), + /* 922 */ SyscallDesc("unused#922", unimplementedFunc), + /* 923 */ SyscallDesc("unused#923", unimplementedFunc), + /* 924 */ SyscallDesc("unused#924", unimplementedFunc), + /* 925 */ SyscallDesc("unused#925", unimplementedFunc), + /* 926 */ SyscallDesc("unused#926", unimplementedFunc), + /* 927 */ SyscallDesc("unused#927", unimplementedFunc), + /* 928 */ SyscallDesc("unused#928", unimplementedFunc), + /* 929 */ SyscallDesc("unused#929", unimplementedFunc), + /* 930 */ SyscallDesc("unused#930", unimplementedFunc), + /* 931 */ SyscallDesc("unused#931", unimplementedFunc), + /* 932 */ SyscallDesc("unused#932", unimplementedFunc), + /* 933 */ SyscallDesc("unused#933", unimplementedFunc), + /* 934 */ SyscallDesc("unused#934", unimplementedFunc), + /* 935 */ SyscallDesc("unused#935", unimplementedFunc), + /* 936 */ SyscallDesc("unused#936", unimplementedFunc), + /* 937 */ SyscallDesc("unused#937", unimplementedFunc), + /* 938 */ SyscallDesc("unused#938", unimplementedFunc), + /* 939 */ SyscallDesc("unused#939", unimplementedFunc), + /* 940 */ SyscallDesc("unused#940", unimplementedFunc), + /* 941 */ SyscallDesc("unused#941", unimplementedFunc), + /* 942 */ SyscallDesc("unused#942", unimplementedFunc), + /* 943 */ SyscallDesc("unused#943", unimplementedFunc), + /* 944 */ SyscallDesc("unused#944", unimplementedFunc), + /* 945 */ SyscallDesc("unused#945", unimplementedFunc), + /* 946 */ SyscallDesc("unused#946", unimplementedFunc), + /* 947 */ SyscallDesc("unused#947", unimplementedFunc), + /* 948 */ SyscallDesc("unused#948", unimplementedFunc), + /* 949 */ SyscallDesc("unused#949", unimplementedFunc), + /* 950 */ SyscallDesc("unused#950", unimplementedFunc), + /* 951 */ SyscallDesc("unused#951", unimplementedFunc), + /* 952 */ SyscallDesc("unused#952", unimplementedFunc), + /* 953 */ SyscallDesc("unused#953", unimplementedFunc), + /* 954 */ SyscallDesc("unused#954", unimplementedFunc), + /* 955 */ SyscallDesc("unused#955", unimplementedFunc), + /* 956 */ SyscallDesc("unused#956", unimplementedFunc), + /* 957 */ SyscallDesc("unused#957", unimplementedFunc), + /* 958 */ SyscallDesc("unused#958", unimplementedFunc), + /* 959 */ SyscallDesc("unused#959", unimplementedFunc), + /* 960 */ SyscallDesc("unused#960", unimplementedFunc), + /* 961 */ SyscallDesc("unused#961", unimplementedFunc), + /* 962 */ SyscallDesc("unused#962", unimplementedFunc), + /* 963 */ SyscallDesc("unused#963", unimplementedFunc), + /* 964 */ SyscallDesc("unused#964", unimplementedFunc), + /* 965 */ SyscallDesc("unused#965", unimplementedFunc), + /* 966 */ SyscallDesc("unused#966", unimplementedFunc), + /* 967 */ SyscallDesc("unused#967", unimplementedFunc), + /* 968 */ SyscallDesc("unused#968", unimplementedFunc), + /* 969 */ SyscallDesc("unused#969", unimplementedFunc), + /* 970 */ SyscallDesc("unused#970", unimplementedFunc), + /* 971 */ SyscallDesc("unused#971", unimplementedFunc), + /* 972 */ SyscallDesc("unused#972", unimplementedFunc), + /* 973 */ SyscallDesc("unused#973", unimplementedFunc), + /* 974 */ SyscallDesc("unused#974", unimplementedFunc), + /* 975 */ SyscallDesc("unused#975", unimplementedFunc), + /* 976 */ SyscallDesc("unused#976", unimplementedFunc), + /* 977 */ SyscallDesc("unused#977", unimplementedFunc), + /* 978 */ SyscallDesc("unused#978", unimplementedFunc), + /* 979 */ SyscallDesc("unused#979", unimplementedFunc), + /* 980 */ SyscallDesc("unused#980", unimplementedFunc), + /* 981 */ SyscallDesc("unused#981", unimplementedFunc), + /* 982 */ SyscallDesc("unused#982", unimplementedFunc), + /* 983 */ SyscallDesc("unused#983", unimplementedFunc), + /* 984 */ SyscallDesc("unused#984", unimplementedFunc), + /* 985 */ SyscallDesc("unused#985", unimplementedFunc), + /* 986 */ SyscallDesc("unused#986", unimplementedFunc), + /* 987 */ SyscallDesc("unused#987", unimplementedFunc), + /* 988 */ SyscallDesc("unused#988", unimplementedFunc), + /* 989 */ SyscallDesc("unused#989", unimplementedFunc), + /* 990 */ SyscallDesc("unused#990", unimplementedFunc), + /* 991 */ SyscallDesc("unused#991", unimplementedFunc), + /* 992 */ SyscallDesc("unused#992", unimplementedFunc), + /* 993 */ SyscallDesc("unused#993", unimplementedFunc), + /* 994 */ SyscallDesc("unused#994", unimplementedFunc), + /* 995 */ SyscallDesc("unused#995", unimplementedFunc), + /* 996 */ SyscallDesc("unused#996", unimplementedFunc), + /* 997 */ SyscallDesc("unused#997", unimplementedFunc), + /* 998 */ SyscallDesc("unused#998", unimplementedFunc), + /* 999 */ SyscallDesc("unused#999", unimplementedFunc), + /* 1000 */ SyscallDesc("unused#1000", unimplementedFunc), + /* 1001 */ SyscallDesc("unused#1001", unimplementedFunc), + /* 1002 */ SyscallDesc("unused#1002", unimplementedFunc), + /* 1003 */ SyscallDesc("unused#1003", unimplementedFunc), + /* 1004 */ SyscallDesc("unused#1004", unimplementedFunc), + /* 1005 */ SyscallDesc("unused#1005", unimplementedFunc), + /* 1006 */ SyscallDesc("unused#1006", unimplementedFunc), + /* 1007 */ SyscallDesc("unused#1007", unimplementedFunc), + /* 1008 */ SyscallDesc("unused#1008", unimplementedFunc), + /* 1009 */ SyscallDesc("unused#1009", unimplementedFunc), + /* 1010 */ SyscallDesc("unused#1010", unimplementedFunc), + /* 1011 */ SyscallDesc("unused#1011", unimplementedFunc), + /* 1012 */ SyscallDesc("unused#1012", unimplementedFunc), + /* 1013 */ SyscallDesc("unused#1013", unimplementedFunc), + /* 1014 */ SyscallDesc("unused#1014", unimplementedFunc), + /* 1015 */ SyscallDesc("unused#1015", unimplementedFunc), + /* 1016 */ SyscallDesc("unused#1016", unimplementedFunc), + /* 1017 */ SyscallDesc("unused#1017", unimplementedFunc), + /* 1018 */ SyscallDesc("unused#1018", unimplementedFunc), + /* 1019 */ SyscallDesc("unused#1019", unimplementedFunc), + /* 1020 */ SyscallDesc("unused#1020", unimplementedFunc), + /* 1021 */ SyscallDesc("unused#1021", unimplementedFunc), + /* 1022 */ SyscallDesc("unused#1022", unimplementedFunc), + /* 1023 */ SyscallDesc("unused#1023", unimplementedFunc), + /* 1024 */ SyscallDesc("open", openFunc<ArmLinux64>), + /* 1025 */ SyscallDesc("link", unimplementedFunc), + /* 1026 */ SyscallDesc("unlink", unlinkFunc), + /* 1027 */ SyscallDesc("mknod", unimplementedFunc), + /* 1028 */ SyscallDesc("chmod", chmodFunc<ArmLinux64>), + /* 1029 */ SyscallDesc("chown", unimplementedFunc), + /* 1030 */ SyscallDesc("mkdir", mkdirFunc), + /* 1031 */ SyscallDesc("rmdir", unimplementedFunc), + /* 1032 */ SyscallDesc("lchown", unimplementedFunc), + /* 1033 */ SyscallDesc("access", unimplementedFunc), + /* 1034 */ SyscallDesc("rename", renameFunc), + /* 1035 */ SyscallDesc("readlink", readlinkFunc), + /* 1036 */ SyscallDesc("symlink", unimplementedFunc), + /* 1037 */ SyscallDesc("utimes", unimplementedFunc), + /* 1038 */ SyscallDesc("stat64", stat64Func<ArmLinux64>), + /* 1039 */ SyscallDesc("lstat64", lstat64Func<ArmLinux64>), + /* 1040 */ SyscallDesc("pipe", pipePseudoFunc), + /* 1041 */ SyscallDesc("dup2", unimplementedFunc), + /* 1042 */ SyscallDesc("epoll_create", unimplementedFunc), + /* 1043 */ SyscallDesc("inotify_init", unimplementedFunc), + /* 1044 */ SyscallDesc("eventfd", unimplementedFunc), + /* 1045 */ SyscallDesc("signalfd", unimplementedFunc), + /* 1046 */ SyscallDesc("sendfile", unimplementedFunc), + /* 1047 */ SyscallDesc("ftruncate", ftruncateFunc), + /* 1048 */ SyscallDesc("truncate", truncateFunc), + /* 1049 */ SyscallDesc("stat", statFunc<ArmLinux64>), + /* 1050 */ SyscallDesc("lstat", unimplementedFunc), + /* 1051 */ SyscallDesc("fstat", fstatFunc<ArmLinux64>), + /* 1052 */ SyscallDesc("fcntl", fcntlFunc), + /* 1053 */ SyscallDesc("fadvise64", unimplementedFunc), + /* 1054 */ SyscallDesc("newfstatat", unimplementedFunc), + /* 1055 */ SyscallDesc("fstatfs", unimplementedFunc), + /* 1056 */ SyscallDesc("statfs", unimplementedFunc), + /* 1057 */ SyscallDesc("lseek", lseekFunc), + /* 1058 */ SyscallDesc("mmap", mmapFunc<ArmLinux64>), + /* 1059 */ SyscallDesc("alarm", unimplementedFunc), + /* 1060 */ SyscallDesc("getpgrp", unimplementedFunc), + /* 1061 */ SyscallDesc("pause", unimplementedFunc), + /* 1062 */ SyscallDesc("time", timeFunc<ArmLinux64>), + /* 1063 */ SyscallDesc("utime", unimplementedFunc), + /* 1064 */ SyscallDesc("creat", unimplementedFunc), + /* 1065 */ SyscallDesc("getdents", unimplementedFunc), + /* 1066 */ SyscallDesc("futimesat", unimplementedFunc), + /* 1067 */ SyscallDesc("select", unimplementedFunc), + /* 1068 */ SyscallDesc("poll", unimplementedFunc), + /* 1069 */ SyscallDesc("epoll_wait", unimplementedFunc), + /* 1070 */ SyscallDesc("ustat", unimplementedFunc), + /* 1071 */ SyscallDesc("vfork", unimplementedFunc), + /* 1072 */ SyscallDesc("oldwait4", unimplementedFunc), + /* 1073 */ SyscallDesc("recv", unimplementedFunc), + /* 1074 */ SyscallDesc("send", unimplementedFunc), + /* 1075 */ SyscallDesc("bdflush", unimplementedFunc), + /* 1076 */ SyscallDesc("umount", unimplementedFunc), + /* 1077 */ SyscallDesc("uselib", unimplementedFunc), + /* 1078 */ SyscallDesc("_sysctl", unimplementedFunc), + /* 1079 */ SyscallDesc("fork", unimplementedFunc) +}; -SyscallDesc ArmLinuxProcess::privSyscallDescs[] = { +static SyscallDesc privSyscallDescs32[] = { /* 1 */ SyscallDesc("breakpoint", unimplementedFunc), /* 2 */ SyscallDesc("cacheflush", unimplementedFunc), /* 3 */ SyscallDesc("usr26", unimplementedFunc), /* 4 */ SyscallDesc("usr32", unimplementedFunc), - /* 5 */ SyscallDesc("set_tls", setTLSFunc) + /* 5 */ SyscallDesc("set_tls", setTLSFunc32) }; -ArmLinuxProcess::ArmLinuxProcess(LiveProcessParams * params, +// Indices 1, 3 and 4 are unallocated. +static SyscallDesc privSyscallDescs64[] = { + /* 1 */ SyscallDesc("unallocated", unimplementedFunc), + /* 2 */ SyscallDesc("cacheflush", unimplementedFunc), + /* 3 */ SyscallDesc("unallocated", unimplementedFunc), + /* 4 */ SyscallDesc("unallocated", unimplementedFunc), + /* 5 */ SyscallDesc("set_tls", setTLSFunc64) +}; + +ArmLinuxProcess32::ArmLinuxProcess32(LiveProcessParams * params, ObjectFile *objFile, ObjectFile::Arch _arch) - : ArmLiveProcess(params, objFile, _arch), - Num_Syscall_Descs(sizeof(syscallDescs) / sizeof(SyscallDesc)), - Num_Priv_Syscall_Descs(sizeof(privSyscallDescs) / sizeof(SyscallDesc)) -{ } + : ArmLiveProcess32(params, objFile, _arch) +{ + SyscallTable table; + + table.descs = syscallDescs32; + table.size = sizeof(syscallDescs32) / sizeof(SyscallDesc); + table.base = 0; + syscallTables.push_back(table); + table.base = 0x900000; + syscallTables.push_back(table); -const Addr ArmLinuxProcess::commPage = 0xffff0000; + table.descs = privSyscallDescs32; + table.size = sizeof(privSyscallDescs32) / sizeof(SyscallDesc); + table.base = 0xf0001; + syscallTables.push_back(table); +} + +ArmLinuxProcess64::ArmLinuxProcess64(LiveProcessParams * params, + ObjectFile *objFile, ObjectFile::Arch _arch) + : ArmLiveProcess64(params, objFile, _arch) +{ + SyscallTable table; + + table.descs = syscallDescs64; + table.size = sizeof(syscallDescs64) / sizeof(SyscallDesc); + table.base = 0; + syscallTables.push_back(table); + table.base = 0x900000; + syscallTables.push_back(table); + + table.descs = privSyscallDescs64; + table.size = sizeof(privSyscallDescs64) / sizeof(SyscallDesc); + table.base = 0x1001; + syscallTables.push_back(table); +} + +const Addr ArmLinuxProcess32::commPage = 0xffff0000; SyscallDesc* -ArmLinuxProcess::getDesc(int callnum) +ArmLinuxProcessBits::getLinuxDesc(int callnum) { // Angel SWI syscalls are unsupported in this release - if (callnum == 0x123456) { + if (callnum == 0x123456) panic("Attempt to execute an ANGEL_SWI system call (newlib-related)"); - } else if ((callnum & 0x00f00000) == 0x00900000 || - (callnum & 0xf0000) == 0xf0000) { - callnum &= 0x000fffff; - if ((callnum & 0x0f0000) == 0xf0000) { - callnum -= 0x0f0001; - if (callnum < 0 || callnum > Num_Priv_Syscall_Descs) - return NULL; - return &privSyscallDescs[callnum]; - } + for (unsigned i = 0; i < syscallTables.size(); i++) { + SyscallDesc *desc = syscallTables[i].getDesc(callnum); + if (desc) + return desc; } - // Linux syscalls have to strip off the 0x00900000 + return NULL; +} - if (callnum < 0 || callnum > Num_Syscall_Descs) +SyscallDesc * +ArmLinuxProcessBits::SyscallTable::getDesc(int callnum) const +{ + int offset = callnum - base; + if (offset < 0 || offset >= size) return NULL; + return &descs[offset]; +} + +SyscallDesc* +ArmLinuxProcess32::getDesc(int callnum) +{ + return getLinuxDesc(callnum); +} - return &syscallDescs[callnum]; +SyscallDesc* +ArmLinuxProcess64::getDesc(int callnum) +{ + return getLinuxDesc(callnum); } void -ArmLinuxProcess::initState() +ArmLinuxProcess32::initState() { - ArmLiveProcess::initState(); + ArmLiveProcess32::initState(); allocateMem(commPage, PageBytes); ThreadContext *tc = system->getThreadContext(contextIds[0]); @@ -546,20 +1709,9 @@ ArmLinuxProcess::initState() tc->getMemProxy().writeBlob(commPage + 0x0fe0, get_tls, sizeof(get_tls)); } -ArmISA::IntReg -ArmLinuxProcess::getSyscallArg(ThreadContext *tc, int &i) -{ - // Linux apparently allows more parameter than the ABI says it should. - // This limit may need to be increased even further. - assert(i < 6); - return tc->readIntReg(ArgumentReg0 + i++); -} - void -ArmLinuxProcess::setSyscallArg(ThreadContext *tc, int i, ArmISA::IntReg val) +ArmLinuxProcess64::initState() { - // Linux apparently allows more parameter than the ABI says it should. - // This limit may need to be increased even further. - assert(i < 6); - tc->setIntReg(ArgumentReg0 + i, val); + ArmLiveProcess64::initState(); + // The 64 bit equivalent of the comm page would be set up here. } diff --git a/src/arch/arm/linux/process.hh b/src/arch/arm/linux/process.hh index 7d3a943ed..670739438 100644 --- a/src/arch/arm/linux/process.hh +++ b/src/arch/arm/linux/process.hh @@ -1,4 +1,16 @@ /* +* Copyright (c) 2011-2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2007-2008 The Florida State University * All rights reserved. * @@ -31,39 +43,54 @@ #ifndef __ARM_LINUX_PROCESS_HH__ #define __ARM_LINUX_PROCESS_HH__ +#include <vector> + #include "arch/arm/process.hh" +class ArmLinuxProcessBits +{ + protected: + SyscallDesc* getLinuxDesc(int callnum); + + struct SyscallTable + { + int base; + SyscallDesc *descs; + int size; + + SyscallDesc *getDesc(int offset) const; + }; + + std::vector<SyscallTable> syscallTables; +}; + /// A process with emulated Arm/Linux syscalls. -class ArmLinuxProcess : public ArmLiveProcess +class ArmLinuxProcess32 : public ArmLiveProcess32, public ArmLinuxProcessBits { public: - ArmLinuxProcess(LiveProcessParams * params, ObjectFile *objFile, - ObjectFile::Arch _arch); - - virtual SyscallDesc* getDesc(int callnum); + ArmLinuxProcess32(LiveProcessParams * params, ObjectFile *objFile, + ObjectFile::Arch _arch); void initState(); - ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i); /// Explicitly import the otherwise hidden getSyscallArg using ArmLiveProcess::getSyscallArg; - void setSyscallArg(ThreadContext *tc, int i, ArmISA::IntReg val); - - /// The target system's hostname. - static const char *hostname; /// A page to hold "kernel" provided functions. The name might be wrong. static const Addr commPage; - /// Array of syscall descriptors, indexed by call number. - static SyscallDesc syscallDescs[]; - - /// Array of "arm private" syscall descriptors. - static SyscallDesc privSyscallDescs[]; + SyscallDesc* getDesc(int callnum); +}; - const int Num_Syscall_Descs; +/// A process with emulated Arm/Linux syscalls. +class ArmLinuxProcess64 : public ArmLiveProcess64, public ArmLinuxProcessBits +{ + public: + ArmLinuxProcess64(LiveProcessParams * params, ObjectFile *objFile, + ObjectFile::Arch _arch); - const int Num_Priv_Syscall_Descs; + void initState(); + SyscallDesc* getDesc(int callnum); }; #endif // __ARM_LINUX_PROCESS_HH__ diff --git a/src/arch/arm/linux/system.cc b/src/arch/arm/linux/system.cc index bc7fd2cb6..216a65899 100644 --- a/src/arch/arm/linux/system.cc +++ b/src/arch/arm/linux/system.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -63,7 +63,8 @@ using namespace Linux; LinuxArmSystem::LinuxArmSystem(Params *p) : ArmSystem(p), enableContextSwitchStatsDump(p->enable_context_switch_stats_dump), - kernelPanicEvent(NULL), kernelOopsEvent(NULL) + kernelPanicEvent(NULL), kernelOopsEvent(NULL), + bootReleaseAddr(p->boot_release_addr) { if (p->panic_on_panic) { kernelPanicEvent = addKernelFuncEventOrPanic<PanicPCEvent>( @@ -98,22 +99,30 @@ LinuxArmSystem::LinuxArmSystem(Params *p) secDataPtrAddr = 0; secDataAddr = 0; penReleaseAddr = 0; + kernelSymtab->findAddress("__secondary_data", secDataPtrAddr); kernelSymtab->findAddress("secondary_data", secDataAddr); kernelSymtab->findAddress("pen_release", penReleaseAddr); + kernelSymtab->findAddress("secondary_holding_pen_release", pen64ReleaseAddr); secDataPtrAddr &= ~ULL(0x7F); secDataAddr &= ~ULL(0x7F); penReleaseAddr &= ~ULL(0x7F); + pen64ReleaseAddr &= ~ULL(0x7F); + bootReleaseAddr = (bootReleaseAddr & ~ULL(0x7F)) + loadAddrOffset; + } bool LinuxArmSystem::adderBootUncacheable(Addr a) { Addr block = a & ~ULL(0x7F); + if (block == secDataPtrAddr || block == secDataAddr || - block == penReleaseAddr) + block == penReleaseAddr || pen64ReleaseAddr == block || + block == bootReleaseAddr) return true; + return false; } @@ -145,7 +154,8 @@ LinuxArmSystem::initState() if (kernel_has_fdt_support && dtb_file_specified) { // Kernel supports flattened device tree and dtb file specified. // Using Device Tree Blob to describe system configuration. - inform("Loading DTB file: %s\n", params()->dtb_filename); + inform("Loading DTB file: %s at address %#x\n", params()->dtb_filename, + params()->atags_addr + loadAddrOffset); ObjectFile *dtb_file = createObjectFile(params()->dtb_filename, true); if (!dtb_file) { @@ -165,7 +175,7 @@ LinuxArmSystem::initState() "to DTB file: %s\n", params()->dtb_filename); } - dtb_file->setTextBase(params()->atags_addr); + dtb_file->setTextBase(params()->atags_addr + loadAddrOffset); dtb_file->loadSections(physProxy); delete dtb_file; } else { @@ -215,15 +225,17 @@ LinuxArmSystem::initState() DPRINTF(Loader, "Boot atags was %d bytes in total\n", size << 2); DDUMP(Loader, boot_data, size << 2); - physProxy.writeBlob(params()->atags_addr, boot_data, size << 2); + physProxy.writeBlob(params()->atags_addr + loadAddrOffset, boot_data, + size << 2); delete[] boot_data; } + // Kernel boot requirements to set up r0, r1 and r2 in ARMv7 for (int i = 0; i < threadContexts.size(); i++) { threadContexts[i]->setIntReg(0, 0); threadContexts[i]->setIntReg(1, params()->machine_type); - threadContexts[i]->setIntReg(2, params()->atags_addr); + threadContexts[i]->setIntReg(2, params()->atags_addr + loadAddrOffset); } } diff --git a/src/arch/arm/linux/system.hh b/src/arch/arm/linux/system.hh index 008c64429..4ce6ac49e 100644 --- a/src/arch/arm/linux/system.hh +++ b/src/arch/arm/linux/system.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -126,6 +126,8 @@ class LinuxArmSystem : public ArmSystem Addr secDataPtrAddr; Addr secDataAddr; Addr penReleaseAddr; + Addr pen64ReleaseAddr; + Addr bootReleaseAddr; }; class DumpStatsPCEvent : public PCEvent diff --git a/src/arch/arm/locked_mem.hh b/src/arch/arm/locked_mem.hh index f2601f00c..24c78e721 100644 --- a/src/arch/arm/locked_mem.hh +++ b/src/arch/arm/locked_mem.hh @@ -53,6 +53,8 @@ */ #include "arch/arm/miscregs.hh" +#include "arch/arm/isa_traits.hh" +#include "debug/LLSC.hh" #include "mem/packet.hh" #include "mem/request.hh" @@ -62,31 +64,48 @@ template <class XC> inline void handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask) { + DPRINTF(LLSC,"%s: handleing snoop for address: %#x locked: %d\n", + xc->getCpuPtr()->name(),pkt->getAddr(), + xc->readMiscReg(MISCREG_LOCKFLAG)); if (!xc->readMiscReg(MISCREG_LOCKFLAG)) return; Addr locked_addr = xc->readMiscReg(MISCREG_LOCKADDR) & cacheBlockMask; + // If no caches are attached, the snoop address always needs to be masked Addr snoop_addr = pkt->getAddr() & cacheBlockMask; - if (locked_addr == snoop_addr) + DPRINTF(LLSC,"%s: handleing snoop for address: %#x locked addr: %#x\n", + xc->getCpuPtr()->name(),snoop_addr, locked_addr); + if (locked_addr == snoop_addr) { + DPRINTF(LLSC,"%s: address match, clearing lock and signaling sev\n", + xc->getCpuPtr()->name()); xc->setMiscReg(MISCREG_LOCKFLAG, false); + // Implement ARMv8 WFE/SEV semantics + xc->setMiscReg(MISCREG_SEV_MAILBOX, true); + xc->getCpuPtr()->wakeup(); + } } template <class XC> inline void -handleLockedSnoopHit(XC *xc) +handleLockedRead(XC *xc, Request *req) { + xc->setMiscReg(MISCREG_LOCKADDR, req->getPaddr()); + xc->setMiscReg(MISCREG_LOCKFLAG, true); + DPRINTF(LLSC,"%s: Placing address %#x in monitor\n", xc->getCpuPtr()->name(), + req->getPaddr()); } template <class XC> inline void -handleLockedRead(XC *xc, Request *req) +handleLockedSnoopHit(XC *xc) { - xc->setMiscReg(MISCREG_LOCKADDR, req->getPaddr()); - xc->setMiscReg(MISCREG_LOCKFLAG, true); + DPRINTF(LLSC,"%s: handling snoop lock hit address: %#x\n", + xc->getCpuPtr()->name(), xc->readMiscReg(MISCREG_LOCKADDR)); + xc->setMiscReg(MISCREG_LOCKFLAG, false); + xc->setMiscReg(MISCREG_SEV_MAILBOX, true); } - template <class XC> inline bool handleLockedWrite(XC *xc, Request *req, Addr cacheBlockMask) @@ -94,6 +113,8 @@ handleLockedWrite(XC *xc, Request *req, Addr cacheBlockMask) if (req->isSwap()) return true; + DPRINTF(LLSC,"%s: handling locked write for address %#x in monitor\n", + xc->getCpuPtr()->name(), req->getPaddr()); // Verify that the lock flag is still set and the address // is correct bool lock_flag = xc->readMiscReg(MISCREG_LOCKFLAG); @@ -103,6 +124,8 @@ handleLockedWrite(XC *xc, Request *req, Addr cacheBlockMask) // don't even bother sending to memory system req->setExtraData(0); xc->setMiscReg(MISCREG_LOCKFLAG, false); + DPRINTF(LLSC,"%s: clearing lock flag in handle locked write\n", + xc->getCpuPtr()->name()); // the rest of this code is not architectural; // it's just a debugging aid to help detect // livelock by warning on long sequences of failed diff --git a/src/arch/arm/miscregs.cc b/src/arch/arm/miscregs.cc index 3a64b557a..6fa304938 100644 --- a/src/arch/arm/miscregs.cc +++ b/src/arch/arm/miscregs.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -36,11 +36,13 @@ * * Authors: Gabe Black * Ali Saidi + * Giacomo Gabrielli */ #include "arch/arm/isa.hh" #include "arch/arm/miscregs.hh" #include "base/misc.hh" +#include "cpu/thread_context.hh" namespace ArmISA { @@ -50,23 +52,31 @@ decodeCP14Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) { switch(crn) { case 0: - switch (opc2) { + switch (opc1) { case 0: - switch (crm) { + switch (opc2) { case 0: - return MISCREG_DBGDIDR; - case 1: - return MISCREG_DBGDSCR_INT; - default: - warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]", - crn, opc1, crm, opc2); - return NUM_MISCREGS; - } - default: - warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]", - crn, opc1, crm, opc2); - return NUM_MISCREGS; + switch (crm) { + case 0: + return MISCREG_DBGDIDR; + case 1: + return MISCREG_DBGDSCRint; + } + break; + } + break; + case 7: + switch (opc2) { + case 0: + switch (crm) { + case 0: + return MISCREG_JIDR; + } + break; + } + break; } + break; case 1: switch (opc1) { case 6: @@ -75,29 +85,1270 @@ decodeCP14Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) switch (opc2) { case 0: return MISCREG_TEEHBR; - default: - warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]", - crn, opc1, crm, opc2); - return NUM_MISCREGS; } - default: - warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]", - crn, opc1, crm, opc2); - return NUM_MISCREGS; - } - default: - warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]", - crn, opc1, crm, opc2); - return NUM_MISCREGS; + break; + } + break; + case 7: + switch (crm) { + case 0: + switch (opc2) { + case 0: + return MISCREG_JOSCR; + } + break; + } + break; } - default: - warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]", - crn, opc1, crm, opc2); - return NUM_MISCREGS; + break; + case 2: + switch (opc1) { + case 7: + switch (crm) { + case 0: + switch (opc2) { + case 0: + return MISCREG_JMCR; + } + break; + } + break; + } + break; } - + // If we get here then it must be a register that we haven't implemented + warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]", + crn, opc1, crm, opc2); + return MISCREG_CP14_UNIMPL; } +using namespace std; + +bitset<NUM_MISCREG_INFOS> miscRegInfo[NUM_MISCREGS] = { + // MISCREG_CPSR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_FIQ + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_IRQ + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_SVC + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_MON + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_ABT + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_HYP + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_UND + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_ELR_HYP + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_FPSID + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_FPSCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MVFR1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MVFR0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_FPEXC + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + + // Helper registers + // MISCREG_CPSR_MODE + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CPSR_Q + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_FPSCR_Q + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_FPSCR_EXC + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_LOCKADDR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_LOCKFLAG + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PRRR_MAIR0 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001101")), + // MISCREG_PRRR_MAIR0_NS + bitset<NUM_MISCREG_INFOS>(string("0000000000000010101")), + // MISCREG_PRRR_MAIR0_S + bitset<NUM_MISCREG_INFOS>(string("0000000000000010101")), + // MISCREG_NMRR_MAIR1 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001101")), + // MISCREG_NMRR_MAIR1_NS + bitset<NUM_MISCREG_INFOS>(string("0000000000000010101")), + // MISCREG_NMRR_MAIR1_S + bitset<NUM_MISCREG_INFOS>(string("0000000000000010101")), + // MISCREG_PMXEVTYPER_PMCCFILTR + bitset<NUM_MISCREG_INFOS>(string("0000000000000000101")), + // MISCREG_SCTLR_RST + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SEV_MAILBOX + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + + // AArch32 CP14 registers + // MISCREG_DBGDIDR + bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")), + // MISCREG_DBGDSCRint + bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")), + // MISCREG_DBGDCCINT + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGDTRTXint + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGDTRRXint + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWFAR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGVCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGDTRRXext + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGDSCRext + bitset<NUM_MISCREG_INFOS>(string("1111111111111100010")), + // MISCREG_DBGDTRTXext + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGOSECCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBVR0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBVR1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBVR2 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBVR3 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBVR4 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBVR5 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBCR0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBCR1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBCR2 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBCR3 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBCR4 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBCR5 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWVR0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWVR1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWVR2 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWVR3 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWCR0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWCR1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWCR2 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGWCR3 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGDRAR + bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")), + // MISCREG_DBGBXVR4 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGBXVR5 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGOSLAR + bitset<NUM_MISCREG_INFOS>(string("1010111111111100000")), + // MISCREG_DBGOSLSR + bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")), + // MISCREG_DBGOSDLR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGPRCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGDSAR + bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")), + // MISCREG_DBGCLAIMSET + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGCLAIMCLR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_DBGAUTHSTATUS + bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")), + // MISCREG_DBGDEVID2 + bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")), + // MISCREG_DBGDEVID1 + bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")), + // MISCREG_DBGDEVID0 + bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")), + // MISCREG_TEECR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_JIDR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_TEEHBR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_JOSCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_JMCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + + // AArch32 CP15 registers + // MISCREG_MIDR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CTR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_TCMTR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_TLBTR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_MPIDR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_REVIDR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000010")), + // MISCREG_ID_PFR0 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_PFR1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_DFR0 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AFR0 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR0 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR2 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR3 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR0 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR2 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR3 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR4 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR5 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CCSIDR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CLIDR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_AIDR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CSSELR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_CSSELR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_CSSELR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_VPIDR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_VMPIDR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_SCTLR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_SCTLR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_SCTLR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_ACTLR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_ACTLR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_ACTLR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_CPACR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_SCR + bitset<NUM_MISCREG_INFOS>(string("1111001100000000001")), + // MISCREG_SDER + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_NSACR + bitset<NUM_MISCREG_INFOS>(string("1111011101000000001")), + // MISCREG_HSCTLR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HACTLR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HCR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HDCR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HCPTR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HSTR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HACR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000010")), + // MISCREG_TTBR0 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_TTBR0_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_TTBR0_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_TTBR1 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_TTBR1_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_TTBR1_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_TTBCR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_TTBCR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_TTBCR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_HTCR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_VTCR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_DACR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_DACR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_DACR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_DFSR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_DFSR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_DFSR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_IFSR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_IFSR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_IFSR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_ADFSR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001010")), + // MISCREG_ADFSR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010010")), + // MISCREG_ADFSR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010010")), + // MISCREG_AIFSR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001010")), + // MISCREG_AIFSR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010010")), + // MISCREG_AIFSR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010010")), + // MISCREG_HADFSR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HAIFSR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HSR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_DFAR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_DFAR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_DFAR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_IFAR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_IFAR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_IFAR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_HDFAR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HIFAR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HPFAR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_ICIALLUIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_BPIALLIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_PAR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_PAR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_PAR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_ICIALLU + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_ICIMVAU + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_CP15ISB + bitset<NUM_MISCREG_INFOS>(string("1010101010101000001")), + // MISCREG_BPIALL + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_BPIMVA + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_DCIMVAC + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_DCISW + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_ATS1CPR + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_ATS1CPW + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_ATS1CUR + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_ATS1CUW + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_ATS12NSOPR + bitset<NUM_MISCREG_INFOS>(string("1010101000000000001")), + // MISCREG_ATS12NSOPW + bitset<NUM_MISCREG_INFOS>(string("1010101000000000001")), + // MISCREG_ATS12NSOUR + bitset<NUM_MISCREG_INFOS>(string("1010101000000000001")), + // MISCREG_ATS12NSOUW + bitset<NUM_MISCREG_INFOS>(string("1010101000000000001")), + // MISCREG_DCCMVAC + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_DCCSW + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_CP15DSB + bitset<NUM_MISCREG_INFOS>(string("1010101010101000001")), + // MISCREG_CP15DMB + bitset<NUM_MISCREG_INFOS>(string("1010101010101000001")), + // MISCREG_DCCMVAU + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_DCCIMVAC + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_DCCISW + bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")), + // MISCREG_ATS1HR + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_ATS1HW + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBIALLIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIMVAIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIASIDIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIMVAAIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIMVALIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")), + // MISCREG_TLBIMVAALIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")), + // MISCREG_ITLBIALL + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_ITLBIMVA + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_ITLBIASID + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_DTLBIALL + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_DTLBIMVA + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_DTLBIASID + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIALL + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIMVA + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIASID + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIMVAA + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBIMVAL + bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")), + // MISCREG_TLBIMVAAL + bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")), + // MISCREG_TLBIIPAS2IS + bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")), + // MISCREG_TLBIIPAS2LIS + bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")), + // MISCREG_TLBIALLHIS + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBIMVAHIS + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBIALLNSNHIS + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBIMVALHIS + bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")), + // MISCREG_TLBIIPAS2 + bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")), + // MISCREG_TLBIIPAS2L + bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")), + // MISCREG_TLBIALLH + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBIMVAH + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBIALLNSNH + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBIMVALH + bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")), + // MISCREG_PMCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCNTENSET + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCNTENCLR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMOVSR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMSWINC + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMSELR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCEID0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCEID1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCCNTR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMXEVTYPER + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCCFILTR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMXEVCNTR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMUSERENR + bitset<NUM_MISCREG_INFOS>(string("1111111111010100001")), + // MISCREG_PMINTENSET + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_PMINTENCLR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_PMOVSSET + bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")), + // MISCREG_L2CTLR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_L2ECTLR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_PRRR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_PRRR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_PRRR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_MAIR0 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_MAIR0_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_MAIR0_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_NMRR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_NMRR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_NMRR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_MAIR1 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_MAIR1_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_MAIR1_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_AMAIR0 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_AMAIR0_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_AMAIR0_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_AMAIR1 + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_AMAIR1_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_AMAIR1_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_HMAIR0 + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HMAIR1 + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_HAMAIR0 + bitset<NUM_MISCREG_INFOS>(string("1100110000000000010")), + // MISCREG_HAMAIR1 + bitset<NUM_MISCREG_INFOS>(string("1100110000000000010")), + // MISCREG_VBAR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_VBAR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_VBAR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_MVBAR + bitset<NUM_MISCREG_INFOS>(string("1111001100000000001")), + // MISCREG_RMR + bitset<NUM_MISCREG_INFOS>(string("1111001100000000000")), + // MISCREG_ISR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_HVBAR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_FCSEIDR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000010")), + // MISCREG_CONTEXTIDR + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_CONTEXTIDR_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_CONTEXTIDR_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_TPIDRURW + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_TPIDRURW_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011111110001")), + // MISCREG_TPIDRURW_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_TPIDRURO + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_TPIDRURO_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011010110001")), + // MISCREG_TPIDRURO_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_TPIDRPRW + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_TPIDRPRW_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")), + // MISCREG_TPIDRPRW_S + bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")), + // MISCREG_HTPIDR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_CNTFRQ + bitset<NUM_MISCREG_INFOS>(string("1111010101010100001")), + // MISCREG_CNTKCTL + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CNTP_TVAL + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_CNTP_TVAL_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011111110001")), + // MISCREG_CNTP_TVAL_S + bitset<NUM_MISCREG_INFOS>(string("0011001100111110000")), + // MISCREG_CNTP_CTL + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_CNTP_CTL_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011111110001")), + // MISCREG_CNTP_CTL_S + bitset<NUM_MISCREG_INFOS>(string("0011001100111110000")), + // MISCREG_CNTV_TVAL + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTV_CTL + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTHCTL + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_CNTHP_TVAL + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_CNTHP_CTL + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_IL1DATA0 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_IL1DATA1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_IL1DATA2 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_IL1DATA3 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_DL1DATA0 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_DL1DATA1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_DL1DATA2 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_DL1DATA3 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_DL1DATA4 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_RAMINDEX + bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")), + // MISCREG_L2ACTLR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_CBAR + bitset<NUM_MISCREG_INFOS>(string("0101010101000000000")), + // MISCREG_HTTBR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_VTTBR + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_CNTPCT + bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")), + // MISCREG_CNTVCT + bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")), + // MISCREG_CNTP_CVAL + bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")), + // MISCREG_CNTP_CVAL_NS + bitset<NUM_MISCREG_INFOS>(string("1100110011111110000")), + // MISCREG_CNTP_CVAL_S + bitset<NUM_MISCREG_INFOS>(string("0011001100111110000")), + // MISCREG_CNTV_CVAL + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTVOFF + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_CNTHP_CVAL + bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")), + // MISCREG_CPUMERRSR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + // MISCREG_L2MERRSR + bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")), + + // AArch64 registers (Op0=2) + // MISCREG_MDCCINT_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_OSDTRRX_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MDSCR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_OSDTRTX_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_OSECCR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBVR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBVR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBVR2_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBVR3_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBVR4_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBVR5_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBCR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBCR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBCR2_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBCR3_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBCR4_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGBCR5_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWVR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWVR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWVR2_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWVR3_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWCR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWCR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWCR2_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGWCR3_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MDCCSR_EL0 + bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")), + // MISCREG_MDDTR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MDDTRTX_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MDDTRRX_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGVCR32_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MDRAR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")), + // MISCREG_OSLAR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1010111111111100001")), + // MISCREG_OSLSR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")), + // MISCREG_OSDLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGPRCR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGCLAIMSET_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGCLAIMCLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DBGAUTHSTATUS_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")), + // MISCREG_TEECR32_EL1 + bitset<NUM_MISCREG_INFOS>(string("0000000000000000001")), + // MISCREG_TEEHBR32_EL1 + bitset<NUM_MISCREG_INFOS>(string("0000000000000000001")), + + // AArch64 registers (Op0=1,3) + // MISCREG_MIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_MPIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_REVIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_PFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_PFR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_DFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR2_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_MMFR3_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR2_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR3_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR4_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_ISAR5_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_MVFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_MVFR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_MVFR2_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64PFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64PFR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64DFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64DFR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64AFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64AFR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64ISAR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64ISAR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64MMFR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ID_AA64MMFR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CCSIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CLIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_AIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CSSELR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CTR_EL0 + bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")), + // MISCREG_DCZID_EL0 + bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")), + // MISCREG_VPIDR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_VMPIDR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SCTLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_ACTLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CPACR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_SCTLR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_ACTLR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_HCR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_MDCR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_CPTR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_HSTR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_HACR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SCTLR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_ACTLR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_SCR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_SDER32_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_CPTR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_MDCR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_TTBR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_TTBR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_TCR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_TTBR0_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_TCR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_VTTBR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_VTCR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_TTBR0_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_TCR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_DACR32_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SPSR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_ELR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_SP_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_SPSEL + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CURRENTEL + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_NZCV + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DAIF + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_FPCR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_FPSR + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DSPSR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_DLR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_SPSR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_ELR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SP_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SPSR_IRQ_AA64 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SPSR_ABT_AA64 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SPSR_UND_AA64 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SPSR_FIQ_AA64 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_SPSR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_ELR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_SP_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_AFSR0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_AFSR1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_ESR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_IFSR32_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_AFSR0_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_AFSR1_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_ESR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_FPEXC32_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_AFSR0_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_AFSR1_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_ESR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_FAR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_FAR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_HPFAR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_FAR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_IC_IALLUIS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")), + // MISCREG_PAR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_IC_IALLU + bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")), + // MISCREG_DC_IVAC_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")), + // MISCREG_DC_ISW_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")), + // MISCREG_AT_S1E1R_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_AT_S1E1W_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_AT_S1E0R_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_AT_S1E0W_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_DC_CSW_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")), + // MISCREG_DC_CISW_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")), + // MISCREG_DC_ZVA_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010001000011")), + // MISCREG_IC_IVAU_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010101000001")), + // MISCREG_DC_CVAC_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010101000011")), + // MISCREG_DC_CVAU_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010101000011")), + // MISCREG_DC_CIVAC_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010101000011")), + // MISCREG_AT_S1E2R_Xt + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_AT_S1E2W_Xt + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_AT_S12E1R_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_AT_S12E1W_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_AT_S12E0R_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_AT_S12E0W_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_AT_S1E3R_Xt + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_AT_S1E3W_Xt + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_TLBI_VMALLE1IS + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VAE1IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_ASIDE1IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VAAE1IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VALE1IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VAALE1IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VMALLE1 + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VAE1_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_ASIDE1_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VAAE1_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VALE1_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_VAALE1_Xt + bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")), + // MISCREG_TLBI_IPAS2E1IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_IPAS2LE1IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_ALLE2IS + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBI_VAE2IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBI_ALLE1IS + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_VALE2IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBI_VMALLS12E1IS + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_IPAS2E1_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_IPAS2LE1_Xt + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_ALLE2 + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBI_VAE2_Xt + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBI_ALLE1 + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_VALE2_Xt + bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")), + // MISCREG_TLBI_VMALLS12E1 + bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")), + // MISCREG_TLBI_ALLE3IS + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_TLBI_VAE3IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_TLBI_VALE3IS_Xt + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_TLBI_ALLE3 + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_TLBI_VAE3_Xt + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_TLBI_VALE3_Xt + bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")), + // MISCREG_PMINTENSET_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_PMINTENCLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_PMCR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCNTENSET_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCNTENCLR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMOVSCLR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMSWINC_EL0 + bitset<NUM_MISCREG_INFOS>(string("1010101010111100001")), + // MISCREG_PMSELR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCEID0_EL0 + bitset<NUM_MISCREG_INFOS>(string("0101010101111100001")), + // MISCREG_PMCEID1_EL0 + bitset<NUM_MISCREG_INFOS>(string("0101010101111100001")), + // MISCREG_PMCCNTR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMXEVTYPER_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMCCFILTR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMXEVCNTR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMUSERENR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111010100001")), + // MISCREG_PMOVSSET_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_MAIR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_AMAIR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_MAIR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_AMAIR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_MAIR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_AMAIR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_L2CTLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_L2ECTLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_VBAR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_RVBAR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_ISR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_VBAR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_RVBAR_EL2 + bitset<NUM_MISCREG_INFOS>(string("0101010000000000001")), + // MISCREG_VBAR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_RVBAR_EL3 + bitset<NUM_MISCREG_INFOS>(string("0101000000000000001")), + // MISCREG_RMR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_CONTEXTIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_TPIDR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_TPIDR_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_TPIDRRO_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111010100001")), + // MISCREG_TPIDR_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_TPIDR_EL3 + bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")), + // MISCREG_CNTKCTL_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CNTFRQ_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111010101010100001")), + // MISCREG_CNTPCT_EL0 + bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")), + // MISCREG_CNTVCT_EL0 + bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")), + // MISCREG_CNTP_TVAL_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTP_CTL_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTP_CVAL_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTV_TVAL_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTV_CTL_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTV_CVAL_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVCNTR0_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVCNTR1_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVCNTR2_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVCNTR3_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVCNTR4_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVCNTR5_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVTYPER0_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVTYPER1_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVTYPER2_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVTYPER3_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVTYPER4_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_PMEVTYPER5_EL0 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTVOFF_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_CNTHCTL_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_CNTHP_TVAL_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_CNTHP_CTL_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_CNTHP_CVAL_EL2 + bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")), + // MISCREG_CNTPS_TVAL_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTPS_CTL_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_CNTPS_CVAL_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_IL1DATA0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_IL1DATA1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_IL1DATA2_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_IL1DATA3_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_DL1DATA0_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_DL1DATA1_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_DL1DATA2_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_DL1DATA3_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_DL1DATA4_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_L2ACTLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CPUACTLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CPUECTLR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CPUMERRSR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_L2MERRSR_EL1 + bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")), + // MISCREG_CBAR_EL1 + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + + // Dummy registers + // MISCREG_NOP + bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")), + // MISCREG_RAZ + bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")), + // MISCREG_CP14_UNIMPL + bitset<NUM_MISCREG_INFOS>(string("0000000000000000010")), + // MISCREG_CP15_UNIMPL + bitset<NUM_MISCREG_INFOS>(string("0000000000000000010")), + // MISCREG_A64_UNIMPL + bitset<NUM_MISCREG_INFOS>(string("0000000000000000010")), + // MISCREG_UNKNOWN + bitset<NUM_MISCREG_INFOS>(string("0000000000000000001")) +}; + MiscRegIndex decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) { @@ -116,6 +1367,8 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) return MISCREG_TLBTR; case 5: return MISCREG_MPIDR; + case 6: + return MISCREG_REVIDR; default: return MISCREG_MIDR; } @@ -180,6 +1433,14 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) return MISCREG_CSSELR; } break; + case 4: + if (crm == 0) { + if (opc2 == 0) + return MISCREG_VPIDR; + else if (opc2 == 5) + return MISCREG_VMPIDR; + } + break; } break; case 1: @@ -203,6 +1464,26 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) return MISCREG_NSACR; } } + } else if (opc1 == 4) { + if (crm == 0) { + if (opc2 == 0) + return MISCREG_HSCTLR; + else if (opc2 == 1) + return MISCREG_HACTLR; + } else if (crm == 1) { + switch (opc2) { + case 0: + return MISCREG_HCR; + case 1: + return MISCREG_HDCR; + case 2: + return MISCREG_HCPTR; + case 3: + return MISCREG_HSTR; + case 7: + return MISCREG_HACR; + } + } } break; case 2: @@ -215,6 +1496,11 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) case 2: return MISCREG_TTBCR; } + } else if (opc1 == 4) { + if (crm == 0 && opc2 == 2) + return MISCREG_HTCR; + else if (crm == 1 && opc2 == 2) + return MISCREG_VTCR; } break; case 3: @@ -237,6 +1523,15 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) return MISCREG_AIFSR; } } + } else if (opc1 == 4) { + if (crm == 1) { + if (opc2 == 0) + return MISCREG_HADFSR; + else if (opc2 == 1) + return MISCREG_HAIFSR; + } else if (crm == 2 && opc2 == 0) { + return MISCREG_HSR; + } } break; case 6: @@ -247,6 +1542,15 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) case 2: return MISCREG_IFAR; } + } else if (opc1 == 4 && crm == 0) { + switch (opc2) { + case 0: + return MISCREG_HDFAR; + case 2: + return MISCREG_HIFAR; + case 4: + return MISCREG_HPFAR; + } } break; case 7: @@ -294,21 +1598,21 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) case 8: switch (opc2) { case 0: - return MISCREG_V2PCWPR; + return MISCREG_ATS1CPR; case 1: - return MISCREG_V2PCWPW; + return MISCREG_ATS1CPW; case 2: - return MISCREG_V2PCWUR; + return MISCREG_ATS1CUR; case 3: - return MISCREG_V2PCWUW; + return MISCREG_ATS1CUW; case 4: - return MISCREG_V2POWPR; + return MISCREG_ATS12NSOPR; case 5: - return MISCREG_V2POWPW; + return MISCREG_ATS12NSOPW; case 6: - return MISCREG_V2POWUR; + return MISCREG_ATS12NSOUR; case 7: - return MISCREG_V2POWUW; + return MISCREG_ATS12NSOUW; } break; case 10: @@ -316,7 +1620,7 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) case 1: return MISCREG_DCCMVAC; case 2: - return MISCREG_MCCSW; + return MISCREG_DCCSW; case 4: return MISCREG_CP15DSB; case 5: @@ -341,6 +1645,11 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) } break; } + } else if (opc1 == 4 && crm == 8) { + if (opc2 == 0) + return MISCREG_ATS1HR; + else if (opc2 == 1) + return MISCREG_ATS1HW; } break; case 8: @@ -391,6 +1700,26 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) } break; } + } else if (opc1 == 4) { + if (crm == 3) { + switch (opc2) { + case 0: + return MISCREG_TLBIALLHIS; + case 1: + return MISCREG_TLBIMVAHIS; + case 4: + return MISCREG_TLBIALLNSNHIS; + } + } else if (crm == 7) { + switch (opc2) { + case 0: + return MISCREG_TLBIALLH; + case 1: + return MISCREG_TLBIMVAH; + case 4: + return MISCREG_TLBIALLNSNH; + } + } } break; case 9: @@ -421,7 +1750,8 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) case 0: return MISCREG_PMCCNTR; case 1: - return MISCREG_PMC_OTHER; + // Selector is PMSELR.SEL + return MISCREG_PMXEVTYPER_PMCCFILTR; case 2: return MISCREG_PMXEVCNTR; } @@ -434,6 +1764,8 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) return MISCREG_PMINTENSET; case 2: return MISCREG_PMINTENCLR; + case 3: + return MISCREG_PMOVSSET; } break; } @@ -443,28 +1775,45 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) switch (opc2) { case 2: // L2CTLR, L2 Control Register return MISCREG_L2CTLR; - default: - warn("Uknown miscregs: crn:%d crm:%d opc1:%d opc2:%d\n", - crn,crm, opc1,opc2); - break; + case 3: + return MISCREG_L2ECTLR; } break; - default: - return MISCREG_L2LATENCY; + break; } } - //Reserved for Branch Predictor, Cache and TCM operations break; case 10: if (opc1 == 0) { // crm 0, 1, 4, and 8, with op2 0 - 7, reserved for TLB lockdown if (crm == 2) { // TEX Remap Registers if (opc2 == 0) { - return MISCREG_PRRR; + // Selector is TTBCR.EAE + return MISCREG_PRRR_MAIR0; } else if (opc2 == 1) { - return MISCREG_NMRR; + // Selector is TTBCR.EAE + return MISCREG_NMRR_MAIR1; + } + } else if (crm == 3) { + if (opc2 == 0) { + return MISCREG_AMAIR0; + } else if (opc2 == 1) { + return MISCREG_AMAIR1; } } + } else if (opc1 == 4) { + // crm 0, 1, 4, and 8, with op2 0 - 7, reserved for TLB lockdown + if (crm == 2) { + if (opc2 == 0) + return MISCREG_HMAIR0; + else if (opc2 == 1) + return MISCREG_HMAIR1; + } else if (crm == 3) { + if (opc2 == 0) + return MISCREG_HAMAIR0; + else if (opc2 == 1) + return MISCREG_HAMAIR1; + } } break; case 11: @@ -498,6 +1847,9 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) return MISCREG_ISR; } } + } else if (opc1 == 4) { + if (crm == 0 && opc2 == 0) + return MISCREG_HVBAR; } break; case 13: @@ -505,7 +1857,7 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) if (crm == 0) { switch (opc2) { case 0: - return MISCREG_FCEIDR; + return MISCREG_FCSEIDR; case 1: return MISCREG_CONTEXTIDR; case 2: @@ -516,14 +1868,1682 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2) return MISCREG_TPIDRPRW; } } + } else if (opc1 == 4) { + if (crm == 0 && opc2 == 2) + return MISCREG_HTPIDR; + } + break; + case 14: + if (opc1 == 0) { + switch (crm) { + case 0: + if (opc2 == 0) + return MISCREG_CNTFRQ; + break; + case 1: + if (opc2 == 0) + return MISCREG_CNTKCTL; + break; + case 2: + if (opc2 == 0) + return MISCREG_CNTP_TVAL; + else if (opc2 == 1) + return MISCREG_CNTP_CTL; + break; + case 3: + if (opc2 == 0) + return MISCREG_CNTV_TVAL; + else if (opc2 == 1) + return MISCREG_CNTV_CTL; + break; + } + } else if (opc1 == 4) { + if (crm == 1 && opc2 == 0) { + return MISCREG_CNTHCTL; + } else if (crm == 2) { + if (opc2 == 0) + return MISCREG_CNTHP_TVAL; + else if (opc2 == 1) + return MISCREG_CNTHP_CTL; + } } break; case 15: // Implementation defined - return MISCREG_CRN15; + return MISCREG_CP15_UNIMPL; } // Unrecognized register - return NUM_MISCREGS; + return MISCREG_CP15_UNIMPL; +} + +MiscRegIndex +decodeCP15Reg64(unsigned crm, unsigned opc1) +{ + switch (crm) { + case 2: + switch (opc1) { + case 0: + return MISCREG_TTBR0; + case 1: + return MISCREG_TTBR1; + case 4: + return MISCREG_HTTBR; + case 6: + return MISCREG_VTTBR; + } + break; + case 7: + if (opc1 == 0) + return MISCREG_PAR; + break; + case 14: + switch (opc1) { + case 0: + return MISCREG_CNTPCT; + case 1: + return MISCREG_CNTVCT; + case 2: + return MISCREG_CNTP_CVAL; + case 3: + return MISCREG_CNTV_CVAL; + case 4: + return MISCREG_CNTVOFF; + case 6: + return MISCREG_CNTHP_CVAL; + } + break; + case 15: + if (opc1 == 0) + return MISCREG_CPUMERRSR; + else if (opc1 == 1) + return MISCREG_L2MERRSR; + break; + } + // Unrecognized register + return MISCREG_CP15_UNIMPL; +} + +bool +canReadCoprocReg(MiscRegIndex reg, SCR scr, CPSR cpsr, ThreadContext *tc) +{ + bool secure = !scr.ns; + bool canRead; + + switch (cpsr.mode) { + case MODE_USER: + canRead = secure ? miscRegInfo[reg][MISCREG_USR_S_RD] : + miscRegInfo[reg][MISCREG_USR_NS_RD]; + break; + case MODE_FIQ: + case MODE_IRQ: + case MODE_SVC: + case MODE_ABORT: + case MODE_UNDEFINED: + case MODE_SYSTEM: + canRead = secure ? miscRegInfo[reg][MISCREG_PRI_S_RD] : + miscRegInfo[reg][MISCREG_PRI_NS_RD]; + break; + case MODE_MON: + canRead = secure ? miscRegInfo[reg][MISCREG_MON_NS0_RD] : + miscRegInfo[reg][MISCREG_MON_NS1_RD]; + break; + case MODE_HYP: + canRead = miscRegInfo[reg][MISCREG_HYP_RD]; + break; + default: + panic("Unrecognized mode setting in CPSR.\n"); + } + // can't do permissions checkes on the root of a banked pair of regs + assert(!miscRegInfo[reg][MISCREG_BANKED]); + return canRead; +} + +bool +canWriteCoprocReg(MiscRegIndex reg, SCR scr, CPSR cpsr, ThreadContext *tc) +{ + bool secure = !scr.ns; + bool canWrite; + + switch (cpsr.mode) { + case MODE_USER: + canWrite = secure ? miscRegInfo[reg][MISCREG_USR_S_WR] : + miscRegInfo[reg][MISCREG_USR_NS_WR]; + break; + case MODE_FIQ: + case MODE_IRQ: + case MODE_SVC: + case MODE_ABORT: + case MODE_UNDEFINED: + case MODE_SYSTEM: + canWrite = secure ? miscRegInfo[reg][MISCREG_PRI_S_WR] : + miscRegInfo[reg][MISCREG_PRI_NS_WR]; + break; + case MODE_MON: + canWrite = secure ? miscRegInfo[reg][MISCREG_MON_NS0_WR] : + miscRegInfo[reg][MISCREG_MON_NS1_WR]; + break; + case MODE_HYP: + canWrite = miscRegInfo[reg][MISCREG_HYP_WR]; + break; + default: + panic("Unrecognized mode setting in CPSR.\n"); + } + // can't do permissions checkes on the root of a banked pair of regs + assert(!miscRegInfo[reg][MISCREG_BANKED]); + return canWrite; +} + +int +flattenMiscRegNsBanked(int reg, ThreadContext *tc) +{ + if (miscRegInfo[reg][MISCREG_BANKED]) { + SCR scr = tc->readMiscReg(MISCREG_SCR); + reg += (ArmSystem::haveSecurity(tc) && !scr.ns) ? 2 : 1; + } + return reg; +} + +int +flattenMiscRegNsBanked(int reg, ThreadContext *tc, bool ns) +{ + if (miscRegInfo[reg][MISCREG_BANKED]) { + reg += (ArmSystem::haveSecurity(tc) && !ns) ? 2 : 1; + } + return reg; } + +/** + * If the reg is a child reg of a banked set, then the parent is the last + * banked one in the list. This is messy, and the wish is to eventually have + * the bitmap replaced with a better data structure. the preUnflatten function + * initializes a lookup table to speed up the search for these banked + * registers. + */ + +int unflattenResultMiscReg[NUM_MISCREGS]; + +void +preUnflattenMiscReg() +{ + int reg = -1; + for (int i = 0 ; i < NUM_MISCREGS; i++){ + if (miscRegInfo[i][MISCREG_BANKED]) + reg = i; + if (miscRegInfo[i][MISCREG_BANKED_CHILD]) + unflattenResultMiscReg[i] = reg; + else + unflattenResultMiscReg[i] = i; + // if this assert fails, no parent was found, and something is broken + assert(unflattenResultMiscReg[i] > -1); + } } + +int +unflattenMiscReg(int reg) +{ + return unflattenResultMiscReg[reg]; +} + +bool +canReadAArch64SysReg(MiscRegIndex reg, SCR scr, CPSR cpsr, ThreadContext *tc) +{ + // Check for SP_EL0 access while SPSEL == 0 + if ((reg == MISCREG_SP_EL0) && (tc->readMiscReg(MISCREG_SPSEL) == 0)) + return false; + + // Check for RVBAR access + if (reg == MISCREG_RVBAR_EL1) { + ExceptionLevel highest_el = ArmSystem::highestEL(tc); + if (highest_el == EL2 || highest_el == EL3) + return false; + } + if (reg == MISCREG_RVBAR_EL2) { + ExceptionLevel highest_el = ArmSystem::highestEL(tc); + if (highest_el == EL3) + return false; + } + + bool secure = ArmSystem::haveSecurity(tc) && !scr.ns; + + switch (opModeToEL((OperatingMode) (uint8_t) cpsr.mode)) { + case EL0: + return secure ? miscRegInfo[reg][MISCREG_USR_S_RD] : + miscRegInfo[reg][MISCREG_USR_NS_RD]; + case EL1: + return secure ? miscRegInfo[reg][MISCREG_PRI_S_RD] : + miscRegInfo[reg][MISCREG_PRI_NS_RD]; + // @todo: uncomment this to enable Virtualization + // case EL2: + // return miscRegInfo[reg][MISCREG_HYP_RD]; + case EL3: + return secure ? miscRegInfo[reg][MISCREG_MON_NS0_RD] : + miscRegInfo[reg][MISCREG_MON_NS1_RD]; + default: + panic("Invalid exception level"); + } +} + +bool +canWriteAArch64SysReg(MiscRegIndex reg, SCR scr, CPSR cpsr, ThreadContext *tc) +{ + // Check for SP_EL0 access while SPSEL == 0 + if ((reg == MISCREG_SP_EL0) && (tc->readMiscReg(MISCREG_SPSEL) == 0)) + return false; + ExceptionLevel el = opModeToEL((OperatingMode) (uint8_t) cpsr.mode); + if (reg == MISCREG_DAIF) { + SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR_EL1); + if (el == EL0 && !sctlr.uma) + return false; + } + if (reg == MISCREG_DC_ZVA_Xt) { + SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR_EL1); + if (el == EL0 && !sctlr.dze) + return false; + } + if (reg == MISCREG_DC_CVAC_Xt || reg == MISCREG_DC_CIVAC_Xt) { + SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR_EL1); + if (el == EL0 && !sctlr.uci) + return false; + } + + bool secure = ArmSystem::haveSecurity(tc) && !scr.ns; + + switch (el) { + case EL0: + return secure ? miscRegInfo[reg][MISCREG_USR_S_WR] : + miscRegInfo[reg][MISCREG_USR_NS_WR]; + case EL1: + return secure ? miscRegInfo[reg][MISCREG_PRI_S_WR] : + miscRegInfo[reg][MISCREG_PRI_NS_WR]; + // @todo: uncomment this to enable Virtualization + // case EL2: + // return miscRegInfo[reg][MISCREG_HYP_WR]; + case EL3: + return secure ? miscRegInfo[reg][MISCREG_MON_NS0_WR] : + miscRegInfo[reg][MISCREG_MON_NS1_WR]; + default: + panic("Invalid exception level"); + } +} + +MiscRegIndex +decodeAArch64SysReg(unsigned op0, unsigned op1, + unsigned crn, unsigned crm, + unsigned op2) +{ + switch (op0) { + case 1: + switch (crn) { + case 7: + switch (op1) { + case 0: + switch (crm) { + case 1: + switch (op2) { + case 0: + return MISCREG_IC_IALLUIS; + } + break; + case 5: + switch (op2) { + case 0: + return MISCREG_IC_IALLU; + } + break; + case 6: + switch (op2) { + case 1: + return MISCREG_DC_IVAC_Xt; + case 2: + return MISCREG_DC_ISW_Xt; + } + break; + case 8: + switch (op2) { + case 0: + return MISCREG_AT_S1E1R_Xt; + case 1: + return MISCREG_AT_S1E1W_Xt; + case 2: + return MISCREG_AT_S1E0R_Xt; + case 3: + return MISCREG_AT_S1E0W_Xt; + } + break; + case 10: + switch (op2) { + case 2: + return MISCREG_DC_CSW_Xt; + } + break; + case 14: + switch (op2) { + case 2: + return MISCREG_DC_CISW_Xt; + } + break; + } + break; + case 3: + switch (crm) { + case 4: + switch (op2) { + case 1: + return MISCREG_DC_ZVA_Xt; + } + break; + case 5: + switch (op2) { + case 1: + return MISCREG_IC_IVAU_Xt; + } + break; + case 10: + switch (op2) { + case 1: + return MISCREG_DC_CVAC_Xt; + } + break; + case 11: + switch (op2) { + case 1: + return MISCREG_DC_CVAU_Xt; + } + break; + case 14: + switch (op2) { + case 1: + return MISCREG_DC_CIVAC_Xt; + } + break; + } + break; + case 4: + switch (crm) { + case 8: + switch (op2) { + case 0: + return MISCREG_AT_S1E2R_Xt; + case 1: + return MISCREG_AT_S1E2W_Xt; + case 4: + return MISCREG_AT_S12E1R_Xt; + case 5: + return MISCREG_AT_S12E1W_Xt; + case 6: + return MISCREG_AT_S12E0R_Xt; + case 7: + return MISCREG_AT_S12E0W_Xt; + } + break; + } + break; + case 6: + switch (crm) { + case 8: + switch (op2) { + case 0: + return MISCREG_AT_S1E3R_Xt; + case 1: + return MISCREG_AT_S1E3W_Xt; + } + break; + } + break; + } + break; + case 8: + switch (op1) { + case 0: + switch (crm) { + case 3: + switch (op2) { + case 0: + return MISCREG_TLBI_VMALLE1IS; + case 1: + return MISCREG_TLBI_VAE1IS_Xt; + case 2: + return MISCREG_TLBI_ASIDE1IS_Xt; + case 3: + return MISCREG_TLBI_VAAE1IS_Xt; + case 5: + return MISCREG_TLBI_VALE1IS_Xt; + case 7: + return MISCREG_TLBI_VAALE1IS_Xt; + } + break; + case 7: + switch (op2) { + case 0: + return MISCREG_TLBI_VMALLE1; + case 1: + return MISCREG_TLBI_VAE1_Xt; + case 2: + return MISCREG_TLBI_ASIDE1_Xt; + case 3: + return MISCREG_TLBI_VAAE1_Xt; + case 5: + return MISCREG_TLBI_VALE1_Xt; + case 7: + return MISCREG_TLBI_VAALE1_Xt; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 1: + return MISCREG_TLBI_IPAS2E1IS_Xt; + case 5: + return MISCREG_TLBI_IPAS2LE1IS_Xt; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_TLBI_ALLE2IS; + case 1: + return MISCREG_TLBI_VAE2IS_Xt; + case 4: + return MISCREG_TLBI_ALLE1IS; + case 5: + return MISCREG_TLBI_VALE2IS_Xt; + case 6: + return MISCREG_TLBI_VMALLS12E1IS; + } + break; + case 4: + switch (op2) { + case 1: + return MISCREG_TLBI_IPAS2E1_Xt; + case 5: + return MISCREG_TLBI_IPAS2LE1_Xt; + } + break; + case 7: + switch (op2) { + case 0: + return MISCREG_TLBI_ALLE2; + case 1: + return MISCREG_TLBI_VAE2_Xt; + case 4: + return MISCREG_TLBI_ALLE1; + case 5: + return MISCREG_TLBI_VALE2_Xt; + case 6: + return MISCREG_TLBI_VMALLS12E1; + } + break; + } + break; + case 6: + switch (crm) { + case 3: + switch (op2) { + case 0: + return MISCREG_TLBI_ALLE3IS; + case 1: + return MISCREG_TLBI_VAE3IS_Xt; + case 5: + return MISCREG_TLBI_VALE3IS_Xt; + } + break; + case 7: + switch (op2) { + case 0: + return MISCREG_TLBI_ALLE3; + case 1: + return MISCREG_TLBI_VAE3_Xt; + case 5: + return MISCREG_TLBI_VALE3_Xt; + } + break; + } + break; + } + break; + } + break; + case 2: + switch (crn) { + case 0: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 2: + return MISCREG_OSDTRRX_EL1; + case 4: + return MISCREG_DBGBVR0_EL1; + case 5: + return MISCREG_DBGBCR0_EL1; + case 6: + return MISCREG_DBGWVR0_EL1; + case 7: + return MISCREG_DBGWCR0_EL1; + } + break; + case 1: + switch (op2) { + case 4: + return MISCREG_DBGBVR1_EL1; + case 5: + return MISCREG_DBGBCR1_EL1; + case 6: + return MISCREG_DBGWVR1_EL1; + case 7: + return MISCREG_DBGWCR1_EL1; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_MDCCINT_EL1; + case 2: + return MISCREG_MDSCR_EL1; + case 4: + return MISCREG_DBGBVR2_EL1; + case 5: + return MISCREG_DBGBCR2_EL1; + case 6: + return MISCREG_DBGWVR2_EL1; + case 7: + return MISCREG_DBGWCR2_EL1; + } + break; + case 3: + switch (op2) { + case 2: + return MISCREG_OSDTRTX_EL1; + case 4: + return MISCREG_DBGBVR3_EL1; + case 5: + return MISCREG_DBGBCR3_EL1; + case 6: + return MISCREG_DBGWVR3_EL1; + case 7: + return MISCREG_DBGWCR3_EL1; + } + break; + case 4: + switch (op2) { + case 4: + return MISCREG_DBGBVR4_EL1; + case 5: + return MISCREG_DBGBCR4_EL1; + } + break; + case 5: + switch (op2) { + case 4: + return MISCREG_DBGBVR5_EL1; + case 5: + return MISCREG_DBGBCR5_EL1; + } + break; + case 6: + switch (op2) { + case 2: + return MISCREG_OSECCR_EL1; + } + break; + } + break; + case 2: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_TEECR32_EL1; + } + break; + } + break; + case 3: + switch (crm) { + case 1: + switch (op2) { + case 0: + return MISCREG_MDCCSR_EL0; + } + break; + case 4: + switch (op2) { + case 0: + return MISCREG_MDDTR_EL0; + } + break; + case 5: + switch (op2) { + case 0: + return MISCREG_MDDTRRX_EL0; + } + break; + } + break; + case 4: + switch (crm) { + case 7: + switch (op2) { + case 0: + return MISCREG_DBGVCR32_EL2; + } + break; + } + break; + } + break; + case 1: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_MDRAR_EL1; + case 4: + return MISCREG_OSLAR_EL1; + } + break; + case 1: + switch (op2) { + case 4: + return MISCREG_OSLSR_EL1; + } + break; + case 3: + switch (op2) { + case 4: + return MISCREG_OSDLR_EL1; + } + break; + case 4: + switch (op2) { + case 4: + return MISCREG_DBGPRCR_EL1; + } + break; + } + break; + case 2: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_TEEHBR32_EL1; + } + break; + } + break; + } + break; + case 7: + switch (op1) { + case 0: + switch (crm) { + case 8: + switch (op2) { + case 6: + return MISCREG_DBGCLAIMSET_EL1; + } + break; + case 9: + switch (op2) { + case 6: + return MISCREG_DBGCLAIMCLR_EL1; + } + break; + case 14: + switch (op2) { + case 6: + return MISCREG_DBGAUTHSTATUS_EL1; + } + break; + } + break; + } + break; + } + break; + case 3: + switch (crn) { + case 0: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_MIDR_EL1; + case 5: + return MISCREG_MPIDR_EL1; + case 6: + return MISCREG_REVIDR_EL1; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_ID_PFR0_EL1; + case 1: + return MISCREG_ID_PFR1_EL1; + case 2: + return MISCREG_ID_DFR0_EL1; + case 3: + return MISCREG_ID_AFR0_EL1; + case 4: + return MISCREG_ID_MMFR0_EL1; + case 5: + return MISCREG_ID_MMFR1_EL1; + case 6: + return MISCREG_ID_MMFR2_EL1; + case 7: + return MISCREG_ID_MMFR3_EL1; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_ID_ISAR0_EL1; + case 1: + return MISCREG_ID_ISAR1_EL1; + case 2: + return MISCREG_ID_ISAR2_EL1; + case 3: + return MISCREG_ID_ISAR3_EL1; + case 4: + return MISCREG_ID_ISAR4_EL1; + case 5: + return MISCREG_ID_ISAR5_EL1; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_MVFR0_EL1; + case 1: + return MISCREG_MVFR1_EL1; + case 2: + return MISCREG_MVFR2_EL1; + case 3 ... 7: + return MISCREG_RAZ; + } + break; + case 4: + switch (op2) { + case 0: + return MISCREG_ID_AA64PFR0_EL1; + case 1: + return MISCREG_ID_AA64PFR1_EL1; + case 2 ... 7: + return MISCREG_RAZ; + } + break; + case 5: + switch (op2) { + case 0: + return MISCREG_ID_AA64DFR0_EL1; + case 1: + return MISCREG_ID_AA64DFR1_EL1; + case 4: + return MISCREG_ID_AA64AFR0_EL1; + case 5: + return MISCREG_ID_AA64AFR1_EL1; + case 2: + case 3: + case 6: + case 7: + return MISCREG_RAZ; + } + break; + case 6: + switch (op2) { + case 0: + return MISCREG_ID_AA64ISAR0_EL1; + case 1: + return MISCREG_ID_AA64ISAR1_EL1; + case 2 ... 7: + return MISCREG_RAZ; + } + break; + case 7: + switch (op2) { + case 0: + return MISCREG_ID_AA64MMFR0_EL1; + case 1: + return MISCREG_ID_AA64MMFR1_EL1; + case 2 ... 7: + return MISCREG_RAZ; + } + break; + } + break; + case 1: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_CCSIDR_EL1; + case 1: + return MISCREG_CLIDR_EL1; + case 7: + return MISCREG_AIDR_EL1; + } + break; + } + break; + case 2: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_CSSELR_EL1; + } + break; + } + break; + case 3: + switch (crm) { + case 0: + switch (op2) { + case 1: + return MISCREG_CTR_EL0; + case 7: + return MISCREG_DCZID_EL0; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_VPIDR_EL2; + case 5: + return MISCREG_VMPIDR_EL2; + } + break; + } + break; + } + break; + case 1: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_SCTLR_EL1; + case 1: + return MISCREG_ACTLR_EL1; + case 2: + return MISCREG_CPACR_EL1; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_SCTLR_EL2; + case 1: + return MISCREG_ACTLR_EL2; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_HCR_EL2; + case 1: + return MISCREG_MDCR_EL2; + case 2: + return MISCREG_CPTR_EL2; + case 3: + return MISCREG_HSTR_EL2; + case 7: + return MISCREG_HACR_EL2; + } + break; + } + break; + case 6: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_SCTLR_EL3; + case 1: + return MISCREG_ACTLR_EL3; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_SCR_EL3; + case 1: + return MISCREG_SDER32_EL3; + case 2: + return MISCREG_CPTR_EL3; + } + break; + case 3: + switch (op2) { + case 1: + return MISCREG_MDCR_EL3; + } + break; + } + break; + } + break; + case 2: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_TTBR0_EL1; + case 1: + return MISCREG_TTBR1_EL1; + case 2: + return MISCREG_TCR_EL1; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_TTBR0_EL2; + case 2: + return MISCREG_TCR_EL2; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_VTTBR_EL2; + case 2: + return MISCREG_VTCR_EL2; + } + break; + } + break; + case 6: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_TTBR0_EL3; + case 2: + return MISCREG_TCR_EL3; + } + break; + } + break; + } + break; + case 3: + switch (op1) { + case 4: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_DACR32_EL2; + } + break; + } + break; + } + break; + case 4: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_SPSR_EL1; + case 1: + return MISCREG_ELR_EL1; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_SP_EL0; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_SPSEL; + case 2: + return MISCREG_CURRENTEL; + } + break; + } + break; + case 3: + switch (crm) { + case 2: + switch (op2) { + case 0: + return MISCREG_NZCV; + case 1: + return MISCREG_DAIF; + } + break; + case 4: + switch (op2) { + case 0: + return MISCREG_FPCR; + case 1: + return MISCREG_FPSR; + } + break; + case 5: + switch (op2) { + case 0: + return MISCREG_DSPSR_EL0; + case 1: + return MISCREG_DLR_EL0; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_SPSR_EL2; + case 1: + return MISCREG_ELR_EL2; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_SP_EL1; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_SPSR_IRQ_AA64; + case 1: + return MISCREG_SPSR_ABT_AA64; + case 2: + return MISCREG_SPSR_UND_AA64; + case 3: + return MISCREG_SPSR_FIQ_AA64; + } + break; + } + break; + case 6: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_SPSR_EL3; + case 1: + return MISCREG_ELR_EL3; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_SP_EL2; + } + break; + } + break; + } + break; + case 5: + switch (op1) { + case 0: + switch (crm) { + case 1: + switch (op2) { + case 0: + return MISCREG_AFSR0_EL1; + case 1: + return MISCREG_AFSR1_EL1; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_ESR_EL1; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 1: + return MISCREG_IFSR32_EL2; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_AFSR0_EL2; + case 1: + return MISCREG_AFSR1_EL2; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_ESR_EL2; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_FPEXC32_EL2; + } + break; + } + break; + case 6: + switch (crm) { + case 1: + switch (op2) { + case 0: + return MISCREG_AFSR0_EL3; + case 1: + return MISCREG_AFSR1_EL3; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_ESR_EL3; + } + break; + } + break; + } + break; + case 6: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_FAR_EL1; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_FAR_EL2; + case 4: + return MISCREG_HPFAR_EL2; + } + break; + } + break; + case 6: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_FAR_EL3; + } + break; + } + break; + } + break; + case 7: + switch (op1) { + case 0: + switch (crm) { + case 4: + switch (op2) { + case 0: + return MISCREG_PAR_EL1; + } + break; + } + break; + } + break; + case 9: + switch (op1) { + case 0: + switch (crm) { + case 14: + switch (op2) { + case 1: + return MISCREG_PMINTENSET_EL1; + case 2: + return MISCREG_PMINTENCLR_EL1; + } + break; + } + break; + case 3: + switch (crm) { + case 12: + switch (op2) { + case 0: + return MISCREG_PMCR_EL0; + case 1: + return MISCREG_PMCNTENSET_EL0; + case 2: + return MISCREG_PMCNTENCLR_EL0; + case 3: + return MISCREG_PMOVSCLR_EL0; + case 4: + return MISCREG_PMSWINC_EL0; + case 5: + return MISCREG_PMSELR_EL0; + case 6: + return MISCREG_PMCEID0_EL0; + case 7: + return MISCREG_PMCEID1_EL0; + } + break; + case 13: + switch (op2) { + case 0: + return MISCREG_PMCCNTR_EL0; + case 1: + return MISCREG_PMCCFILTR_EL0; + case 2: + return MISCREG_PMXEVCNTR_EL0; + } + break; + case 14: + switch (op2) { + case 0: + return MISCREG_PMUSERENR_EL0; + case 3: + return MISCREG_PMOVSSET_EL0; + } + break; + } + break; + } + break; + case 10: + switch (op1) { + case 0: + switch (crm) { + case 2: + switch (op2) { + case 0: + return MISCREG_MAIR_EL1; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_AMAIR_EL1; + } + break; + } + break; + case 4: + switch (crm) { + case 2: + switch (op2) { + case 0: + return MISCREG_MAIR_EL2; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_AMAIR_EL2; + } + break; + } + break; + case 6: + switch (crm) { + case 2: + switch (op2) { + case 0: + return MISCREG_MAIR_EL3; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_AMAIR_EL3; + } + break; + } + break; + } + break; + case 11: + switch (op1) { + case 1: + switch (crm) { + case 0: + switch (op2) { + case 2: + return MISCREG_L2CTLR_EL1; + case 3: + return MISCREG_L2ECTLR_EL1; + } + break; + } + break; + } + break; + case 12: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_VBAR_EL1; + case 1: + return MISCREG_RVBAR_EL1; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_ISR_EL1; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_VBAR_EL2; + case 1: + return MISCREG_RVBAR_EL2; + } + break; + } + break; + case 6: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_VBAR_EL3; + case 1: + return MISCREG_RVBAR_EL3; + case 2: + return MISCREG_RMR_EL3; + } + break; + } + break; + } + break; + case 13: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 1: + return MISCREG_CONTEXTIDR_EL1; + case 4: + return MISCREG_TPIDR_EL1; + } + break; + } + break; + case 3: + switch (crm) { + case 0: + switch (op2) { + case 2: + return MISCREG_TPIDR_EL0; + case 3: + return MISCREG_TPIDRRO_EL0; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 2: + return MISCREG_TPIDR_EL2; + } + break; + } + break; + case 6: + switch (crm) { + case 0: + switch (op2) { + case 2: + return MISCREG_TPIDR_EL3; + } + break; + } + break; + } + break; + case 14: + switch (op1) { + case 0: + switch (crm) { + case 1: + switch (op2) { + case 0: + return MISCREG_CNTKCTL_EL1; + } + break; + } + break; + case 3: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_CNTFRQ_EL0; + case 1: + return MISCREG_CNTPCT_EL0; + case 2: + return MISCREG_CNTVCT_EL0; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_CNTP_TVAL_EL0; + case 1: + return MISCREG_CNTP_CTL_EL0; + case 2: + return MISCREG_CNTP_CVAL_EL0; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_CNTV_TVAL_EL0; + case 1: + return MISCREG_CNTV_CTL_EL0; + case 2: + return MISCREG_CNTV_CVAL_EL0; + } + break; + case 8: + switch (op2) { + case 0: + return MISCREG_PMEVCNTR0_EL0; + case 1: + return MISCREG_PMEVCNTR1_EL0; + case 2: + return MISCREG_PMEVCNTR2_EL0; + case 3: + return MISCREG_PMEVCNTR3_EL0; + case 4: + return MISCREG_PMEVCNTR4_EL0; + case 5: + return MISCREG_PMEVCNTR5_EL0; + } + break; + case 12: + switch (op2) { + case 0: + return MISCREG_PMEVTYPER0_EL0; + case 1: + return MISCREG_PMEVTYPER1_EL0; + case 2: + return MISCREG_PMEVTYPER2_EL0; + case 3: + return MISCREG_PMEVTYPER3_EL0; + case 4: + return MISCREG_PMEVTYPER4_EL0; + case 5: + return MISCREG_PMEVTYPER5_EL0; + } + break; + } + break; + case 4: + switch (crm) { + case 0: + switch (op2) { + case 3: + return MISCREG_CNTVOFF_EL2; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_CNTHCTL_EL2; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_CNTHP_TVAL_EL2; + case 1: + return MISCREG_CNTHP_CTL_EL2; + case 2: + return MISCREG_CNTHP_CVAL_EL2; + } + break; + } + break; + case 7: + switch (crm) { + case 2: + switch (op2) { + case 0: + return MISCREG_CNTPS_TVAL_EL1; + case 1: + return MISCREG_CNTPS_CTL_EL1; + case 2: + return MISCREG_CNTPS_CVAL_EL1; + } + break; + } + break; + } + break; + case 15: + switch (op1) { + case 0: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_IL1DATA0_EL1; + case 1: + return MISCREG_IL1DATA1_EL1; + case 2: + return MISCREG_IL1DATA2_EL1; + case 3: + return MISCREG_IL1DATA3_EL1; + } + break; + case 1: + switch (op2) { + case 0: + return MISCREG_DL1DATA0_EL1; + case 1: + return MISCREG_DL1DATA1_EL1; + case 2: + return MISCREG_DL1DATA2_EL1; + case 3: + return MISCREG_DL1DATA3_EL1; + case 4: + return MISCREG_DL1DATA4_EL1; + } + break; + } + break; + case 1: + switch (crm) { + case 0: + switch (op2) { + case 0: + return MISCREG_L2ACTLR_EL1; + } + break; + case 2: + switch (op2) { + case 0: + return MISCREG_CPUACTLR_EL1; + case 1: + return MISCREG_CPUECTLR_EL1; + case 2: + return MISCREG_CPUMERRSR_EL1; + case 3: + return MISCREG_L2MERRSR_EL1; + } + break; + case 3: + switch (op2) { + case 0: + return MISCREG_CBAR_EL1; + + } + break; + } + break; + } + break; + } + break; + } + + return MISCREG_UNKNOWN; +} + +} // namespace ArmISA diff --git a/src/arch/arm/miscregs.hh b/src/arch/arm/miscregs.hh index 13234ddf5..c447dcd27 100644 --- a/src/arch/arm/miscregs.hh +++ b/src/arch/arm/miscregs.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -38,13 +38,19 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Gabe Black + * Giacomo Gabrielli */ #ifndef __ARCH_ARM_MISCREGS_HH__ #define __ARCH_ARM_MISCREGS_HH__ +#include <bitset> + #include "base/bitunion.hh" #include "base/compiler.hh" +class ThreadContext; + + namespace ArmISA { enum ConditionCode { @@ -67,284 +73,1330 @@ namespace ArmISA }; enum MiscRegIndex { - MISCREG_CPSR = 0, - MISCREG_CPSR_Q, - MISCREG_SPSR, - MISCREG_SPSR_FIQ, - MISCREG_SPSR_IRQ, - MISCREG_SPSR_SVC, - MISCREG_SPSR_MON, - MISCREG_SPSR_UND, - MISCREG_SPSR_ABT, - MISCREG_FPSR, - MISCREG_FPSID, - MISCREG_FPSCR, - MISCREG_FPSCR_QC, // Cumulative saturation flag - MISCREG_FPSCR_EXC, // Cumulative FP exception flags - MISCREG_FPEXC, - MISCREG_MVFR0, - MISCREG_MVFR1, - MISCREG_SCTLR_RST, - MISCREG_SEV_MAILBOX, - - // CP14 registers - MISCREG_CP14_START, - MISCREG_DBGDIDR = MISCREG_CP14_START, - MISCREG_DBGDSCR_INT, - MISCREG_DBGDTRRX_INT, - MISCREG_DBGTRTX_INT, - MISCREG_DBGWFAR, - MISCREG_DBGVCR, - MISCREG_DBGECR, - MISCREG_DBGDSCCR, - MISCREG_DBGSMCR, - MISCREG_DBGDTRRX_EXT, - MISCREG_DBGDSCR_EXT, - MISCREG_DBGDTRTX_EXT, - MISCREG_DBGDRCR, - MISCREG_DBGBVR, - MISCREG_DBGBCR, - MISCREG_DBGBVR_M, - MISCREG_DBGBCR_M, - MISCREG_DBGDRAR, - MISCREG_DBGBXVR_M, - MISCREG_DBGOSLAR, - MISCREG_DBGOSSRR, - MISCREG_DBGOSDLR, - MISCREG_DBGPRCR, - MISCREG_DBGPRSR, - MISCREG_DBGDSAR, - MISCREG_DBGITCTRL, - MISCREG_DBGCLAIMSET, - MISCREG_DBGCLAIMCLR, - MISCREG_DBGAUTHSTATUS, - MISCREG_DBGDEVID2, - MISCREG_DBGDEVID1, - MISCREG_DBGDEVID, - MISCREG_TEEHBR, - - // CP15 registers - MISCREG_CP15_START, - MISCREG_SCTLR = MISCREG_CP15_START, - MISCREG_DCCISW, - MISCREG_DCCIMVAC, - MISCREG_DCCMVAC, - MISCREG_CONTEXTIDR, - MISCREG_TPIDRURW, - MISCREG_TPIDRURO, - MISCREG_TPIDRPRW, - MISCREG_CP15ISB, - MISCREG_CP15DSB, - MISCREG_CP15DMB, - MISCREG_CPACR, - MISCREG_CLIDR, - MISCREG_CCSIDR, - MISCREG_CSSELR, - MISCREG_ICIALLUIS, - MISCREG_ICIALLU, - MISCREG_ICIMVAU, - MISCREG_BPIMVA, - MISCREG_BPIALLIS, - MISCREG_BPIALL, - MISCREG_MIDR, - MISCREG_TTBR0, - MISCREG_TTBR1, - MISCREG_TLBTR, - MISCREG_DACR, - MISCREG_TLBIALLIS, - MISCREG_TLBIMVAIS, - MISCREG_TLBIASIDIS, - MISCREG_TLBIMVAAIS, - MISCREG_ITLBIALL, - MISCREG_ITLBIMVA, - MISCREG_ITLBIASID, - MISCREG_DTLBIALL, - MISCREG_DTLBIMVA, - MISCREG_DTLBIASID, - MISCREG_TLBIALL, - MISCREG_TLBIMVA, - MISCREG_TLBIASID, - MISCREG_TLBIMVAA, - MISCREG_DFSR, - MISCREG_IFSR, - MISCREG_DFAR, - MISCREG_IFAR, - MISCREG_MPIDR, - MISCREG_PRRR, - MISCREG_NMRR, - MISCREG_TTBCR, - MISCREG_ID_PFR0, - MISCREG_CTR, - MISCREG_SCR, - MISCREG_SDER, - MISCREG_PAR, - MISCREG_V2PCWPR, - MISCREG_V2PCWPW, - MISCREG_V2PCWUR, - MISCREG_V2PCWUW, - MISCREG_V2POWPR, - MISCREG_V2POWPW, - MISCREG_V2POWUR, - MISCREG_V2POWUW, - MISCREG_ID_MMFR0, - MISCREG_ID_MMFR2, - MISCREG_ID_MMFR3, - MISCREG_ACTLR, - MISCREG_PMCR, - MISCREG_PMCCNTR, - MISCREG_PMCNTENSET, - MISCREG_PMCNTENCLR, - MISCREG_PMOVSR, - MISCREG_PMSWINC, - MISCREG_PMSELR, - MISCREG_PMCEID0, - MISCREG_PMCEID1, - MISCREG_PMC_OTHER, - MISCREG_PMXEVCNTR, - MISCREG_PMUSERENR, - MISCREG_PMINTENSET, - MISCREG_PMINTENCLR, - MISCREG_ID_ISAR0, - MISCREG_ID_ISAR1, - MISCREG_ID_ISAR2, - MISCREG_ID_ISAR3, - MISCREG_ID_ISAR4, - MISCREG_ID_ISAR5, - MISCREG_LOCKFLAG, - MISCREG_LOCKADDR, - MISCREG_ID_PFR1, - MISCREG_L2CTLR, - MISCREG_CP15_UNIMP_START, - MISCREG_TCMTR = MISCREG_CP15_UNIMP_START, - MISCREG_ID_DFR0, - MISCREG_ID_AFR0, - MISCREG_ID_MMFR1, - MISCREG_AIDR, - MISCREG_ADFSR, - MISCREG_AIFSR, - MISCREG_DCIMVAC, - MISCREG_DCISW, - MISCREG_MCCSW, - MISCREG_DCCMVAU, - MISCREG_NSACR, - MISCREG_VBAR, - MISCREG_MVBAR, - MISCREG_ISR, - MISCREG_FCEIDR, - MISCREG_L2LATENCY, - MISCREG_CRN15, - - - MISCREG_CP15_END, - - // Dummy indices - MISCREG_NOP = MISCREG_CP15_END, - MISCREG_RAZ, - - NUM_MISCREGS + MISCREG_CPSR = 0, // 0 + MISCREG_SPSR, // 1 + MISCREG_SPSR_FIQ, // 2 + MISCREG_SPSR_IRQ, // 3 + MISCREG_SPSR_SVC, // 4 + MISCREG_SPSR_MON, // 5 + MISCREG_SPSR_ABT, // 6 + MISCREG_SPSR_HYP, // 7 + MISCREG_SPSR_UND, // 8 + MISCREG_ELR_HYP, // 9 + MISCREG_FPSID, // 10 + MISCREG_FPSCR, // 11 + MISCREG_MVFR1, // 12 + MISCREG_MVFR0, // 13 + MISCREG_FPEXC, // 14 + + // Helper registers + MISCREG_CPSR_MODE, // 15 + MISCREG_CPSR_Q, // 16 + MISCREG_FPSCR_EXC, // 17 + MISCREG_FPSCR_QC, // 18 + MISCREG_LOCKADDR, // 19 + MISCREG_LOCKFLAG, // 20 + MISCREG_PRRR_MAIR0, // 21 + MISCREG_PRRR_MAIR0_NS, // 22 + MISCREG_PRRR_MAIR0_S, // 23 + MISCREG_NMRR_MAIR1, // 24 + MISCREG_NMRR_MAIR1_NS, // 25 + MISCREG_NMRR_MAIR1_S, // 26 + MISCREG_PMXEVTYPER_PMCCFILTR, // 27 + MISCREG_SCTLR_RST, // 28 + MISCREG_SEV_MAILBOX, // 29 + + // AArch32 CP14 registers (debug/trace/ThumbEE/Jazelle control) + MISCREG_DBGDIDR, // 30 + MISCREG_DBGDSCRint, // 31 + MISCREG_DBGDCCINT, // 32 + MISCREG_DBGDTRTXint, // 33 + MISCREG_DBGDTRRXint, // 34 + MISCREG_DBGWFAR, // 35 + MISCREG_DBGVCR, // 36 + MISCREG_DBGDTRRXext, // 37 + MISCREG_DBGDSCRext, // 38 + MISCREG_DBGDTRTXext, // 39 + MISCREG_DBGOSECCR, // 40 + MISCREG_DBGBVR0, // 41 + MISCREG_DBGBVR1, // 42 + MISCREG_DBGBVR2, // 43 + MISCREG_DBGBVR3, // 44 + MISCREG_DBGBVR4, // 45 + MISCREG_DBGBVR5, // 46 + MISCREG_DBGBCR0, // 47 + MISCREG_DBGBCR1, // 48 + MISCREG_DBGBCR2, // 49 + MISCREG_DBGBCR3, // 50 + MISCREG_DBGBCR4, // 51 + MISCREG_DBGBCR5, // 52 + MISCREG_DBGWVR0, // 53 + MISCREG_DBGWVR1, // 54 + MISCREG_DBGWVR2, // 55 + MISCREG_DBGWVR3, // 56 + MISCREG_DBGWCR0, // 57 + MISCREG_DBGWCR1, // 58 + MISCREG_DBGWCR2, // 59 + MISCREG_DBGWCR3, // 60 + MISCREG_DBGDRAR, // 61 + MISCREG_DBGBXVR4, // 62 + MISCREG_DBGBXVR5, // 63 + MISCREG_DBGOSLAR, // 64 + MISCREG_DBGOSLSR, // 65 + MISCREG_DBGOSDLR, // 66 + MISCREG_DBGPRCR, // 67 + MISCREG_DBGDSAR, // 68 + MISCREG_DBGCLAIMSET, // 69 + MISCREG_DBGCLAIMCLR, // 70 + MISCREG_DBGAUTHSTATUS, // 71 + MISCREG_DBGDEVID2, // 72 + MISCREG_DBGDEVID1, // 73 + MISCREG_DBGDEVID0, // 74 + MISCREG_TEECR, // 75 + MISCREG_JIDR, // 76 + MISCREG_TEEHBR, // 77 + MISCREG_JOSCR, // 78 + MISCREG_JMCR, // 79 + + // AArch32 CP15 registers (system control) + MISCREG_MIDR, // 80 + MISCREG_CTR, // 81 + MISCREG_TCMTR, // 82 + MISCREG_TLBTR, // 83 + MISCREG_MPIDR, // 84 + MISCREG_REVIDR, // 85 + MISCREG_ID_PFR0, // 86 + MISCREG_ID_PFR1, // 87 + MISCREG_ID_DFR0, // 88 + MISCREG_ID_AFR0, // 89 + MISCREG_ID_MMFR0, // 90 + MISCREG_ID_MMFR1, // 91 + MISCREG_ID_MMFR2, // 92 + MISCREG_ID_MMFR3, // 93 + MISCREG_ID_ISAR0, // 94 + MISCREG_ID_ISAR1, // 95 + MISCREG_ID_ISAR2, // 96 + MISCREG_ID_ISAR3, // 97 + MISCREG_ID_ISAR4, // 98 + MISCREG_ID_ISAR5, // 99 + MISCREG_CCSIDR, // 100 + MISCREG_CLIDR, // 101 + MISCREG_AIDR, // 102 + MISCREG_CSSELR, // 103 + MISCREG_CSSELR_NS, // 104 + MISCREG_CSSELR_S, // 105 + MISCREG_VPIDR, // 106 + MISCREG_VMPIDR, // 107 + MISCREG_SCTLR, // 108 + MISCREG_SCTLR_NS, // 109 + MISCREG_SCTLR_S, // 110 + MISCREG_ACTLR, // 111 + MISCREG_ACTLR_NS, // 112 + MISCREG_ACTLR_S, // 113 + MISCREG_CPACR, // 114 + MISCREG_SCR, // 115 + MISCREG_SDER, // 116 + MISCREG_NSACR, // 117 + MISCREG_HSCTLR, // 118 + MISCREG_HACTLR, // 119 + MISCREG_HCR, // 120 + MISCREG_HDCR, // 121 + MISCREG_HCPTR, // 122 + MISCREG_HSTR, // 123 + MISCREG_HACR, // 124 + MISCREG_TTBR0, // 125 + MISCREG_TTBR0_NS, // 126 + MISCREG_TTBR0_S, // 127 + MISCREG_TTBR1, // 128 + MISCREG_TTBR1_NS, // 129 + MISCREG_TTBR1_S, // 130 + MISCREG_TTBCR, // 131 + MISCREG_TTBCR_NS, // 132 + MISCREG_TTBCR_S, // 133 + MISCREG_HTCR, // 134 + MISCREG_VTCR, // 135 + MISCREG_DACR, // 136 + MISCREG_DACR_NS, // 137 + MISCREG_DACR_S, // 138 + MISCREG_DFSR, // 139 + MISCREG_DFSR_NS, // 140 + MISCREG_DFSR_S, // 141 + MISCREG_IFSR, // 142 + MISCREG_IFSR_NS, // 143 + MISCREG_IFSR_S, // 144 + MISCREG_ADFSR, // 145 + MISCREG_ADFSR_NS, // 146 + MISCREG_ADFSR_S, // 147 + MISCREG_AIFSR, // 148 + MISCREG_AIFSR_NS, // 149 + MISCREG_AIFSR_S, // 150 + MISCREG_HADFSR, // 151 + MISCREG_HAIFSR, // 152 + MISCREG_HSR, // 153 + MISCREG_DFAR, // 154 + MISCREG_DFAR_NS, // 155 + MISCREG_DFAR_S, // 156 + MISCREG_IFAR, // 157 + MISCREG_IFAR_NS, // 158 + MISCREG_IFAR_S, // 159 + MISCREG_HDFAR, // 160 + MISCREG_HIFAR, // 161 + MISCREG_HPFAR, // 162 + MISCREG_ICIALLUIS, // 163 + MISCREG_BPIALLIS, // 164 + MISCREG_PAR, // 165 + MISCREG_PAR_NS, // 166 + MISCREG_PAR_S, // 167 + MISCREG_ICIALLU, // 168 + MISCREG_ICIMVAU, // 169 + MISCREG_CP15ISB, // 170 + MISCREG_BPIALL, // 171 + MISCREG_BPIMVA, // 172 + MISCREG_DCIMVAC, // 173 + MISCREG_DCISW, // 174 + MISCREG_ATS1CPR, // 175 + MISCREG_ATS1CPW, // 176 + MISCREG_ATS1CUR, // 177 + MISCREG_ATS1CUW, // 178 + MISCREG_ATS12NSOPR, // 179 + MISCREG_ATS12NSOPW, // 180 + MISCREG_ATS12NSOUR, // 181 + MISCREG_ATS12NSOUW, // 182 + MISCREG_DCCMVAC, // 183 + MISCREG_DCCSW, // 184 + MISCREG_CP15DSB, // 185 + MISCREG_CP15DMB, // 186 + MISCREG_DCCMVAU, // 187 + MISCREG_DCCIMVAC, // 188 + MISCREG_DCCISW, // 189 + MISCREG_ATS1HR, // 190 + MISCREG_ATS1HW, // 191 + MISCREG_TLBIALLIS, // 192 + MISCREG_TLBIMVAIS, // 193 + MISCREG_TLBIASIDIS, // 194 + MISCREG_TLBIMVAAIS, // 195 + MISCREG_TLBIMVALIS, // 196 + MISCREG_TLBIMVAALIS, // 197 + MISCREG_ITLBIALL, // 198 + MISCREG_ITLBIMVA, // 199 + MISCREG_ITLBIASID, // 200 + MISCREG_DTLBIALL, // 201 + MISCREG_DTLBIMVA, // 202 + MISCREG_DTLBIASID, // 203 + MISCREG_TLBIALL, // 204 + MISCREG_TLBIMVA, // 205 + MISCREG_TLBIASID, // 206 + MISCREG_TLBIMVAA, // 207 + MISCREG_TLBIMVAL, // 208 + MISCREG_TLBIMVAAL, // 209 + MISCREG_TLBIIPAS2IS, // 210 + MISCREG_TLBIIPAS2LIS, // 211 + MISCREG_TLBIALLHIS, // 212 + MISCREG_TLBIMVAHIS, // 213 + MISCREG_TLBIALLNSNHIS, // 214 + MISCREG_TLBIMVALHIS, // 215 + MISCREG_TLBIIPAS2, // 216 + MISCREG_TLBIIPAS2L, // 217 + MISCREG_TLBIALLH, // 218 + MISCREG_TLBIMVAH, // 219 + MISCREG_TLBIALLNSNH, // 220 + MISCREG_TLBIMVALH, // 221 + MISCREG_PMCR, // 222 + MISCREG_PMCNTENSET, // 223 + MISCREG_PMCNTENCLR, // 224 + MISCREG_PMOVSR, // 225 + MISCREG_PMSWINC, // 226 + MISCREG_PMSELR, // 227 + MISCREG_PMCEID0, // 228 + MISCREG_PMCEID1, // 229 + MISCREG_PMCCNTR, // 230 + MISCREG_PMXEVTYPER, // 231 + MISCREG_PMCCFILTR, // 232 + MISCREG_PMXEVCNTR, // 233 + MISCREG_PMUSERENR, // 234 + MISCREG_PMINTENSET, // 235 + MISCREG_PMINTENCLR, // 236 + MISCREG_PMOVSSET, // 237 + MISCREG_L2CTLR, // 238 + MISCREG_L2ECTLR, // 239 + MISCREG_PRRR, // 240 + MISCREG_PRRR_NS, // 241 + MISCREG_PRRR_S, // 242 + MISCREG_MAIR0, // 243 + MISCREG_MAIR0_NS, // 244 + MISCREG_MAIR0_S, // 245 + MISCREG_NMRR, // 246 + MISCREG_NMRR_NS, // 247 + MISCREG_NMRR_S, // 248 + MISCREG_MAIR1, // 249 + MISCREG_MAIR1_NS, // 250 + MISCREG_MAIR1_S, // 251 + MISCREG_AMAIR0, // 252 + MISCREG_AMAIR0_NS, // 253 + MISCREG_AMAIR0_S, // 254 + MISCREG_AMAIR1, // 255 + MISCREG_AMAIR1_NS, // 256 + MISCREG_AMAIR1_S, // 257 + MISCREG_HMAIR0, // 258 + MISCREG_HMAIR1, // 259 + MISCREG_HAMAIR0, // 260 + MISCREG_HAMAIR1, // 261 + MISCREG_VBAR, // 262 + MISCREG_VBAR_NS, // 263 + MISCREG_VBAR_S, // 264 + MISCREG_MVBAR, // 265 + MISCREG_RMR, // 266 + MISCREG_ISR, // 267 + MISCREG_HVBAR, // 268 + MISCREG_FCSEIDR, // 269 + MISCREG_CONTEXTIDR, // 270 + MISCREG_CONTEXTIDR_NS, // 271 + MISCREG_CONTEXTIDR_S, // 272 + MISCREG_TPIDRURW, // 273 + MISCREG_TPIDRURW_NS, // 274 + MISCREG_TPIDRURW_S, // 275 + MISCREG_TPIDRURO, // 276 + MISCREG_TPIDRURO_NS, // 277 + MISCREG_TPIDRURO_S, // 278 + MISCREG_TPIDRPRW, // 279 + MISCREG_TPIDRPRW_NS, // 280 + MISCREG_TPIDRPRW_S, // 281 + MISCREG_HTPIDR, // 282 + MISCREG_CNTFRQ, // 283 + MISCREG_CNTKCTL, // 284 + MISCREG_CNTP_TVAL, // 285 + MISCREG_CNTP_TVAL_NS, // 286 + MISCREG_CNTP_TVAL_S, // 287 + MISCREG_CNTP_CTL, // 288 + MISCREG_CNTP_CTL_NS, // 289 + MISCREG_CNTP_CTL_S, // 290 + MISCREG_CNTV_TVAL, // 291 + MISCREG_CNTV_CTL, // 292 + MISCREG_CNTHCTL, // 293 + MISCREG_CNTHP_TVAL, // 294 + MISCREG_CNTHP_CTL, // 295 + MISCREG_IL1DATA0, // 296 + MISCREG_IL1DATA1, // 297 + MISCREG_IL1DATA2, // 298 + MISCREG_IL1DATA3, // 299 + MISCREG_DL1DATA0, // 300 + MISCREG_DL1DATA1, // 301 + MISCREG_DL1DATA2, // 302 + MISCREG_DL1DATA3, // 303 + MISCREG_DL1DATA4, // 304 + MISCREG_RAMINDEX, // 305 + MISCREG_L2ACTLR, // 306 + MISCREG_CBAR, // 307 + MISCREG_HTTBR, // 308 + MISCREG_VTTBR, // 309 + MISCREG_CNTPCT, // 310 + MISCREG_CNTVCT, // 311 + MISCREG_CNTP_CVAL, // 312 + MISCREG_CNTP_CVAL_NS, // 313 + MISCREG_CNTP_CVAL_S, // 314 + MISCREG_CNTV_CVAL, // 315 + MISCREG_CNTVOFF, // 316 + MISCREG_CNTHP_CVAL, // 317 + MISCREG_CPUMERRSR, // 318 + MISCREG_L2MERRSR, // 319 + + // AArch64 registers (Op0=2) + MISCREG_MDCCINT_EL1, // 320 + MISCREG_OSDTRRX_EL1, // 321 + MISCREG_MDSCR_EL1, // 322 + MISCREG_OSDTRTX_EL1, // 323 + MISCREG_OSECCR_EL1, // 324 + MISCREG_DBGBVR0_EL1, // 325 + MISCREG_DBGBVR1_EL1, // 326 + MISCREG_DBGBVR2_EL1, // 327 + MISCREG_DBGBVR3_EL1, // 328 + MISCREG_DBGBVR4_EL1, // 329 + MISCREG_DBGBVR5_EL1, // 330 + MISCREG_DBGBCR0_EL1, // 331 + MISCREG_DBGBCR1_EL1, // 332 + MISCREG_DBGBCR2_EL1, // 333 + MISCREG_DBGBCR3_EL1, // 334 + MISCREG_DBGBCR4_EL1, // 335 + MISCREG_DBGBCR5_EL1, // 336 + MISCREG_DBGWVR0_EL1, // 337 + MISCREG_DBGWVR1_EL1, // 338 + MISCREG_DBGWVR2_EL1, // 339 + MISCREG_DBGWVR3_EL1, // 340 + MISCREG_DBGWCR0_EL1, // 341 + MISCREG_DBGWCR1_EL1, // 342 + MISCREG_DBGWCR2_EL1, // 343 + MISCREG_DBGWCR3_EL1, // 344 + MISCREG_MDCCSR_EL0, // 345 + MISCREG_MDDTR_EL0, // 346 + MISCREG_MDDTRTX_EL0, // 347 + MISCREG_MDDTRRX_EL0, // 348 + MISCREG_DBGVCR32_EL2, // 349 + MISCREG_MDRAR_EL1, // 350 + MISCREG_OSLAR_EL1, // 351 + MISCREG_OSLSR_EL1, // 352 + MISCREG_OSDLR_EL1, // 353 + MISCREG_DBGPRCR_EL1, // 354 + MISCREG_DBGCLAIMSET_EL1, // 355 + MISCREG_DBGCLAIMCLR_EL1, // 356 + MISCREG_DBGAUTHSTATUS_EL1, // 357 + MISCREG_TEECR32_EL1, // 358 + MISCREG_TEEHBR32_EL1, // 359 + + // AArch64 registers (Op0=1,3) + MISCREG_MIDR_EL1, // 360 + MISCREG_MPIDR_EL1, // 361 + MISCREG_REVIDR_EL1, // 362 + MISCREG_ID_PFR0_EL1, // 363 + MISCREG_ID_PFR1_EL1, // 364 + MISCREG_ID_DFR0_EL1, // 365 + MISCREG_ID_AFR0_EL1, // 366 + MISCREG_ID_MMFR0_EL1, // 367 + MISCREG_ID_MMFR1_EL1, // 368 + MISCREG_ID_MMFR2_EL1, // 369 + MISCREG_ID_MMFR3_EL1, // 370 + MISCREG_ID_ISAR0_EL1, // 371 + MISCREG_ID_ISAR1_EL1, // 372 + MISCREG_ID_ISAR2_EL1, // 373 + MISCREG_ID_ISAR3_EL1, // 374 + MISCREG_ID_ISAR4_EL1, // 375 + MISCREG_ID_ISAR5_EL1, // 376 + MISCREG_MVFR0_EL1, // 377 + MISCREG_MVFR1_EL1, // 378 + MISCREG_MVFR2_EL1, // 379 + MISCREG_ID_AA64PFR0_EL1, // 380 + MISCREG_ID_AA64PFR1_EL1, // 381 + MISCREG_ID_AA64DFR0_EL1, // 382 + MISCREG_ID_AA64DFR1_EL1, // 383 + MISCREG_ID_AA64AFR0_EL1, // 384 + MISCREG_ID_AA64AFR1_EL1, // 385 + MISCREG_ID_AA64ISAR0_EL1, // 386 + MISCREG_ID_AA64ISAR1_EL1, // 387 + MISCREG_ID_AA64MMFR0_EL1, // 388 + MISCREG_ID_AA64MMFR1_EL1, // 389 + MISCREG_CCSIDR_EL1, // 390 + MISCREG_CLIDR_EL1, // 391 + MISCREG_AIDR_EL1, // 392 + MISCREG_CSSELR_EL1, // 393 + MISCREG_CTR_EL0, // 394 + MISCREG_DCZID_EL0, // 395 + MISCREG_VPIDR_EL2, // 396 + MISCREG_VMPIDR_EL2, // 397 + MISCREG_SCTLR_EL1, // 398 + MISCREG_ACTLR_EL1, // 399 + MISCREG_CPACR_EL1, // 400 + MISCREG_SCTLR_EL2, // 401 + MISCREG_ACTLR_EL2, // 402 + MISCREG_HCR_EL2, // 403 + MISCREG_MDCR_EL2, // 404 + MISCREG_CPTR_EL2, // 405 + MISCREG_HSTR_EL2, // 406 + MISCREG_HACR_EL2, // 407 + MISCREG_SCTLR_EL3, // 408 + MISCREG_ACTLR_EL3, // 409 + MISCREG_SCR_EL3, // 410 + MISCREG_SDER32_EL3, // 411 + MISCREG_CPTR_EL3, // 412 + MISCREG_MDCR_EL3, // 413 + MISCREG_TTBR0_EL1, // 414 + MISCREG_TTBR1_EL1, // 415 + MISCREG_TCR_EL1, // 416 + MISCREG_TTBR0_EL2, // 417 + MISCREG_TCR_EL2, // 418 + MISCREG_VTTBR_EL2, // 419 + MISCREG_VTCR_EL2, // 420 + MISCREG_TTBR0_EL3, // 421 + MISCREG_TCR_EL3, // 422 + MISCREG_DACR32_EL2, // 423 + MISCREG_SPSR_EL1, // 424 + MISCREG_ELR_EL1, // 425 + MISCREG_SP_EL0, // 426 + MISCREG_SPSEL, // 427 + MISCREG_CURRENTEL, // 428 + MISCREG_NZCV, // 429 + MISCREG_DAIF, // 430 + MISCREG_FPCR, // 431 + MISCREG_FPSR, // 432 + MISCREG_DSPSR_EL0, // 433 + MISCREG_DLR_EL0, // 434 + MISCREG_SPSR_EL2, // 435 + MISCREG_ELR_EL2, // 436 + MISCREG_SP_EL1, // 437 + MISCREG_SPSR_IRQ_AA64, // 438 + MISCREG_SPSR_ABT_AA64, // 439 + MISCREG_SPSR_UND_AA64, // 440 + MISCREG_SPSR_FIQ_AA64, // 441 + MISCREG_SPSR_EL3, // 442 + MISCREG_ELR_EL3, // 443 + MISCREG_SP_EL2, // 444 + MISCREG_AFSR0_EL1, // 445 + MISCREG_AFSR1_EL1, // 446 + MISCREG_ESR_EL1, // 447 + MISCREG_IFSR32_EL2, // 448 + MISCREG_AFSR0_EL2, // 449 + MISCREG_AFSR1_EL2, // 450 + MISCREG_ESR_EL2, // 451 + MISCREG_FPEXC32_EL2, // 452 + MISCREG_AFSR0_EL3, // 453 + MISCREG_AFSR1_EL3, // 454 + MISCREG_ESR_EL3, // 455 + MISCREG_FAR_EL1, // 456 + MISCREG_FAR_EL2, // 457 + MISCREG_HPFAR_EL2, // 458 + MISCREG_FAR_EL3, // 459 + MISCREG_IC_IALLUIS, // 460 + MISCREG_PAR_EL1, // 461 + MISCREG_IC_IALLU, // 462 + MISCREG_DC_IVAC_Xt, // 463 + MISCREG_DC_ISW_Xt, // 464 + MISCREG_AT_S1E1R_Xt, // 465 + MISCREG_AT_S1E1W_Xt, // 466 + MISCREG_AT_S1E0R_Xt, // 467 + MISCREG_AT_S1E0W_Xt, // 468 + MISCREG_DC_CSW_Xt, // 469 + MISCREG_DC_CISW_Xt, // 470 + MISCREG_DC_ZVA_Xt, // 471 + MISCREG_IC_IVAU_Xt, // 472 + MISCREG_DC_CVAC_Xt, // 473 + MISCREG_DC_CVAU_Xt, // 474 + MISCREG_DC_CIVAC_Xt, // 475 + MISCREG_AT_S1E2R_Xt, // 476 + MISCREG_AT_S1E2W_Xt, // 477 + MISCREG_AT_S12E1R_Xt, // 478 + MISCREG_AT_S12E1W_Xt, // 479 + MISCREG_AT_S12E0R_Xt, // 480 + MISCREG_AT_S12E0W_Xt, // 481 + MISCREG_AT_S1E3R_Xt, // 482 + MISCREG_AT_S1E3W_Xt, // 483 + MISCREG_TLBI_VMALLE1IS, // 484 + MISCREG_TLBI_VAE1IS_Xt, // 485 + MISCREG_TLBI_ASIDE1IS_Xt, // 486 + MISCREG_TLBI_VAAE1IS_Xt, // 487 + MISCREG_TLBI_VALE1IS_Xt, // 488 + MISCREG_TLBI_VAALE1IS_Xt, // 489 + MISCREG_TLBI_VMALLE1, // 490 + MISCREG_TLBI_VAE1_Xt, // 491 + MISCREG_TLBI_ASIDE1_Xt, // 492 + MISCREG_TLBI_VAAE1_Xt, // 493 + MISCREG_TLBI_VALE1_Xt, // 494 + MISCREG_TLBI_VAALE1_Xt, // 495 + MISCREG_TLBI_IPAS2E1IS_Xt, // 496 + MISCREG_TLBI_IPAS2LE1IS_Xt, // 497 + MISCREG_TLBI_ALLE2IS, // 498 + MISCREG_TLBI_VAE2IS_Xt, // 499 + MISCREG_TLBI_ALLE1IS, // 500 + MISCREG_TLBI_VALE2IS_Xt, // 501 + MISCREG_TLBI_VMALLS12E1IS, // 502 + MISCREG_TLBI_IPAS2E1_Xt, // 503 + MISCREG_TLBI_IPAS2LE1_Xt, // 504 + MISCREG_TLBI_ALLE2, // 505 + MISCREG_TLBI_VAE2_Xt, // 506 + MISCREG_TLBI_ALLE1, // 507 + MISCREG_TLBI_VALE2_Xt, // 508 + MISCREG_TLBI_VMALLS12E1, // 509 + MISCREG_TLBI_ALLE3IS, // 510 + MISCREG_TLBI_VAE3IS_Xt, // 511 + MISCREG_TLBI_VALE3IS_Xt, // 512 + MISCREG_TLBI_ALLE3, // 513 + MISCREG_TLBI_VAE3_Xt, // 514 + MISCREG_TLBI_VALE3_Xt, // 515 + MISCREG_PMINTENSET_EL1, // 516 + MISCREG_PMINTENCLR_EL1, // 517 + MISCREG_PMCR_EL0, // 518 + MISCREG_PMCNTENSET_EL0, // 519 + MISCREG_PMCNTENCLR_EL0, // 520 + MISCREG_PMOVSCLR_EL0, // 521 + MISCREG_PMSWINC_EL0, // 522 + MISCREG_PMSELR_EL0, // 523 + MISCREG_PMCEID0_EL0, // 524 + MISCREG_PMCEID1_EL0, // 525 + MISCREG_PMCCNTR_EL0, // 526 + MISCREG_PMXEVTYPER_EL0, // 527 + MISCREG_PMCCFILTR_EL0, // 528 + MISCREG_PMXEVCNTR_EL0, // 529 + MISCREG_PMUSERENR_EL0, // 530 + MISCREG_PMOVSSET_EL0, // 531 + MISCREG_MAIR_EL1, // 532 + MISCREG_AMAIR_EL1, // 533 + MISCREG_MAIR_EL2, // 534 + MISCREG_AMAIR_EL2, // 535 + MISCREG_MAIR_EL3, // 536 + MISCREG_AMAIR_EL3, // 537 + MISCREG_L2CTLR_EL1, // 538 + MISCREG_L2ECTLR_EL1, // 539 + MISCREG_VBAR_EL1, // 540 + MISCREG_RVBAR_EL1, // 541 + MISCREG_ISR_EL1, // 542 + MISCREG_VBAR_EL2, // 543 + MISCREG_RVBAR_EL2, // 544 + MISCREG_VBAR_EL3, // 545 + MISCREG_RVBAR_EL3, // 546 + MISCREG_RMR_EL3, // 547 + MISCREG_CONTEXTIDR_EL1, // 548 + MISCREG_TPIDR_EL1, // 549 + MISCREG_TPIDR_EL0, // 550 + MISCREG_TPIDRRO_EL0, // 551 + MISCREG_TPIDR_EL2, // 552 + MISCREG_TPIDR_EL3, // 553 + MISCREG_CNTKCTL_EL1, // 554 + MISCREG_CNTFRQ_EL0, // 555 + MISCREG_CNTPCT_EL0, // 556 + MISCREG_CNTVCT_EL0, // 557 + MISCREG_CNTP_TVAL_EL0, // 558 + MISCREG_CNTP_CTL_EL0, // 559 + MISCREG_CNTP_CVAL_EL0, // 560 + MISCREG_CNTV_TVAL_EL0, // 561 + MISCREG_CNTV_CTL_EL0, // 562 + MISCREG_CNTV_CVAL_EL0, // 563 + MISCREG_PMEVCNTR0_EL0, // 564 + MISCREG_PMEVCNTR1_EL0, // 565 + MISCREG_PMEVCNTR2_EL0, // 566 + MISCREG_PMEVCNTR3_EL0, // 567 + MISCREG_PMEVCNTR4_EL0, // 568 + MISCREG_PMEVCNTR5_EL0, // 569 + MISCREG_PMEVTYPER0_EL0, // 570 + MISCREG_PMEVTYPER1_EL0, // 571 + MISCREG_PMEVTYPER2_EL0, // 572 + MISCREG_PMEVTYPER3_EL0, // 573 + MISCREG_PMEVTYPER4_EL0, // 574 + MISCREG_PMEVTYPER5_EL0, // 575 + MISCREG_CNTVOFF_EL2, // 576 + MISCREG_CNTHCTL_EL2, // 577 + MISCREG_CNTHP_TVAL_EL2, // 578 + MISCREG_CNTHP_CTL_EL2, // 579 + MISCREG_CNTHP_CVAL_EL2, // 580 + MISCREG_CNTPS_TVAL_EL1, // 581 + MISCREG_CNTPS_CTL_EL1, // 582 + MISCREG_CNTPS_CVAL_EL1, // 583 + MISCREG_IL1DATA0_EL1, // 584 + MISCREG_IL1DATA1_EL1, // 585 + MISCREG_IL1DATA2_EL1, // 586 + MISCREG_IL1DATA3_EL1, // 587 + MISCREG_DL1DATA0_EL1, // 588 + MISCREG_DL1DATA1_EL1, // 589 + MISCREG_DL1DATA2_EL1, // 590 + MISCREG_DL1DATA3_EL1, // 591 + MISCREG_DL1DATA4_EL1, // 592 + MISCREG_L2ACTLR_EL1, // 593 + MISCREG_CPUACTLR_EL1, // 594 + MISCREG_CPUECTLR_EL1, // 595 + MISCREG_CPUMERRSR_EL1, // 596 + MISCREG_L2MERRSR_EL1, // 597 + MISCREG_CBAR_EL1, // 598 + + // Dummy registers + MISCREG_NOP, // 599 + MISCREG_RAZ, // 600 + MISCREG_CP14_UNIMPL, // 601 + MISCREG_CP15_UNIMPL, // 602 + MISCREG_A64_UNIMPL, // 603 + MISCREG_UNKNOWN, // 604 + + NUM_MISCREGS // 605 }; + enum MiscRegInfo { + MISCREG_IMPLEMENTED, + MISCREG_WARN_NOT_FAIL, // If MISCREG_IMPLEMENTED is deasserted, it + // tells whether the instruction should raise a + // warning or fail + MISCREG_MUTEX, // True if the register corresponds to a pair of + // mutually exclusive registers + MISCREG_BANKED, // True if the register is banked between the two + // security states, and this is the parent node of the + // two banked registers + MISCREG_BANKED_CHILD, // The entry is one of the child registers that + // forms a banked set of regs (along with the + // other child regs) + + // Access permissions + // User mode + MISCREG_USR_NS_RD, + MISCREG_USR_NS_WR, + MISCREG_USR_S_RD, + MISCREG_USR_S_WR, + // Privileged modes other than hypervisor or monitor + MISCREG_PRI_NS_RD, + MISCREG_PRI_NS_WR, + MISCREG_PRI_S_RD, + MISCREG_PRI_S_WR, + // Hypervisor mode + MISCREG_HYP_RD, + MISCREG_HYP_WR, + // Monitor mode, SCR.NS == 0 + MISCREG_MON_NS0_RD, + MISCREG_MON_NS0_WR, + // Monitor mode, SCR.NS == 1 + MISCREG_MON_NS1_RD, + MISCREG_MON_NS1_WR, + + NUM_MISCREG_INFOS + }; + + extern std::bitset<NUM_MISCREG_INFOS> miscRegInfo[NUM_MISCREGS]; + + // Decodes 32-bit CP14 registers accessible through MCR/MRC instructions MiscRegIndex decodeCP14Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2); + MiscRegIndex decodeAArch64SysReg(unsigned op0, unsigned op1, + unsigned crn, unsigned crm, + unsigned op2); + // Whether a particular AArch64 system register is -always- read only. + bool aarch64SysRegReadOnly(MiscRegIndex miscReg); + // Decodes 32-bit CP15 registers accessible through MCR/MRC instructions MiscRegIndex decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2); + // Decodes 64-bit CP15 registers accessible through MCRR/MRRC instructions + MiscRegIndex decodeCP15Reg64(unsigned crm, unsigned opc1); + const char * const miscRegName[] = { - "cpsr", "cpsr_q", "spsr", "spsr_fiq", "spsr_irq", "spsr_svc", - "spsr_mon", "spsr_und", "spsr_abt", - "fpsr", "fpsid", "fpscr", "fpscr_qc", "fpscr_exc", "fpexc", - "mvfr0", "mvfr1", - "sctlr_rst", "sev_mailbox", - "DBGDIDR", - "DBGDSCR_INT", - "DBGDTRRX_INT", - "DBGTRTX_INT", - "DBGWFAR", - "DBGVCR", - "DBGECR", - "DBGDSCCR", - "DBGSMCR", - "DBGDTRRX_EXT", - "DBGDSCR_EXT", - "DBGDTRTX_EXT", - "DBGDRCR", - "DBGBVR", - "DBGBCR", - "DBGBVR_M", - "DBGBCR_M", - "DBGDRAR", - "DBGBXVR_M", - "DBGOSLAR", - "DBGOSSRR", - "DBGOSDLR", - "DBGPRCR", - "DBGPRSR", - "DBGDSAR", - "DBGITCTRL", - "DBGCLAIMSET", - "DBGCLAIMCLR", - "DBGAUTHSTATUS", - "DBGDEVID2", - "DBGDEVID1", - "DBGDEVID", - "TEEHBR", - "sctlr", "dccisw", "dccimvac", "dccmvac", - "contextidr", "tpidrurw", "tpidruro", "tpidrprw", - "cp15isb", "cp15dsb", "cp15dmb", "cpacr", - "clidr", "ccsidr", "csselr", - "icialluis", "iciallu", "icimvau", - "bpimva", "bpiallis", "bpiall", - "midr", "ttbr0", "ttbr1", "tlbtr", "dacr", - "tlbiallis", "tlbimvais", "tlbiasidis", "tlbimvaais", - "itlbiall", "itlbimva", "itlbiasid", - "dtlbiall", "dtlbimva", "dtlbiasid", - "tlbiall", "tlbimva", "tlbiasid", "tlbimvaa", - "dfsr", "ifsr", "dfar", "ifar", "mpidr", - "prrr", "nmrr", "ttbcr", "id_pfr0", "ctr", - "scr", "sder", "par", - "v2pcwpr", "v2pcwpw", "v2pcwur", "v2pcwuw", - "v2powpr", "v2powpw", "v2powur", "v2powuw", - "id_mmfr0", "id_mmfr2", "id_mmfr3", "actlr", "pmcr", "pmccntr", - "pmcntenset", "pmcntenclr", "pmovsr", - "pmswinc", "pmselr", "pmceid0", - "pmceid1", "pmc_other", "pmxevcntr", - "pmuserenr", "pmintenset", "pmintenclr", - "id_isar0", "id_isar1", "id_isar2", "id_isar3", "id_isar4", "id_isar5", - "lockflag", "lockaddr", "id_pfr1", - "l2ctlr", - // Unimplemented below + "cpsr", + "spsr", + "spsr_fiq", + "spsr_irq", + "spsr_svc", + "spsr_mon", + "spsr_abt", + "spsr_hyp", + "spsr_und", + "elr_hyp", + "fpsid", + "fpscr", + "mvfr1", + "mvfr0", + "fpexc", + + // Helper registers + "cpsr_mode", + "cpsr_q", + "fpscr_exc", + "fpscr_qc", + "lockaddr", + "lockflag", + "prrr_mair0", + "prrr_mair0_ns", + "prrr_mair0_s", + "nmrr_mair1", + "nmrr_mair1_ns", + "nmrr_mair1_s", + "pmxevtyper_pmccfiltr", + "sctlr_rst", + "sev_mailbox", + + // AArch32 CP14 registers + "dbgdidr", + "dbgdscrint", + "dbgdccint", + "dbgdtrtxint", + "dbgdtrrxint", + "dbgwfar", + "dbgvcr", + "dbgdtrrxext", + "dbgdscrext", + "dbgdtrtxext", + "dbgoseccr", + "dbgbvr0", + "dbgbvr1", + "dbgbvr2", + "dbgbvr3", + "dbgbvr4", + "dbgbvr5", + "dbgbcr0", + "dbgbcr1", + "dbgbcr2", + "dbgbcr3", + "dbgbcr4", + "dbgbcr5", + "dbgwvr0", + "dbgwvr1", + "dbgwvr2", + "dbgwvr3", + "dbgwcr0", + "dbgwcr1", + "dbgwcr2", + "dbgwcr3", + "dbgdrar", + "dbgbxvr4", + "dbgbxvr5", + "dbgoslar", + "dbgoslsr", + "dbgosdlr", + "dbgprcr", + "dbgdsar", + "dbgclaimset", + "dbgclaimclr", + "dbgauthstatus", + "dbgdevid2", + "dbgdevid1", + "dbgdevid0", + "teecr", + "jidr", + "teehbr", + "joscr", + "jmcr", + + // AArch32 CP15 registers + "midr", + "ctr", "tcmtr", - "id_dfr0", "id_afr0", + "tlbtr", + "mpidr", + "revidr", + "id_pfr0", + "id_pfr1", + "id_dfr0", + "id_afr0", + "id_mmfr0", "id_mmfr1", - "aidr", "adfsr", "aifsr", - "dcimvac", "dcisw", "mccsw", - "dccmvau", + "id_mmfr2", + "id_mmfr3", + "id_isar0", + "id_isar1", + "id_isar2", + "id_isar3", + "id_isar4", + "id_isar5", + "ccsidr", + "clidr", + "aidr", + "csselr", + "csselr_ns", + "csselr_s", + "vpidr", + "vmpidr", + "sctlr", + "sctlr_ns", + "sctlr_s", + "actlr", + "actlr_ns", + "actlr_s", + "cpacr", + "scr", + "sder", "nsacr", - "vbar", "mvbar", "isr", "fceidr", "l2latency", - "crn15", - "nop", "raz" + "hsctlr", + "hactlr", + "hcr", + "hdcr", + "hcptr", + "hstr", + "hacr", + "ttbr0", + "ttbr0_ns", + "ttbr0_s", + "ttbr1", + "ttbr1_ns", + "ttbr1_s", + "ttbcr", + "ttbcr_ns", + "ttbcr_s", + "htcr", + "vtcr", + "dacr", + "dacr_ns", + "dacr_s", + "dfsr", + "dfsr_ns", + "dfsr_s", + "ifsr", + "ifsr_ns", + "ifsr_s", + "adfsr", + "adfsr_ns", + "adfsr_s", + "aifsr", + "aifsr_ns", + "aifsr_s", + "hadfsr", + "haifsr", + "hsr", + "dfar", + "dfar_ns", + "dfar_s", + "ifar", + "ifar_ns", + "ifar_s", + "hdfar", + "hifar", + "hpfar", + "icialluis", + "bpiallis", + "par", + "par_ns", + "par_s", + "iciallu", + "icimvau", + "cp15isb", + "bpiall", + "bpimva", + "dcimvac", + "dcisw", + "ats1cpr", + "ats1cpw", + "ats1cur", + "ats1cuw", + "ats12nsopr", + "ats12nsopw", + "ats12nsour", + "ats12nsouw", + "dccmvac", + "dccsw", + "cp15dsb", + "cp15dmb", + "dccmvau", + "dccimvac", + "dccisw", + "ats1hr", + "ats1hw", + "tlbiallis", + "tlbimvais", + "tlbiasidis", + "tlbimvaais", + "tlbimvalis", + "tlbimvaalis", + "itlbiall", + "itlbimva", + "itlbiasid", + "dtlbiall", + "dtlbimva", + "dtlbiasid", + "tlbiall", + "tlbimva", + "tlbiasid", + "tlbimvaa", + "tlbimval", + "tlbimvaal", + "tlbiipas2is", + "tlbiipas2lis", + "tlbiallhis", + "tlbimvahis", + "tlbiallnsnhis", + "tlbimvalhis", + "tlbiipas2", + "tlbiipas2l", + "tlbiallh", + "tlbimvah", + "tlbiallnsnh", + "tlbimvalh", + "pmcr", + "pmcntenset", + "pmcntenclr", + "pmovsr", + "pmswinc", + "pmselr", + "pmceid0", + "pmceid1", + "pmccntr", + "pmxevtyper", + "pmccfiltr", + "pmxevcntr", + "pmuserenr", + "pmintenset", + "pmintenclr", + "pmovsset", + "l2ctlr", + "l2ectlr", + "prrr", + "prrr_ns", + "prrr_s", + "mair0", + "mair0_ns", + "mair0_s", + "nmrr", + "nmrr_ns", + "nmrr_s", + "mair1", + "mair1_ns", + "mair1_s", + "amair0", + "amair0_ns", + "amair0_s", + "amair1", + "amair1_ns", + "amair1_s", + "hmair0", + "hmair1", + "hamair0", + "hamair1", + "vbar", + "vbar_ns", + "vbar_s", + "mvbar", + "rmr", + "isr", + "hvbar", + "fcseidr", + "contextidr", + "contextidr_ns", + "contextidr_s", + "tpidrurw", + "tpidrurw_ns", + "tpidrurw_s", + "tpidruro", + "tpidruro_ns", + "tpidruro_s", + "tpidrprw", + "tpidrprw_ns", + "tpidrprw_s", + "htpidr", + "cntfrq", + "cntkctl", + "cntp_tval", + "cntp_tval_ns", + "cntp_tval_s", + "cntp_ctl", + "cntp_ctl_ns", + "cntp_ctl_s", + "cntv_tval", + "cntv_ctl", + "cnthctl", + "cnthp_tval", + "cnthp_ctl", + "il1data0", + "il1data1", + "il1data2", + "il1data3", + "dl1data0", + "dl1data1", + "dl1data2", + "dl1data3", + "dl1data4", + "ramindex", + "l2actlr", + "cbar", + "httbr", + "vttbr", + "cntpct", + "cntvct", + "cntp_cval", + "cntp_cval_ns", + "cntp_cval_s", + "cntv_cval", + "cntvoff", + "cnthp_cval", + "cpumerrsr", + "l2merrsr", + + // AArch64 registers (Op0=2) + "mdccint_el1", + "osdtrrx_el1", + "mdscr_el1", + "osdtrtx_el1", + "oseccr_el1", + "dbgbvr0_el1", + "dbgbvr1_el1", + "dbgbvr2_el1", + "dbgbvr3_el1", + "dbgbvr4_el1", + "dbgbvr5_el1", + "dbgbcr0_el1", + "dbgbcr1_el1", + "dbgbcr2_el1", + "dbgbcr3_el1", + "dbgbcr4_el1", + "dbgbcr5_el1", + "dbgwvr0_el1", + "dbgwvr1_el1", + "dbgwvr2_el1", + "dbgwvr3_el1", + "dbgwcr0_el1", + "dbgwcr1_el1", + "dbgwcr2_el1", + "dbgwcr3_el1", + "mdccsr_el0", + "mddtr_el0", + "mddtrtx_el0", + "mddtrrx_el0", + "dbgvcr32_el2", + "mdrar_el1", + "oslar_el1", + "oslsr_el1", + "osdlr_el1", + "dbgprcr_el1", + "dbgclaimset_el1", + "dbgclaimclr_el1", + "dbgauthstatus_el1", + "teecr32_el1", + "teehbr32_el1", + + // AArch64 registers (Op0=1,3) + "midr_el1", + "mpidr_el1", + "revidr_el1", + "id_pfr0_el1", + "id_pfr1_el1", + "id_dfr0_el1", + "id_afr0_el1", + "id_mmfr0_el1", + "id_mmfr1_el1", + "id_mmfr2_el1", + "id_mmfr3_el1", + "id_isar0_el1", + "id_isar1_el1", + "id_isar2_el1", + "id_isar3_el1", + "id_isar4_el1", + "id_isar5_el1", + "mvfr0_el1", + "mvfr1_el1", + "mvfr2_el1", + "id_aa64pfr0_el1", + "id_aa64pfr1_el1", + "id_aa64dfr0_el1", + "id_aa64dfr1_el1", + "id_aa64afr0_el1", + "id_aa64afr1_el1", + "id_aa64isar0_el1", + "id_aa64isar1_el1", + "id_aa64mmfr0_el1", + "id_aa64mmfr1_el1", + "ccsidr_el1", + "clidr_el1", + "aidr_el1", + "csselr_el1", + "ctr_el0", + "dczid_el0", + "vpidr_el2", + "vmpidr_el2", + "sctlr_el1", + "actlr_el1", + "cpacr_el1", + "sctlr_el2", + "actlr_el2", + "hcr_el2", + "mdcr_el2", + "cptr_el2", + "hstr_el2", + "hacr_el2", + "sctlr_el3", + "actlr_el3", + "scr_el3", + "sder32_el3", + "cptr_el3", + "mdcr_el3", + "ttbr0_el1", + "ttbr1_el1", + "tcr_el1", + "ttbr0_el2", + "tcr_el2", + "vttbr_el2", + "vtcr_el2", + "ttbr0_el3", + "tcr_el3", + "dacr32_el2", + "spsr_el1", + "elr_el1", + "sp_el0", + "spsel", + "currentel", + "nzcv", + "daif", + "fpcr", + "fpsr", + "dspsr_el0", + "dlr_el0", + "spsr_el2", + "elr_el2", + "sp_el1", + "spsr_irq_aa64", + "spsr_abt_aa64", + "spsr_und_aa64", + "spsr_fiq_aa64", + "spsr_el3", + "elr_el3", + "sp_el2", + "afsr0_el1", + "afsr1_el1", + "esr_el1", + "ifsr32_el2", + "afsr0_el2", + "afsr1_el2", + "esr_el2", + "fpexc32_el2", + "afsr0_el3", + "afsr1_el3", + "esr_el3", + "far_el1", + "far_el2", + "hpfar_el2", + "far_el3", + "ic_ialluis", + "par_el1", + "ic_iallu", + "dc_ivac_xt", + "dc_isw_xt", + "at_s1e1r_xt", + "at_s1e1w_xt", + "at_s1e0r_xt", + "at_s1e0w_xt", + "dc_csw_xt", + "dc_cisw_xt", + "dc_zva_xt", + "ic_ivau_xt", + "dc_cvac_xt", + "dc_cvau_xt", + "dc_civac_xt", + "at_s1e2r_xt", + "at_s1e2w_xt", + "at_s12e1r_xt", + "at_s12e1w_xt", + "at_s12e0r_xt", + "at_s12e0w_xt", + "at_s1e3r_xt", + "at_s1e3w_xt", + "tlbi_vmalle1is", + "tlbi_vae1is_xt", + "tlbi_aside1is_xt", + "tlbi_vaae1is_xt", + "tlbi_vale1is_xt", + "tlbi_vaale1is_xt", + "tlbi_vmalle1", + "tlbi_vae1_xt", + "tlbi_aside1_xt", + "tlbi_vaae1_xt", + "tlbi_vale1_xt", + "tlbi_vaale1_xt", + "tlbi_ipas2e1is_xt", + "tlbi_ipas2le1is_xt", + "tlbi_alle2is", + "tlbi_vae2is_xt", + "tlbi_alle1is", + "tlbi_vale2is_xt", + "tlbi_vmalls12e1is", + "tlbi_ipas2e1_xt", + "tlbi_ipas2le1_xt", + "tlbi_alle2", + "tlbi_vae2_xt", + "tlbi_alle1", + "tlbi_vale2_xt", + "tlbi_vmalls12e1", + "tlbi_alle3is", + "tlbi_vae3is_xt", + "tlbi_vale3is_xt", + "tlbi_alle3", + "tlbi_vae3_xt", + "tlbi_vale3_xt", + "pmintenset_el1", + "pmintenclr_el1", + "pmcr_el0", + "pmcntenset_el0", + "pmcntenclr_el0", + "pmovsclr_el0", + "pmswinc_el0", + "pmselr_el0", + "pmceid0_el0", + "pmceid1_el0", + "pmccntr_el0", + "pmxevtyper_el0", + "pmccfiltr_el0", + "pmxevcntr_el0", + "pmuserenr_el0", + "pmovsset_el0", + "mair_el1", + "amair_el1", + "mair_el2", + "amair_el2", + "mair_el3", + "amair_el3", + "l2ctlr_el1", + "l2ectlr_el1", + "vbar_el1", + "rvbar_el1", + "isr_el1", + "vbar_el2", + "rvbar_el2", + "vbar_el3", + "rvbar_el3", + "rmr_el3", + "contextidr_el1", + "tpidr_el1", + "tpidr_el0", + "tpidrro_el0", + "tpidr_el2", + "tpidr_el3", + "cntkctl_el1", + "cntfrq_el0", + "cntpct_el0", + "cntvct_el0", + "cntp_tval_el0", + "cntp_ctl_el0", + "cntp_cval_el0", + "cntv_tval_el0", + "cntv_ctl_el0", + "cntv_cval_el0", + "pmevcntr0_el0", + "pmevcntr1_el0", + "pmevcntr2_el0", + "pmevcntr3_el0", + "pmevcntr4_el0", + "pmevcntr5_el0", + "pmevtyper0_el0", + "pmevtyper1_el0", + "pmevtyper2_el0", + "pmevtyper3_el0", + "pmevtyper4_el0", + "pmevtyper5_el0", + "cntvoff_el2", + "cnthctl_el2", + "cnthp_tval_el2", + "cnthp_ctl_el2", + "cnthp_cval_el2", + "cntps_tval_el1", + "cntps_ctl_el1", + "cntps_cval_el1", + "il1data0_el1", + "il1data1_el1", + "il1data2_el1", + "il1data3_el1", + "dl1data0_el1", + "dl1data1_el1", + "dl1data2_el1", + "dl1data3_el1", + "dl1data4_el1", + "l2actlr_el1", + "cpuactlr_el1", + "cpuectlr_el1", + "cpumerrsr_el1", + "l2merrsr_el1", + "cbar_el1", + + // Dummy registers + "nop", + "raz", + "cp14_unimpl", + "cp15_unimpl", + "a64_unimpl", + "unknown" }; static_assert(sizeof(miscRegName) / sizeof(*miscRegName) == NUM_MISCREGS, "The miscRegName array and NUM_MISCREGS are inconsistent."); BitUnion32(CPSR) - Bitfield<31,30> nz; + Bitfield<31, 30> nz; Bitfield<29> c; Bitfield<28> v; Bitfield<27> q; - Bitfield<26,25> it1; + Bitfield<26, 25> it1; Bitfield<24> j; + Bitfield<23, 22> res0_23_22; + Bitfield<21> ss; // AArch64 + Bitfield<20> il; // AArch64 Bitfield<19, 16> ge; - Bitfield<15,10> it2; + Bitfield<15, 10> it2; + Bitfield<9> d; // AArch64 Bitfield<9> e; Bitfield<8> a; Bitfield<7> i; Bitfield<6> f; + Bitfield<9, 6> daif; // AArch64 Bitfield<5> t; + Bitfield<4> width; // AArch64 + Bitfield<3, 2> el; // AArch64 Bitfield<4, 0> mode; + Bitfield<0> sp; // AArch64 EndBitUnion(CPSR) // This mask selects bits of the CPSR that actually go in the CondCodes @@ -352,32 +1404,190 @@ namespace ArmISA static const uint32_t CondCodesMask = 0xF00F0000; static const uint32_t CpsrMaskQ = 0x08000000; + BitUnion32(HDCR) + Bitfield<11> tdra; + Bitfield<10> tdosa; + Bitfield<9> tda; + Bitfield<8> tde; + Bitfield<7> hpme; + Bitfield<6> tpm; + Bitfield<5> tpmcr; + Bitfield<4, 0> hpmn; + EndBitUnion(HDCR) + + BitUnion32(HCPTR) + Bitfield<31> tcpac; + Bitfield<20> tta; + Bitfield<15> tase; + Bitfield<13> tcp13; + Bitfield<12> tcp12; + Bitfield<11> tcp11; + Bitfield<10> tcp10; + Bitfield<10> tfp; // AArch64 + Bitfield<9> tcp9; + Bitfield<8> tcp8; + Bitfield<7> tcp7; + Bitfield<6> tcp6; + Bitfield<5> tcp5; + Bitfield<4> tcp4; + Bitfield<3> tcp3; + Bitfield<2> tcp2; + Bitfield<1> tcp1; + Bitfield<0> tcp0; + EndBitUnion(HCPTR) + + BitUnion32(HSTR) + Bitfield<17> tjdbx; + Bitfield<16> ttee; + Bitfield<15> t15; + Bitfield<13> t13; + Bitfield<12> t12; + Bitfield<11> t11; + Bitfield<10> t10; + Bitfield<9> t9; + Bitfield<8> t8; + Bitfield<7> t7; + Bitfield<6> t6; + Bitfield<5> t5; + Bitfield<4> t4; + Bitfield<3> t3; + Bitfield<2> t2; + Bitfield<1> t1; + Bitfield<0> t0; + EndBitUnion(HSTR) + + BitUnion64(HCR) + Bitfield<33> id; // AArch64 + Bitfield<32> cd; // AArch64 + Bitfield<31> rw; // AArch64 + Bitfield<30> trvm; // AArch64 + Bitfield<29> hcd; // AArch64 + Bitfield<28> tdz; // AArch64 + + Bitfield<27> tge; + Bitfield<26> tvm; + Bitfield<25> ttlb; + Bitfield<24> tpu; + Bitfield<23> tpc; + Bitfield<22> tsw; + Bitfield<21> tac; + Bitfield<21> tacr; // AArch64 + Bitfield<20> tidcp; + Bitfield<19> tsc; + Bitfield<18> tid3; + Bitfield<17> tid2; + Bitfield<16> tid1; + Bitfield<15> tid0; + Bitfield<14> twe; + Bitfield<13> twi; + Bitfield<12> dc; + Bitfield<11, 10> bsu; + Bitfield<9> fb; + Bitfield<8> va; + Bitfield<8> vse; // AArch64 + Bitfield<7> vi; + Bitfield<6> vf; + Bitfield<5> amo; + Bitfield<4> imo; + Bitfield<3> fmo; + Bitfield<2> ptw; + Bitfield<1> swio; + Bitfield<0> vm; + EndBitUnion(HCR) + + BitUnion32(NSACR) + Bitfield<20> nstrcdis; + Bitfield<19> rfr; + Bitfield<15> nsasedis; + Bitfield<14> nsd32dis; + Bitfield<13> cp13; + Bitfield<12> cp12; + Bitfield<11> cp11; + Bitfield<10> cp10; + Bitfield<9> cp9; + Bitfield<8> cp8; + Bitfield<7> cp7; + Bitfield<6> cp6; + Bitfield<5> cp5; + Bitfield<4> cp4; + Bitfield<3> cp3; + Bitfield<2> cp2; + Bitfield<1> cp1; + Bitfield<0> cp0; + EndBitUnion(NSACR) + + BitUnion32(SCR) + Bitfield<13> twe; + Bitfield<12> twi; + Bitfield<11> st; // AArch64 + Bitfield<10> rw; // AArch64 + Bitfield<9> sif; + Bitfield<8> hce; + Bitfield<7> scd; + Bitfield<7> smd; // AArch64 + Bitfield<6> nEt; + Bitfield<5> aw; + Bitfield<4> fw; + Bitfield<3> ea; + Bitfield<2> fiq; + Bitfield<1> irq; + Bitfield<0> ns; + EndBitUnion(SCR) + BitUnion32(SCTLR) - Bitfield<31> ie; // Instruction endianness - Bitfield<30> te; // Thumb Exception Enable - Bitfield<29> afe; // Access flag enable - Bitfield<28> tre; // TEX Remap bit - Bitfield<27> nmfi;// Non-maskable fast interrupts enable - Bitfield<25> ee; // Exception Endianness bit - Bitfield<24> ve; // Interrupt vectors enable - Bitfield<23> xp; // Extended page table enable bit - Bitfield<22> u; // Alignment (now unused) - Bitfield<21> fi; // Fast interrupts configuration enable - Bitfield<19> dz; // Divide by Zero fault enable bit - Bitfield<18> rao2;// Read as one - Bitfield<17> br; // Background region bit - Bitfield<16> rao3;// Read as one - Bitfield<14> rr; // Round robin cache replacement - Bitfield<13> v; // Base address for exception vectors - Bitfield<12> i; // instruction cache enable - Bitfield<11> z; // branch prediction enable bit - Bitfield<10> sw; // Enable swp/swpb - Bitfield<9,8> rs; // deprecated protection bits - Bitfield<6,3> rao4;// Read as one - Bitfield<7> b; // Endianness support (unused) - Bitfield<2> c; // Cache enable bit - Bitfield<1> a; // Alignment fault checking - Bitfield<0> m; // MMU enable bit + Bitfield<30> te; // Thumb Exception Enable (AArch32 only) + Bitfield<29> afe; // Access flag enable (AArch32 only) + Bitfield<28> tre; // TEX remap enable (AArch32 only) + Bitfield<27> nmfi; // Non-maskable FIQ support (ARMv7 only) + Bitfield<26> uci; // Enable EL0 access to DC CVAU, DC CIVAC, + // DC CVAC and IC IVAU instructions + // (AArch64 SCTLR_EL1 only) + Bitfield<25> ee; // Exception Endianness + Bitfield<24> ve; // Interrupt Vectors Enable (ARMv7 only) + Bitfield<24> e0e; // Endianness of explicit data accesses at EL0 + // (AArch64 SCTLR_EL1 only) + Bitfield<23> xp; // Extended page table enable (dropped in ARMv7) + Bitfield<22> u; // Alignment (dropped in ARMv7) + Bitfield<21> fi; // Fast interrupts configuration enable + // (ARMv7 only) + Bitfield<20> uwxn; // Unprivileged write permission implies EL1 XN + // (AArch32 only) + Bitfield<19> dz; // Divide by Zero fault enable + // (dropped in ARMv7) + Bitfield<19> wxn; // Write permission implies XN + Bitfield<18> ntwe; // Not trap WFE + // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only) + Bitfield<18> rao2; // Read as one + Bitfield<16> ntwi; // Not trap WFI + // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only) + Bitfield<16> rao3; // Read as one + Bitfield<15> uct; // Enable EL0 access to CTR_EL0 + // (AArch64 SCTLR_EL1 only) + Bitfield<14> rr; // Round Robin select (ARMv7 only) + Bitfield<14> dze; // Enable EL0 access to DC ZVA + // (AArch64 SCTLR_EL1 only) + Bitfield<13> v; // Vectors bit (AArch32 only) + Bitfield<12> i; // Instruction cache enable + Bitfield<11> z; // Branch prediction enable (ARMv7 only) + Bitfield<10> sw; // SWP/SWPB enable (ARMv7 only) + Bitfield<9, 8> rs; // Deprecated protection bits (dropped in ARMv7) + Bitfield<9> uma; // User mask access (AArch64 SCTLR_EL1 only) + Bitfield<8> sed; // SETEND disable + // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only) + Bitfield<7> b; // Endianness support (dropped in ARMv7) + Bitfield<7> itd; // IT disable + // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only) + Bitfield<6, 3> rao4; // Read as one + Bitfield<6> thee; // ThumbEE enable + // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only) + Bitfield<5> cp15ben; // CP15 barrier enable + // (AArch32 and AArch64 SCTLR_EL1 only) + Bitfield<4> sa0; // Stack Alignment Check Enable for EL0 + // (AArch64 SCTLR_EL1 only) + Bitfield<3> sa; // Stack Alignment Check Enable (AArch64 only) + Bitfield<2> c; // Cache enable + Bitfield<1> a; // Alignment check enable + Bitfield<0> m; // MMU enable EndBitUnion(SCTLR) BitUnion32(CPACR) @@ -392,20 +1602,25 @@ namespace ArmISA Bitfield<17, 16> cp8; Bitfield<19, 18> cp9; Bitfield<21, 20> cp10; + Bitfield<21, 20> fpen; // AArch64 Bitfield<23, 22> cp11; Bitfield<25, 24> cp12; Bitfield<27, 26> cp13; Bitfield<29, 28> rsvd; + Bitfield<28> tta; // AArch64 Bitfield<30> d32dis; Bitfield<31> asedis; EndBitUnion(CPACR) BitUnion32(FSR) Bitfield<3, 0> fsLow; + Bitfield<5, 0> status; // LPAE Bitfield<7, 4> domain; + Bitfield<9> lpae; Bitfield<10> fsHigh; Bitfield<11> wnr; Bitfield<12> ext; + Bitfield<13> cm; // LPAE EndBitUnion(FSR) BitUnion32(FPSCR) @@ -470,6 +1685,52 @@ namespace ArmISA Bitfield<31, 28> raz; EndBitUnion(MVFR1) + BitUnion64(TTBCR) + // Short-descriptor translation table format + Bitfield<2, 0> n; + Bitfield<4> pd0; + Bitfield<5> pd1; + // Long-descriptor translation table format + Bitfield<5, 0> t0sz; + Bitfield<7> epd0; + Bitfield<9, 8> irgn0; + Bitfield<11, 10> orgn0; + Bitfield<13, 12> sh0; + Bitfield<14> tg0; + Bitfield<21, 16> t1sz; + Bitfield<22> a1; + Bitfield<23> epd1; + Bitfield<25, 24> irgn1; + Bitfield<27, 26> orgn1; + Bitfield<29, 28> sh1; + Bitfield<30> tg1; + Bitfield<34, 32> ips; + Bitfield<36> as; + Bitfield<37> tbi0; + Bitfield<38> tbi1; + // Common + Bitfield<31> eae; + // TCR_EL2/3 (AArch64) + Bitfield<18, 16> ps; + Bitfield<20> tbi; + EndBitUnion(TTBCR) + + BitUnion32(HTCR) + Bitfield<2, 0> t0sz; + Bitfield<9, 8> irgn0; + Bitfield<11, 10> orgn0; + Bitfield<13, 12> sh0; + EndBitUnion(HTCR) + + BitUnion32(VTCR_t) + Bitfield<3, 0> t0sz; + Bitfield<4> s; + Bitfield<7, 6> sl0; + Bitfield<9, 8> irgn0; + Bitfield<11, 10> orgn0; + Bitfield<13, 12> sh0; + EndBitUnion(VTCR_t) + BitUnion32(PRRR) Bitfield<1,0> tr0; Bitfield<3,2> tr1; @@ -544,6 +1805,72 @@ namespace ArmISA Bitfield<28> raz_28; Bitfield<31,29> format; EndBitUnion(CTR) + + BitUnion32(PMSELR) + Bitfield<4, 0> sel; + EndBitUnion(PMSELR) + + BitUnion64(PAR) + // 64-bit format + Bitfield<63, 56> attr; + Bitfield<39, 12> pa; + Bitfield<11> lpae; + Bitfield<9> ns; + Bitfield<8, 7> sh; + Bitfield<0> f; + EndBitUnion(PAR) + + BitUnion32(ESR) + Bitfield<31, 26> ec; + Bitfield<25> il; + Bitfield<15, 0> imm16; + EndBitUnion(ESR) + + BitUnion32(CPTR) + Bitfield<31> tcpac; + Bitfield<20> tta; + Bitfield<13, 12> res1_13_12_el2; + Bitfield<10> tfp; + Bitfield<9, 0> res1_9_0_el2; + EndBitUnion(CPTR) + + + // Checks read access permissions to coproc. registers + bool canReadCoprocReg(MiscRegIndex reg, SCR scr, CPSR cpsr, + ThreadContext *tc); + + // Checks write access permissions to coproc. registers + bool canWriteCoprocReg(MiscRegIndex reg, SCR scr, CPSR cpsr, + ThreadContext *tc); + + // Checks read access permissions to AArch64 system registers + bool canReadAArch64SysReg(MiscRegIndex reg, SCR scr, CPSR cpsr, + ThreadContext *tc); + + // Checks write access permissions to AArch64 system registers + bool canWriteAArch64SysReg(MiscRegIndex reg, SCR scr, CPSR cpsr, + ThreadContext *tc); + + // Uses just the scr.ns bit to pre flatten the misc regs. This is useful + // for MCR/MRC instructions + int + flattenMiscRegNsBanked(int reg, ThreadContext *tc); + + // Flattens a misc reg index using the specified security state. This is + // used for opperations (eg address translations) where the security + // state of the register access may differ from the current state of the + // processor + int + flattenMiscRegNsBanked(int reg, ThreadContext *tc, bool ns); + + // Takes a misc reg index and returns the root reg if its one of a set of + // banked registers + void + preUnflattenMiscReg(); + + int + unflattenMiscReg(int reg); + } #endif // __ARCH_ARM_MISCREGS_HH__ diff --git a/src/arch/arm/nativetrace.cc b/src/arch/arm/nativetrace.cc index 21dff8b7c..9ba3fa84a 100644 --- a/src/arch/arm/nativetrace.cc +++ b/src/arch/arm/nativetrace.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2011 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -124,7 +124,7 @@ Trace::ArmNativeTrace::ThreadState::update(ThreadContext *tc) newState[STATE_CPSR] = cpsr; changed[STATE_CPSR] = (newState[STATE_CPSR] != oldState[STATE_CPSR]); - for (int i = 0; i < NumFloatArchRegs; i += 2) { + for (int i = 0; i < NumFloatV7ArchRegs; i += 2) { newState[STATE_F0 + (i >> 1)] = static_cast<uint64_t>(tc->readFloatRegBits(i + 1)) << 32 | tc->readFloatRegBits(i); diff --git a/src/arch/arm/pagetable.hh b/src/arch/arm/pagetable.hh index 898ab3191..591ec9807 100644 --- a/src/arch/arm/pagetable.hh +++ b/src/arch/arm/pagetable.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -43,6 +43,8 @@ #ifndef __ARCH_ARM_PAGETABLE_H__ #define __ARCH_ARM_PAGETABLE_H__ +#include <cstdint> + #include "arch/arm/isa_traits.hh" #include "arch/arm/utility.hh" #include "arch/arm/vtophys.hh" @@ -71,69 +73,107 @@ struct PTE }; +// Lookup level +enum LookupLevel { + L0 = 0, // AArch64 only + L1, + L2, + L3, + MAX_LOOKUP_LEVELS +}; + // ITB/DTB table entry struct TlbEntry { public: - enum MemoryType { + enum class MemoryType : std::uint8_t { StronglyOrdered, Device, Normal }; - enum DomainType { - DomainNoAccess = 0, - DomainClient, - DomainReserved, - DomainManager + + enum class DomainType : std::uint8_t { + NoAccess = 0, + Client, + Reserved, + Manager }; // Matching variables Addr pfn; Addr size; // Size of this entry, == Type of TLB Rec Addr vpn; // Virtual Page Number - uint32_t asid; // Address Space Identifier + uint64_t attributes; // Memory attributes formatted for PAR + + LookupLevel lookupLevel; // Lookup level where the descriptor was fetched + // from. Used to set the FSR for faults + // occurring while the long desc. format is in + // use (AArch32 w/ LPAE and AArch64) + + uint16_t asid; // Address Space Identifier + uint8_t vmid; // Virtual machine Identifier uint8_t N; // Number of bits in pagesize + uint8_t innerAttrs; + uint8_t outerAttrs; + uint8_t ap; // Access permissions bits + uint8_t hap; // Hyp access permissions bits + DomainType domain; // Access Domain + + MemoryType mtype; + + // True if the long descriptor format is used for this entry (LPAE only) + bool longDescFormat; // @todo use this in the update attribute bethod + + bool isHyp; bool global; bool valid; + // True if the entry targets the non-secure physical address space + bool ns; + // True if the entry was brought in from a non-secure page table + bool nstid; + // Exception level on insert, AARCH64 EL0&1, AARCH32 -> el=1 + uint8_t el; + // Type of memory bool nonCacheable; // Can we wrap this in mtype? - bool sNp; // Section descriptor // Memory Attributes - MemoryType mtype; - uint8_t innerAttrs; - uint8_t outerAttrs; bool shareable; - uint32_t attributes; // Memory attributes formatted for PAR - + bool outerShareable; // Access permissions bool xn; // Execute Never - uint8_t ap; // Access permissions bits - uint8_t domain; // Access Domain + bool pxn; // Privileged Execute Never (LPAE only) //Construct an entry that maps to physical address addr for SE mode - TlbEntry(Addr _asn, Addr _vaddr, Addr _paddr) + TlbEntry(Addr _asn, Addr _vaddr, Addr _paddr) : + pfn(_paddr >> PageShift), size(PageBytes - 1), vpn(_vaddr >> PageShift), + attributes(0), lookupLevel(L1), asid(_asn), vmid(0), N(0), + innerAttrs(0), outerAttrs(0), ap(0), hap(0x3), + domain(DomainType::Client), mtype(MemoryType::StronglyOrdered), + longDescFormat(false), isHyp(false), global(false), valid(true), + ns(true), nstid(true), el(0), nonCacheable(false), shareable(false), + outerShareable(false), xn(0), pxn(0) { - pfn = _paddr >> PageShift; - size = PageBytes - 1; - asid = _asn; - global = false; - valid = true; + // no restrictions by default, hap = 0x3 - vpn = _vaddr >> PageShift; + // @todo Check the memory type + } - nonCacheable = sNp = false; + TlbEntry() : + pfn(0), size(0), vpn(0), attributes(0), lookupLevel(L1), asid(0), + vmid(0), N(0), innerAttrs(0), outerAttrs(0), ap(0), hap(0x3), + domain(DomainType::Client), mtype(MemoryType::StronglyOrdered), + longDescFormat(false), isHyp(false), global(false), valid(true), + ns(true), nstid(true), el(0), nonCacheable(false), + shareable(false), outerShareable(false), xn(0), pxn(0) + { + // no restrictions by default, hap = 0x3 - xn = 0; - ap = 0; // ??? - domain = DomainClient; //??? + // @todo Check the memory type } - TlbEntry() - {} - void updateVaddr(Addr new_vaddr) { @@ -141,67 +181,165 @@ struct TlbEntry } Addr - pageStart() + pageStart() const { return pfn << PageShift; } bool - match(Addr va, uint8_t cid) + match(Addr va, uint8_t _vmid, bool hypLookUp, bool secure_lookup, + uint8_t target_el) const + { + return match(va, 0, _vmid, hypLookUp, secure_lookup, true, target_el); + } + + bool + match(Addr va, uint16_t asn, uint8_t _vmid, bool hypLookUp, + bool secure_lookup, bool ignore_asn, uint8_t target_el) const { + bool match = false; Addr v = vpn << N; - if (valid && va >= v && va <= v + size && (global || cid == asid)) - return true; - return false; + + if (valid && va >= v && va <= v + size && (secure_lookup == !nstid) && + (hypLookUp == isHyp)) + { + if (target_el == 2 || target_el == 3) + match = (el == target_el); + else + match = (el == 0) || (el == 1); + if (match && !ignore_asn) { + match = global || (asn == asid); + } + if (match && nstid) { + match = isHyp || (_vmid == vmid); + } + } + return match; } Addr - pAddr(Addr va) + pAddr(Addr va) const { return (pfn << N) | (va & size); } void + updateAttributes() + { + uint64_t mask; + uint64_t newBits; + + // chec bit 11 to determine if its currently LPAE or VMSA format. + if ( attributes & (1 << 11) ) { + newBits = ((outerShareable ? 0x2 : + shareable ? 0x3 : 0) << 7); + mask = 0x180; + } else { + /** Formatting for Physical Address Register (PAR) + * Only including lower bits (TLB info here) + * PAR (32-bit format): + * PA [31:12] + * LPAE [11] (Large Physical Address Extension) + * TLB info [10:1] + * NOS [10] (Not Outer Sharable) + * NS [9] (Non-Secure) + * -- [8] (Implementation Defined) + * SH [7] (Sharable) + * Inner[6:4](Inner memory attributes) + * Outer[3:2](Outer memory attributes) + * SS [1] (SuperSection) + * F [0] (Fault, Fault Status in [6:1] if faulted) + */ + newBits = ((outerShareable ? 0:1) << 10) | + ((shareable ? 1:0) << 7) | + (innerAttrs << 4) | + (outerAttrs << 2); + // TODO: Supersection bit + mask = 0x4FC; + } + // common bits + newBits |= ns << 9; // NS bit + mask |= 1 << 9; + // add in the new bits + attributes &= ~mask; + attributes |= newBits; + } + + void + setAttributes(bool lpae) + { + attributes = lpae ? (1 << 11) : 0; + updateAttributes(); + } + + std::string + print() const + { + return csprintf("%#x, asn %d vmn %d hyp %d ppn %#x size: %#x ap:%d " + "ns:%d nstid:%d g:%d el:%d", vpn << N, asid, vmid, + isHyp, pfn << N, size, ap, ns, nstid, global, el); + } + + void serialize(std::ostream &os) { + SERIALIZE_SCALAR(longDescFormat); SERIALIZE_SCALAR(pfn); SERIALIZE_SCALAR(size); SERIALIZE_SCALAR(vpn); SERIALIZE_SCALAR(asid); + SERIALIZE_SCALAR(vmid); + SERIALIZE_SCALAR(isHyp); SERIALIZE_SCALAR(N); SERIALIZE_SCALAR(global); SERIALIZE_SCALAR(valid); + SERIALIZE_SCALAR(ns); + SERIALIZE_SCALAR(nstid); SERIALIZE_SCALAR(nonCacheable); - SERIALIZE_SCALAR(sNp); + SERIALIZE_ENUM(lookupLevel); SERIALIZE_ENUM(mtype); SERIALIZE_SCALAR(innerAttrs); SERIALIZE_SCALAR(outerAttrs); SERIALIZE_SCALAR(shareable); + SERIALIZE_SCALAR(outerShareable); SERIALIZE_SCALAR(attributes); SERIALIZE_SCALAR(xn); + SERIALIZE_SCALAR(pxn); SERIALIZE_SCALAR(ap); - SERIALIZE_SCALAR(domain); + SERIALIZE_SCALAR(hap); + uint8_t domain_ = static_cast<uint8_t>(domain); + paramOut(os, "domain", domain_); } void unserialize(Checkpoint *cp, const std::string §ion) { + UNSERIALIZE_SCALAR(longDescFormat); UNSERIALIZE_SCALAR(pfn); UNSERIALIZE_SCALAR(size); UNSERIALIZE_SCALAR(vpn); UNSERIALIZE_SCALAR(asid); + UNSERIALIZE_SCALAR(vmid); + UNSERIALIZE_SCALAR(isHyp); UNSERIALIZE_SCALAR(N); UNSERIALIZE_SCALAR(global); UNSERIALIZE_SCALAR(valid); + UNSERIALIZE_SCALAR(ns); + UNSERIALIZE_SCALAR(nstid); UNSERIALIZE_SCALAR(nonCacheable); - UNSERIALIZE_SCALAR(sNp); + UNSERIALIZE_ENUM(lookupLevel); UNSERIALIZE_ENUM(mtype); UNSERIALIZE_SCALAR(innerAttrs); UNSERIALIZE_SCALAR(outerAttrs); UNSERIALIZE_SCALAR(shareable); + UNSERIALIZE_SCALAR(outerShareable); UNSERIALIZE_SCALAR(attributes); UNSERIALIZE_SCALAR(xn); + UNSERIALIZE_SCALAR(pxn); UNSERIALIZE_SCALAR(ap); - UNSERIALIZE_SCALAR(domain); + UNSERIALIZE_SCALAR(hap); + uint8_t domain_; + paramIn(cp, section, "domain", domain_); + domain = static_cast<DomainType>(domain_); } }; diff --git a/src/arch/arm/process.cc b/src/arch/arm/process.cc index 37999c905..dd23a5e21 100644 --- a/src/arch/arm/process.cc +++ b/src/arch/arm/process.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -61,6 +61,12 @@ ArmLiveProcess::ArmLiveProcess(LiveProcessParams *params, ObjectFile *objFile, ObjectFile::Arch _arch) : LiveProcess(params, objFile), arch(_arch) { +} + +ArmLiveProcess32::ArmLiveProcess32(LiveProcessParams *params, + ObjectFile *objFile, ObjectFile::Arch _arch) + : ArmLiveProcess(params, objFile, _arch) +{ stack_base = 0xbf000000L; // Set pointer for next thread stack. Reserve 8M for main stack. @@ -74,11 +80,28 @@ ArmLiveProcess::ArmLiveProcess(LiveProcessParams *params, ObjectFile *objFile, mmap_start = mmap_end = 0x40000000L; } +ArmLiveProcess64::ArmLiveProcess64(LiveProcessParams *params, + ObjectFile *objFile, ObjectFile::Arch _arch) + : ArmLiveProcess(params, objFile, _arch) +{ + stack_base = 0x7fffff0000L; + + // Set pointer for next thread stack. Reserve 8M for main stack. + next_thread_stack_base = stack_base - (8 * 1024 * 1024); + + // Set up break point (Top of Heap) + brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize(); + brk_point = roundUp(brk_point, VMPageSize); + + // Set up region for mmaps. For now, start at bottom of kuseg space. + mmap_start = mmap_end = 0x4000000000L; +} + void -ArmLiveProcess::initState() +ArmLiveProcess32::initState() { LiveProcess::initState(); - argsInit(MachineBytes, VMPageSize); + argsInit<uint32_t>(VMPageSize, INTREG_SP); for (int i = 0; i < contextIds.size(); i++) { ThreadContext * tc = system->getThreadContext(contextIds[i]); CPACR cpacr = tc->readMiscReg(MISCREG_CPACR); @@ -94,9 +117,34 @@ ArmLiveProcess::initState() } void -ArmLiveProcess::argsInit(int intSize, int pageSize) +ArmLiveProcess64::initState() { - typedef AuxVector<uint32_t> auxv_t; + LiveProcess::initState(); + argsInit<uint64_t>(VMPageSize, INTREG_SP0); + for (int i = 0; i < contextIds.size(); i++) { + ThreadContext * tc = system->getThreadContext(contextIds[i]); + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + cpsr.mode = MODE_EL0T; + tc->setMiscReg(MISCREG_CPSR, cpsr); + CPACR cpacr = tc->readMiscReg(MISCREG_CPACR_EL1); + // Enable the floating point coprocessors. + cpacr.cp10 = 0x3; + cpacr.cp11 = 0x3; + tc->setMiscReg(MISCREG_CPACR_EL1, cpacr); + // Generically enable floating point support. + FPEXC fpexc = tc->readMiscReg(MISCREG_FPEXC); + fpexc.en = 1; + tc->setMiscReg(MISCREG_FPEXC, fpexc); + } +} + +template <class IntType> +void +ArmLiveProcess::argsInit(int pageSize, IntRegIndex spIndex) +{ + int intSize = sizeof(IntType); + + typedef AuxVector<IntType> auxv_t; std::vector<auxv_t> auxv; string filename; @@ -133,7 +181,7 @@ ArmLiveProcess::argsInit(int intSize, int pageSize) //Auxilliary vectors are loaded only for elf formatted executables. ElfObject * elfObject = dynamic_cast<ElfObject *>(objFile); if (elfObject) { - uint32_t features = + IntType features = Arm_Swp | Arm_Half | Arm_Thumb | @@ -253,16 +301,16 @@ ArmLiveProcess::argsInit(int intSize, int pageSize) allocateMem(roundDown(stack_min, pageSize), roundUp(stack_size, pageSize)); // map out initial stack contents - uint32_t sentry_base = stack_base - sentry_size; - uint32_t aux_data_base = sentry_base - aux_data_size; - uint32_t env_data_base = aux_data_base - env_data_size; - uint32_t arg_data_base = env_data_base - arg_data_size; - uint32_t platform_base = arg_data_base - platform_size; - uint32_t aux_random_base = platform_base - aux_random_size; - uint32_t auxv_array_base = aux_random_base - aux_array_size - aux_padding; - uint32_t envp_array_base = auxv_array_base - envp_array_size; - uint32_t argv_array_base = envp_array_base - argv_array_size; - uint32_t argc_base = argv_array_base - argc_size; + IntType sentry_base = stack_base - sentry_size; + IntType aux_data_base = sentry_base - aux_data_size; + IntType env_data_base = aux_data_base - env_data_size; + IntType arg_data_base = env_data_base - arg_data_size; + IntType platform_base = arg_data_base - platform_size; + IntType aux_random_base = platform_base - aux_random_size; + IntType auxv_array_base = aux_random_base - aux_array_size - aux_padding; + IntType envp_array_base = auxv_array_base - envp_array_size; + IntType argv_array_base = envp_array_base - argv_array_size; + IntType argc_base = argv_array_base - argc_size; DPRINTF(Stack, "The addresses of items on the initial stack:\n"); DPRINTF(Stack, "0x%x - aux data\n", aux_data_base); @@ -279,11 +327,11 @@ ArmLiveProcess::argsInit(int intSize, int pageSize) // write contents to stack // figure out argc - uint32_t argc = argv.size(); - uint32_t guestArgc = ArmISA::htog(argc); + IntType argc = argv.size(); + IntType guestArgc = ArmISA::htog(argc); //Write out the sentry void * - uint32_t sentry_NULL = 0; + IntType sentry_NULL = 0; initVirtMem.writeBlob(sentry_base, (uint8_t*)&sentry_NULL, sentry_size); @@ -302,8 +350,7 @@ ArmLiveProcess::argsInit(int intSize, int pageSize) } //Copy the aux stuff - for(int x = 0; x < auxv.size(); x++) - { + for (int x = 0; x < auxv.size(); x++) { initVirtMem.writeBlob(auxv_array_base + x * 2 * intSize, (uint8_t*)&(auxv[x].a_type), intSize); initVirtMem.writeBlob(auxv_array_base + (x * 2 + 1) * intSize, @@ -321,7 +368,7 @@ ArmLiveProcess::argsInit(int intSize, int pageSize) ThreadContext *tc = system->getThreadContext(contextIds[0]); //Set the stack pointer register - tc->setIntReg(StackPointerReg, stack_min); + tc->setIntReg(spIndex, stack_min); //A pointer to a function to run when the program exits. We'll set this //to zero explicitly to make sure this isn't used. tc->setIntReg(ArgumentReg0, 0); @@ -342,6 +389,8 @@ ArmLiveProcess::argsInit(int intSize, int pageSize) PCState pc; pc.thumb(arch == ObjectFile::Thumb); pc.nextThumb(pc.thumb()); + pc.aarch64(arch == ObjectFile::Arm64); + pc.nextAArch64(pc.aarch64()); pc.set(objFile->entryPoint() & ~mask(1)); tc->pcState(pc); @@ -350,14 +399,21 @@ ArmLiveProcess::argsInit(int intSize, int pageSize) } ArmISA::IntReg -ArmLiveProcess::getSyscallArg(ThreadContext *tc, int &i) +ArmLiveProcess32::getSyscallArg(ThreadContext *tc, int &i) { assert(i < 6); return tc->readIntReg(ArgumentReg0 + i++); } -uint64_t -ArmLiveProcess::getSyscallArg(ThreadContext *tc, int &i, int width) +ArmISA::IntReg +ArmLiveProcess64::getSyscallArg(ThreadContext *tc, int &i) +{ + assert(i < 8); + return tc->readIntReg(ArgumentReg0 + i++); +} + +ArmISA::IntReg +ArmLiveProcess32::getSyscallArg(ThreadContext *tc, int &i, int width) { assert(width == 32 || width == 64); if (width == 32) @@ -375,17 +431,37 @@ ArmLiveProcess::getSyscallArg(ThreadContext *tc, int &i, int width) return val; } +ArmISA::IntReg +ArmLiveProcess64::getSyscallArg(ThreadContext *tc, int &i, int width) +{ + return getSyscallArg(tc, i); +} + + +void +ArmLiveProcess32::setSyscallArg(ThreadContext *tc, int i, ArmISA::IntReg val) +{ + assert(i < 6); + tc->setIntReg(ArgumentReg0 + i, val); +} void -ArmLiveProcess::setSyscallArg(ThreadContext *tc, +ArmLiveProcess64::setSyscallArg(ThreadContext *tc, int i, ArmISA::IntReg val) { - assert(i < 4); + assert(i < 8); tc->setIntReg(ArgumentReg0 + i, val); } void -ArmLiveProcess::setSyscallReturn(ThreadContext *tc, +ArmLiveProcess32::setSyscallReturn(ThreadContext *tc, + SyscallReturn return_value) +{ + tc->setIntReg(ReturnValueReg, return_value.value()); +} + +void +ArmLiveProcess64::setSyscallReturn(ThreadContext *tc, SyscallReturn return_value) { tc->setIntReg(ReturnValueReg, return_value.value()); diff --git a/src/arch/arm/process.hh b/src/arch/arm/process.hh index f8d821037..34ce1dd02 100644 --- a/src/arch/arm/process.hh +++ b/src/arch/arm/process.hh @@ -1,4 +1,16 @@ /* +* Copyright (c) 2012 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2007-2008 The Florida State University * All rights reserved. * @@ -34,6 +46,7 @@ #include <string> #include <vector> +#include "arch/arm/intregs.hh" #include "base/loader/object_file.hh" #include "sim/process.hh" @@ -47,11 +60,37 @@ class ArmLiveProcess : public LiveProcess ObjectFile::Arch arch; ArmLiveProcess(LiveProcessParams * params, ObjectFile *objFile, ObjectFile::Arch _arch); + template<class IntType> + void argsInit(int pageSize, ArmISA::IntRegIndex spIndex); +}; + +class ArmLiveProcess32 : public ArmLiveProcess +{ + protected: + ObjectFile::Arch arch; + ArmLiveProcess32(LiveProcessParams * params, ObjectFile *objFile, + ObjectFile::Arch _arch); + + void initState(); + + public: + + ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i, int width); + ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i); + void setSyscallArg(ThreadContext *tc, int i, ArmISA::IntReg val); + void setSyscallReturn(ThreadContext *tc, SyscallReturn return_value); +}; + +class ArmLiveProcess64 : public ArmLiveProcess +{ + protected: + ObjectFile::Arch arch; + ArmLiveProcess64(LiveProcessParams * params, ObjectFile *objFile, + ObjectFile::Arch _arch); void initState(); public: - void argsInit(int intSize, int pageSize); ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i, int width); ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i); diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh index b9033fd5b..09041f306 100644 --- a/src/arch/arm/registers.hh +++ b/src/arch/arm/registers.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010-2011 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -74,11 +74,12 @@ typedef uint8_t CCReg; // Constants Related to the number of registers const int NumIntArchRegs = NUM_ARCH_INTREGS; // The number of single precision floating point registers -const int NumFloatArchRegs = 64; -const int NumFloatSpecialRegs = 8; +const int NumFloatV7ArchRegs = 64; +const int NumFloatV8ArchRegs = 128; +const int NumFloatSpecialRegs = 32; const int NumIntRegs = NUM_INTREGS; -const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs; +const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs; const int NumCCRegs = 0; const int NumMiscRegs = NUM_MISCREGS; @@ -89,6 +90,7 @@ const int ReturnValueReg = 0; const int ReturnValueReg1 = 1; const int ReturnValueReg2 = 2; const int NumArgumentRegs = 4; +const int NumArgumentRegs64 = 8; const int ArgumentReg0 = 0; const int ArgumentReg1 = 1; const int ArgumentReg2 = 2; diff --git a/src/arch/arm/remote_gdb.cc b/src/arch/arm/remote_gdb.cc index 4078630d6..74c3c7ff3 100644 --- a/src/arch/arm/remote_gdb.cc +++ b/src/arch/arm/remote_gdb.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -138,6 +138,7 @@ #include "arch/arm/pagetable.hh" #include "arch/arm/registers.hh" #include "arch/arm/remote_gdb.hh" +#include "arch/arm/system.hh" #include "arch/arm/utility.hh" #include "arch/arm/vtophys.hh" #include "base/intmath.hh" @@ -159,7 +160,7 @@ using namespace std; using namespace ArmISA; RemoteGDB::RemoteGDB(System *_system, ThreadContext *tc) - : BaseRemoteGDB(_system, tc, NUMREGS) + : BaseRemoteGDB(_system, tc, MAX_NUMREGS) { } @@ -204,45 +205,65 @@ RemoteGDB::getregs() memset(gdbregs.regs, 0, gdbregs.bytes()); - // R0-R15 supervisor mode - // arm registers are 32 bits wide, gdb registers are 64 bits wide - // two arm registers are packed into one gdb register (little endian) - gdbregs.regs[REG_R0 + 0] = context->readIntReg(INTREG_R1) << 32 | - context->readIntReg(INTREG_R0); - gdbregs.regs[REG_R0 + 1] = context->readIntReg(INTREG_R3) << 32 | - context->readIntReg(INTREG_R2); - gdbregs.regs[REG_R0 + 2] = context->readIntReg(INTREG_R5) << 32 | - context->readIntReg(INTREG_R4); - gdbregs.regs[REG_R0 + 3] = context->readIntReg(INTREG_R7) << 32 | - context->readIntReg(INTREG_R6); - gdbregs.regs[REG_R0 + 4] = context->readIntReg(INTREG_R9) << 32 | - context->readIntReg(INTREG_R8); - gdbregs.regs[REG_R0 + 5] = context->readIntReg(INTREG_R11) << 32| - context->readIntReg(INTREG_R10); - gdbregs.regs[REG_R0 + 6] = context->readIntReg(INTREG_SP) << 32 | - context->readIntReg(INTREG_R12); - gdbregs.regs[REG_R0 + 7] = context->pcState().pc() << 32 | - context->readIntReg(INTREG_LR); - - // CPSR - gdbregs.regs[REG_CPSR] = context->readMiscRegNoEffect(MISCREG_CPSR); - - // vfpv3/neon floating point registers (32 double or 64 float) - - gdbregs.regs[REG_F0] = - static_cast<uint64_t>(context->readFloatRegBits(0)) << 32 | - gdbregs.regs[REG_CPSR]; - - for (int i = 1; i < (NumFloatArchRegs>>1); ++i) { - gdbregs.regs[i + REG_F0] = - static_cast<uint64_t>(context->readFloatRegBits(2*i)) << 32 | - context->readFloatRegBits(2*i-1); + if (inAArch64(context)) { // AArch64 + // x0-x31 + for (int i = 0; i < 32; ++i) { + gdbregs.regs[REG_X0 + i] = context->readIntReg(INTREG_X0 + i); + } + // pc + gdbregs.regs[REG_PC_64] = context->pcState().pc(); + // cpsr + gdbregs.regs[REG_CPSR_64] = context->readMiscRegNoEffect(MISCREG_CPSR); + // v0-v31 + for (int i = 0; i < 32; ++i) { + gdbregs.regs[REG_V0 + 2 * i] = static_cast<uint64_t>( + context->readFloatRegBits(i * 4 + 3)) << 32 | + context->readFloatRegBits(i * 4 + 2); + gdbregs.regs[REG_V0 + 2 * i + 1] = static_cast<uint64_t>( + context->readFloatRegBits(i * 4 + 1)) << 32 | + context->readFloatRegBits(i * 4 + 0); + } + } else { // AArch32 + // R0-R15 supervisor mode + // arm registers are 32 bits wide, gdb registers are 64 bits wide two + // arm registers are packed into one gdb register (little endian) + gdbregs.regs[REG_R0 + 0] = context->readIntReg(INTREG_R1) << 32 | + context->readIntReg(INTREG_R0); + gdbregs.regs[REG_R0 + 1] = context->readIntReg(INTREG_R3) << 32 | + context->readIntReg(INTREG_R2); + gdbregs.regs[REG_R0 + 2] = context->readIntReg(INTREG_R5) << 32 | + context->readIntReg(INTREG_R4); + gdbregs.regs[REG_R0 + 3] = context->readIntReg(INTREG_R7) << 32 | + context->readIntReg(INTREG_R6); + gdbregs.regs[REG_R0 + 4] = context->readIntReg(INTREG_R9) << 32 | + context->readIntReg(INTREG_R8); + gdbregs.regs[REG_R0 + 5] = context->readIntReg(INTREG_R11) << 32| + context->readIntReg(INTREG_R10); + gdbregs.regs[REG_R0 + 6] = context->readIntReg(INTREG_SP) << 32 | + context->readIntReg(INTREG_R12); + gdbregs.regs[REG_R0 + 7] = context->pcState().pc() << 32 | + context->readIntReg(INTREG_LR); + + // CPSR + gdbregs.regs[REG_CPSR] = context->readMiscRegNoEffect(MISCREG_CPSR); + + // vfpv3/neon floating point registers (32 double or 64 float) + + gdbregs.regs[REG_F0] = + static_cast<uint64_t>(context->readFloatRegBits(0)) << 32 | + gdbregs.regs[REG_CPSR]; + + for (int i = 1; i < (NumFloatV7ArchRegs>>1); ++i) { + gdbregs.regs[i + REG_F0] = + static_cast<uint64_t>(context->readFloatRegBits(2*i)) << 32 | + context->readFloatRegBits(2*i-1); + } + + // FPSCR + gdbregs.regs[REG_FPSCR] = static_cast<uint64_t>( + context->readMiscRegNoEffect(MISCREG_FPSCR)) << 32 | + context->readFloatRegBits(NumFloatV7ArchRegs - 1); } - - // FPSCR - gdbregs.regs[REG_FPSCR] = - static_cast<uint64_t>(context->readMiscRegNoEffect(MISCREG_FPSCR)) << 32 | - context->readFloatRegBits(NumFloatArchRegs - 1); } /* @@ -254,46 +275,66 @@ RemoteGDB::setregs() { DPRINTF(GDBAcc, "setregs in remotegdb \n"); + if (inAArch64(context)) { // AArch64 + // x0-x31 + for (int i = 0; i < 32; ++i) { + context->setIntReg(INTREG_X0 + i, gdbregs.regs[REG_X0 + i]); + } + // pc + context->pcState(gdbregs.regs[REG_PC_64]); + // cpsr + context->setMiscRegNoEffect(MISCREG_CPSR, gdbregs.regs[REG_CPSR_64]); + // v0-v31 + for (int i = 0; i < 32; ++i) { + context->setFloatRegBits(i * 4 + 3, + gdbregs.regs[REG_V0 + 2 * i] >> 32); + context->setFloatRegBits(i * 4 + 2, + gdbregs.regs[REG_V0 + 2 * i]); + context->setFloatRegBits(i * 4 + 1, + gdbregs.regs[REG_V0 + 2 * i + 1] >> 32); + context->setFloatRegBits(i * 4 + 0, + gdbregs.regs[REG_V0 + 2 * i + 1]); + } + } else { // AArch32 + // R0-R15 supervisor mode + // arm registers are 32 bits wide, gdb registers are 64 bits wide + // two arm registers are packed into one gdb register (little endian) + context->setIntReg(INTREG_R0 , bits(gdbregs.regs[REG_R0 + 0], 31, 0)); + context->setIntReg(INTREG_R1 , bits(gdbregs.regs[REG_R0 + 0], 63, 32)); + context->setIntReg(INTREG_R2 , bits(gdbregs.regs[REG_R0 + 1], 31, 0)); + context->setIntReg(INTREG_R3 , bits(gdbregs.regs[REG_R0 + 1], 63, 32)); + context->setIntReg(INTREG_R4 , bits(gdbregs.regs[REG_R0 + 2], 31, 0)); + context->setIntReg(INTREG_R5 , bits(gdbregs.regs[REG_R0 + 2], 63, 32)); + context->setIntReg(INTREG_R6 , bits(gdbregs.regs[REG_R0 + 3], 31, 0)); + context->setIntReg(INTREG_R7 , bits(gdbregs.regs[REG_R0 + 3], 63, 32)); + context->setIntReg(INTREG_R8 , bits(gdbregs.regs[REG_R0 + 4], 31, 0)); + context->setIntReg(INTREG_R9 , bits(gdbregs.regs[REG_R0 + 4], 63, 32)); + context->setIntReg(INTREG_R10, bits(gdbregs.regs[REG_R0 + 5], 31, 0)); + context->setIntReg(INTREG_R11, bits(gdbregs.regs[REG_R0 + 5], 63, 32)); + context->setIntReg(INTREG_R12, bits(gdbregs.regs[REG_R0 + 6], 31, 0)); + context->setIntReg(INTREG_SP , bits(gdbregs.regs[REG_R0 + 6], 63, 32)); + context->setIntReg(INTREG_LR , bits(gdbregs.regs[REG_R0 + 7], 31, 0)); + context->pcState(bits(gdbregs.regs[REG_R0 + 7], 63, 32)); + + //CPSR + context->setMiscRegNoEffect(MISCREG_CPSR, gdbregs.regs[REG_CPSR]); + + //vfpv3/neon floating point registers (32 double or 64 float) + context->setFloatRegBits(0, gdbregs.regs[REG_F0]>>32); + + for (int i = 1; i < NumFloatV7ArchRegs; ++i) { + if (i%2) { + int j = (i+1)/2; + context->setFloatRegBits(i, bits(gdbregs.regs[j + REG_F0], 31, 0)); + } else { + int j = i/2; + context->setFloatRegBits(i, gdbregs.regs[j + REG_F0]>>32); + } + } - // R0-R15 supervisor mode - // arm registers are 32 bits wide, gdb registers are 64 bits wide - // two arm registers are packed into one gdb register (little endian) - context->setIntReg(INTREG_R0 , bits(gdbregs.regs[REG_R0 + 0], 31, 0)); - context->setIntReg(INTREG_R1 , bits(gdbregs.regs[REG_R0 + 0], 63, 32)); - context->setIntReg(INTREG_R2 , bits(gdbregs.regs[REG_R0 + 1], 31, 0)); - context->setIntReg(INTREG_R3 , bits(gdbregs.regs[REG_R0 + 1], 63, 32)); - context->setIntReg(INTREG_R4 , bits(gdbregs.regs[REG_R0 + 2], 31, 0)); - context->setIntReg(INTREG_R5 , bits(gdbregs.regs[REG_R0 + 2], 63, 32)); - context->setIntReg(INTREG_R6 , bits(gdbregs.regs[REG_R0 + 3], 31, 0)); - context->setIntReg(INTREG_R7 , bits(gdbregs.regs[REG_R0 + 3], 63, 32)); - context->setIntReg(INTREG_R8 , bits(gdbregs.regs[REG_R0 + 4], 31, 0)); - context->setIntReg(INTREG_R9 , bits(gdbregs.regs[REG_R0 + 4], 63, 32)); - context->setIntReg(INTREG_R10, bits(gdbregs.regs[REG_R0 + 5], 31, 0)); - context->setIntReg(INTREG_R11, bits(gdbregs.regs[REG_R0 + 5], 63, 32)); - context->setIntReg(INTREG_R12, bits(gdbregs.regs[REG_R0 + 6], 31, 0)); - context->setIntReg(INTREG_SP , bits(gdbregs.regs[REG_R0 + 6], 63, 32)); - context->setIntReg(INTREG_LR , bits(gdbregs.regs[REG_R0 + 7], 31, 0)); - context->pcState(bits(gdbregs.regs[REG_R0 + 7], 63, 32)); - - //CPSR - context->setMiscRegNoEffect(MISCREG_CPSR, gdbregs.regs[REG_CPSR]); - - //vfpv3/neon floating point registers (32 double or 64 float) - context->setFloatRegBits(0, gdbregs.regs[REG_F0]>>32); - - for (int i = 1; i < NumFloatArchRegs; ++i) { - if(i%2){ - int j = (i+1)/2; - context->setFloatRegBits(i, bits(gdbregs.regs[j + REG_F0], 31, 0)); - } - else{ - int j = i/2; - context->setFloatRegBits(i, gdbregs.regs[j + REG_F0]>>32); - } + //FPSCR + context->setMiscReg(MISCREG_FPSCR, gdbregs.regs[REG_FPSCR]>>32); } - - //FPSCR - context->setMiscReg(MISCREG_FPSCR, gdbregs.regs[REG_FPSCR]>>32); } void diff --git a/src/arch/arm/remote_gdb.hh b/src/arch/arm/remote_gdb.hh index b75d921fb..a6b2b9d35 100644 --- a/src/arch/arm/remote_gdb.hh +++ b/src/arch/arm/remote_gdb.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2002-2005 The Regents of The University of Michigan * Copyright (c) 2007-2008 The Florida State University * All rights reserved. @@ -40,13 +52,24 @@ class ThreadContext; namespace ArmISA { -// registers for arm with vfpv3/neon + +// AArch32 registers with vfpv3/neon const int NUMREGS = 41; /* r0-r15, cpsr, d0-d31, fpscr */ const int REG_R0 = 0; const int REG_F0 = 8; const int REG_CPSR = 8; /* bit 512 to bit 543 */ const int REG_FPSCR = 40; /* bit 2592 to bit 2623 */ +// AArch64 registers +const int NUMREGS_64 = 98; // x0-x31, pc, cpsr (64-bit GPRs) + // v0-v31 (128-bit FPRs) +const int REG_X0 = 0; +const int REG_PC_64 = 32; +const int REG_CPSR_64 = 33; +const int REG_V0 = 34; + +const int MAX_NUMREGS = NUMREGS_64; + class RemoteGDB : public BaseRemoteGDB { diff --git a/src/arch/arm/stage2_lookup.cc b/src/arch/arm/stage2_lookup.cc new file mode 100755 index 000000000..1299ade68 --- /dev/null +++ b/src/arch/arm/stage2_lookup.cc @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2010-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + * Giacomo Gabrielli + */ + +#include "arch/arm/faults.hh" +#include "arch/arm/stage2_lookup.hh" +#include "arch/arm/system.hh" +#include "arch/arm/table_walker.hh" +#include "arch/arm/tlb.hh" +#include "cpu/base.hh" +#include "cpu/thread_context.hh" +#include "debug/Checkpoint.hh" +#include "debug/TLB.hh" +#include "debug/TLBVerbose.hh" +#include "sim/system.hh" + +using namespace ArmISA; + +Fault +Stage2LookUp::getTe(ThreadContext *tc, TlbEntry *destTe) + +{ + fault = stage2Tlb->getTE(&stage2Te, &req, tc, mode, this, timing, + functional, false, tranType); + // Call finish if we're done already + if ((fault != NoFault) || (stage2Te != NULL)) { + mergeTe(&req, mode); + *destTe = stage1Te; + } + return fault; +} + +void +Stage2LookUp::mergeTe(RequestPtr req, BaseTLB::Mode mode) +{ + // Since we directly requested the table entry (which we need later on to + // merge the attributes) then we've skipped some stage 2 permissinos + // checking. So call translate on stage 2 to do the checking. As the entry + // is now in the TLB this should always hit the cache. + if (fault == NoFault) { + fault = stage2Tlb->checkPermissions(stage2Te, req, mode); + } + + // Check again that we haven't got a fault + if (fault == NoFault) { + assert(stage2Te != NULL); + + // Now we have the table entries for both stages of translation + // merge them and insert the result into the stage 1 TLB. See + // CombineS1S2Desc() in pseudocode + stage1Te.N = stage2Te->N; + stage1Te.nonCacheable |= stage2Te->nonCacheable; + stage1Te.xn |= stage2Te->xn; + + if (stage1Te.size > stage2Te->size) { + // Size mismatch also implies vpn mismatch (this is shifted by + // sizebits!). + stage1Te.vpn = s1Req->getVaddr() / (stage2Te->size+1); + stage1Te.pfn = stage2Te->pfn; + stage1Te.size = stage2Te->size; + } else if (stage1Te.size < stage2Te->size) { + // Guest 4K could well be section-backed by host hugepage! In this + // case a 4K entry is added but pfn needs to be adjusted. New PFN = + // offset into section PFN given by stage2 IPA treated as a stage1 + // page size. + stage1Te.pfn = (stage2Te->pfn * ((stage2Te->size+1) / (stage1Te.size+1))) + + (stage2Te->vpn / (stage1Te.size+1)); + // Size remains smaller of the two. + } else { + // Matching sizes + stage1Te.pfn = stage2Te->pfn; + } + + if (stage2Te->mtype == TlbEntry::MemoryType::StronglyOrdered || + stage1Te.mtype == TlbEntry::MemoryType::StronglyOrdered) { + stage1Te.mtype = TlbEntry::MemoryType::StronglyOrdered; + } else if (stage2Te->mtype == TlbEntry::MemoryType::Device || + stage1Te.mtype == TlbEntry::MemoryType::Device) { + stage1Te.mtype = TlbEntry::MemoryType::Device; + } else { + stage1Te.mtype = TlbEntry::MemoryType::Normal; + } + + if (stage1Te.mtype == TlbEntry::MemoryType::Normal) { + + if (stage2Te->innerAttrs == 0 || + stage1Te.innerAttrs == 0) { + // either encoding Non-cacheable + stage1Te.innerAttrs = 0; + } else if (stage2Te->innerAttrs == 2 || + stage1Te.innerAttrs == 2) { + // either encoding Write-Through cacheable + stage1Te.innerAttrs = 2; + } else { + // both encodings Write-Back + stage1Te.innerAttrs = 3; + } + + if (stage2Te->outerAttrs == 0 || + stage1Te.outerAttrs == 0) { + // either encoding Non-cacheable + stage1Te.outerAttrs = 0; + } else if (stage2Te->outerAttrs == 2 || + stage1Te.outerAttrs == 2) { + // either encoding Write-Through cacheable + stage1Te.outerAttrs = 2; + } else { + // both encodings Write-Back + stage1Te.outerAttrs = 3; + } + + stage1Te.shareable |= stage2Te->shareable; + stage1Te.outerShareable |= stage2Te->outerShareable; + if (stage1Te.innerAttrs == 0 && + stage1Te.outerAttrs == 0) { + // something Non-cacheable at each level is outer shareable + stage1Te.shareable = true; + stage1Te.outerShareable = true; + } + } else { + stage1Te.shareable = true; + stage1Te.outerShareable = true; + } + stage1Te.updateAttributes(); + } + + // if there's a fault annotate it, + if (fault != NoFault) { + // If the second stage of translation generated a fault add the + // details of the original stage 1 virtual address + reinterpret_cast<ArmFault *>(fault.get())->annotate(ArmFault::OVA, + s1Req->getVaddr()); + } + complete = true; +} + +void +Stage2LookUp::finish(Fault _fault, RequestPtr req, + ThreadContext *tc, BaseTLB::Mode mode) +{ + fault = _fault; + // if we haven't got the table entry get it now + if ((fault == NoFault) && (stage2Te == NULL)) { + fault = stage2Tlb->getTE(&stage2Te, req, tc, mode, this, + timing, functional, false, tranType); + } + + // Now we have the stage 2 table entry we need to merge it with the stage + // 1 entry we were given at the start + mergeTe(req, mode); + + if (fault != NoFault) { + transState->finish(fault, req, tc, mode); + } else if (timing) { + // Now notify the original stage 1 translation that we finally have + // a result + stage1Tlb->translateComplete(s1Req, tc, transState, mode, tranType, true); + } + // if we have been asked to delete ourselfs do it now + if (selfDelete) { + delete this; + } +} + diff --git a/src/arch/arm/stage2_lookup.hh b/src/arch/arm/stage2_lookup.hh new file mode 100755 index 000000000..3a1228f46 --- /dev/null +++ b/src/arch/arm/stage2_lookup.hh @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2010-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Ali Saidi + * Giacomo Gabrielli + */ + +#ifndef __ARCH_ARM_STAGE2_LOOKUP_HH__ +#define __ARCH_ARM_STAGE2_LOOKUP_HH__ + +#include <list> + +#include "arch/arm/system.hh" +#include "arch/arm/table_walker.hh" +#include "arch/arm/tlb.hh" +#include "mem/request.hh" +#include "sim/tlb.hh" + +class ThreadContext; + +namespace ArmISA { +class Translation; +class TLB; + + +class Stage2LookUp : public BaseTLB::Translation +{ + private: + TLB *stage1Tlb; + TLB *stage2Tlb; + TlbEntry stage1Te; + RequestPtr s1Req; + TLB::Translation *transState; + BaseTLB::Mode mode; + bool timing; + bool functional; + TLB::ArmTranslationType tranType; + TlbEntry *stage2Te; + Request req; + Fault fault; + bool complete; + bool selfDelete; + + public: + Stage2LookUp(TLB *s1Tlb, TLB *s2Tlb, TlbEntry s1Te, RequestPtr _req, + TLB::Translation *_transState, BaseTLB::Mode _mode, bool _timing, + bool _functional, TLB::ArmTranslationType _tranType) : + stage1Tlb(s1Tlb), stage2Tlb(s2Tlb), stage1Te(s1Te), s1Req(_req), + transState(_transState), mode(_mode), timing(_timing), + functional(_functional), tranType(_tranType), fault(NoFault), + complete(false), selfDelete(false) + { + req.setVirt(0, s1Te.pAddr(s1Req->getVaddr()), s1Req->getSize(), + s1Req->getFlags(), s1Req->masterId(), 0); + } + + Fault getTe(ThreadContext *tc, TlbEntry *destTe); + + void mergeTe(RequestPtr req, BaseTLB::Mode mode); + + void setSelfDelete() { selfDelete = true; } + + bool isComplete() const { return complete; } + + void markDelayed() {} + + void finish(Fault fault, RequestPtr req, ThreadContext *tc, + BaseTLB::Mode mode); +}; + + +} // namespace ArmISA + +#endif //__ARCH_ARM_STAGE2_LOOKUP_HH__ + diff --git a/src/arch/arm/stage2_mmu.cc b/src/arch/arm/stage2_mmu.cc new file mode 100755 index 000000000..01451548c --- /dev/null +++ b/src/arch/arm/stage2_mmu.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Thomas Grocutt + */ + +#include "arch/arm/faults.hh" +#include "arch/arm/stage2_mmu.hh" +#include "arch/arm/system.hh" +#include "arch/arm/tlb.hh" +#include "cpu/base.hh" +#include "cpu/thread_context.hh" +#include "debug/Checkpoint.hh" +#include "debug/TLB.hh" +#include "debug/TLBVerbose.hh" + +using namespace ArmISA; + +Stage2MMU::Stage2MMU(const Params *p) + : SimObject(p), _stage1Tlb(p->tlb), _stage2Tlb(p->stage2_tlb) +{ + stage1Tlb()->setMMU(this); + stage2Tlb()->setMMU(this); +} + +Fault +Stage2MMU::readDataUntimed(ThreadContext *tc, Addr oVAddr, Addr descAddr, + uint8_t *data, int numBytes, Request::Flags flags, int masterId, + bool isFunctional) +{ + Fault fault; + + // translate to physical address using the second stage MMU + Request req = Request(); + req.setVirt(0, descAddr, numBytes, flags | Request::PT_WALK, masterId, 0); + if (isFunctional) { + fault = stage2Tlb()->translateFunctional(&req, tc, BaseTLB::Read); + } else { + fault = stage2Tlb()->translateAtomic(&req, tc, BaseTLB::Read); + } + + // Now do the access. + if (fault == NoFault && !req.getFlags().isSet(Request::NO_ACCESS)) { + Packet pkt = Packet(&req, MemCmd::ReadReq); + pkt.dataStatic(data); + if (isFunctional) { + stage1Tlb()->getWalkerPort().sendFunctional(&pkt); + } else { + stage1Tlb()->getWalkerPort().sendAtomic(&pkt); + } + assert(!pkt.isError()); + } + + // If there was a fault annotate it with the flag saying the foult occured + // while doing a translation for a stage 1 page table walk. + if (fault != NoFault) { + ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get()); + armFault->annotate(ArmFault::S1PTW, true); + armFault->annotate(ArmFault::OVA, oVAddr); + } + return fault; +} + +Fault +Stage2MMU::readDataTimed(ThreadContext *tc, Addr descAddr, + Stage2Translation *translation, int numBytes, Request::Flags flags, + int masterId) +{ + Fault fault; + // translate to physical address using the second stage MMU + translation->setVirt(descAddr, numBytes, flags | Request::PT_WALK, masterId); + fault = translation->translateTiming(tc); + return fault; +} + +Stage2MMU::Stage2Translation::Stage2Translation(Stage2MMU &_parent, + uint8_t *_data, Event *_event, Addr _oVAddr) + : data(_data), event(_event), parent(_parent), oVAddr(_oVAddr), + fault(NoFault) +{ +} + +void +Stage2MMU::Stage2Translation::finish(Fault _fault, RequestPtr req, ThreadContext *tc, + BaseTLB::Mode mode) +{ + fault = _fault; + + // If there was a fault annotate it with the flag saying the foult occured + // while doing a translation for a stage 1 page table walk. + if (fault != NoFault) { + ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get()); + armFault->annotate(ArmFault::S1PTW, true); + armFault->annotate(ArmFault::OVA, oVAddr); + } + + if (_fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) { + DmaPort& port = parent.stage1Tlb()->getWalkerPort(); + port.dmaAction(MemCmd::ReadReq, req->getPaddr(), numBytes, + event, data, tc->getCpuPtr()->clockPeriod(), + req->getFlags()); + } else { + // We can't do the DMA access as there's been a problem, so tell the + // event we're done + event->process(); + } +} + +ArmISA::Stage2MMU * +ArmStage2MMUParams::create() +{ + return new ArmISA::Stage2MMU(this); +} diff --git a/src/arch/arm/stage2_mmu.hh b/src/arch/arm/stage2_mmu.hh new file mode 100755 index 000000000..d1812c4ed --- /dev/null +++ b/src/arch/arm/stage2_mmu.hh @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2012-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Thomas Grocutt + */ + +#ifndef __ARCH_ARM_STAGE2_MMU_HH__ +#define __ARCH_ARM_STAGE2_MMU_HH__ + +#include "arch/arm/faults.hh" +#include "arch/arm/tlb.hh" +#include "mem/request.hh" +#include "params/ArmStage2MMU.hh" +#include "sim/eventq.hh" + +namespace ArmISA { + +class Stage2MMU : public SimObject +{ + private: + TLB *_stage1Tlb; + /** The TLB that will cache the stage 2 look ups. */ + TLB *_stage2Tlb; + + public: + /** This translation class is used to trigger the data fetch once a timing + translation returns the translated physical address */ + class Stage2Translation : public BaseTLB::Translation + { + private: + uint8_t *data; + int numBytes; + Request req; + Event *event; + Stage2MMU &parent; + Addr oVAddr; + + public: + Fault fault; + + Stage2Translation(Stage2MMU &_parent, uint8_t *_data, Event *_event, + Addr _oVAddr); + + void + markDelayed() {} + + void + finish(Fault fault, RequestPtr req, ThreadContext *tc, + BaseTLB::Mode mode); + + void setVirt(Addr vaddr, int size, Request::Flags flags, int masterId) + { + numBytes = size; + req.setVirt(0, vaddr, size, flags, masterId, 0); + } + + Fault translateTiming(ThreadContext *tc) + { + return (parent.stage2Tlb()->translateTiming(&req, tc, this, BaseTLB::Read)); + } + }; + + typedef ArmStage2MMUParams Params; + Stage2MMU(const Params *p); + + Fault readDataUntimed(ThreadContext *tc, Addr oVAddr, Addr descAddr, + uint8_t *data, int numBytes, Request::Flags flags, int masterId, + bool isFunctional); + Fault readDataTimed(ThreadContext *tc, Addr descAddr, + Stage2Translation *translation, int numBytes, Request::Flags flags, + int masterId); + + TLB* stage1Tlb() const { return _stage1Tlb; } + TLB* stage2Tlb() const { return _stage2Tlb; } +}; + + + +} // namespace ArmISA + +#endif //__ARCH_ARM_STAGE2_MMU_HH__ + diff --git a/src/arch/arm/system.cc b/src/arch/arm/system.cc index b09784b64..00d9d7613 100644 --- a/src/arch/arm/system.cc +++ b/src/arch/arm/system.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -48,19 +48,46 @@ #include "cpu/thread_context.hh" #include "mem/physical.hh" #include "mem/fs_translating_port_proxy.hh" +#include "sim/full_system.hh" using namespace std; using namespace Linux; ArmSystem::ArmSystem(Params *p) - : System(p), bootldr(NULL), multiProc(p->multi_proc) + : System(p), bootldr(NULL), _haveSecurity(p->have_security), + _haveLPAE(p->have_lpae), + _haveVirtualization(p->have_virtualization), + _haveGenericTimer(p->have_generic_timer), + _highestELIs64(p->highest_el_is_64), + _resetAddr64(p->reset_addr_64), + _physAddrRange64(p->phys_addr_range_64), + _haveLargeAsid64(p->have_large_asid_64), + multiProc(p->multi_proc) { + // Check if the physical address range is valid + if (_highestELIs64 && ( + _physAddrRange64 < 32 || + _physAddrRange64 > 48 || + (_physAddrRange64 % 4 != 0 && _physAddrRange64 != 42))) { + fatal("Invalid physical address range (%d)\n", _physAddrRange64); + } + if (p->boot_loader != "") { bootldr = createObjectFile(p->boot_loader); if (!bootldr) fatal("Could not read bootloader: %s\n", p->boot_loader); + if ((bootldr->getArch() == ObjectFile::Arm64) && !_highestELIs64) { + warn("Highest ARM exception-level set to AArch32 but bootloader " + "is for AArch64. Assuming you wanted these to match.\n"); + _highestELIs64 = true; + } else if ((bootldr->getArch() == ObjectFile::Arm) && _highestELIs64) { + warn("Highest ARM exception-level set to AArch64 but bootloader " + "is for AArch32. Assuming you wanted these to match.\n"); + _highestELIs64 = false; + } + bootldr->loadGlobalSymbols(debugSymbolTable); } @@ -81,11 +108,21 @@ ArmSystem::initState() if (bootldr) { bootldr->loadSections(physProxy); - uint8_t jump_to_bl[] = + uint8_t jump_to_bl_32[] = + { + 0x07, 0xf0, 0xa0, 0xe1 // branch to r7 in aarch32 + }; + + uint8_t jump_to_bl_64[] = { - 0x07, 0xf0, 0xa0, 0xe1 // branch to r7 + 0xe0, 0x00, 0x1f, 0xd6 // instruction "br x7" in aarch64 }; - physProxy.writeBlob(0x0, jump_to_bl, sizeof(jump_to_bl)); + + // write the jump to branch table into address 0 + if (!_highestELIs64) + physProxy.writeBlob(0x0, jump_to_bl_32, sizeof(jump_to_bl_32)); + else + physProxy.writeBlob(0x0, jump_to_bl_64, sizeof(jump_to_bl_64)); inform("Using bootloader at address %#x\n", bootldr->entryPoint()); @@ -96,24 +133,116 @@ ArmSystem::initState() fatal("gic_cpu_addr && flags_addr must be set with bootloader\n"); for (int i = 0; i < threadContexts.size(); i++) { - threadContexts[i]->setIntReg(3, kernelEntry & loadAddrMask); + if (!_highestELIs64) + threadContexts[i]->setIntReg(3, (kernelEntry & loadAddrMask) + + loadAddrOffset); threadContexts[i]->setIntReg(4, params()->gic_cpu_addr); threadContexts[i]->setIntReg(5, params()->flags_addr); threadContexts[i]->setIntReg(7, bootldr->entryPoint()); } + inform("Using kernel entry physical address at %#x\n", + (kernelEntry & loadAddrMask) + loadAddrOffset); } else { // Set the initial PC to be at start of the kernel code - threadContexts[0]->pcState(kernelEntry & loadAddrMask); + if (!_highestELIs64) + threadContexts[0]->pcState((kernelEntry & loadAddrMask) + + loadAddrOffset); + } +} + +GenericTimer::ArchTimer * +ArmSystem::getArchTimer(int cpu_id) const +{ + if (_genericTimer) { + return _genericTimer->getArchTimer(cpu_id); } + return NULL; } +GenericTimer::SystemCounter * +ArmSystem::getSystemCounter() const +{ + if (_genericTimer) { + return _genericTimer->getSystemCounter(); + } + return NULL; +} + +bool +ArmSystem::haveSecurity(ThreadContext *tc) +{ + if (!FullSystem) + return false; + + ArmSystem *a_sys = dynamic_cast<ArmSystem *>(tc->getSystemPtr()); + assert(a_sys); + return a_sys->haveSecurity(); +} + + ArmSystem::~ArmSystem() { if (debugPrintkEvent) delete debugPrintkEvent; } +bool +ArmSystem::haveLPAE(ThreadContext *tc) +{ + if (!FullSystem) + return false; + ArmSystem *a_sys = dynamic_cast<ArmSystem *>(tc->getSystemPtr()); + assert(a_sys); + return a_sys->haveLPAE(); +} + +bool +ArmSystem::haveVirtualization(ThreadContext *tc) +{ + if (!FullSystem) + return false; + + ArmSystem *a_sys = dynamic_cast<ArmSystem *>(tc->getSystemPtr()); + assert(a_sys); + return a_sys->haveVirtualization(); +} + +bool +ArmSystem::highestELIs64(ThreadContext *tc) +{ + return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->highestELIs64(); +} + +ExceptionLevel +ArmSystem::highestEL(ThreadContext *tc) +{ + return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->highestEL(); +} + +Addr +ArmSystem::resetAddr64(ThreadContext *tc) +{ + return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->resetAddr64(); +} + +uint8_t +ArmSystem::physAddrRange(ThreadContext *tc) +{ + return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->physAddrRange(); +} + +Addr +ArmSystem::physAddrMask(ThreadContext *tc) +{ + return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->physAddrMask(); +} + +bool +ArmSystem::haveLargeAsid64(ThreadContext *tc) +{ + return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->haveLargeAsid64(); +} ArmSystem * ArmSystemParams::create() { diff --git a/src/arch/arm/system.hh b/src/arch/arm/system.hh index 3135c5da1..f906dc2d2 100644 --- a/src/arch/arm/system.hh +++ b/src/arch/arm/system.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -46,11 +46,14 @@ #include <string> #include <vector> +#include "dev/arm/generic_timer.hh" #include "kern/linux/events.hh" #include "params/ArmSystem.hh" #include "sim/sim_object.hh" #include "sim/system.hh" +class ThreadContext; + class ArmSystem : public System { protected: @@ -65,6 +68,54 @@ class ArmSystem : public System */ ObjectFile *bootldr; + /** + * True if this system implements the Security Extensions + */ + const bool _haveSecurity; + + /** + * True if this system implements the Large Physical Address Extension + */ + const bool _haveLPAE; + + /** + * True if this system implements the virtualization Extensions + */ + const bool _haveVirtualization; + + /** + * True if this system implements the Generic Timer extension + */ + const bool _haveGenericTimer; + + /** + * Pointer to the Generic Timer wrapper. + */ + GenericTimer *_genericTimer; + + /** + * True if the register width of the highest implemented exception level is + * 64 bits (ARMv8) + */ + bool _highestELIs64; + + /** + * Reset address if the highest implemented exception level is 64 bits + * (ARMv8) + */ + const Addr _resetAddr64; + + /** + * Supported physical address range in bits if the highest implemented + * exception level is 64 bits (ARMv8) + */ + const uint8_t _physAddrRange64; + + /** + * True if ASID is 16 bits in AArch64 (ARMv8) + */ + const bool _haveLargeAsid64; + public: typedef ArmSystemParams Params; const Params * @@ -101,6 +152,120 @@ class ArmSystem : public System /** true if this a multiprocessor system */ bool multiProc; + + /** Returns true if this system implements the Security Extensions */ + bool haveSecurity() const { return _haveSecurity; } + + /** Returns true if this system implements the Large Physical Address + * Extension */ + bool haveLPAE() const { return _haveLPAE; } + + /** Returns true if this system implements the virtualization + * Extensions + */ + bool haveVirtualization() const { return _haveVirtualization; } + + /** Returns true if this system implements the Generic Timer extension. */ + bool haveGenericTimer() const { return _haveGenericTimer; } + + /** Sets the pointer to the Generic Timer. */ + void setGenericTimer(GenericTimer *generic_timer) + { + _genericTimer = generic_timer; + } + + /** Returns a pointer to the system counter. */ + GenericTimer::SystemCounter *getSystemCounter() const; + + /** Returns a pointer to the appropriate architected timer. */ + GenericTimer::ArchTimer *getArchTimer(int cpu_id) const; + + /** Returns true if the register width of the highest implemented exception + * level is 64 bits (ARMv8) */ + bool highestELIs64() const { return _highestELIs64; } + + /** Returns the highest implemented exception level */ + ExceptionLevel highestEL() const + { + if (_haveSecurity) + return EL3; + // @todo: uncomment this to enable Virtualization + // if (_haveVirtualization) + // return EL2; + return EL1; + } + + /** Returns the reset address if the highest implemented exception level is + * 64 bits (ARMv8) */ + Addr resetAddr64() const { return _resetAddr64; } + + /** Returns true if ASID is 16 bits in AArch64 (ARMv8) */ + bool haveLargeAsid64() const { return _haveLargeAsid64; } + + /** Returns the supported physical address range in bits if the highest + * implemented exception level is 64 bits (ARMv8) */ + uint8_t physAddrRange64() const { return _physAddrRange64; } + + /** Returns the supported physical address range in bits */ + uint8_t physAddrRange() const + { + if (_highestELIs64) + return _physAddrRange64; + if (_haveLPAE) + return 40; + return 32; + } + + /** Returns the physical address mask */ + Addr physAddrMask() const + { + return mask(physAddrRange()); + } + + /** Returns true if the system of a specific thread context implements the + * Security Extensions + */ + static bool haveSecurity(ThreadContext *tc); + + /** Returns true if the system of a specific thread context implements the + * virtualization Extensions + */ + static bool haveVirtualization(ThreadContext *tc); + + /** Returns true if the system of a specific thread context implements the + * Large Physical Address Extension + */ + static bool haveLPAE(ThreadContext *tc); + + /** Returns true if the register width of the highest implemented exception + * level for the system of a specific thread context is 64 bits (ARMv8) + */ + static bool highestELIs64(ThreadContext *tc); + + /** Returns the highest implemented exception level for the system of a + * specific thread context + */ + static ExceptionLevel highestEL(ThreadContext *tc); + + /** Returns the reset address if the highest implemented exception level for + * the system of a specific thread context is 64 bits (ARMv8) + */ + static Addr resetAddr64(ThreadContext *tc); + + /** Returns the supported physical address range in bits for the system of a + * specific thread context + */ + static uint8_t physAddrRange(ThreadContext *tc); + + /** Returns the physical address mask for the system of a specific thread + * context + */ + static Addr physAddrMask(ThreadContext *tc); + + /** Returns true if ASID is 16 bits for the system of a specific thread + * context while in AArch64 (ARMv8) */ + static bool haveLargeAsid64(ThreadContext *tc); + }; #endif diff --git a/src/arch/arm/table_walker.cc b/src/arch/arm/table_walker.cc index d419fdec5..7eda13b3e 100644 --- a/src/arch/arm/table_walker.cc +++ b/src/arch/arm/table_walker.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -35,9 +35,12 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Ali Saidi + * Giacomo Gabrielli */ #include "arch/arm/faults.hh" +#include "arch/arm/stage2_mmu.hh" +#include "arch/arm/system.hh" #include "arch/arm/table_walker.hh" #include "arch/arm/tlb.hh" #include "cpu/base.hh" @@ -51,13 +54,33 @@ using namespace ArmISA; TableWalker::TableWalker(const Params *p) - : MemObject(p), port(this, params()->sys), drainManager(NULL), - tlb(NULL), currState(NULL), pending(false), - masterId(p->sys->getMasterId(name())), + : MemObject(p), port(this, p->sys), drainManager(NULL), + stage2Mmu(NULL), isStage2(p->is_stage2), tlb(NULL), + currState(NULL), pending(false), masterId(p->sys->getMasterId(name())), numSquashable(p->num_squash_per_cycle), - doL1DescEvent(this), doL2DescEvent(this), doProcessEvent(this) + doL1DescEvent(this), doL2DescEvent(this), + doL0LongDescEvent(this), doL1LongDescEvent(this), doL2LongDescEvent(this), + doL3LongDescEvent(this), + doProcessEvent(this) { sctlr = 0; + + // Cache system-level properties + if (FullSystem) { + armSys = dynamic_cast<ArmSystem *>(p->sys); + assert(armSys); + haveSecurity = armSys->haveSecurity(); + _haveLPAE = armSys->haveLPAE(); + _haveVirtualization = armSys->haveVirtualization(); + physAddrRange = armSys->physAddrRange(); + _haveLargeAsid64 = armSys->haveLargeAsid64(); + } else { + armSys = NULL; + haveSecurity = _haveLPAE = _haveVirtualization = false; + _haveLargeAsid64 = false; + physAddrRange = 32; + } + } TableWalker::~TableWalker() @@ -65,10 +88,14 @@ TableWalker::~TableWalker() ; } +TableWalker::WalkerState::WalkerState() : stage2Tran(NULL), l2Desc(l1Desc) +{ +} + void TableWalker::completeDrain() { - if (drainManager && stateQueueL1.empty() && stateQueueL2.empty() && + if (drainManager && stateQueues[L1].empty() && stateQueues[L2].empty() && pendingQueue.empty()) { setDrainState(Drainable::Drained); DPRINTF(Drain, "TableWalker done draining, processing drain event\n"); @@ -82,21 +109,28 @@ TableWalker::drain(DrainManager *dm) { unsigned int count = port.drain(dm); - if (stateQueueL1.empty() && stateQueueL2.empty() && - pendingQueue.empty()) { - setDrainState(Drainable::Drained); - DPRINTF(Drain, "TableWalker free, no need to drain\n"); + bool state_queues_not_empty = false; - // table walker is drained, but its ports may still need to be drained - return count; - } else { + for (int i = 0; i < MAX_LOOKUP_LEVELS; ++i) { + if (!stateQueues[i].empty()) { + state_queues_not_empty = true; + break; + } + } + + if (state_queues_not_empty || pendingQueue.size()) { drainManager = dm; setDrainState(Drainable::Draining); DPRINTF(Drain, "TableWalker not drained\n"); // return port drain count plus the table walker itself needs to drain return count + 1; + } else { + setDrainState(Drainable::Drained); + DPRINTF(Drain, "TableWalker free, no need to drain\n"); + // table walker is drained, but its ports may still need to be drained + return count; } } @@ -120,10 +154,13 @@ TableWalker::getMasterPort(const std::string &if_name, PortID idx) } Fault -TableWalker::walk(RequestPtr _req, ThreadContext *_tc, uint8_t _cid, TLB::Mode _mode, - TLB::Translation *_trans, bool _timing, bool _functional) +TableWalker::walk(RequestPtr _req, ThreadContext *_tc, uint16_t _asid, + uint8_t _vmid, bool _isHyp, TLB::Mode _mode, + TLB::Translation *_trans, bool _timing, bool _functional, + bool secure, TLB::ArmTranslationType tranType) { assert(!(_functional && _timing)); + if (!currState) { // For atomic mode, a new WalkerState instance should be only created // once per TLB. For timing mode, a new instance is generated for every @@ -139,41 +176,113 @@ TableWalker::walk(RequestPtr _req, ThreadContext *_tc, uint8_t _cid, TLB::Mode _ // misprediction, in which case nothing will happen or we'll use // this fault to re-execute the faulting instruction which should clean // up everything. - if (currState->vaddr == _req->getVaddr()) { + if (currState->vaddr_tainted == _req->getVaddr()) { return new ReExec; } - panic("currState should always be empty in timing mode!\n"); } currState->tc = _tc; + currState->aarch64 = opModeIs64(currOpMode(_tc)); + currState->el = currEL(_tc); currState->transState = _trans; currState->req = _req; currState->fault = NoFault; - currState->contextId = _cid; + currState->asid = _asid; + currState->vmid = _vmid; + currState->isHyp = _isHyp; currState->timing = _timing; currState->functional = _functional; currState->mode = _mode; + currState->tranType = tranType; + currState->isSecure = secure; + currState->physAddrRange = physAddrRange; /** @todo These should be cached or grabbed from cached copies in the TLB, all these miscreg reads are expensive */ - currState->vaddr = currState->req->getVaddr(); - currState->sctlr = currState->tc->readMiscReg(MISCREG_SCTLR); + currState->vaddr_tainted = currState->req->getVaddr(); + if (currState->aarch64) + currState->vaddr = purifyTaggedAddr(currState->vaddr_tainted, + currState->tc, currState->el); + else + currState->vaddr = currState->vaddr_tainted; + + if (currState->aarch64) { + switch (currState->el) { + case EL0: + case EL1: + currState->sctlr = currState->tc->readMiscReg(MISCREG_SCTLR_EL1); + currState->ttbcr = currState->tc->readMiscReg(MISCREG_TCR_EL1); + break; + // @todo: uncomment this to enable Virtualization + // case EL2: + // assert(haveVirtualization); + // currState->sctlr = currState->tc->readMiscReg(MISCREG_SCTLR_EL2); + // currState->ttbcr = currState->tc->readMiscReg(MISCREG_TCR_EL2); + // break; + case EL3: + assert(haveSecurity); + currState->sctlr = currState->tc->readMiscReg(MISCREG_SCTLR_EL3); + currState->ttbcr = currState->tc->readMiscReg(MISCREG_TCR_EL3); + break; + default: + panic("Invalid exception level"); + break; + } + } else { + currState->sctlr = currState->tc->readMiscReg(flattenMiscRegNsBanked( + MISCREG_SCTLR, currState->tc, !currState->isSecure)); + currState->ttbcr = currState->tc->readMiscReg(flattenMiscRegNsBanked( + MISCREG_TTBCR, currState->tc, !currState->isSecure)); + currState->htcr = currState->tc->readMiscReg(MISCREG_HTCR); + currState->hcr = currState->tc->readMiscReg(MISCREG_HCR); + currState->vtcr = currState->tc->readMiscReg(MISCREG_VTCR); + } sctlr = currState->sctlr; - currState->N = currState->tc->readMiscReg(MISCREG_TTBCR); currState->isFetch = (currState->mode == TLB::Execute); currState->isWrite = (currState->mode == TLB::Write); + // We only do a second stage of translation if we're not secure, or in + // hyp mode, the second stage MMU is enabled, and this table walker + // instance is the first stage. + currState->doingStage2 = false; + // @todo: for now disable this in AArch64 (HCR is not set) + currState->stage2Req = !currState->aarch64 && currState->hcr.vm && + !isStage2 && !currState->isSecure && !currState->isHyp; + + bool long_desc_format = currState->aarch64 || + (_haveLPAE && currState->ttbcr.eae) || + _isHyp || isStage2; + + if (long_desc_format) { + // Helper variables used for hierarchical permissions + currState->secureLookup = currState->isSecure; + currState->rwTable = true; + currState->userTable = true; + currState->xnTable = false; + currState->pxnTable = false; + } - if (!currState->timing) - return processWalk(); + if (!currState->timing) { + if (currState->aarch64) + return processWalkAArch64(); + else if (long_desc_format) + return processWalkLPAE(); + else + return processWalk(); + } if (pending || pendingQueue.size()) { pendingQueue.push_back(currState); currState = NULL; } else { pending = true; - return processWalk(); + if (currState->aarch64) + return processWalkAArch64(); + else if (long_desc_format) + return processWalkLPAE(); + else + return processWalk(); } return NoFault; @@ -186,8 +295,17 @@ TableWalker::processWalkWrapper() assert(pendingQueue.size()); currState = pendingQueue.front(); + ExceptionLevel target_el = EL0; + if (currState->aarch64) + target_el = currEL(currState->tc); + else + target_el = EL1; + // Check if a previous walk filled this request already - TlbEntry* te = tlb->lookup(currState->vaddr, currState->contextId, true); + // @TODO Should this always be the TLB or should we look in the stage2 TLB? + TlbEntry* te = tlb->lookup(currState->vaddr, currState->asid, + currState->vmid, currState->isHyp, currState->isSecure, true, false, + target_el); // Check if we still need to have a walk for this request. If the requesting // instruction has been squashed, or a previous walk has filled the TLB with @@ -198,7 +316,12 @@ TableWalker::processWalkWrapper() // We've got a valid request, lets process it pending = true; pendingQueue.pop_front(); - processWalk(); + if (currState->aarch64) + processWalkAArch64(); + else if ((_haveLPAE && currState->ttbcr.eae) || currState->isHyp || isStage2) + processWalkLPAE(); + else + processWalk(); return; } @@ -212,7 +335,8 @@ TableWalker::processWalkWrapper() pendingQueue.pop_front(); num_squashed++; - DPRINTF(TLB, "Squashing table walk for address %#x\n", currState->vaddr); + DPRINTF(TLB, "Squashing table walk for address %#x\n", + currState->vaddr_tainted); if (currState->transState->squashed()) { // finish the translation which will delete the translation object @@ -220,8 +344,9 @@ TableWalker::processWalkWrapper() currState->req, currState->tc, currState->mode); } else { // translate the request now that we know it will work - currState->fault = tlb->translateTiming(currState->req, currState->tc, - currState->transState, currState->mode); + tlb->translateTiming(currState->req, currState->tc, + currState->transState, currState->mode); + } // delete the current request @@ -230,7 +355,9 @@ TableWalker::processWalkWrapper() // peak at the next one if (pendingQueue.size()) { currState = pendingQueue.front(); - te = tlb->lookup(currState->vaddr, currState->contextId, true); + te = tlb->lookup(currState->vaddr, currState->asid, + currState->vmid, currState->isHyp, currState->isSecure, true, + false, target_el); } else { // Terminate the loop, nothing more to do currState = NULL; @@ -249,32 +376,62 @@ TableWalker::processWalk() Addr ttbr = 0; // If translation isn't enabled, we shouldn't be here - assert(currState->sctlr.m); + assert(currState->sctlr.m || isStage2); - DPRINTF(TLB, "Begining table walk for address %#x, TTBCR: %#x, bits:%#x\n", - currState->vaddr, currState->N, mbits(currState->vaddr, 31, - 32-currState->N)); + DPRINTF(TLB, "Beginning table walk for address %#x, TTBCR: %#x, bits:%#x\n", + currState->vaddr_tainted, currState->ttbcr, mbits(currState->vaddr, 31, + 32 - currState->ttbcr.n)); - if (currState->N == 0 || !mbits(currState->vaddr, 31, 32-currState->N)) { + if (currState->ttbcr.n == 0 || !mbits(currState->vaddr, 31, + 32 - currState->ttbcr.n)) { DPRINTF(TLB, " - Selecting TTBR0\n"); - ttbr = currState->tc->readMiscReg(MISCREG_TTBR0); + // Check if table walk is allowed when Security Extensions are enabled + if (haveSecurity && currState->ttbcr.pd0) { + if (currState->isFetch) + return new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::VmsaTran); + else + return new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, currState->isWrite, + ArmFault::TranslationLL + L1, isStage2, + ArmFault::VmsaTran); + } + ttbr = currState->tc->readMiscReg(flattenMiscRegNsBanked( + MISCREG_TTBR0, currState->tc, !currState->isSecure)); } else { DPRINTF(TLB, " - Selecting TTBR1\n"); - ttbr = currState->tc->readMiscReg(MISCREG_TTBR1); - currState->N = 0; + // Check if table walk is allowed when Security Extensions are enabled + if (haveSecurity && currState->ttbcr.pd1) { + if (currState->isFetch) + return new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::VmsaTran); + else + return new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, currState->isWrite, + ArmFault::TranslationLL + L1, isStage2, + ArmFault::VmsaTran); + } + ttbr = currState->tc->readMiscReg(flattenMiscRegNsBanked( + MISCREG_TTBR1, currState->tc, !currState->isSecure)); + currState->ttbcr.n = 0; } - Addr l1desc_addr = mbits(ttbr, 31, 14-currState->N) | - (bits(currState->vaddr,31-currState->N,20) << 2); - DPRINTF(TLB, " - Descriptor at address %#x\n", l1desc_addr); - + Addr l1desc_addr = mbits(ttbr, 31, 14 - currState->ttbcr.n) | + (bits(currState->vaddr, 31 - currState->ttbcr.n, 20) << 2); + DPRINTF(TLB, " - Descriptor at address %#x (%s)\n", l1desc_addr, + currState->isSecure ? "s" : "ns"); // Trickbox address check Fault f; - f = tlb->walkTrickBoxCheck(l1desc_addr, currState->vaddr, sizeof(uint32_t), - currState->isFetch, currState->isWrite, 0, true); + f = tlb->walkTrickBoxCheck(l1desc_addr, currState->isSecure, + currState->vaddr, sizeof(uint32_t), currState->isFetch, + currState->isWrite, TlbEntry::DomainType::NoAccess, L1); if (f) { - DPRINTF(TLB, "Trickbox check caused fault on %#x\n", currState->vaddr); + DPRINTF(TLB, "Trickbox check caused fault on %#x\n", currState->vaddr_tainted); if (currState->timing) { pending = false; nextWalk(currState->tc); @@ -291,28 +448,422 @@ TableWalker::processWalk() flag = Request::UNCACHEABLE; } + bool delayed; + delayed = fetchDescriptor(l1desc_addr, (uint8_t*)&currState->l1Desc.data, + sizeof(uint32_t), flag, L1, &doL1DescEvent, + &TableWalker::doL1Descriptor); + if (!delayed) { + f = currState->fault; + } + + return f; +} + +Fault +TableWalker::processWalkLPAE() +{ + Addr ttbr, ttbr0_max, ttbr1_min, desc_addr; + int tsz, n; + LookupLevel start_lookup_level = L1; + + DPRINTF(TLB, "Beginning table walk for address %#x, TTBCR: %#x\n", + currState->vaddr_tainted, currState->ttbcr); + + Request::Flags flag = 0; + if (currState->isSecure) + flag.set(Request::SECURE); + + // work out which base address register to use, if in hyp mode we always + // use HTTBR + if (isStage2) { + DPRINTF(TLB, " - Selecting VTTBR (long-desc.)\n"); + ttbr = currState->tc->readMiscReg(MISCREG_VTTBR); + tsz = sext<4>(currState->vtcr.t0sz); + start_lookup_level = currState->vtcr.sl0 ? L1 : L2; + } else if (currState->isHyp) { + DPRINTF(TLB, " - Selecting HTTBR (long-desc.)\n"); + ttbr = currState->tc->readMiscReg(MISCREG_HTTBR); + tsz = currState->htcr.t0sz; + } else { + assert(_haveLPAE && currState->ttbcr.eae); + + // Determine boundaries of TTBR0/1 regions + if (currState->ttbcr.t0sz) + ttbr0_max = (1ULL << (32 - currState->ttbcr.t0sz)) - 1; + else if (currState->ttbcr.t1sz) + ttbr0_max = (1ULL << 32) - + (1ULL << (32 - currState->ttbcr.t1sz)) - 1; + else + ttbr0_max = (1ULL << 32) - 1; + if (currState->ttbcr.t1sz) + ttbr1_min = (1ULL << 32) - (1ULL << (32 - currState->ttbcr.t1sz)); + else + ttbr1_min = (1ULL << (32 - currState->ttbcr.t0sz)); + + // The following code snippet selects the appropriate translation table base + // address (TTBR0 or TTBR1) and the appropriate starting lookup level + // depending on the address range supported by the translation table (ARM + // ARM issue C B3.6.4) + if (currState->vaddr <= ttbr0_max) { + DPRINTF(TLB, " - Selecting TTBR0 (long-desc.)\n"); + // Check if table walk is allowed + if (currState->ttbcr.epd0) { + if (currState->isFetch) + return new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::LpaeTran); + else + return new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, + currState->isWrite, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::LpaeTran); + } + ttbr = currState->tc->readMiscReg(flattenMiscRegNsBanked( + MISCREG_TTBR0, currState->tc, !currState->isSecure)); + tsz = currState->ttbcr.t0sz; + if (ttbr0_max < (1ULL << 30)) // Upper limit < 1 GB + start_lookup_level = L2; + } else if (currState->vaddr >= ttbr1_min) { + DPRINTF(TLB, " - Selecting TTBR1 (long-desc.)\n"); + // Check if table walk is allowed + if (currState->ttbcr.epd1) { + if (currState->isFetch) + return new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::LpaeTran); + else + return new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, + currState->isWrite, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::LpaeTran); + } + ttbr = currState->tc->readMiscReg(flattenMiscRegNsBanked( + MISCREG_TTBR1, currState->tc, !currState->isSecure)); + tsz = currState->ttbcr.t1sz; + if (ttbr1_min >= (1ULL << 31) + (1ULL << 30)) // Lower limit >= 3 GB + start_lookup_level = L2; + } else { + // Out of boundaries -> translation fault + if (currState->isFetch) + return new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::LpaeTran); + else + return new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, + currState->isWrite, ArmFault::TranslationLL + L1, + isStage2, ArmFault::LpaeTran); + } + + } + + // Perform lookup (ARM ARM issue C B3.6.6) + if (start_lookup_level == L1) { + n = 5 - tsz; + desc_addr = mbits(ttbr, 39, n) | + (bits(currState->vaddr, n + 26, 30) << 3); + DPRINTF(TLB, " - Descriptor at address %#x (%s) (long-desc.)\n", + desc_addr, currState->isSecure ? "s" : "ns"); + } else { + // Skip first-level lookup + n = (tsz >= 2 ? 14 - tsz : 12); + desc_addr = mbits(ttbr, 39, n) | + (bits(currState->vaddr, n + 17, 21) << 3); + DPRINTF(TLB, " - Descriptor at address %#x (%s) (long-desc.)\n", + desc_addr, currState->isSecure ? "s" : "ns"); + } + + // Trickbox address check + Fault f = tlb->walkTrickBoxCheck(desc_addr, currState->isSecure, + currState->vaddr, sizeof(uint64_t), currState->isFetch, + currState->isWrite, TlbEntry::DomainType::NoAccess, + start_lookup_level); + if (f) { + DPRINTF(TLB, "Trickbox check caused fault on %#x\n", currState->vaddr_tainted); + if (currState->timing) { + pending = false; + nextWalk(currState->tc); + currState = NULL; + } else { + currState->tc = NULL; + currState->req = NULL; + } + return f; + } + + if (currState->sctlr.c == 0) { + flag = Request::UNCACHEABLE; + } + + if (currState->isSecure) + flag.set(Request::SECURE); + + currState->longDesc.lookupLevel = start_lookup_level; + currState->longDesc.aarch64 = false; + currState->longDesc.largeGrain = false; + currState->longDesc.grainSize = 12; + + Event *event = start_lookup_level == L1 ? (Event *) &doL1LongDescEvent + : (Event *) &doL2LongDescEvent; + + bool delayed = fetchDescriptor(desc_addr, (uint8_t*)&currState->longDesc.data, + sizeof(uint64_t), flag, start_lookup_level, + event, &TableWalker::doLongDescriptor); + if (!delayed) { + f = currState->fault; + } + + return f; +} + +unsigned +TableWalker::adjustTableSizeAArch64(unsigned tsz) +{ + if (tsz < 25) + return 25; + if (tsz > 48) + return 48; + return tsz; +} + +bool +TableWalker::checkAddrSizeFaultAArch64(Addr addr, int currPhysAddrRange) +{ + return (currPhysAddrRange != MaxPhysAddrRange && + bits(addr, MaxPhysAddrRange - 1, currPhysAddrRange)); +} + +Fault +TableWalker::processWalkAArch64() +{ + assert(currState->aarch64); + + DPRINTF(TLB, "Beginning table walk for address %#llx, TTBCR: %#llx\n", + currState->vaddr_tainted, currState->ttbcr); + + // Determine TTBR, table size, granule size and phys. address range + Addr ttbr = 0; + int tsz = 0, ps = 0; + bool large_grain = false; + bool fault = false; + switch (currState->el) { + case EL0: + case EL1: + switch (bits(currState->vaddr, 63,48)) { + case 0: + DPRINTF(TLB, " - Selecting TTBR0 (AArch64)\n"); + ttbr = currState->tc->readMiscReg(MISCREG_TTBR0_EL1); + tsz = adjustTableSizeAArch64(64 - currState->ttbcr.t0sz); + large_grain = currState->ttbcr.tg0; + if (bits(currState->vaddr, 63, tsz) != 0x0 || + currState->ttbcr.epd0) + fault = true; + break; + case 0xffff: + DPRINTF(TLB, " - Selecting TTBR1 (AArch64)\n"); + ttbr = currState->tc->readMiscReg(MISCREG_TTBR1_EL1); + tsz = adjustTableSizeAArch64(64 - currState->ttbcr.t1sz); + large_grain = currState->ttbcr.tg1; + if (bits(currState->vaddr, 63, tsz) != mask(64-tsz) || + currState->ttbcr.epd1) + fault = true; + break; + default: + // top two bytes must be all 0s or all 1s, else invalid addr + fault = true; + } + ps = currState->ttbcr.ips; + break; + case EL2: + case EL3: + switch(bits(currState->vaddr, 63,48)) { + case 0: + DPRINTF(TLB, " - Selecting TTBR0 (AArch64)\n"); + if (currState->el == EL2) + ttbr = currState->tc->readMiscReg(MISCREG_TTBR0_EL2); + else + ttbr = currState->tc->readMiscReg(MISCREG_TTBR0_EL3); + tsz = adjustTableSizeAArch64(64 - currState->ttbcr.t0sz); + large_grain = currState->ttbcr.tg0; + break; + default: + // invalid addr if top two bytes are not all 0s + fault = true; + } + ps = currState->ttbcr.ps; + break; + } + + if (fault) { + Fault f; + if (currState->isFetch) + f = new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L0, isStage2, + ArmFault::LpaeTran); + else + f = new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, + currState->isWrite, + ArmFault::TranslationLL + L0, + isStage2, ArmFault::LpaeTran); + + if (currState->timing) { + pending = false; + nextWalk(currState->tc); + currState = NULL; + } else { + currState->tc = NULL; + currState->req = NULL; + } + return f; + + } + + // Determine starting lookup level + LookupLevel start_lookup_level; + int grain_size, stride; + if (large_grain) { // 64 KB granule + grain_size = 16; + stride = grain_size - 3; + if (tsz > grain_size + 2 * stride) + start_lookup_level = L1; + else if (tsz > grain_size + stride) + start_lookup_level = L2; + else + start_lookup_level = L3; + } else { // 4 KB granule + grain_size = 12; + stride = grain_size - 3; + if (tsz > grain_size + 3 * stride) + start_lookup_level = L0; + else if (tsz > grain_size + 2 * stride) + start_lookup_level = L1; + else + start_lookup_level = L2; + } + + // Determine table base address + int base_addr_lo = 3 + tsz - stride * (3 - start_lookup_level) - + grain_size; + Addr base_addr = mbits(ttbr, 47, base_addr_lo); + + // Determine physical address size and raise an Address Size Fault if + // necessary + int pa_range = decodePhysAddrRange64(ps); + // Clamp to lower limit + if (pa_range > physAddrRange) + currState->physAddrRange = physAddrRange; + else + currState->physAddrRange = pa_range; + if (checkAddrSizeFaultAArch64(base_addr, currState->physAddrRange)) { + DPRINTF(TLB, "Address size fault before any lookup\n"); + Fault f; + if (currState->isFetch) + f = new PrefetchAbort(currState->vaddr_tainted, + ArmFault::AddressSizeLL + start_lookup_level, + isStage2, + ArmFault::LpaeTran); + else + f = new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, + currState->isWrite, + ArmFault::AddressSizeLL + start_lookup_level, + isStage2, + ArmFault::LpaeTran); + + + if (currState->timing) { + pending = false; + nextWalk(currState->tc); + currState = NULL; + } else { + currState->tc = NULL; + currState->req = NULL; + } + return f; + + } + + // Determine descriptor address + Addr desc_addr = base_addr | + (bits(currState->vaddr, tsz - 1, + stride * (3 - start_lookup_level) + grain_size) << 3); + + // Trickbox address check + Fault f = tlb->walkTrickBoxCheck(desc_addr, currState->isSecure, + currState->vaddr, sizeof(uint64_t), currState->isFetch, + currState->isWrite, TlbEntry::DomainType::NoAccess, + start_lookup_level); + if (f) { + DPRINTF(TLB, "Trickbox check caused fault on %#x\n", currState->vaddr_tainted); + if (currState->timing) { + pending = false; + nextWalk(currState->tc); + currState = NULL; + } else { + currState->tc = NULL; + currState->req = NULL; + } + return f; + } + + Request::Flags flag = 0; + if (currState->sctlr.c == 0) { + flag = Request::UNCACHEABLE; + } + + currState->longDesc.lookupLevel = start_lookup_level; + currState->longDesc.aarch64 = true; + currState->longDesc.largeGrain = large_grain; + currState->longDesc.grainSize = grain_size; + if (currState->timing) { - port.dmaAction(MemCmd::ReadReq, l1desc_addr, sizeof(uint32_t), - &doL1DescEvent, (uint8_t*)&currState->l1Desc.data, + Event *event; + switch (start_lookup_level) { + case L0: + event = (Event *) &doL0LongDescEvent; + break; + case L1: + event = (Event *) &doL1LongDescEvent; + break; + case L2: + event = (Event *) &doL2LongDescEvent; + break; + case L3: + event = (Event *) &doL3LongDescEvent; + break; + default: + panic("Invalid table lookup level"); + break; + } + port.dmaAction(MemCmd::ReadReq, desc_addr, sizeof(uint64_t), event, + (uint8_t*) &currState->longDesc.data, currState->tc->getCpuPtr()->clockPeriod(), flag); - DPRINTF(TLBVerbose, "Adding to walker fifo: queue size before " - "adding: %d\n", - stateQueueL1.size()); - stateQueueL1.push_back(currState); + DPRINTF(TLBVerbose, + "Adding to walker fifo: queue size before adding: %d\n", + stateQueues[start_lookup_level].size()); + stateQueues[start_lookup_level].push_back(currState); currState = NULL; } else if (!currState->functional) { - port.dmaAction(MemCmd::ReadReq, l1desc_addr, sizeof(uint32_t), - NULL, (uint8_t*)&currState->l1Desc.data, + port.dmaAction(MemCmd::ReadReq, desc_addr, sizeof(uint64_t), + NULL, (uint8_t*) &currState->longDesc.data, currState->tc->getCpuPtr()->clockPeriod(), flag); - doL1Descriptor(); + doLongDescriptor(); f = currState->fault; } else { - RequestPtr req = new Request(l1desc_addr, sizeof(uint32_t), flag, masterId); - req->taskId(ContextSwitchTaskId::DMA); + RequestPtr req = new Request(desc_addr, sizeof(uint64_t), flag, + masterId); PacketPtr pkt = new Packet(req, MemCmd::ReadReq); - pkt->dataStatic((uint8_t*)&currState->l1Desc.data); + pkt->dataStatic((uint8_t*) &currState->longDesc.data); port.sendFunctional(pkt); - doL1Descriptor(); + doLongDescriptor(); delete req; delete pkt; f = currState->fault; @@ -330,38 +881,38 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, DPRINTF(TLBVerbose, "memAttrs texcb:%d s:%d\n", texcb, s); te.shareable = false; // default value te.nonCacheable = false; - bool outer_shareable = false; + te.outerShareable = false; if (sctlr.tre == 0 || ((sctlr.tre == 1) && (sctlr.m == 0))) { switch(texcb) { case 0: // Stongly-ordered te.nonCacheable = true; - te.mtype = TlbEntry::StronglyOrdered; + te.mtype = TlbEntry::MemoryType::StronglyOrdered; te.shareable = true; te.innerAttrs = 1; te.outerAttrs = 0; break; case 1: // Shareable Device te.nonCacheable = true; - te.mtype = TlbEntry::Device; + te.mtype = TlbEntry::MemoryType::Device; te.shareable = true; te.innerAttrs = 3; te.outerAttrs = 0; break; case 2: // Outer and Inner Write-Through, no Write-Allocate - te.mtype = TlbEntry::Normal; + te.mtype = TlbEntry::MemoryType::Normal; te.shareable = s; te.innerAttrs = 6; te.outerAttrs = bits(texcb, 1, 0); break; case 3: // Outer and Inner Write-Back, no Write-Allocate - te.mtype = TlbEntry::Normal; + te.mtype = TlbEntry::MemoryType::Normal; te.shareable = s; te.innerAttrs = 7; te.outerAttrs = bits(texcb, 1, 0); break; case 4: // Outer and Inner Non-cacheable te.nonCacheable = true; - te.mtype = TlbEntry::Normal; + te.mtype = TlbEntry::MemoryType::Normal; te.shareable = s; te.innerAttrs = 0; te.outerAttrs = bits(texcb, 1, 0); @@ -373,14 +924,14 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, panic("Implementation-defined texcb value!\n"); break; case 7: // Outer and Inner Write-Back, Write-Allocate - te.mtype = TlbEntry::Normal; + te.mtype = TlbEntry::MemoryType::Normal; te.shareable = s; te.innerAttrs = 5; te.outerAttrs = 1; break; case 8: // Non-shareable Device te.nonCacheable = true; - te.mtype = TlbEntry::Device; + te.mtype = TlbEntry::MemoryType::Device; te.shareable = false; te.innerAttrs = 3; te.outerAttrs = 0; @@ -389,7 +940,7 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, panic("Reserved texcb value!\n"); break; case 16 ... 31: // Cacheable Memory - te.mtype = TlbEntry::Normal; + te.mtype = TlbEntry::MemoryType::Normal; te.shareable = s; if (bits(texcb, 1,0) == 0 || bits(texcb, 3,2) == 0) te.nonCacheable = true; @@ -401,8 +952,10 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, } } else { assert(tc); - PRRR prrr = tc->readMiscReg(MISCREG_PRRR); - NMRR nmrr = tc->readMiscReg(MISCREG_NMRR); + PRRR prrr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_PRRR, + currState->tc, !currState->isSecure)); + NMRR nmrr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_NMRR, + currState->tc, !currState->isSecure)); DPRINTF(TLBVerbose, "memAttrs PRRR:%08x NMRR:%08x\n", prrr, nmrr); uint8_t curr_tr = 0, curr_ir = 0, curr_or = 0; switch(bits(texcb, 2,0)) { @@ -410,37 +963,37 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, curr_tr = prrr.tr0; curr_ir = nmrr.ir0; curr_or = nmrr.or0; - outer_shareable = (prrr.nos0 == 0); + te.outerShareable = (prrr.nos0 == 0); break; case 1: curr_tr = prrr.tr1; curr_ir = nmrr.ir1; curr_or = nmrr.or1; - outer_shareable = (prrr.nos1 == 0); + te.outerShareable = (prrr.nos1 == 0); break; case 2: curr_tr = prrr.tr2; curr_ir = nmrr.ir2; curr_or = nmrr.or2; - outer_shareable = (prrr.nos2 == 0); + te.outerShareable = (prrr.nos2 == 0); break; case 3: curr_tr = prrr.tr3; curr_ir = nmrr.ir3; curr_or = nmrr.or3; - outer_shareable = (prrr.nos3 == 0); + te.outerShareable = (prrr.nos3 == 0); break; case 4: curr_tr = prrr.tr4; curr_ir = nmrr.ir4; curr_or = nmrr.or4; - outer_shareable = (prrr.nos4 == 0); + te.outerShareable = (prrr.nos4 == 0); break; case 5: curr_tr = prrr.tr5; curr_ir = nmrr.ir5; curr_or = nmrr.or5; - outer_shareable = (prrr.nos5 == 0); + te.outerShareable = (prrr.nos5 == 0); break; case 6: panic("Imp defined type\n"); @@ -448,14 +1001,14 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, curr_tr = prrr.tr7; curr_ir = nmrr.ir7; curr_or = nmrr.or7; - outer_shareable = (prrr.nos7 == 0); + te.outerShareable = (prrr.nos7 == 0); break; } switch(curr_tr) { case 0: DPRINTF(TLBVerbose, "StronglyOrdered\n"); - te.mtype = TlbEntry::StronglyOrdered; + te.mtype = TlbEntry::MemoryType::StronglyOrdered; te.nonCacheable = true; te.innerAttrs = 1; te.outerAttrs = 0; @@ -464,7 +1017,7 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, case 1: DPRINTF(TLBVerbose, "Device ds1:%d ds0:%d s:%d\n", prrr.ds1, prrr.ds0, s); - te.mtype = TlbEntry::Device; + te.mtype = TlbEntry::MemoryType::Device; te.nonCacheable = true; te.innerAttrs = 3; te.outerAttrs = 0; @@ -476,7 +1029,7 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, case 2: DPRINTF(TLBVerbose, "Normal ns1:%d ns0:%d s:%d\n", prrr.ns1, prrr.ns0, s); - te.mtype = TlbEntry::Normal; + te.mtype = TlbEntry::MemoryType::Normal; if (prrr.ns1 && s) te.shareable = true; if (prrr.ns0 && !s) @@ -486,7 +1039,7 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, panic("Reserved type"); } - if (te.mtype == TlbEntry::Normal){ + if (te.mtype == TlbEntry::MemoryType::Normal){ switch(curr_ir) { case 0: te.nonCacheable = true; @@ -523,40 +1076,192 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, DPRINTF(TLBVerbose, "memAttrs: shareable: %d, innerAttrs: %d, \ outerAttrs: %d\n", te.shareable, te.innerAttrs, te.outerAttrs); + te.setAttributes(false); +} + +void +TableWalker::memAttrsLPAE(ThreadContext *tc, TlbEntry &te, + LongDescriptor &lDescriptor) +{ + assert(_haveLPAE); + + uint8_t attr; + uint8_t sh = lDescriptor.sh(); + // Different format and source of attributes if this is a stage 2 + // translation + if (isStage2) { + attr = lDescriptor.memAttr(); + uint8_t attr_3_2 = (attr >> 2) & 0x3; + uint8_t attr_1_0 = attr & 0x3; + + DPRINTF(TLBVerbose, "memAttrsLPAE MemAttr:%#x sh:%#x\n", attr, sh); + + if (attr_3_2 == 0) { + te.mtype = attr_1_0 == 0 ? TlbEntry::MemoryType::StronglyOrdered + : TlbEntry::MemoryType::Device; + te.outerAttrs = 0; + te.innerAttrs = attr_1_0 == 0 ? 1 : 3; + te.nonCacheable = true; + } else { + te.mtype = TlbEntry::MemoryType::Normal; + te.outerAttrs = attr_3_2 == 1 ? 0 : + attr_3_2 == 2 ? 2 : 1; + te.innerAttrs = attr_1_0 == 1 ? 0 : + attr_1_0 == 2 ? 6 : 5; + te.nonCacheable = (attr_3_2 == 1) || (attr_1_0 == 1); + } + } else { + uint8_t attrIndx = lDescriptor.attrIndx(); + + // LPAE always uses remapping of memory attributes, irrespective of the + // value of SCTLR.TRE + int reg = attrIndx & 0x4 ? MISCREG_MAIR1 : MISCREG_MAIR0; + reg = flattenMiscRegNsBanked(reg, currState->tc, !currState->isSecure); + uint32_t mair = currState->tc->readMiscReg(reg); + attr = (mair >> (8 * (attrIndx % 4))) & 0xff; + uint8_t attr_7_4 = bits(attr, 7, 4); + uint8_t attr_3_0 = bits(attr, 3, 0); + DPRINTF(TLBVerbose, "memAttrsLPAE AttrIndx:%#x sh:%#x, attr %#x\n", attrIndx, sh, attr); + + // Note: the memory subsystem only cares about the 'cacheable' memory + // attribute. The other attributes are only used to fill the PAR register + // accordingly to provide the illusion of full support + te.nonCacheable = false; + + switch (attr_7_4) { + case 0x0: + // Strongly-ordered or Device memory + if (attr_3_0 == 0x0) + te.mtype = TlbEntry::MemoryType::StronglyOrdered; + else if (attr_3_0 == 0x4) + te.mtype = TlbEntry::MemoryType::Device; + else + panic("Unpredictable behavior\n"); + te.nonCacheable = true; + te.outerAttrs = 0; + break; + case 0x4: + // Normal memory, Outer Non-cacheable + te.mtype = TlbEntry::MemoryType::Normal; + te.outerAttrs = 0; + if (attr_3_0 == 0x4) + // Inner Non-cacheable + te.nonCacheable = true; + else if (attr_3_0 < 0x8) + panic("Unpredictable behavior\n"); + break; + case 0x8: + case 0x9: + case 0xa: + case 0xb: + case 0xc: + case 0xd: + case 0xe: + case 0xf: + if (attr_7_4 & 0x4) { + te.outerAttrs = (attr_7_4 & 1) ? 1 : 3; + } else { + te.outerAttrs = 0x2; + } + // Normal memory, Outer Cacheable + te.mtype = TlbEntry::MemoryType::Normal; + if (attr_3_0 != 0x4 && attr_3_0 < 0x8) + panic("Unpredictable behavior\n"); + break; + default: + panic("Unpredictable behavior\n"); + break; + } + + switch (attr_3_0) { + case 0x0: + te.innerAttrs = 0x1; + break; + case 0x4: + te.innerAttrs = attr_7_4 == 0 ? 0x3 : 0; + break; + case 0x8: + case 0x9: + case 0xA: + case 0xB: + te.innerAttrs = 6; + break; + case 0xC: + case 0xD: + case 0xE: + case 0xF: + te.innerAttrs = attr_3_0 & 1 ? 0x5 : 0x7; + break; + default: + panic("Unpredictable behavior\n"); + break; + } + } + + te.outerShareable = sh == 2; + te.shareable = (sh & 0x2) ? true : false; + te.setAttributes(true); + te.attributes |= (uint64_t) attr << 56; +} - /** Formatting for Physical Address Register (PAR) - * Only including lower bits (TLB info here) - * PAR: - * PA [31:12] - * Reserved [11] - * TLB info [10:1] - * NOS [10] (Not Outer Sharable) - * NS [9] (Non-Secure) - * -- [8] (Implementation Defined) - * SH [7] (Sharable) - * Inner[6:4](Inner memory attributes) - * Outer[3:2](Outer memory attributes) - * SS [1] (SuperSection) - * F [0] (Fault, Fault Status in [6:1] if faulted) - */ - te.attributes = ( - ((outer_shareable ? 0:1) << 10) | - // TODO: NS Bit - ((te.shareable ? 1:0) << 7) | - (te.innerAttrs << 4) | - (te.outerAttrs << 2) - // TODO: Supersection bit - // TODO: Fault bit - ); +void +TableWalker::memAttrsAArch64(ThreadContext *tc, TlbEntry &te, uint8_t attrIndx, + uint8_t sh) +{ + DPRINTF(TLBVerbose, "memAttrsAArch64 AttrIndx:%#x sh:%#x\n", attrIndx, sh); + + // Select MAIR + uint64_t mair; + switch (currState->el) { + case EL0: + case EL1: + mair = tc->readMiscReg(MISCREG_MAIR_EL1); + break; + case EL2: + mair = tc->readMiscReg(MISCREG_MAIR_EL2); + break; + case EL3: + mair = tc->readMiscReg(MISCREG_MAIR_EL3); + break; + default: + panic("Invalid exception level"); + break; + } + + // Select attributes + uint8_t attr = bits(mair, 8 * attrIndx + 7, 8 * attrIndx); + uint8_t attr_lo = bits(attr, 3, 0); + uint8_t attr_hi = bits(attr, 7, 4); + // Memory type + te.mtype = attr_hi == 0 ? TlbEntry::MemoryType::Device : TlbEntry::MemoryType::Normal; + // Cacheability + te.nonCacheable = false; + if (te.mtype == TlbEntry::MemoryType::Device || // Device memory + attr_hi == 0x8 || // Normal memory, Outer Non-cacheable + attr_lo == 0x8) { // Normal memory, Inner Non-cacheable + te.nonCacheable = true; + } + + te.shareable = sh == 2; + te.outerShareable = (sh & 0x2) ? true : false; + // Attributes formatted according to the 64-bit PAR + te.attributes = ((uint64_t) attr << 56) | + (1 << 11) | // LPAE bit + (te.ns << 9) | // NS bit + (sh << 7); } void TableWalker::doL1Descriptor() { + if (currState->fault != NoFault) { + return; + } + DPRINTF(TLB, "L1 descriptor for %#x is %#x\n", - currState->vaddr, currState->l1Desc.data); + currState->vaddr_tainted, currState->l1Desc.data); TlbEntry te; switch (currState->l1Desc.type()) { @@ -569,11 +1274,17 @@ TableWalker::doL1Descriptor() DPRINTF(TLB, "L1 Descriptor Reserved/Ignore, causing fault\n"); if (currState->isFetch) currState->fault = - new PrefetchAbort(currState->vaddr, ArmFault::Translation0); + new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L1, + isStage2, + ArmFault::VmsaTran); else currState->fault = - new DataAbort(currState->vaddr, 0, currState->isWrite, - ArmFault::Translation0); + new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, + currState->isWrite, + ArmFault::TranslationLL + L1, isStage2, + ArmFault::VmsaTran); return; case L1Descriptor::Section: if (currState->sctlr.afe && bits(currState->l1Desc.ap(), 0) == 0) { @@ -582,85 +1293,251 @@ TableWalker::doL1Descriptor() * AccessFlag0 */ - currState->fault = new DataAbort(currState->vaddr, - currState->l1Desc.domain(), currState->isWrite, - ArmFault::AccessFlag0); + currState->fault = new DataAbort(currState->vaddr_tainted, + currState->l1Desc.domain(), + currState->isWrite, + ArmFault::AccessFlagLL + L1, + isStage2, + ArmFault::VmsaTran); } if (currState->l1Desc.supersection()) { panic("Haven't implemented supersections\n"); } - te.N = 20; - te.pfn = currState->l1Desc.pfn(); - te.size = (1<<te.N) - 1; - te.global = !currState->l1Desc.global(); - te.valid = true; - te.vpn = currState->vaddr >> te.N; - te.sNp = true; - te.xn = currState->l1Desc.xn(); - te.ap = currState->l1Desc.ap(); - te.domain = currState->l1Desc.domain(); - te.asid = currState->contextId; - memAttrs(currState->tc, te, currState->sctlr, - currState->l1Desc.texcb(), currState->l1Desc.shareable()); - - DPRINTF(TLB, "Inserting Section Descriptor into TLB\n"); - DPRINTF(TLB, " - N:%d pfn:%#x size: %#x global:%d valid: %d\n", - te.N, te.pfn, te.size, te.global, te.valid); - DPRINTF(TLB, " - vpn:%#x sNp: %d xn:%d ap:%d domain: %d asid:%d nc:%d\n", - te.vpn, te.sNp, te.xn, te.ap, te.domain, te.asid, - te.nonCacheable); - DPRINTF(TLB, " - domain from l1 desc: %d data: %#x bits:%d\n", - currState->l1Desc.domain(), currState->l1Desc.data, - (currState->l1Desc.data >> 5) & 0xF ); + insertTableEntry(currState->l1Desc, false); + return; + case L1Descriptor::PageTable: + { + Addr l2desc_addr; + l2desc_addr = currState->l1Desc.l2Addr() | + (bits(currState->vaddr, 19, 12) << 2); + DPRINTF(TLB, "L1 descriptor points to page table at: %#x (%s)\n", + l2desc_addr, currState->isSecure ? "s" : "ns"); + + // Trickbox address check + currState->fault = tlb->walkTrickBoxCheck( + l2desc_addr, currState->isSecure, currState->vaddr, + sizeof(uint32_t), currState->isFetch, currState->isWrite, + currState->l1Desc.domain(), L2); + + if (currState->fault) { + if (!currState->timing) { + currState->tc = NULL; + currState->req = NULL; + } + return; + } + + Request::Flags flag = 0; + if (currState->isSecure) + flag.set(Request::SECURE); + + bool delayed; + delayed = fetchDescriptor(l2desc_addr, + (uint8_t*)&currState->l2Desc.data, + sizeof(uint32_t), flag, -1, &doL2DescEvent, + &TableWalker::doL2Descriptor); + if (delayed) { + currState->delayed = true; + } + return; + } + default: + panic("A new type in a 2 bit field?\n"); + } +} + +void +TableWalker::doLongDescriptor() +{ + if (currState->fault != NoFault) { + return; + } + + DPRINTF(TLB, "L%d descriptor for %#llx is %#llx (%s)\n", + currState->longDesc.lookupLevel, currState->vaddr_tainted, + currState->longDesc.data, + currState->aarch64 ? "AArch64" : "long-desc."); + + if ((currState->longDesc.type() == LongDescriptor::Block) || + (currState->longDesc.type() == LongDescriptor::Page)) { + DPRINTF(TLBVerbose, "Analyzing L%d descriptor: %#llx, pxn: %d, " + "xn: %d, ap: %d, af: %d, type: %d\n", + currState->longDesc.lookupLevel, + currState->longDesc.data, + currState->longDesc.pxn(), + currState->longDesc.xn(), + currState->longDesc.ap(), + currState->longDesc.af(), + currState->longDesc.type()); + } else { + DPRINTF(TLBVerbose, "Analyzing L%d descriptor: %#llx, type: %d\n", + currState->longDesc.lookupLevel, + currState->longDesc.data, + currState->longDesc.type()); + } + + TlbEntry te; + + switch (currState->longDesc.type()) { + case LongDescriptor::Invalid: if (!currState->timing) { currState->tc = NULL; currState->req = NULL; } - tlb->insert(currState->vaddr, te); + DPRINTF(TLB, "L%d descriptor Invalid, causing fault type %d\n", + currState->longDesc.lookupLevel, + ArmFault::TranslationLL + currState->longDesc.lookupLevel); + if (currState->isFetch) + currState->fault = new PrefetchAbort( + currState->vaddr_tainted, + ArmFault::TranslationLL + currState->longDesc.lookupLevel, + isStage2, + ArmFault::LpaeTran); + else + currState->fault = new DataAbort( + currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, + currState->isWrite, + ArmFault::TranslationLL + currState->longDesc.lookupLevel, + isStage2, + ArmFault::LpaeTran); return; - case L1Descriptor::PageTable: - Addr l2desc_addr; - l2desc_addr = currState->l1Desc.l2Addr() | - (bits(currState->vaddr, 19,12) << 2); - DPRINTF(TLB, "L1 descriptor points to page table at: %#x\n", - l2desc_addr); - - // Trickbox address check - currState->fault = tlb->walkTrickBoxCheck(l2desc_addr, currState->vaddr, - sizeof(uint32_t), currState->isFetch, currState->isWrite, - currState->l1Desc.domain(), false); - - if (currState->fault) { - if (!currState->timing) { - currState->tc = NULL; - currState->req = NULL; + case LongDescriptor::Block: + case LongDescriptor::Page: + { + bool fault = false; + bool aff = false; + // Check for address size fault + if (checkAddrSizeFaultAArch64( + mbits(currState->longDesc.data, MaxPhysAddrRange - 1, + currState->longDesc.offsetBits()), + currState->physAddrRange)) { + fault = true; + DPRINTF(TLB, "L%d descriptor causing Address Size Fault\n", + currState->longDesc.lookupLevel); + // Check for access fault + } else if (currState->longDesc.af() == 0) { + fault = true; + DPRINTF(TLB, "L%d descriptor causing Access Fault\n", + currState->longDesc.lookupLevel); + aff = true; + } + if (fault) { + if (currState->isFetch) + currState->fault = new PrefetchAbort( + currState->vaddr_tainted, + (aff ? ArmFault::AccessFlagLL : ArmFault::AddressSizeLL) + + currState->longDesc.lookupLevel, + isStage2, + ArmFault::LpaeTran); + else + currState->fault = new DataAbort( + currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, currState->isWrite, + (aff ? ArmFault::AccessFlagLL : ArmFault::AddressSizeLL) + + currState->longDesc.lookupLevel, + isStage2, + ArmFault::LpaeTran); + } else { + insertTableEntry(currState->longDesc, true); } - return; } + return; + case LongDescriptor::Table: + { + // Set hierarchical permission flags + currState->secureLookup = currState->secureLookup && + currState->longDesc.secureTable(); + currState->rwTable = currState->rwTable && + currState->longDesc.rwTable(); + currState->userTable = currState->userTable && + currState->longDesc.userTable(); + currState->xnTable = currState->xnTable || + currState->longDesc.xnTable(); + currState->pxnTable = currState->pxnTable || + currState->longDesc.pxnTable(); + + // Set up next level lookup + Addr next_desc_addr = currState->longDesc.nextDescAddr( + currState->vaddr); + + DPRINTF(TLB, "L%d descriptor points to L%d descriptor at: %#x (%s)\n", + currState->longDesc.lookupLevel, + currState->longDesc.lookupLevel + 1, + next_desc_addr, + currState->secureLookup ? "s" : "ns"); + + // Check for address size fault + if (currState->aarch64 && checkAddrSizeFaultAArch64( + next_desc_addr, currState->physAddrRange)) { + DPRINTF(TLB, "L%d descriptor causing Address Size Fault\n", + currState->longDesc.lookupLevel); + if (currState->isFetch) + currState->fault = new PrefetchAbort( + currState->vaddr_tainted, + ArmFault::AddressSizeLL + + currState->longDesc.lookupLevel, + isStage2, + ArmFault::LpaeTran); + else + currState->fault = new DataAbort( + currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, currState->isWrite, + ArmFault::AddressSizeLL + + currState->longDesc.lookupLevel, + isStage2, + ArmFault::LpaeTran); + return; + } + // Trickbox address check + currState->fault = tlb->walkTrickBoxCheck( + next_desc_addr, currState->vaddr, + currState->vaddr, sizeof(uint64_t), + currState->isFetch, currState->isWrite, + TlbEntry::DomainType::Client, + toLookupLevel(currState->longDesc.lookupLevel +1)); + + if (currState->fault) { + if (!currState->timing) { + currState->tc = NULL; + currState->req = NULL; + } + return; + } - if (currState->timing) { - currState->delayed = true; - port.dmaAction(MemCmd::ReadReq, l2desc_addr, sizeof(uint32_t), - &doL2DescEvent, (uint8_t*)&currState->l2Desc.data, - currState->tc->getCpuPtr()->clockPeriod()); - } else if (!currState->functional) { - port.dmaAction(MemCmd::ReadReq, l2desc_addr, sizeof(uint32_t), - NULL, (uint8_t*)&currState->l2Desc.data, - currState->tc->getCpuPtr()->clockPeriod()); - doL2Descriptor(); - } else { - RequestPtr req = new Request(l2desc_addr, sizeof(uint32_t), 0, - masterId); - req->taskId(ContextSwitchTaskId::DMA); - PacketPtr pkt = new Packet(req, MemCmd::ReadReq); - pkt->dataStatic((uint8_t*)&currState->l2Desc.data); - port.sendFunctional(pkt); - doL2Descriptor(); - delete req; - delete pkt; + Request::Flags flag = 0; + if (currState->secureLookup) + flag.set(Request::SECURE); + + currState->longDesc.lookupLevel = + (LookupLevel) (currState->longDesc.lookupLevel + 1); + Event *event = NULL; + switch (currState->longDesc.lookupLevel) { + case L1: + assert(currState->aarch64); + event = &doL1LongDescEvent; + break; + case L2: + event = &doL2LongDescEvent; + break; + case L3: + event = &doL3LongDescEvent; + break; + default: + panic("Wrong lookup level in table walk\n"); + break; + } + + bool delayed; + delayed = fetchDescriptor(next_desc_addr, (uint8_t*)&currState->longDesc.data, + sizeof(uint64_t), flag, -1, event, + &TableWalker::doLongDescriptor); + if (delayed) { + currState->delayed = true; + } } return; default: @@ -671,8 +1548,12 @@ TableWalker::doL1Descriptor() void TableWalker::doL2Descriptor() { + if (currState->fault != NoFault) { + return; + } + DPRINTF(TLB, "L2 descriptor for %#x is %#x\n", - currState->vaddr, currState->l2Desc.data); + currState->vaddr_tainted, currState->l2Desc.data); TlbEntry te; if (currState->l2Desc.invalid()) { @@ -683,11 +1564,16 @@ TableWalker::doL2Descriptor() } if (currState->isFetch) currState->fault = - new PrefetchAbort(currState->vaddr, ArmFault::Translation1); + new PrefetchAbort(currState->vaddr_tainted, + ArmFault::TranslationLL + L2, + isStage2, + ArmFault::VmsaTran); else currState->fault = - new DataAbort(currState->vaddr, currState->l1Desc.domain(), - currState->isWrite, ArmFault::Translation1); + new DataAbort(currState->vaddr_tainted, currState->l1Desc.domain(), + currState->isWrite, ArmFault::TranslationLL + L2, + isStage2, + ArmFault::VmsaTran); return; } @@ -695,53 +1581,38 @@ TableWalker::doL2Descriptor() /** @todo: check sctlr.ha (bit[17]) if Hardware Access Flag is enabled * if set, do l2.Desc.setAp0() instead of generating AccessFlag0 */ + DPRINTF(TLB, "Generating access fault at L2, afe: %d, ap: %d\n", + currState->sctlr.afe, currState->l2Desc.ap()); currState->fault = - new DataAbort(currState->vaddr, 0, currState->isWrite, - ArmFault::AccessFlag1); - + new DataAbort(currState->vaddr_tainted, + TlbEntry::DomainType::NoAccess, currState->isWrite, + ArmFault::AccessFlagLL + L2, isStage2, + ArmFault::VmsaTran); } - if (currState->l2Desc.large()) { - te.N = 16; - te.pfn = currState->l2Desc.pfn(); - } else { - te.N = 12; - te.pfn = currState->l2Desc.pfn(); - } - - te.valid = true; - te.size = (1 << te.N) - 1; - te.asid = currState->contextId; - te.sNp = false; - te.vpn = currState->vaddr >> te.N; - te.global = currState->l2Desc.global(); - te.xn = currState->l2Desc.xn(); - te.ap = currState->l2Desc.ap(); - te.domain = currState->l1Desc.domain(); - memAttrs(currState->tc, te, currState->sctlr, currState->l2Desc.texcb(), - currState->l2Desc.shareable()); - - if (!currState->timing) { - currState->tc = NULL; - currState->req = NULL; - } - tlb->insert(currState->vaddr, te); + insertTableEntry(currState->l2Desc, false); } void TableWalker::doL1DescriptorWrapper() { - currState = stateQueueL1.front(); + currState = stateQueues[L1].front(); currState->delayed = false; + // if there's a stage2 translation object we don't need it any more + if (currState->stage2Tran) { + delete currState->stage2Tran; + currState->stage2Tran = NULL; + } + DPRINTF(TLBVerbose, "L1 Desc object host addr: %p\n",&currState->l1Desc.data); DPRINTF(TLBVerbose, "L1 Desc object data: %08x\n",currState->l1Desc.data); - DPRINTF(TLBVerbose, "calling doL1Descriptor for vaddr:%#x\n", currState->vaddr); + DPRINTF(TLBVerbose, "calling doL1Descriptor for vaddr:%#x\n", currState->vaddr_tainted); doL1Descriptor(); - stateQueueL1.pop_front(); + stateQueues[L1].pop_front(); completeDrain(); // Check if fault was generated if (currState->fault != NoFault) { @@ -758,9 +1629,12 @@ TableWalker::doL1DescriptorWrapper() } else if (!currState->delayed) { // delay is not set so there is no L2 to do - DPRINTF(TLBVerbose, "calling translateTiming again\n"); - currState->fault = tlb->translateTiming(currState->req, currState->tc, - currState->transState, currState->mode); + // Don't finish the translation if a stage 2 look up is underway + if (!currState->doingStage2) { + DPRINTF(TLBVerbose, "calling translateTiming again\n"); + currState->fault = tlb->translateTiming(currState->req, currState->tc, + currState->transState, currState->mode); + } pending = false; nextWalk(currState->tc); @@ -771,7 +1645,7 @@ TableWalker::doL1DescriptorWrapper() delete currState; } else { // need to do L2 descriptor - stateQueueL2.push_back(currState); + stateQueues[L2].push_back(currState); } currState = NULL; } @@ -779,11 +1653,16 @@ TableWalker::doL1DescriptorWrapper() void TableWalker::doL2DescriptorWrapper() { - currState = stateQueueL2.front(); + currState = stateQueues[L2].front(); assert(currState->delayed); + // if there's a stage2 translation object we don't need it any more + if (currState->stage2Tran) { + delete currState->stage2Tran; + currState->stage2Tran = NULL; + } DPRINTF(TLBVerbose, "calling doL2Descriptor for vaddr:%#x\n", - currState->vaddr); + currState->vaddr_tainted); doL2Descriptor(); // Check if fault was generated @@ -792,13 +1671,16 @@ TableWalker::doL2DescriptorWrapper() currState->tc, currState->mode); } else { - DPRINTF(TLBVerbose, "calling translateTiming again\n"); - currState->fault = tlb->translateTiming(currState->req, currState->tc, - currState->transState, currState->mode); + // Don't finish the translation if a stage 2 look up is underway + if (!currState->doingStage2) { + DPRINTF(TLBVerbose, "calling translateTiming again\n"); + currState->fault = tlb->translateTiming(currState->req, + currState->tc, currState->transState, currState->mode); + } } - stateQueueL2.pop_front(); + stateQueues[L2].pop_front(); completeDrain(); pending = false; nextWalk(currState->tc); @@ -812,13 +1694,234 @@ TableWalker::doL2DescriptorWrapper() } void +TableWalker::doL0LongDescriptorWrapper() +{ + doLongDescriptorWrapper(L0); +} + +void +TableWalker::doL1LongDescriptorWrapper() +{ + doLongDescriptorWrapper(L1); +} + +void +TableWalker::doL2LongDescriptorWrapper() +{ + doLongDescriptorWrapper(L2); +} + +void +TableWalker::doL3LongDescriptorWrapper() +{ + doLongDescriptorWrapper(L3); +} + +void +TableWalker::doLongDescriptorWrapper(LookupLevel curr_lookup_level) +{ + currState = stateQueues[curr_lookup_level].front(); + assert(curr_lookup_level == currState->longDesc.lookupLevel); + currState->delayed = false; + + // if there's a stage2 translation object we don't need it any more + if (currState->stage2Tran) { + delete currState->stage2Tran; + currState->stage2Tran = NULL; + } + + DPRINTF(TLBVerbose, "calling doLongDescriptor for vaddr:%#x\n", + currState->vaddr_tainted); + doLongDescriptor(); + + stateQueues[curr_lookup_level].pop_front(); + + if (currState->fault != NoFault) { + // A fault was generated + currState->transState->finish(currState->fault, currState->req, + currState->tc, currState->mode); + + pending = false; + nextWalk(currState->tc); + + currState->req = NULL; + currState->tc = NULL; + currState->delayed = false; + delete currState; + } else if (!currState->delayed) { + // No additional lookups required + // Don't finish the translation if a stage 2 look up is underway + if (!currState->doingStage2) { + DPRINTF(TLBVerbose, "calling translateTiming again\n"); + currState->fault = tlb->translateTiming(currState->req, currState->tc, + currState->transState, + currState->mode); + } + + pending = false; + nextWalk(currState->tc); + + currState->req = NULL; + currState->tc = NULL; + currState->delayed = false; + delete currState; + } else { + if (curr_lookup_level >= MAX_LOOKUP_LEVELS - 1) + panic("Max. number of lookups already reached in table walk\n"); + // Need to perform additional lookups + stateQueues[currState->longDesc.lookupLevel].push_back(currState); + } + currState = NULL; +} + + +void TableWalker::nextWalk(ThreadContext *tc) { if (pendingQueue.size()) schedule(doProcessEvent, clockEdge(Cycles(1))); } +bool +TableWalker::fetchDescriptor(Addr descAddr, uint8_t *data, int numBytes, + Request::Flags flags, int queueIndex, Event *event, + void (TableWalker::*doDescriptor)()) +{ + bool isTiming = currState->timing; + + // do the requests for the page table descriptors have to go through the + // second stage MMU + if (currState->stage2Req) { + Fault fault; + flags = flags | TLB::MustBeOne; + + if (isTiming) { + Stage2MMU::Stage2Translation *tran = new + Stage2MMU::Stage2Translation(*stage2Mmu, data, event, + currState->vaddr); + currState->stage2Tran = tran; + stage2Mmu->readDataTimed(currState->tc, descAddr, tran, numBytes, + flags, masterId); + fault = tran->fault; + } else { + fault = stage2Mmu->readDataUntimed(currState->tc, + currState->vaddr, descAddr, data, numBytes, flags, masterId, + currState->functional); + } + + if (fault != NoFault) { + currState->fault = fault; + } + if (isTiming) { + if (queueIndex >= 0) { + DPRINTF(TLBVerbose, "Adding to walker fifo: queue size before adding: %d\n", + stateQueues[queueIndex].size()); + stateQueues[queueIndex].push_back(currState); + currState = NULL; + } + } else { + (this->*doDescriptor)(); + } + } else { + if (isTiming) { + port.dmaAction(MemCmd::ReadReq, descAddr, numBytes, event, data, + currState->tc->getCpuPtr()->clockPeriod(), flags); + if (queueIndex >= 0) { + DPRINTF(TLBVerbose, "Adding to walker fifo: queue size before adding: %d\n", + stateQueues[queueIndex].size()); + stateQueues[queueIndex].push_back(currState); + currState = NULL; + } + } else if (!currState->functional) { + port.dmaAction(MemCmd::ReadReq, descAddr, numBytes, NULL, data, + currState->tc->getCpuPtr()->clockPeriod(), flags); + (this->*doDescriptor)(); + } else { + RequestPtr req = new Request(descAddr, numBytes, flags, masterId); + req->taskId(ContextSwitchTaskId::DMA); + PacketPtr pkt = new Packet(req, MemCmd::ReadReq); + pkt->dataStatic(data); + port.sendFunctional(pkt); + (this->*doDescriptor)(); + delete req; + delete pkt; + } + } + return (isTiming); +} + +void +TableWalker::insertTableEntry(DescriptorBase &descriptor, bool longDescriptor) +{ + TlbEntry te; + // Create and fill a new page table entry + te.valid = true; + te.longDescFormat = longDescriptor; + te.isHyp = currState->isHyp; + te.asid = currState->asid; + te.vmid = currState->vmid; + te.N = descriptor.offsetBits(); + te.vpn = currState->vaddr >> te.N; + te.size = (1<<te.N) - 1; + te.pfn = descriptor.pfn(); + te.domain = descriptor.domain(); + te.lookupLevel = descriptor.lookupLevel; + te.ns = !descriptor.secure(haveSecurity, currState) || isStage2; + te.nstid = !currState->isSecure; + te.xn = descriptor.xn(); + if (currState->aarch64) + te.el = currState->el; + else + te.el = 1; + + // ASID has no meaning for stage 2 TLB entries, so mark all stage 2 entries + // as global + te.global = descriptor.global(currState) || isStage2; + if (longDescriptor) { + LongDescriptor lDescriptor = + dynamic_cast<LongDescriptor &>(descriptor); + + te.xn |= currState->xnTable; + te.pxn = currState->pxnTable || lDescriptor.pxn(); + if (isStage2) { + // this is actually the HAP field, but its stored in the same bit + // possitions as the AP field in a stage 1 translation. + te.hap = lDescriptor.ap(); + } else { + te.ap = ((!currState->rwTable || descriptor.ap() >> 1) << 1) | + (currState->userTable && (descriptor.ap() & 0x1)); + } + if (currState->aarch64) + memAttrsAArch64(currState->tc, te, currState->longDesc.attrIndx(), + currState->longDesc.sh()); + else + memAttrsLPAE(currState->tc, te, lDescriptor); + } else { + te.ap = descriptor.ap(); + memAttrs(currState->tc, te, currState->sctlr, descriptor.texcb(), + descriptor.shareable()); + } + + // Debug output + DPRINTF(TLB, descriptor.dbgHeader().c_str()); + DPRINTF(TLB, " - N:%d pfn:%#x size:%#x global:%d valid:%d\n", + te.N, te.pfn, te.size, te.global, te.valid); + DPRINTF(TLB, " - vpn:%#x xn:%d pxn:%d ap:%d domain:%d asid:%d " + "vmid:%d hyp:%d nc:%d ns:%d\n", te.vpn, te.xn, te.pxn, + te.ap, static_cast<uint8_t>(te.domain), te.asid, te.vmid, te.isHyp, + te.nonCacheable, te.ns); + DPRINTF(TLB, " - domain from L%d desc:%d data:%#x\n", + descriptor.lookupLevel, static_cast<uint8_t>(descriptor.domain()), + descriptor.getRawData()); + + // Insert the entry into the TLB + tlb->insert(currState->vaddr, te); + if (!currState->timing) { + currState->tc = NULL; + currState->req = NULL; + } +} ArmISA::TableWalker * ArmTableWalkerParams::create() @@ -826,3 +1929,17 @@ ArmTableWalkerParams::create() return new ArmISA::TableWalker(this); } +LookupLevel +TableWalker::toLookupLevel(uint8_t lookup_level_as_int) +{ + switch (lookup_level_as_int) { + case L1: + return L1; + case L2: + return L2; + case L3: + return L3; + default: + panic("Invalid lookup level conversion"); + } +} diff --git a/src/arch/arm/table_walker.hh b/src/arch/arm/table_walker.hh index 23464f56d..4753fe6a0 100644 --- a/src/arch/arm/table_walker.hh +++ b/src/arch/arm/table_walker.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -35,6 +35,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Authors: Ali Saidi + * Giacomo Gabrielli */ #ifndef __ARCH_ARM_TABLE_WALKER_HH__ @@ -43,6 +44,7 @@ #include <list> #include "arch/arm/miscregs.hh" +#include "arch/arm/system.hh" #include "arch/arm/tlb.hh" #include "dev/dma_device.hh" #include "mem/mem_object.hh" @@ -56,11 +58,39 @@ class ThreadContext; namespace ArmISA { class Translation; class TLB; +class Stage2MMU; class TableWalker : public MemObject { public: - struct L1Descriptor { + class WalkerState; + + class DescriptorBase { + public: + /** Current lookup level for this descriptor */ + LookupLevel lookupLevel; + + virtual Addr pfn() const = 0; + virtual TlbEntry::DomainType domain() const = 0; + virtual bool xn() const = 0; + virtual uint8_t ap() const = 0; + virtual bool global(WalkerState *currState) const = 0; + virtual uint8_t offsetBits() const = 0; + virtual bool secure(bool have_security, WalkerState *currState) const = 0; + virtual std::string dbgHeader() const = 0; + virtual uint64_t getRawData() const = 0; + virtual uint8_t texcb() const + { + panic("texcb() not implemented for this class\n"); + } + virtual bool shareable() const + { + panic("shareable() not implemented for this class\n"); + } + }; + + class L1Descriptor : public DescriptorBase { + public: /** Type of page table entry ARM DDI 0406B: B3-8*/ enum EntryType { Ignore, @@ -76,6 +106,27 @@ class TableWalker : public MemObject * written back to memory */ bool _dirty; + /** Default ctor */ + L1Descriptor() + { + lookupLevel = L1; + } + + virtual uint64_t getRawData() const + { + return (data); + } + + virtual std::string dbgHeader() const + { + return "Inserting Section Descriptor into TLB\n"; + } + + virtual uint8_t offsetBits() const + { + return 20; + } + EntryType type() const { return (EntryType)(data & 0x3); @@ -112,9 +163,9 @@ class TableWalker : public MemObject } /** Is the translation global (no asid used)? */ - bool global() const + bool global(WalkerState *currState) const { - return bits(data, 17); + return !bits(data, 17); } /** Is the translation not allow execution? */ @@ -130,9 +181,9 @@ class TableWalker : public MemObject } /** Domain Client/Manager: ARM DDI 0406B: B3-31 */ - uint8_t domain() const + TlbEntry::DomainType domain() const { - return bits(data, 8, 5); + return static_cast<TlbEntry::DomainType>(bits(data, 8, 5)); } /** Address of L2 descriptor if it exists */ @@ -171,18 +222,70 @@ class TableWalker : public MemObject { return _dirty; } + + /** + * Returns true if this entry targets the secure physical address + * map. + */ + bool secure(bool have_security, WalkerState *currState) const + { + if (have_security) { + if (type() == PageTable) + return !bits(data, 3); + else + return !bits(data, 19); + } + return false; + } }; /** Level 2 page table descriptor */ - struct L2Descriptor { - + class L2Descriptor : public DescriptorBase { + public: /** The raw bits of the entry. */ - uint32_t data; + uint32_t data; + L1Descriptor *l1Parent; /** This entry has been modified (access flag set) and needs to be * written back to memory */ bool _dirty; + /** Default ctor */ + L2Descriptor() + { + lookupLevel = L2; + } + + L2Descriptor(L1Descriptor &parent) : l1Parent(&parent) + { + lookupLevel = L2; + } + + virtual uint64_t getRawData() const + { + return (data); + } + + virtual std::string dbgHeader() const + { + return "Inserting L2 Descriptor into TLB\n"; + } + + virtual TlbEntry::DomainType domain() const + { + return l1Parent->domain(); + } + + bool secure(bool have_security, WalkerState *currState) const + { + return l1Parent->secure(have_security, currState); + } + + virtual uint8_t offsetBits() const + { + return large() ? 16 : 12; + } + /** Is the entry invalid */ bool invalid() const { @@ -202,7 +305,7 @@ class TableWalker : public MemObject } /** Is the translation global (no asid used)? */ - bool global() const + bool global(WalkerState *currState) const { return !bits(data, 11); } @@ -259,49 +362,329 @@ class TableWalker : public MemObject }; - protected: + /** Long-descriptor format (LPAE) */ + class LongDescriptor : public DescriptorBase { + public: + /** Descriptor type */ + enum EntryType { + Invalid, + Table, + Block, + Page + }; - /** - * A snooping DMA port that currently does nothing besides - * extending the DMA port to accept snoops without complaining. - */ - class SnoopingDmaPort : public DmaPort - { + /** The raw bits of the entry */ + uint64_t data; - protected: + /** This entry has been modified (access flag set) and needs to be + * written back to memory */ + bool _dirty; - virtual void recvTimingSnoopReq(PacketPtr pkt) - { } + virtual uint64_t getRawData() const + { + return (data); + } - virtual Tick recvAtomicSnoop(PacketPtr pkt) - { return 0; } + virtual std::string dbgHeader() const + { + if (type() == LongDescriptor::Page) { + assert(lookupLevel == L3); + return "Inserting Page descriptor into TLB\n"; + } else { + assert(lookupLevel < L3); + return "Inserting Block descriptor into TLB\n"; + } + } - virtual void recvFunctionalSnoop(PacketPtr pkt) - { } + /** + * Returns true if this entry targets the secure physical address + * map. + */ + bool secure(bool have_security, WalkerState *currState) const + { + assert(type() == Block || type() == Page); + return have_security && (currState->secureLookup && !bits(data, 5)); + } - virtual bool isSnooping() const { return true; } + /** True if the current lookup is performed in AArch64 state */ + bool aarch64; - public: + /** True if the granule size is 64 KB (AArch64 only) */ + bool largeGrain; - /** - * A snooping DMA port merely calls the construtor of the DMA - * port. - */ - SnoopingDmaPort(MemObject *dev, System *s) : - DmaPort(dev, s) - { } + /** Width of the granule size in bits */ + int grainSize; + + /** Return the descriptor type */ + EntryType type() const + { + switch (bits(data, 1, 0)) { + case 0x1: + // In AArch64 blocks are not allowed at L0 for the 4 KB granule + // and at L1 for the 64 KB granule + if (largeGrain) + return lookupLevel == L2 ? Block : Invalid; + return lookupLevel == L0 || lookupLevel == L3 ? Invalid : Block; + case 0x3: + return lookupLevel == L3 ? Page : Table; + default: + return Invalid; + } + } + + /** Return the bit width of the page/block offset */ + uint8_t offsetBits() const + { + assert(type() == Block || type() == Page); + if (largeGrain) { + if (type() == Block) + return 29 /* 512 MB */; + return 16 /* 64 KB */; // type() == Page + } else { + if (type() == Block) + return lookupLevel == L1 ? 30 /* 1 GB */ : 21 /* 2 MB */; + return 12 /* 4 KB */; // type() == Page + } + } + + /** Return the physical frame, bits shifted right */ + Addr pfn() const + { + if (aarch64) + return bits(data, 47, offsetBits()); + return bits(data, 39, offsetBits()); + } + + /** Return the complete physical address given a VA */ + Addr paddr(Addr va) const + { + int n = offsetBits(); + if (aarch64) + return mbits(data, 47, n) | mbits(va, n - 1, 0); + return mbits(data, 39, n) | mbits(va, n - 1, 0); + } + + /** Return the physical address of the entry */ + Addr paddr() const + { + if (aarch64) + return mbits(data, 47, offsetBits()); + return mbits(data, 39, offsetBits()); + } + + /** Return the address of the next page table */ + Addr nextTableAddr() const + { + assert(type() == Table); + if (aarch64) + return mbits(data, 47, grainSize); + else + return mbits(data, 39, 12); + } + + /** Return the address of the next descriptor */ + Addr nextDescAddr(Addr va) const + { + assert(type() == Table); + Addr pa = 0; + if (aarch64) { + int stride = grainSize - 3; + int va_lo = stride * (3 - (lookupLevel + 1)) + grainSize; + int va_hi = va_lo + stride - 1; + pa = nextTableAddr() | (bits(va, va_hi, va_lo) << 3); + } else { + if (lookupLevel == L1) + pa = nextTableAddr() | (bits(va, 29, 21) << 3); + else // lookupLevel == L2 + pa = nextTableAddr() | (bits(va, 20, 12) << 3); + } + return pa; + } + + /** Is execution allowed on this mapping? */ + bool xn() const + { + assert(type() == Block || type() == Page); + return bits(data, 54); + } + + /** Is privileged execution allowed on this mapping? (LPAE only) */ + bool pxn() const + { + assert(type() == Block || type() == Page); + return bits(data, 53); + } + + /** Contiguous hint bit. */ + bool contiguousHint() const + { + assert(type() == Block || type() == Page); + return bits(data, 52); + } + + /** Is the translation global (no asid used)? */ + bool global(WalkerState *currState) const + { + assert(currState && (type() == Block || type() == Page)); + if (!currState->aarch64 && (currState->isSecure && + !currState->secureLookup)) { + return false; // ARM ARM issue C B3.6.3 + } else if (currState->aarch64) { + if (currState->el == EL2 || currState->el == EL3) { + return true; // By default translations are treated as global + // in AArch64 EL2 and EL3 + } else if (currState->isSecure && !currState->secureLookup) { + return false; + } + } + return !bits(data, 11); + } + + /** Returns true if the access flag (AF) is set. */ + bool af() const + { + assert(type() == Block || type() == Page); + return bits(data, 10); + } + + /** 2-bit shareability field */ + uint8_t sh() const + { + assert(type() == Block || type() == Page); + return bits(data, 9, 8); + } + + /** 2-bit access protection flags */ + uint8_t ap() const + { + assert(type() == Block || type() == Page); + // Long descriptors only support the AP[2:1] scheme + return bits(data, 7, 6); + } + + /** Read/write access protection flag */ + bool rw() const + { + assert(type() == Block || type() == Page); + return !bits(data, 7); + } + + /** User/privileged level access protection flag */ + bool user() const + { + assert(type() == Block || type() == Page); + return bits(data, 6); + } + + /** Return the AP bits as compatible with the AP[2:0] format. Utility + * function used to simplify the code in the TLB for performing + * permission checks. */ + static uint8_t ap(bool rw, bool user) + { + return ((!rw) << 2) | (user << 1); + } + + TlbEntry::DomainType domain() const + { + // Long-desc. format only supports Client domain + assert(type() == Block || type() == Page); + return TlbEntry::DomainType::Client; + } + + /** Attribute index */ + uint8_t attrIndx() const + { + assert(type() == Block || type() == Page); + return bits(data, 4, 2); + } + + /** Memory attributes, only used by stage 2 translations */ + uint8_t memAttr() const + { + assert(type() == Block || type() == Page); + return bits(data, 5, 2); + } + + /** Set access flag that this entry has been touched. Mark the entry as + * requiring a writeback, in the future. */ + void setAf() + { + data |= 1 << 10; + _dirty = true; + } + + /** This entry needs to be written back to memory */ + bool dirty() const + { + return _dirty; + } + + /** Whether the subsequent levels of lookup are secure */ + bool secureTable() const + { + assert(type() == Table); + return !bits(data, 63); + } + + /** Two bit access protection flags for subsequent levels of lookup */ + uint8_t apTable() const + { + assert(type() == Table); + return bits(data, 62, 61); + } + + /** R/W protection flag for subsequent levels of lookup */ + uint8_t rwTable() const + { + assert(type() == Table); + return !bits(data, 62); + } + + /** User/privileged mode protection flag for subsequent levels of + * lookup */ + uint8_t userTable() const + { + assert(type() == Table); + return !bits(data, 61); + } + + /** Is execution allowed on subsequent lookup levels? */ + bool xnTable() const + { + assert(type() == Table); + return bits(data, 60); + } + + /** Is privileged execution allowed on subsequent lookup levels? */ + bool pxnTable() const + { + assert(type() == Table); + return bits(data, 59); + } }; - struct WalkerState //: public SimObject + class WalkerState { + public: /** Thread context that we're doing the walk for */ ThreadContext *tc; + /** If the access is performed in AArch64 state */ + bool aarch64; + + /** Current exception level */ + ExceptionLevel el; + + /** Current physical address range in bits */ + int physAddrRange; + /** Request that is currently being serviced */ RequestPtr req; - /** Context ID that we're servicing the request under */ - uint8_t contextId; + /** ASID that we're servicing the request under */ + uint16_t asid; + uint8_t vmid; + bool isHyp; /** Translation state for delayed requests */ TLB::Translation *transState; @@ -309,14 +692,32 @@ class TableWalker : public MemObject /** The fault that we are going to return */ Fault fault; - /** The virtual address that is being translated */ + /** The virtual address that is being translated with tagging removed.*/ Addr vaddr; + /** The virtual address that is being translated */ + Addr vaddr_tainted; + /** Cached copy of the sctlr as it existed when translation began */ SCTLR sctlr; - /** Width of the base address held in TTRB0 */ - uint32_t N; + /** Cached copy of the scr as it existed when translation began */ + SCR scr; + + /** Cached copy of the cpsr as it existed when translation began */ + CPSR cpsr; + + /** Cached copy of the ttbcr as it existed when translation began. */ + TTBCR ttbcr; + + /** Cached copy of the htcr as it existed when translation began. */ + HTCR htcr; + + /** Cached copy of the htcr as it existed when translation began. */ + HCR hcr; + + /** Cached copy of the vtcr as it existed when translation began. */ + VTCR_t vtcr; /** If the access is a write */ bool isWrite; @@ -324,6 +725,28 @@ class TableWalker : public MemObject /** If the access is a fetch (for execution, and no-exec) must be checked?*/ bool isFetch; + /** If the access comes from the secure state. */ + bool isSecure; + + /** Helper variables used to implement hierarchical access permissions + * when the long-desc. format is used (LPAE only) */ + bool secureLookup; + bool rwTable; + bool userTable; + bool xnTable; + bool pxnTable; + + /** Flag indicating if a second stage of lookup is required */ + bool stage2Req; + + /** Indicates whether the translation has been passed onto the second + * stage mmu, and no more work is required from the first stage. + */ + bool doingStage2; + + /** A pointer to the stage 2 translation that's in progress */ + TLB::Translation *stage2Tran; + /** If the mode is timing or atomic */ bool timing; @@ -333,10 +756,18 @@ class TableWalker : public MemObject /** Save mode for use in delayed response */ BaseTLB::Mode mode; + /** The translation type that has been requested */ + TLB::ArmTranslationType tranType; + + /** Short-format descriptors */ L1Descriptor l1Desc; L2Descriptor l2Desc; - /** Whether L1/L2 descriptor response is delayed in timing mode */ + /** Long-format descriptor (LPAE and AArch64) */ + LongDescriptor longDesc; + + /** Whether the response is delayed in timing mode due to additional + * lookups */ bool delayed; TableWalker *tableWalker; @@ -344,16 +775,48 @@ class TableWalker : public MemObject void doL1Descriptor(); void doL2Descriptor(); - std::string name() const {return tableWalker->name();} + void doLongDescriptor(); + + WalkerState(); + + std::string name() const { return tableWalker->name(); } }; + protected: + + /** + * A snooping DMA port that currently does nothing besides + * extending the DMA port to accept snoops without complaining. + */ + class SnoopingDmaPort : public DmaPort + { - /** Queue of requests that need processing first level translation */ - std::list<WalkerState *> stateQueueL1; + protected: - /** Queue of requests that have passed first level translation and - * require an additional level. */ - std::list<WalkerState *> stateQueueL2; + virtual void recvTimingSnoopReq(PacketPtr pkt) + { } + + virtual Tick recvAtomicSnoop(PacketPtr pkt) + { return 0; } + + virtual void recvFunctionalSnoop(PacketPtr pkt) + { } + + virtual bool isSnooping() const { return true; } + + public: + + /** + * A snooping DMA port merely calls the construtor of the DMA + * port. + */ + SnoopingDmaPort(MemObject *dev, System *s) : + DmaPort(dev, s) + { } + }; + + /** Queues of requests for all the different lookup levels */ + std::list<WalkerState *> stateQueues[MAX_LOOKUP_LEVELS]; /** Queue of requests that have passed are waiting because the walker is * currently busy. */ @@ -366,6 +829,12 @@ class TableWalker : public MemObject /** If we're draining keep the drain event around until we're drained */ DrainManager *drainManager; + /** The MMU to forward second stage look upts to */ + Stage2MMU *stage2Mmu; + + /** Indicates whether this table walker is part of the stage 2 mmu */ + const bool isStage2; + /** TLB that is initiating these table walks */ TLB *tlb; @@ -384,8 +853,16 @@ class TableWalker : public MemObject * removed from the pendingQueue per cycle. */ unsigned numSquashable; + /** Cached copies of system-level properties */ + bool haveSecurity; + bool _haveLPAE; + bool _haveVirtualization; + uint8_t physAddrRange; + bool _haveLargeAsid64; + ArmSystem *armSys; + public: - typedef ArmTableWalkerParams Params; + typedef ArmTableWalkerParams Params; TableWalker(const Params *p); virtual ~TableWalker(); @@ -395,38 +872,90 @@ class TableWalker : public MemObject return dynamic_cast<const Params *>(_params); } + bool haveLPAE() const { return _haveLPAE; } + bool haveVirtualization() const { return _haveVirtualization; } + bool haveLargeAsid64() const { return _haveLargeAsid64; } /** Checks if all state is cleared and if so, completes drain */ void completeDrain(); unsigned int drain(DrainManager *dm); - void drainResume(); + virtual void drainResume(); virtual BaseMasterPort& getMasterPort(const std::string &if_name, PortID idx = InvalidPortID); - Fault walk(RequestPtr req, ThreadContext *tc, uint8_t cid, TLB::Mode mode, - TLB::Translation *_trans, bool timing, bool functional = false); + /** + * Allow the MMU (overseeing both stage 1 and stage 2 TLBs) to + * access the table walker port through the TLB so that it can + * orchestrate staged translations. + * + * @return Our DMA port + */ + DmaPort& getWalkerPort() { return port; } + + Fault walk(RequestPtr req, ThreadContext *tc, uint16_t asid, uint8_t _vmid, + bool _isHyp, TLB::Mode mode, TLB::Translation *_trans, + bool timing, bool functional, bool secure, + TLB::ArmTranslationType tranType); void setTlb(TLB *_tlb) { tlb = _tlb; } + TLB* getTlb() { return tlb; } + void setMMU(Stage2MMU *m) { stage2Mmu = m; } void memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr, uint8_t texcb, bool s); + void memAttrsLPAE(ThreadContext *tc, TlbEntry &te, + LongDescriptor &lDescriptor); + void memAttrsAArch64(ThreadContext *tc, TlbEntry &te, uint8_t attrIndx, + uint8_t sh); + + static LookupLevel toLookupLevel(uint8_t lookup_level_as_int); private: void doL1Descriptor(); void doL1DescriptorWrapper(); - EventWrapper<TableWalker, &TableWalker::doL1DescriptorWrapper> doL1DescEvent; + EventWrapper<TableWalker, + &TableWalker::doL1DescriptorWrapper> doL1DescEvent; void doL2Descriptor(); void doL2DescriptorWrapper(); - EventWrapper<TableWalker, &TableWalker::doL2DescriptorWrapper> doL2DescEvent; + EventWrapper<TableWalker, + &TableWalker::doL2DescriptorWrapper> doL2DescEvent; + + void doLongDescriptor(); + + void doL0LongDescriptorWrapper(); + EventWrapper<TableWalker, + &TableWalker::doL0LongDescriptorWrapper> doL0LongDescEvent; + void doL1LongDescriptorWrapper(); + EventWrapper<TableWalker, + &TableWalker::doL1LongDescriptorWrapper> doL1LongDescEvent; + void doL2LongDescriptorWrapper(); + EventWrapper<TableWalker, + &TableWalker::doL2LongDescriptorWrapper> doL2LongDescEvent; + void doL3LongDescriptorWrapper(); + EventWrapper<TableWalker, + &TableWalker::doL3LongDescriptorWrapper> doL3LongDescEvent; + + void doLongDescriptorWrapper(LookupLevel curr_lookup_level); + + bool fetchDescriptor(Addr descAddr, uint8_t *data, int numBytes, + Request::Flags flags, int queueIndex, Event *event, + void (TableWalker::*doDescriptor)()); + + void insertTableEntry(DescriptorBase &descriptor, bool longDescriptor); Fault processWalk(); + Fault processWalkLPAE(); + static unsigned adjustTableSizeAArch64(unsigned tsz); + /// Returns true if the address exceeds the range permitted by the + /// system-wide setting or by the TCR_ELx IPS/PS setting + static bool checkAddrSizeFaultAArch64(Addr addr, int currPhysAddrRange); + Fault processWalkAArch64(); void processWalkWrapper(); EventWrapper<TableWalker, &TableWalker::processWalkWrapper> doProcessEvent; void nextWalk(ThreadContext *tc); }; - } // namespace ArmISA #endif //__ARCH_ARM_TABLE_WALKER_HH__ diff --git a/src/arch/arm/tlb.cc b/src/arch/arm/tlb.cc index 805898576..037f7490e 100644 --- a/src/arch/arm/tlb.cc +++ b/src/arch/arm/tlb.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -49,6 +49,8 @@ #include "arch/arm/pagetable.hh" #include "arch/arm/system.hh" #include "arch/arm/table_walker.hh" +#include "arch/arm/stage2_lookup.hh" +#include "arch/arm/stage2_mmu.hh" #include "arch/arm/tlb.hh" #include "arch/arm/utility.hh" #include "base/inifile.hh" @@ -67,28 +69,51 @@ using namespace std; using namespace ArmISA; -TLB::TLB(const Params *p) - : BaseTLB(p), size(p->size) , tableWalker(p->walker), - rangeMRU(1), bootUncacheability(false), miscRegValid(false) +TLB::TLB(const ArmTLBParams *p) + : BaseTLB(p), table(new TlbEntry[p->size]), size(p->size), + isStage2(p->is_stage2), tableWalker(p->walker), stage2Tlb(NULL), + stage2Mmu(NULL), rangeMRU(1), bootUncacheability(false), + miscRegValid(false), curTranType(NormalTran) { - table = new TlbEntry[size]; - memset(table, 0, sizeof(TlbEntry) * size); - tableWalker->setTlb(this); + + // Cache system-level properties + haveLPAE = tableWalker->haveLPAE(); + haveVirtualization = tableWalker->haveVirtualization(); + haveLargeAsid64 = tableWalker->haveLargeAsid64(); } TLB::~TLB() { - if (table) - delete [] table; + delete[] table; +} + +void +TLB::init() +{ + if (stage2Mmu && !isStage2) + stage2Tlb = stage2Mmu->stage2Tlb(); +} + +void +TLB::setMMU(Stage2MMU *m) +{ + stage2Mmu = m; + tableWalker->setMMU(m); } bool TLB::translateFunctional(ThreadContext *tc, Addr va, Addr &pa) { - if (!miscRegValid) - updateMiscReg(tc); - TlbEntry *e = lookup(va, contextId, true); + updateMiscReg(tc); + + if (directToStage2) { + assert(stage2Tlb); + return stage2Tlb->translateFunctional(tc, va, pa); + } + + TlbEntry *e = lookup(va, asid, vmid, isHyp, isSecure, true, false, + aarch64 ? aarch64EL : EL1); if (!e) return false; pa = e->pAddr(va); @@ -102,22 +127,24 @@ TLB::finalizePhysical(RequestPtr req, ThreadContext *tc, Mode mode) const } TlbEntry* -TLB::lookup(Addr va, uint8_t cid, bool functional) +TLB::lookup(Addr va, uint16_t asn, uint8_t vmid, bool hyp, bool secure, + bool functional, bool ignore_asn, uint8_t target_el) { TlbEntry *retval = NULL; - // Maitaining LRU array - + // Maintaining LRU array int x = 0; while (retval == NULL && x < size) { - if (table[x].match(va, cid)) { - - // We only move the hit entry ahead when the position is higher than rangeMRU + if ((!ignore_asn && table[x].match(va, asn, vmid, hyp, secure, false, + target_el)) || + (ignore_asn && table[x].match(va, vmid, hyp, secure, target_el))) { + // We only move the hit entry ahead when the position is higher + // than rangeMRU if (x > rangeMRU && !functional) { TlbEntry tmp_entry = table[x]; for(int i = x; i > 0; i--) - table[i] = table[i-1]; + table[i] = table[i - 1]; table[0] = tmp_entry; retval = &table[0]; } else { @@ -125,14 +152,19 @@ TLB::lookup(Addr va, uint8_t cid, bool functional) } break; } - x++; + ++x; } - DPRINTF(TLBVerbose, "Lookup %#x, cid %#x -> %s ppn %#x size: %#x pa: %#x ap:%d\n", - va, cid, retval ? "hit" : "miss", retval ? retval->pfn : 0, - retval ? retval->size : 0, retval ? retval->pAddr(va) : 0, - retval ? retval->ap : 0); - ; + DPRINTF(TLBVerbose, "Lookup %#x, asn %#x -> %s vmn 0x%x hyp %d secure %d " + "ppn %#x size: %#x pa: %#x ap:%d ns:%d nstid:%d g:%d asid: %d " + "el: %d\n", + va, asn, retval ? "hit" : "miss", vmid, hyp, secure, + retval ? retval->pfn : 0, retval ? retval->size : 0, + retval ? retval->pAddr(va) : 0, retval ? retval->ap : 0, + retval ? retval->ns : 0, retval ? retval->nstid : 0, + retval ? retval->global : 0, retval ? retval->asid : 0, + retval ? retval->el : 0, retval ? retval->el : 0); + return retval; } @@ -141,122 +173,176 @@ void TLB::insert(Addr addr, TlbEntry &entry) { DPRINTF(TLB, "Inserting entry into TLB with pfn:%#x size:%#x vpn: %#x" - " asid:%d N:%d global:%d valid:%d nc:%d sNp:%d xn:%d ap:%#x" - " domain:%#x\n", entry.pfn, entry.size, entry.vpn, entry.asid, - entry.N, entry.global, entry.valid, entry.nonCacheable, entry.sNp, - entry.xn, entry.ap, entry.domain); - - if (table[size-1].valid) - DPRINTF(TLB, " - Replacing Valid entry %#x, asn %d ppn %#x size: %#x ap:%d\n", + " asid:%d vmid:%d N:%d global:%d valid:%d nc:%d xn:%d" + " ap:%#x domain:%#x ns:%d nstid:%d isHyp:%d\n", entry.pfn, + entry.size, entry.vpn, entry.asid, entry.vmid, entry.N, + entry.global, entry.valid, entry.nonCacheable, entry.xn, + entry.ap, static_cast<uint8_t>(entry.domain), entry.ns, entry.nstid, + entry.isHyp); + + if (table[size - 1].valid) + DPRINTF(TLB, " - Replacing Valid entry %#x, asn %d vmn %d ppn %#x " + "size: %#x ap:%d ns:%d nstid:%d g:%d isHyp:%d el: %d\n", table[size-1].vpn << table[size-1].N, table[size-1].asid, - table[size-1].pfn << table[size-1].N, table[size-1].size, - table[size-1].ap); + table[size-1].vmid, table[size-1].pfn << table[size-1].N, + table[size-1].size, table[size-1].ap, table[size-1].ns, + table[size-1].nstid, table[size-1].global, table[size-1].isHyp, + table[size-1].el); //inserting to MRU position and evicting the LRU one - for(int i = size-1; i > 0; i--) - table[i] = table[i-1]; + for (int i = size - 1; i > 0; --i) + table[i] = table[i-1]; table[0] = entry; inserts++; } void -TLB::printTlb() +TLB::printTlb() const { int x = 0; TlbEntry *te; DPRINTF(TLB, "Current TLB contents:\n"); while (x < size) { - te = &table[x]; - if (te->valid) - DPRINTF(TLB, " * %#x, asn %d ppn %#x size: %#x ap:%d\n", - te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap); - x++; + te = &table[x]; + if (te->valid) + DPRINTF(TLB, " * %s\n", te->print()); + ++x; } } - void -TLB::flushAll() +TLB::flushAllSecurity(bool secure_lookup, uint8_t target_el, bool ignore_el) { - DPRINTF(TLB, "Flushing all TLB entries\n"); + DPRINTF(TLB, "Flushing all TLB entries (%s lookup)\n", + (secure_lookup ? "secure" : "non-secure")); int x = 0; TlbEntry *te; while (x < size) { - te = &table[x]; - if (te->valid) { - DPRINTF(TLB, " - %#x, asn %d ppn %#x size: %#x ap:%d\n", - te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap); - flushedEntries++; - } - x++; - } + te = &table[x]; + if (te->valid && secure_lookup == !te->nstid && + (te->vmid == vmid || secure_lookup) && + checkELMatch(target_el, te->el, ignore_el)) { - memset(table, 0, sizeof(TlbEntry) * size); + DPRINTF(TLB, " - %s\n", te->print()); + te->valid = false; + flushedEntries++; + } + ++x; + } flushTlb++; -} + // If there's a second stage TLB (and we're not it) then flush it as well + // if we're currently in hyp mode + if (!isStage2 && isHyp) { + stage2Tlb->flushAllSecurity(secure_lookup, true); + } +} void -TLB::flushMvaAsid(Addr mva, uint64_t asn) +TLB::flushAllNs(bool hyp, uint8_t target_el, bool ignore_el) { - DPRINTF(TLB, "Flushing mva %#x asid: %#x\n", mva, asn); + DPRINTF(TLB, "Flushing all NS TLB entries (%s lookup)\n", + (hyp ? "hyp" : "non-hyp")); + int x = 0; TlbEntry *te; + while (x < size) { + te = &table[x]; + if (te->valid && te->nstid && te->isHyp == hyp && + checkELMatch(target_el, te->el, ignore_el)) { - te = lookup(mva, asn); - while (te != NULL) { - DPRINTF(TLB, " - %#x, asn %d ppn %#x size: %#x ap:%d\n", - te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap); - te->valid = false; - flushedEntries++; - te = lookup(mva,asn); + DPRINTF(TLB, " - %s\n", te->print()); + flushedEntries++; + te->valid = false; + } + ++x; + } + + flushTlb++; + + // If there's a second stage TLB (and we're not it) then flush it as well + if (!isStage2 && !hyp) { + stage2Tlb->flushAllNs(false, true); } +} + +void +TLB::flushMvaAsid(Addr mva, uint64_t asn, bool secure_lookup, uint8_t target_el) +{ + DPRINTF(TLB, "Flushing TLB entries with mva: %#x, asid: %#x " + "(%s lookup)\n", mva, asn, (secure_lookup ? + "secure" : "non-secure")); + _flushMva(mva, asn, secure_lookup, false, false, target_el); flushTlbMvaAsid++; } void -TLB::flushAsid(uint64_t asn) +TLB::flushAsid(uint64_t asn, bool secure_lookup, uint8_t target_el) { - DPRINTF(TLB, "Flushing all entries with asid: %#x\n", asn); + DPRINTF(TLB, "Flushing TLB entries with asid: %#x (%s lookup)\n", asn, + (secure_lookup ? "secure" : "non-secure")); - int x = 0; + int x = 0 ; TlbEntry *te; while (x < size) { te = &table[x]; - if (te->asid == asn) { + if (te->valid && te->asid == asn && secure_lookup == !te->nstid && + (te->vmid == vmid || secure_lookup) && + checkELMatch(target_el, te->el, false)) { + te->valid = false; - DPRINTF(TLB, " - %#x, asn %d ppn %#x size: %#x ap:%d\n", - te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap); + DPRINTF(TLB, " - %s\n", te->print()); flushedEntries++; } - x++; + ++x; } flushTlbAsid++; } void -TLB::flushMva(Addr mva) +TLB::flushMva(Addr mva, bool secure_lookup, bool hyp, uint8_t target_el) { - DPRINTF(TLB, "Flushing all entries with mva: %#x\n", mva); + DPRINTF(TLB, "Flushing TLB entries with mva: %#x (%s lookup)\n", mva, + (secure_lookup ? "secure" : "non-secure")); + _flushMva(mva, 0xbeef, secure_lookup, hyp, true, target_el); + flushTlbMva++; +} - int x = 0; +void +TLB::_flushMva(Addr mva, uint64_t asn, bool secure_lookup, bool hyp, + bool ignore_asn, uint8_t target_el) +{ TlbEntry *te; - - while (x < size) { - te = &table[x]; - Addr v = te->vpn << te->N; - if (mva >= v && mva < v + te->size) { + // D5.7.2: Sign-extend address to 64 bits + mva = sext<56>(mva); + te = lookup(mva, asn, vmid, hyp, secure_lookup, false, ignore_asn, + target_el); + while (te != NULL) { + if (secure_lookup == !te->nstid) { + DPRINTF(TLB, " - %s\n", te->print()); te->valid = false; - DPRINTF(TLB, " - %#x, asn %d ppn %#x size: %#x ap:%d\n", - te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap); flushedEntries++; } - x++; + te = lookup(mva, asn, vmid, hyp, secure_lookup, false, ignore_asn, + target_el); } - flushTlbMva++; +} + +bool +TLB::checkELMatch(uint8_t target_el, uint8_t tentry_el, bool ignore_el) +{ + bool elMatch = true; + if (!ignore_el) { + if (target_el == 2 || target_el == 3) { + elMatch = (tentry_el == target_el); + } else { + elMatch = (tentry_el == 0) || (tentry_el == 1); + } + } + return elMatch; } void @@ -273,6 +359,10 @@ TLB::serialize(ostream &os) DPRINTF(Checkpoint, "Serializing Arm TLB\n"); SERIALIZE_SCALAR(_attr); + SERIALIZE_SCALAR(haveLPAE); + SERIALIZE_SCALAR(directToStage2); + SERIALIZE_SCALAR(stage2Req); + SERIALIZE_SCALAR(bootUncacheability); int num_entries = size; SERIALIZE_SCALAR(num_entries); @@ -288,6 +378,11 @@ TLB::unserialize(Checkpoint *cp, const string §ion) DPRINTF(Checkpoint, "Unserializing Arm TLB\n"); UNSERIALIZE_SCALAR(_attr); + UNSERIALIZE_SCALAR(haveLPAE); + UNSERIALIZE_SCALAR(directToStage2); + UNSERIALIZE_SCALAR(stage2Req); + UNSERIALIZE_SCALAR(bootUncacheability); + int num_entries; UNSERIALIZE_SCALAR(num_entries); for(int i = 0; i < min(size, num_entries); i++){ @@ -413,11 +508,15 @@ TLB::regStats() Fault TLB::translateSe(RequestPtr req, ThreadContext *tc, Mode mode, - Translation *translation, bool &delay, bool timing) + Translation *translation, bool &delay, bool timing) { - if (!miscRegValid) - updateMiscReg(tc); - Addr vaddr = req->getVaddr(); + updateMiscReg(tc); + Addr vaddr_tainted = req->getVaddr(); + Addr vaddr = 0; + if (aarch64) + vaddr = purifyTaggedAddr(vaddr_tainted, tc, aarch64EL); + else + vaddr = vaddr_tainted; uint32_t flags = req->getFlags(); bool is_fetch = (mode == Execute); @@ -426,8 +525,12 @@ TLB::translateSe(RequestPtr req, ThreadContext *tc, Mode mode, if (!is_fetch) { assert(flags & MustBeOne); if (sctlr.a || !(flags & AllowUnaligned)) { - if (vaddr & flags & AlignmentMask) { - return new DataAbort(vaddr, 0, is_write, ArmFault::AlignmentFault); + if (vaddr & mask(flags & AlignmentMask)) { + // LPAE is always disabled in SE mode + return new DataAbort(vaddr_tainted, + TlbEntry::DomainType::NoAccess, is_write, + ArmFault::AlignmentFault, isStage2, + ArmFault::VmsaTran); } } } @@ -436,56 +539,411 @@ TLB::translateSe(RequestPtr req, ThreadContext *tc, Mode mode, Process *p = tc->getProcessPtr(); if (!p->pTable->translate(vaddr, paddr)) - return Fault(new GenericPageTableFault(vaddr)); + return Fault(new GenericPageTableFault(vaddr_tainted)); req->setPaddr(paddr); return NoFault; } Fault -TLB::trickBoxCheck(RequestPtr req, Mode mode, uint8_t domain, bool sNp) +TLB::trickBoxCheck(RequestPtr req, Mode mode, TlbEntry::DomainType domain) { return NoFault; } Fault -TLB::walkTrickBoxCheck(Addr pa, Addr va, Addr sz, bool is_exec, - bool is_write, uint8_t domain, bool sNp) +TLB::walkTrickBoxCheck(Addr pa, bool is_secure, Addr va, Addr sz, bool is_exec, + bool is_write, TlbEntry::DomainType domain, LookupLevel lookup_level) +{ + return NoFault; +} + +Fault +TLB::checkPermissions(TlbEntry *te, RequestPtr req, Mode mode) +{ + Addr vaddr = req->getVaddr(); // 32-bit don't have to purify + uint32_t flags = req->getFlags(); + bool is_fetch = (mode == Execute); + bool is_write = (mode == Write); + bool is_priv = isPriv && !(flags & UserMode); + + // Get the translation type from the actuall table entry + ArmFault::TranMethod tranMethod = te->longDescFormat ? ArmFault::LpaeTran + : ArmFault::VmsaTran; + + // If this is the second stage of translation and the request is for a + // stage 1 page table walk then we need to check the HCR.PTW bit. This + // allows us to generate a fault if the request targets an area marked + // as a device or strongly ordered. + if (isStage2 && req->isPTWalk() && hcr.ptw && + (te->mtype != TlbEntry::MemoryType::Normal)) { + return new DataAbort(vaddr, te->domain, is_write, + ArmFault::PermissionLL + te->lookupLevel, + isStage2, tranMethod); + } + + // Generate an alignment fault for unaligned data accesses to device or + // strongly ordered memory + if (!is_fetch) { + if (te->mtype != TlbEntry::MemoryType::Normal) { + if (vaddr & mask(flags & AlignmentMask)) { + alignFaults++; + return new DataAbort(vaddr, TlbEntry::DomainType::NoAccess, is_write, + ArmFault::AlignmentFault, isStage2, + tranMethod); + } + } + } + + if (te->nonCacheable) { + // Prevent prefetching from I/O devices. + if (req->isPrefetch()) { + // Here we can safely use the fault status for the short + // desc. format in all cases + return new PrefetchAbort(vaddr, ArmFault::PrefetchUncacheable, + isStage2, tranMethod); + } + } + + if (!te->longDescFormat) { + switch ((dacr >> (static_cast<uint8_t>(te->domain) * 2)) & 0x3) { + case 0: + domainFaults++; + DPRINTF(TLB, "TLB Fault: Data abort on domain. DACR: %#x" + " domain: %#x write:%d\n", dacr, + static_cast<uint8_t>(te->domain), is_write); + if (is_fetch) + return new PrefetchAbort(vaddr, + ArmFault::DomainLL + te->lookupLevel, + isStage2, tranMethod); + else + return new DataAbort(vaddr, te->domain, is_write, + ArmFault::DomainLL + te->lookupLevel, + isStage2, tranMethod); + case 1: + // Continue with permissions check + break; + case 2: + panic("UNPRED domain\n"); + case 3: + return NoFault; + } + } + + // The 'ap' variable is AP[2:0] or {AP[2,1],1b'0}, i.e. always three bits + uint8_t ap = te->longDescFormat ? te->ap << 1 : te->ap; + uint8_t hap = te->hap; + + if (sctlr.afe == 1 || te->longDescFormat) + ap |= 1; + + bool abt; + bool isWritable = true; + // If this is a stage 2 access (eg for reading stage 1 page table entries) + // then don't perform the AP permissions check, we stil do the HAP check + // below. + if (isStage2) { + abt = false; + } else { + switch (ap) { + case 0: + DPRINTF(TLB, "Access permissions 0, checking rs:%#x\n", + (int)sctlr.rs); + if (!sctlr.xp) { + switch ((int)sctlr.rs) { + case 2: + abt = is_write; + break; + case 1: + abt = is_write || !is_priv; + break; + case 0: + case 3: + default: + abt = true; + break; + } + } else { + abt = true; + } + break; + case 1: + abt = !is_priv; + break; + case 2: + abt = !is_priv && is_write; + isWritable = is_priv; + break; + case 3: + abt = false; + break; + case 4: + panic("UNPRED premissions\n"); + case 5: + abt = !is_priv || is_write; + isWritable = false; + break; + case 6: + case 7: + abt = is_write; + isWritable = false; + break; + default: + panic("Unknown permissions %#x\n", ap); + } + } + + bool hapAbt = is_write ? !(hap & 2) : !(hap & 1); + bool xn = te->xn || (isWritable && sctlr.wxn) || + (ap == 3 && sctlr.uwxn && is_priv); + if (is_fetch && (abt || xn || + (te->longDescFormat && te->pxn && !is_priv) || + (isSecure && te->ns && scr.sif))) { + permsFaults++; + DPRINTF(TLB, "TLB Fault: Prefetch abort on permission check. AP:%d " + "priv:%d write:%d ns:%d sif:%d sctlr.afe: %d \n", + ap, is_priv, is_write, te->ns, scr.sif,sctlr.afe); + return new PrefetchAbort(vaddr, + ArmFault::PermissionLL + te->lookupLevel, + isStage2, tranMethod); + } else if (abt | hapAbt) { + permsFaults++; + DPRINTF(TLB, "TLB Fault: Data abort on permission check. AP:%d priv:%d" + " write:%d\n", ap, is_priv, is_write); + return new DataAbort(vaddr, te->domain, is_write, + ArmFault::PermissionLL + te->lookupLevel, + isStage2 | !abt, tranMethod); + } + return NoFault; +} + + +Fault +TLB::checkPermissions64(TlbEntry *te, RequestPtr req, Mode mode, + ThreadContext *tc) { + assert(aarch64); + + Addr vaddr_tainted = req->getVaddr(); + Addr vaddr = purifyTaggedAddr(vaddr_tainted, tc, aarch64EL); + + uint32_t flags = req->getFlags(); + bool is_fetch = (mode == Execute); + bool is_write = (mode == Write); + bool is_priv M5_VAR_USED = isPriv && !(flags & UserMode); + + updateMiscReg(tc, curTranType); + + // If this is the second stage of translation and the request is for a + // stage 1 page table walk then we need to check the HCR.PTW bit. This + // allows us to generate a fault if the request targets an area marked + // as a device or strongly ordered. + if (isStage2 && req->isPTWalk() && hcr.ptw && + (te->mtype != TlbEntry::MemoryType::Normal)) { + return new DataAbort(vaddr_tainted, te->domain, is_write, + ArmFault::PermissionLL + te->lookupLevel, + isStage2, ArmFault::LpaeTran); + } + + // Generate an alignment fault for unaligned accesses to device or + // strongly ordered memory + if (!is_fetch) { + if (te->mtype != TlbEntry::MemoryType::Normal) { + if (vaddr & mask(flags & AlignmentMask)) { + alignFaults++; + return new DataAbort(vaddr_tainted, + TlbEntry::DomainType::NoAccess, is_write, + ArmFault::AlignmentFault, isStage2, + ArmFault::LpaeTran); + } + } + } + + if (te->nonCacheable) { + // Prevent prefetching from I/O devices. + if (req->isPrefetch()) { + // Here we can safely use the fault status for the short + // desc. format in all cases + return new PrefetchAbort(vaddr_tainted, + ArmFault::PrefetchUncacheable, + isStage2, ArmFault::LpaeTran); + } + } + + uint8_t ap = 0x3 & (te->ap); // 2-bit access protection field + bool grant = false; + + uint8_t xn = te->xn; + uint8_t pxn = te->pxn; + bool r = !is_write && !is_fetch; + bool w = is_write; + bool x = is_fetch; + DPRINTF(TLBVerbose, "Checking permissions: ap:%d, xn:%d, pxn:%d, r:%d, " + "w:%d, x:%d\n", ap, xn, pxn, r, w, x); + + if (isStage2) { + panic("Virtualization in AArch64 state is not supported yet"); + } else { + switch (aarch64EL) { + case EL0: + { + uint8_t perm = (ap << 2) | (xn << 1) | pxn; + switch (perm) { + case 0: + case 1: + case 8: + case 9: + grant = x; + break; + case 4: + case 5: + grant = r || w || (x && !sctlr.wxn); + break; + case 6: + case 7: + grant = r || w; + break; + case 12: + case 13: + grant = r || x; + break; + case 14: + case 15: + grant = r; + break; + default: + grant = false; + } + } + break; + case EL1: + { + uint8_t perm = (ap << 2) | (xn << 1) | pxn; + switch (perm) { + case 0: + case 2: + grant = r || w || (x && !sctlr.wxn); + break; + case 1: + case 3: + case 4: + case 5: + case 6: + case 7: + // regions that are writeable at EL0 should not be + // executable at EL1 + grant = r || w; + break; + case 8: + case 10: + case 12: + case 14: + grant = r || x; + break; + case 9: + case 11: + case 13: + case 15: + grant = r; + break; + default: + grant = false; + } + } + break; + case EL2: + case EL3: + { + uint8_t perm = (ap & 0x2) | xn; + switch (perm) { + case 0: + grant = r || w || (x && !sctlr.wxn) ; + break; + case 1: + grant = r || w; + break; + case 2: + grant = r || x; + break; + case 3: + grant = r; + break; + default: + grant = false; + } + } + break; + } + } + + if (!grant) { + if (is_fetch) { + permsFaults++; + DPRINTF(TLB, "TLB Fault: Prefetch abort on permission check. " + "AP:%d priv:%d write:%d ns:%d sif:%d " + "sctlr.afe: %d\n", + ap, is_priv, is_write, te->ns, scr.sif, sctlr.afe); + // Use PC value instead of vaddr because vaddr might be aligned to + // cache line and should not be the address reported in FAR + return new PrefetchAbort(req->getPC(), + ArmFault::PermissionLL + te->lookupLevel, + isStage2, ArmFault::LpaeTran); + } else { + permsFaults++; + DPRINTF(TLB, "TLB Fault: Data abort on permission check. AP:%d " + "priv:%d write:%d\n", ap, is_priv, is_write); + return new DataAbort(vaddr_tainted, te->domain, is_write, + ArmFault::PermissionLL + te->lookupLevel, + isStage2, ArmFault::LpaeTran); + } + } + return NoFault; } Fault TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode, - Translation *translation, bool &delay, bool timing, bool functional) + Translation *translation, bool &delay, bool timing, + TLB::ArmTranslationType tranType, bool functional) { // No such thing as a functional timing access assert(!(timing && functional)); - if (!miscRegValid) { - updateMiscReg(tc); - DPRINTF(TLBVerbose, "TLB variables changed!\n"); - } + updateMiscReg(tc, tranType); - Addr vaddr = req->getVaddr(); + Addr vaddr_tainted = req->getVaddr(); + Addr vaddr = 0; + if (aarch64) + vaddr = purifyTaggedAddr(vaddr_tainted, tc, aarch64EL); + else + vaddr = vaddr_tainted; uint32_t flags = req->getFlags(); - bool is_fetch = (mode == Execute); - bool is_write = (mode == Write); - bool is_priv = isPriv && !(flags & UserMode); + bool is_fetch = (mode == Execute); + bool is_write = (mode == Write); + bool long_desc_format = aarch64 || (haveLPAE && ttbcr.eae); + ArmFault::TranMethod tranMethod = long_desc_format ? ArmFault::LpaeTran + : ArmFault::VmsaTran; + + req->setAsid(asid); - req->setAsid(contextId.asid); - if (is_priv) - req->setFlags(Request::PRIVILEGED); + DPRINTF(TLBVerbose, "CPSR is priv:%d UserMode:%d secure:%d S1S2NsTran:%d\n", + isPriv, flags & UserMode, isSecure, tranType & S1S2NsTran); - req->taskId(tc->getCpuPtr()->taskId()); + DPRINTF(TLB, "translateFs addr %#x, mode %d, st2 %d, scr %#x sctlr %#x " + "flags %#x tranType 0x%x\n", vaddr_tainted, mode, isStage2, + scr, sctlr, flags, tranType); + + // Generate an alignment fault for unaligned PC + if (aarch64 && is_fetch && (req->getPC() & mask(2))) { + return new PCAlignmentFault(req->getPC()); + } - DPRINTF(TLBVerbose, "CPSR is priv:%d UserMode:%d\n", - isPriv, flags & UserMode); // If this is a clrex instruction, provide a PA of 0 with no fault // This will force the monitor to set the tracked address to 0 // a bit of a hack but this effectively clrears this processors monitor if (flags & Request::CLEAR_LL){ + // @todo: check implications of security extensions req->setPaddr(0); req->setFlags(Request::UNCACHEABLE); req->setFlags(Request::CLEAR_LL); @@ -498,209 +956,139 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode, if (!is_fetch) { assert(flags & MustBeOne); if (sctlr.a || !(flags & AllowUnaligned)) { - if (vaddr & flags & AlignmentMask) { + if (vaddr & mask(flags & AlignmentMask)) { alignFaults++; - return new DataAbort(vaddr, 0, is_write, ArmFault::AlignmentFault); + return new DataAbort(vaddr_tainted, + TlbEntry::DomainType::NoAccess, is_write, + ArmFault::AlignmentFault, isStage2, + tranMethod); } } } - Fault fault; + // If guest MMU is off or hcr.vm=0 go straight to stage2 + if ((isStage2 && !hcr.vm) || (!isStage2 && !sctlr.m)) { - if (!sctlr.m) { req->setPaddr(vaddr); - if (sctlr.tre == 0) { + // When the MMU is off the security attribute corresponds to the + // security state of the processor + if (isSecure) + req->setFlags(Request::SECURE); + + // @todo: double check this (ARM ARM issue C B3.2.1) + if (long_desc_format || sctlr.tre == 0) { req->setFlags(Request::UNCACHEABLE); } else { if (nmrr.ir0 == 0 || nmrr.or0 == 0 || prrr.tr0 != 0x2) - req->setFlags(Request::UNCACHEABLE); + req->setFlags(Request::UNCACHEABLE); } // Set memory attributes TlbEntry temp_te; - tableWalker->memAttrs(tc, temp_te, sctlr, 0, 1); - temp_te.shareable = true; + temp_te.ns = !isSecure; + if (isStage2 || hcr.dc == 0 || isSecure || + (isHyp && !(tranType & S1CTran))) { + + temp_te.mtype = is_fetch ? TlbEntry::MemoryType::Normal + : TlbEntry::MemoryType::StronglyOrdered; + temp_te.innerAttrs = 0x0; + temp_te.outerAttrs = 0x0; + temp_te.shareable = true; + temp_te.outerShareable = true; + } else { + temp_te.mtype = TlbEntry::MemoryType::Normal; + temp_te.innerAttrs = 0x3; + temp_te.outerAttrs = 0x3; + temp_te.shareable = false; + temp_te.outerShareable = false; + } + temp_te.setAttributes(long_desc_format); DPRINTF(TLBVerbose, "(No MMU) setting memory attributes: shareable:\ - %d, innerAttrs: %d, outerAttrs: %d\n", temp_te.shareable, - temp_te.innerAttrs, temp_te.outerAttrs); + %d, innerAttrs: %d, outerAttrs: %d, isStage2: %d\n", + temp_te.shareable, temp_te.innerAttrs, temp_te.outerAttrs, + isStage2); setAttr(temp_te.attributes); - return trickBoxCheck(req, mode, 0, false); + return trickBoxCheck(req, mode, TlbEntry::DomainType::NoAccess); } - DPRINTF(TLBVerbose, "Translating vaddr=%#x context=%d\n", vaddr, contextId); + DPRINTF(TLBVerbose, "Translating %s=%#x context=%d\n", + isStage2 ? "IPA" : "VA", vaddr_tainted, asid); // Translation enabled - TlbEntry *te = lookup(vaddr, contextId); - if (te == NULL) { - if (req->isPrefetch()){ - //if the request is a prefetch don't attempt to fill the TLB - //or go any further with the memory access - prefetchFaults++; - return new PrefetchAbort(vaddr, ArmFault::PrefetchTLBMiss); - } - - if (is_fetch) - instMisses++; - else if (is_write) - writeMisses++; - else - readMisses++; + TlbEntry *te = NULL; + TlbEntry mergeTe; + Fault fault = getResultTe(&te, req, tc, mode, translation, timing, + functional, &mergeTe); + // only proceed if we have a valid table entry + if ((te == NULL) && (fault == NoFault)) delay = true; - // start translation table walk, pass variables rather than - // re-retreaving in table walker for speed - DPRINTF(TLB, "TLB Miss: Starting hardware table walker for %#x(%d)\n", - vaddr, contextId); - fault = tableWalker->walk(req, tc, contextId, mode, translation, - timing, functional); - if (timing && fault == NoFault) { - delay = true; - // for timing mode, return and wait for table walk - return fault; + // If we have the table entry transfer some of the attributes to the + // request that triggered the translation + if (te != NULL) { + // Set memory attributes + DPRINTF(TLBVerbose, + "Setting memory attributes: shareable: %d, innerAttrs: %d, \ + outerAttrs: %d, mtype: %d, isStage2: %d\n", + te->shareable, te->innerAttrs, te->outerAttrs, + static_cast<uint8_t>(te->mtype), isStage2); + setAttr(te->attributes); + if (te->nonCacheable) { + req->setFlags(Request::UNCACHEABLE); } - if (fault) - return fault; - - te = lookup(vaddr, contextId); - if (!te) - printTlb(); - assert(te); - } else { - if (is_fetch) - instHits++; - else if (is_write) - writeHits++; - else - readHits++; - } - - // Set memory attributes - DPRINTF(TLBVerbose, - "Setting memory attributes: shareable: %d, innerAttrs: %d, \ - outerAttrs: %d\n", - te->shareable, te->innerAttrs, te->outerAttrs); - setAttr(te->attributes); - if (te->nonCacheable) { - req->setFlags(Request::UNCACHEABLE); - // Prevent prefetching from I/O devices. - if (req->isPrefetch()) { - return new PrefetchAbort(vaddr, ArmFault::PrefetchUncacheable); + if (!bootUncacheability && + ((ArmSystem*)tc->getSystemPtr())->adderBootUncacheable(vaddr)) { + req->setFlags(Request::UNCACHEABLE); } - } - - if (!bootUncacheability && - ((ArmSystem*)tc->getSystemPtr())->adderBootUncacheable(vaddr)) - req->setFlags(Request::UNCACHEABLE); - switch ( (dacr >> (te->domain * 2)) & 0x3) { - case 0: - domainFaults++; - DPRINTF(TLB, "TLB Fault: Data abort on domain. DACR: %#x domain: %#x" - " write:%d sNp:%d\n", dacr, te->domain, is_write, te->sNp); - if (is_fetch) - return new PrefetchAbort(vaddr, - (te->sNp ? ArmFault::Domain0 : ArmFault::Domain1)); - else - return new DataAbort(vaddr, te->domain, is_write, - (te->sNp ? ArmFault::Domain0 : ArmFault::Domain1)); - case 1: - // Continue with permissions check - break; - case 2: - panic("UNPRED domain\n"); - case 3: req->setPaddr(te->pAddr(vaddr)); - fault = trickBoxCheck(req, mode, te->domain, te->sNp); - if (fault) - return fault; - return NoFault; - } - - uint8_t ap = te->ap; - - if (sctlr.afe == 1) - ap |= 1; - - bool abt; + if (isSecure && !te->ns) { + req->setFlags(Request::SECURE); + } + if ((!is_fetch) && (vaddr & mask(flags & AlignmentMask)) && + (te->mtype != TlbEntry::MemoryType::Normal)) { + // Unaligned accesses to Device memory should always cause an + // abort regardless of sctlr.a + alignFaults++; + return new DataAbort(vaddr_tainted, + TlbEntry::DomainType::NoAccess, is_write, + ArmFault::AlignmentFault, isStage2, + tranMethod); + } - /* if (!sctlr.xp) - ap &= 0x3; -*/ - switch (ap) { - case 0: - DPRINTF(TLB, "Access permissions 0, checking rs:%#x\n", (int)sctlr.rs); - if (!sctlr.xp) { - switch ((int)sctlr.rs) { - case 2: - abt = is_write; - break; - case 1: - abt = is_write || !is_priv; - break; - case 0: - case 3: - default: - abt = true; - break; - } - } else { - abt = true; + // Check for a trickbox generated address fault + if (fault == NoFault) { + fault = trickBoxCheck(req, mode, te->domain); } - break; - case 1: - abt = !is_priv; - break; - case 2: - abt = !is_priv && is_write; - break; - case 3: - abt = false; - break; - case 4: - panic("UNPRED premissions\n"); - case 5: - abt = !is_priv || is_write; - break; - case 6: - case 7: - abt = is_write; - break; - default: - panic("Unknown permissions\n"); - } - if ((is_fetch) && (abt || te->xn)) { - permsFaults++; - DPRINTF(TLB, "TLB Fault: Prefetch abort on permission check. AP:%d priv:%d" - " write:%d sNp:%d\n", ap, is_priv, is_write, te->sNp); - return new PrefetchAbort(vaddr, - (te->sNp ? ArmFault::Permission0 : - ArmFault::Permission1)); - } else if (abt) { - permsFaults++; - DPRINTF(TLB, "TLB Fault: Data abort on permission check. AP:%d priv:%d" - " write:%d sNp:%d\n", ap, is_priv, is_write, te->sNp); - return new DataAbort(vaddr, te->domain, is_write, - (te->sNp ? ArmFault::Permission0 : - ArmFault::Permission1)); } - req->setPaddr(te->pAddr(vaddr)); - // Check for a trickbox generated address fault - fault = trickBoxCheck(req, mode, te->domain, te->sNp); - if (fault) - return fault; + // Generate Illegal Inst Set State fault if IL bit is set in CPSR + if (fault == NoFault) { + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + if (aarch64 && is_fetch && cpsr.il == 1) { + return new IllegalInstSetStateFault(); + } + } - return NoFault; + return fault; } Fault -TLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode) +TLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode, + TLB::ArmTranslationType tranType) { + updateMiscReg(tc, tranType); + + if (directToStage2) { + assert(stage2Tlb); + return stage2Tlb->translateAtomic(req, tc, mode, tranType); + } + bool delay = false; Fault fault; if (FullSystem) - fault = translateFs(req, tc, mode, NULL, delay, false); + fault = translateFs(req, tc, mode, NULL, delay, false, tranType); else fault = translateSe(req, tc, mode, NULL, delay, false); assert(!delay); @@ -708,13 +1096,21 @@ TLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode) } Fault -TLB::translateFunctional(RequestPtr req, ThreadContext *tc, Mode mode) +TLB::translateFunctional(RequestPtr req, ThreadContext *tc, Mode mode, + TLB::ArmTranslationType tranType) { + updateMiscReg(tc, tranType); + + if (directToStage2) { + assert(stage2Tlb); + return stage2Tlb->translateFunctional(req, tc, mode, tranType); + } + bool delay = false; Fault fault; if (FullSystem) - fault = translateFs(req, tc, mode, NULL, delay, false, true); - else + fault = translateFs(req, tc, mode, NULL, delay, false, tranType, true); + else fault = translateSe(req, tc, mode, NULL, delay, false); assert(!delay); return fault; @@ -722,21 +1118,45 @@ TLB::translateFunctional(RequestPtr req, ThreadContext *tc, Mode mode) Fault TLB::translateTiming(RequestPtr req, ThreadContext *tc, - Translation *translation, Mode mode) + Translation *translation, Mode mode, TLB::ArmTranslationType tranType) { + updateMiscReg(tc, tranType); + + if (directToStage2) { + assert(stage2Tlb); + return stage2Tlb->translateTiming(req, tc, translation, mode, tranType); + } + assert(translation); + + return translateComplete(req, tc, translation, mode, tranType, isStage2); +} + +Fault +TLB::translateComplete(RequestPtr req, ThreadContext *tc, + Translation *translation, Mode mode, TLB::ArmTranslationType tranType, + bool callFromS2) +{ bool delay = false; Fault fault; if (FullSystem) - fault = translateFs(req, tc, mode, translation, delay, true); + fault = translateFs(req, tc, mode, translation, delay, true, tranType); else fault = translateSe(req, tc, mode, translation, delay, true); DPRINTF(TLBVerbose, "Translation returning delay=%d fault=%d\n", delay, fault != NoFault); - if (!delay) - translation->finish(fault, req, tc, mode); - else - translation->markDelayed(); + // If we have a translation, and we're not in the middle of doing a stage + // 2 translation tell the translation that we've either finished or its + // going to take a while. By not doing this when we're in the middle of a + // stage 2 translation we prevent marking the translation as delayed twice, + // one when the translation starts and again when the stage 1 translation + // completes. + if (translation && (callFromS2 || !stage2Req || req->hasPaddr() || fault != NoFault)) { + if (!delay) + translation->finish(fault, req, tc, mode); + else + translation->markDelayed(); + } return fault; } @@ -746,7 +1166,229 @@ TLB::getMasterPort() return &tableWalker->getMasterPort("port"); } +DmaPort& +TLB::getWalkerPort() +{ + return tableWalker->getWalkerPort(); +} + +void +TLB::updateMiscReg(ThreadContext *tc, ArmTranslationType tranType) +{ + // check if the regs have changed, or the translation mode is different. + // NOTE: the tran type doesn't affect stage 2 TLB's as they only handle + // one type of translation anyway + if (miscRegValid && ((tranType == curTranType) || isStage2)) { + return; + } + + DPRINTF(TLBVerbose, "TLB variables changed!\n"); + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + // Dependencies: SCR/SCR_EL3, CPSR + isSecure = inSecureState(tc); + isSecure &= (tranType & HypMode) == 0; + isSecure &= (tranType & S1S2NsTran) == 0; + aarch64 = !cpsr.width; + if (aarch64) { // AArch64 + aarch64EL = (ExceptionLevel) (uint8_t) cpsr.el; + switch (aarch64EL) { + case EL0: + case EL1: + { + sctlr = tc->readMiscReg(MISCREG_SCTLR_EL1); + ttbcr = tc->readMiscReg(MISCREG_TCR_EL1); + uint64_t ttbr_asid = ttbcr.a1 ? + tc->readMiscReg(MISCREG_TTBR1_EL1) : + tc->readMiscReg(MISCREG_TTBR0_EL1); + asid = bits(ttbr_asid, + (haveLargeAsid64 && ttbcr.as) ? 63 : 55, 48); + } + break; + case EL2: + sctlr = tc->readMiscReg(MISCREG_SCTLR_EL2); + ttbcr = tc->readMiscReg(MISCREG_TCR_EL2); + asid = -1; + break; + case EL3: + sctlr = tc->readMiscReg(MISCREG_SCTLR_EL3); + ttbcr = tc->readMiscReg(MISCREG_TCR_EL3); + asid = -1; + break; + } + scr = tc->readMiscReg(MISCREG_SCR_EL3); + isPriv = aarch64EL != EL0; + // @todo: modify this behaviour to support Virtualization in + // AArch64 + vmid = 0; + isHyp = false; + directToStage2 = false; + stage2Req = false; + } else { // AArch32 + sctlr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_SCTLR, tc, + !isSecure)); + ttbcr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_TTBCR, tc, + !isSecure)); + scr = tc->readMiscReg(MISCREG_SCR); + isPriv = cpsr.mode != MODE_USER; + if (haveLPAE && ttbcr.eae) { + // Long-descriptor translation table format in use + uint64_t ttbr_asid = tc->readMiscReg( + flattenMiscRegNsBanked(ttbcr.a1 ? MISCREG_TTBR1 + : MISCREG_TTBR0, + tc, !isSecure)); + asid = bits(ttbr_asid, 55, 48); + } else { + // Short-descriptor translation table format in use + CONTEXTIDR context_id = tc->readMiscReg(flattenMiscRegNsBanked( + MISCREG_CONTEXTIDR, tc,!isSecure)); + asid = context_id.asid; + } + prrr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_PRRR, tc, + !isSecure)); + nmrr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_NMRR, tc, + !isSecure)); + dacr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_DACR, tc, + !isSecure)); + hcr = tc->readMiscReg(MISCREG_HCR); + + if (haveVirtualization) { + vmid = bits(tc->readMiscReg(MISCREG_VTTBR), 55, 48); + isHyp = cpsr.mode == MODE_HYP; + isHyp |= tranType & HypMode; + isHyp &= (tranType & S1S2NsTran) == 0; + isHyp &= (tranType & S1CTran) == 0; + if (isHyp) { + sctlr = tc->readMiscReg(MISCREG_HSCTLR); + } + // Work out if we should skip the first stage of translation and go + // directly to stage 2. This value is cached so we don't have to + // compute it for every translation. + stage2Req = hcr.vm && !isStage2 && !isHyp && !isSecure && + !(tranType & S1CTran); + directToStage2 = stage2Req && !sctlr.m; + } else { + vmid = 0; + stage2Req = false; + isHyp = false; + directToStage2 = false; + } + } + miscRegValid = true; + curTranType = tranType; +} + +Fault +TLB::getTE(TlbEntry **te, RequestPtr req, ThreadContext *tc, Mode mode, + Translation *translation, bool timing, bool functional, + bool is_secure, TLB::ArmTranslationType tranType) +{ + bool is_fetch = (mode == Execute); + bool is_write = (mode == Write); + + Addr vaddr_tainted = req->getVaddr(); + Addr vaddr = 0; + ExceptionLevel target_el = aarch64 ? aarch64EL : EL1; + if (aarch64) { + vaddr = purifyTaggedAddr(vaddr_tainted, tc, target_el); + } else { + vaddr = vaddr_tainted; + } + *te = lookup(vaddr, asid, vmid, isHyp, is_secure, false, false, target_el); + if (*te == NULL) { + if (req->isPrefetch()) { + // if the request is a prefetch don't attempt to fill the TLB or go + // any further with the memory access (here we can safely use the + // fault status for the short desc. format in all cases) + prefetchFaults++; + return new PrefetchAbort(vaddr_tainted, ArmFault::PrefetchTLBMiss, isStage2); + } + + if (is_fetch) + instMisses++; + else if (is_write) + writeMisses++; + else + readMisses++; + + // start translation table walk, pass variables rather than + // re-retreaving in table walker for speed + DPRINTF(TLB, "TLB Miss: Starting hardware table walker for %#x(%d:%d)\n", + vaddr_tainted, asid, vmid); + Fault fault; + fault = tableWalker->walk(req, tc, asid, vmid, isHyp, mode, + translation, timing, functional, is_secure, + tranType); + // for timing mode, return and wait for table walk, + if (timing || fault != NoFault) { + return fault; + } + + *te = lookup(vaddr, asid, vmid, isHyp, is_secure, false, false, target_el); + if (!*te) + printTlb(); + assert(*te); + } else { + if (is_fetch) + instHits++; + else if (is_write) + writeHits++; + else + readHits++; + } + return NoFault; +} +Fault +TLB::getResultTe(TlbEntry **te, RequestPtr req, ThreadContext *tc, Mode mode, + Translation *translation, bool timing, bool functional, + TlbEntry *mergeTe) +{ + Fault fault; + TlbEntry *s1Te = NULL; + + Addr vaddr_tainted = req->getVaddr(); + + // Get the stage 1 table entry + fault = getTE(&s1Te, req, tc, mode, translation, timing, functional, + isSecure, curTranType); + // only proceed if we have a valid table entry + if ((s1Te != NULL) && (fault == NoFault)) { + // Check stage 1 permissions before checking stage 2 + if (aarch64) + fault = checkPermissions64(s1Te, req, mode, tc); + else + fault = checkPermissions(s1Te, req, mode); + if (stage2Req & (fault == NoFault)) { + Stage2LookUp *s2Lookup = new Stage2LookUp(this, stage2Tlb, *s1Te, + req, translation, mode, timing, functional, curTranType); + fault = s2Lookup->getTe(tc, mergeTe); + if (s2Lookup->isComplete()) { + *te = mergeTe; + // We've finished with the lookup so delete it + delete s2Lookup; + } else { + // The lookup hasn't completed, so we can't delete it now. We + // get round this by asking the object to self delete when the + // translation is complete. + s2Lookup->setSelfDelete(); + } + } else { + // This case deals with an S1 hit (or bypass), followed by + // an S2 hit-but-perms issue + if (isStage2) { + DPRINTF(TLBVerbose, "s2TLB: reqVa %#x, reqPa %#x, fault %p\n", + vaddr_tainted, req->hasPaddr() ? req->getPaddr() : ~0, fault); + if (fault != NoFault) { + ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get()); + armFault->annotate(ArmFault::S1PTW, false); + armFault->annotate(ArmFault::OVA, vaddr_tainted); + } + } + *te = s1Te; + } + } + return fault; +} ArmISA::TLB * ArmTLBParams::create() diff --git a/src/arch/arm/tlb.hh b/src/arch/arm/tlb.hh index a66e28b06..ac8c672bf 100644 --- a/src/arch/arm/tlb.hh +++ b/src/arch/arm/tlb.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 ARM Limited + * Copyright (c) 2010-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -43,13 +43,13 @@ #ifndef __ARCH_ARM_TLB_HH__ #define __ARCH_ARM_TLB_HH__ -#include <map> #include "arch/arm/isa_traits.hh" #include "arch/arm/pagetable.hh" #include "arch/arm/utility.hh" #include "arch/arm/vtophys.hh" #include "base/statistics.hh" +#include "dev/dma_device.hh" #include "mem/request.hh" #include "params/ArmTLB.hh" #include "sim/fault_fwd.hh" @@ -60,36 +60,51 @@ class ThreadContext; namespace ArmISA { class TableWalker; +class Stage2LookUp; +class Stage2MMU; class TLB : public BaseTLB { public: enum ArmFlags { - AlignmentMask = 0x1f, + AlignmentMask = 0x7, AlignByte = 0x0, AlignHalfWord = 0x1, - AlignWord = 0x3, - AlignDoubleWord = 0x7, - AlignQuadWord = 0xf, - AlignOctWord = 0x1f, + AlignWord = 0x2, + AlignDoubleWord = 0x3, + AlignQuadWord = 0x4, + AlignOctWord = 0x5, - AllowUnaligned = 0x20, + AllowUnaligned = 0x8, // Priv code operating as if it wasn't - UserMode = 0x40, + UserMode = 0x10, // Because zero otherwise looks like a valid setting and may be used // accidentally, this bit must be non-zero to show it was used on // purpose. - MustBeOne = 0x80 + MustBeOne = 0x40 }; - protected: - - TlbEntry *table; // the Page Table - int size; // TLB Size - uint32_t _attr; // Memory attributes for last accessed TLB entry + enum ArmTranslationType { + NormalTran = 0, + S1CTran = 0x1, + HypMode = 0x2, + // Secure code operating as if it wasn't (required by some Address + // Translate operations) + S1S2NsTran = 0x4 + }; + protected: + TlbEntry* table; // the Page Table + int size; // TLB Size + bool isStage2; // Indicates this TLB is part of the second stage MMU + bool stage2Req; // Indicates whether a stage 2 lookup is also required + uint64_t _attr; // Memory attributes for last accessed TLB entry + bool directToStage2; // Indicates whether all translation requests should + // be routed directly to the stage 2 TLB TableWalker *tableWalker; + TLB *stage2Tlb; + Stage2MMU *stage2Mmu; // Access Stats mutable Stats::Scalar instHits; @@ -121,51 +136,101 @@ class TLB : public BaseTLB bool bootUncacheability; public: - typedef ArmTLBParams Params; - TLB(const Params *p); + TLB(const ArmTLBParams *p); + TLB(const Params *p, int _size, TableWalker *_walker); /** Lookup an entry in the TLB * @param vpn virtual address * @param asn context id/address space id to use + * @param vmid The virtual machine ID used for stage 2 translation + * @param secure if the lookup is secure + * @param hyp if the lookup is done from hyp mode * @param functional if the lookup should modify state - * @return pointer to TLB entrry if it exists + * @param ignore_asn if on lookup asn should be ignored + * @return pointer to TLB entry if it exists */ - TlbEntry *lookup(Addr vpn, uint8_t asn, bool functional = false); + TlbEntry *lookup(Addr vpn, uint16_t asn, uint8_t vmid, bool hyp, + bool secure, bool functional, + bool ignore_asn, uint8_t target_el); virtual ~TLB(); + + /// setup all the back pointers + virtual void init(); + + void setMMU(Stage2MMU *m); + int getsize() const { return size; } void insert(Addr vaddr, TlbEntry &pte); - /** Reset the entire TLB */ - void flushAll(); + Fault getTE(TlbEntry **te, RequestPtr req, ThreadContext *tc, Mode mode, + Translation *translation, bool timing, bool functional, + bool is_secure, ArmTranslationType tranType); + + Fault getResultTe(TlbEntry **te, RequestPtr req, ThreadContext *tc, + Mode mode, Translation *translation, bool timing, + bool functional, TlbEntry *mergeTe); + + Fault checkPermissions(TlbEntry *te, RequestPtr req, Mode mode); + Fault checkPermissions64(TlbEntry *te, RequestPtr req, Mode mode, + ThreadContext *tc); + + + /** Reset the entire TLB + * @param secure_lookup if the operation affects the secure world + */ + void flushAllSecurity(bool secure_lookup, uint8_t target_el, + bool ignore_el = false); + + /** Remove all entries in the non secure world, depending on whether they + * were allocated in hyp mode or not + * @param hyp if the opperation affects hyp mode + */ + void flushAllNs(bool hyp, uint8_t target_el, bool ignore_el = false); + + + /** Reset the entire TLB. Used for CPU switching to prevent stale + * translations after multiple switches + */ + void flushAll() + { + flushAllSecurity(false, 0, true); + flushAllSecurity(true, 0, true); + } /** Remove any entries that match both a va and asn * @param mva virtual address to flush * @param asn contextid/asn to flush on match + * @param secure_lookup if the operation affects the secure world */ - void flushMvaAsid(Addr mva, uint64_t asn); + void flushMvaAsid(Addr mva, uint64_t asn, bool secure_lookup, + uint8_t target_el); /** Remove any entries that match the asn * @param asn contextid/asn to flush on match + * @param secure_lookup if the operation affects the secure world */ - void flushAsid(uint64_t asn); + void flushAsid(uint64_t asn, bool secure_lookup, uint8_t target_el); /** Remove all entries that match the va regardless of asn * @param mva address to flush from cache + * @param secure_lookup if the operation affects the secure world + * @param hyp if the operation affects hyp mode */ - void flushMva(Addr mva); + void flushMva(Addr mva, bool secure_lookup, bool hyp, uint8_t target_el); - Fault trickBoxCheck(RequestPtr req, Mode mode, uint8_t domain, bool sNp); - Fault walkTrickBoxCheck(Addr pa, Addr va, Addr sz, bool is_exec, - bool is_write, uint8_t domain, bool sNp); + Fault trickBoxCheck(RequestPtr req, Mode mode, TlbEntry::DomainType domain); + Fault walkTrickBoxCheck(Addr pa, bool is_secure, Addr va, Addr sz, bool is_exec, + bool is_write, TlbEntry::DomainType domain, LookupLevel lookup_level); - void printTlb(); + void printTlb() const; void allCpusCaching() { bootUncacheability = true; } void demapPage(Addr vaddr, uint64_t asn) { - flushMvaAsid(vaddr, asn); + // needed for x86 only + panic("demapPage() is not implemented.\n"); } static bool validVirtualAddress(Addr vaddr); @@ -184,16 +249,18 @@ class TLB : public BaseTLB * Do a functional lookup on the TLB (for checker cpu) that * behaves like a normal lookup without modifying any page table state. */ - Fault translateFunctional(RequestPtr req, ThreadContext *tc, Mode mode); + Fault translateFunctional(RequestPtr req, ThreadContext *tc, Mode mode, + ArmTranslationType tranType = NormalTran); /** Accessor functions for memory attributes for last accessed TLB entry */ void - setAttr(uint32_t attr) + setAttr(uint64_t attr) { _attr = attr; } - uint32_t + + uint64_t getAttr() const { return _attr; @@ -201,12 +268,17 @@ class TLB : public BaseTLB Fault translateFs(RequestPtr req, ThreadContext *tc, Mode mode, Translation *translation, bool &delay, - bool timing, bool functional = false); + bool timing, ArmTranslationType tranType, bool functional = false); Fault translateSe(RequestPtr req, ThreadContext *tc, Mode mode, Translation *translation, bool &delay, bool timing); - Fault translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode); + Fault translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode, + ArmTranslationType tranType = NormalTran); Fault translateTiming(RequestPtr req, ThreadContext *tc, - Translation *translation, Mode mode); + Translation *translation, Mode mode, + ArmTranslationType tranType = NormalTran); + Fault translateComplete(RequestPtr req, ThreadContext *tc, + Translation *translation, Mode mode, ArmTranslationType tranType, + bool callFromS2); Fault finalizePhysical(RequestPtr req, ThreadContext *tc, Mode mode) const; void drainResume(); @@ -229,29 +301,45 @@ class TLB : public BaseTLB */ virtual BaseMasterPort* getMasterPort(); + /** + * Allow the MMU (overseeing both stage 1 and stage 2 TLBs) to + * access the table walker port of this TLB so that it can + * orchestrate staged translations. + * + * @return The table walker DMA port + */ + DmaPort& getWalkerPort(); + // Caching misc register values here. // Writing to misc registers needs to invalidate them. // translateFunctional/translateSe/translateFs checks if they are // invalid and call updateMiscReg if necessary. protected: + bool aarch64; + ExceptionLevel aarch64EL; SCTLR sctlr; + SCR scr; bool isPriv; - CONTEXTIDR contextId; + bool isSecure; + bool isHyp; + TTBCR ttbcr; + uint16_t asid; + uint8_t vmid; PRRR prrr; NMRR nmrr; + HCR hcr; uint32_t dacr; bool miscRegValid; - void updateMiscReg(ThreadContext *tc) - { - sctlr = tc->readMiscReg(MISCREG_SCTLR); - CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); - isPriv = cpsr.mode != MODE_USER; - contextId = tc->readMiscReg(MISCREG_CONTEXTIDR); - prrr = tc->readMiscReg(MISCREG_PRRR); - nmrr = tc->readMiscReg(MISCREG_NMRR); - dacr = tc->readMiscReg(MISCREG_DACR); - miscRegValid = true; - } + ArmTranslationType curTranType; + + // Cached copies of system-level properties + bool haveLPAE; + bool haveVirtualization; + bool haveLargeAsid64; + + void updateMiscReg(ThreadContext *tc, + ArmTranslationType tranType = NormalTran); + public: const Params * params() const @@ -259,6 +347,19 @@ public: return dynamic_cast<const Params *>(_params); } inline void invalidateMiscReg() { miscRegValid = false; } + +private: + /** Remove any entries that match both a va and asn + * @param mva virtual address to flush + * @param asn contextid/asn to flush on match + * @param secure_lookup if the operation affects the secure world + * @param hyp if the operation affects hyp mode + * @param ignore_asn if the flush should ignore the asn + */ + void _flushMva(Addr mva, uint64_t asn, bool secure_lookup, + bool hyp, bool ignore_asn, uint8_t target_el); + + bool checkELMatch(uint8_t target_el, uint8_t tentry_el, bool ignore_el); }; } // namespace ArmISA diff --git a/src/arch/arm/types.hh b/src/arch/arm/types.hh index cd0b74b2d..7b736492b 100644 --- a/src/arch/arm/types.hh +++ b/src/arch/arm/types.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -82,6 +82,7 @@ namespace ArmISA // Bitfields to select mode. Bitfield<36> thumb; Bitfield<35> bigThumb; + Bitfield<34> aarch64; // Made up bitfields that make life easier. Bitfield<33> sevenAndFour; @@ -143,9 +144,9 @@ namespace ArmISA Bitfield<3, 0> immedLo3_0; Bitfield<15, 0> regList; - + Bitfield<23, 0> offset; - + Bitfield<23, 0> immed23_0; Bitfield<11, 8> cpNum; @@ -213,7 +214,8 @@ namespace ArmISA enum FlagBits { ThumbBit = (1 << 0), - JazelleBit = (1 << 1) + JazelleBit = (1 << 1), + AArch64Bit = (1 << 2) }; uint8_t flags; uint8_t nextFlags; @@ -304,6 +306,37 @@ namespace ArmISA nextFlags &= ~JazelleBit; } + bool + aarch64() const + { + return flags & AArch64Bit; + } + + void + aarch64(bool val) + { + if (val) + flags |= AArch64Bit; + else + flags &= ~AArch64Bit; + } + + bool + nextAArch64() const + { + return nextFlags & AArch64Bit; + } + + void + nextAArch64(bool val) + { + if (val) + nextFlags |= AArch64Bit; + else + nextFlags &= ~AArch64Bit; + } + + uint8_t itstate() const { @@ -374,9 +407,15 @@ namespace ArmISA } void - instNPC(uint32_t val) + instNPC(Addr val) { - npc(val &~ mask(nextThumb() ? 1 : 2)); + // @todo: review this when AArch32/64 interprocessing is + // supported + if (aarch64()) + npc(val); // AArch64 doesn't force PC alignment, a PC + // Alignment Fault can be raised instead + else + npc(val &~ mask(nextThumb() ? 1 : 2)); } Addr @@ -387,7 +426,7 @@ namespace ArmISA // Perform an interworking branch. void - instIWNPC(uint32_t val) + instIWNPC(Addr val) { bool thumbEE = (thumb() && jazelle()); @@ -417,7 +456,7 @@ namespace ArmISA // Perform an interworking branch in ARM mode, a regular branch // otherwise. void - instAIWNPC(uint32_t val) + instAIWNPC(Addr val) { if (!thumb() && !jazelle()) instIWNPC(val); @@ -470,6 +509,18 @@ namespace ArmISA ROR }; + // Extension types for ARM instructions + enum ArmExtendType { + UXTB = 0, + UXTH = 1, + UXTW = 2, + UXTX = 3, + SXTB = 4, + SXTH = 5, + SXTW = 6, + SXTX = 7 + }; + typedef uint64_t LargestRead; // Need to use 64 bits to make sure that read requests get handled properly @@ -508,28 +559,163 @@ namespace ArmISA RND_NEAREST }; + enum ExceptionLevel { + EL0 = 0, + EL1, + EL2, + EL3 + }; + enum OperatingMode { + MODE_EL0T = 0x0, + MODE_EL1T = 0x4, + MODE_EL1H = 0x5, + MODE_EL2T = 0x8, + MODE_EL2H = 0x9, + MODE_EL3T = 0xC, + MODE_EL3H = 0xD, MODE_USER = 16, MODE_FIQ = 17, MODE_IRQ = 18, MODE_SVC = 19, MODE_MON = 22, MODE_ABORT = 23, + MODE_HYP = 26, MODE_UNDEFINED = 27, MODE_SYSTEM = 31, MODE_MAXMODE = MODE_SYSTEM }; + enum ExceptionClass { + EC_INVALID = -1, + EC_UNKNOWN = 0x0, + EC_TRAPPED_WFI_WFE = 0x1, + EC_TRAPPED_CP15_MCR_MRC = 0x3, + EC_TRAPPED_CP15_MCRR_MRRC = 0x4, + EC_TRAPPED_CP14_MCR_MRC = 0x5, + EC_TRAPPED_CP14_LDC_STC = 0x6, + EC_TRAPPED_HCPTR = 0x7, + EC_TRAPPED_SIMD_FP = 0x7, // AArch64 alias + EC_TRAPPED_CP10_MRC_VMRS = 0x8, + EC_TRAPPED_BXJ = 0xA, + EC_TRAPPED_CP14_MCRR_MRRC = 0xC, + EC_ILLEGAL_INST = 0xE, + EC_SVC_TO_HYP = 0x11, + EC_SVC = 0x11, // AArch64 alias + EC_HVC = 0x12, + EC_SMC_TO_HYP = 0x13, + EC_SMC = 0x13, // AArch64 alias + EC_SVC_64 = 0x15, + EC_HVC_64 = 0x16, + EC_SMC_64 = 0x17, + EC_TRAPPED_MSR_MRS_64 = 0x18, + EC_PREFETCH_ABORT_TO_HYP = 0x20, + EC_PREFETCH_ABORT_LOWER_EL = 0x20, // AArch64 alias + EC_PREFETCH_ABORT_FROM_HYP = 0x21, + EC_PREFETCH_ABORT_CURR_EL = 0x21, // AArch64 alias + EC_PC_ALIGNMENT = 0x22, + EC_DATA_ABORT_TO_HYP = 0x24, + EC_DATA_ABORT_LOWER_EL = 0x24, // AArch64 alias + EC_DATA_ABORT_FROM_HYP = 0x25, + EC_DATA_ABORT_CURR_EL = 0x25, // AArch64 alias + EC_STACK_PTR_ALIGNMENT = 0x26, + EC_FP_EXCEPTION = 0x28, + EC_FP_EXCEPTION_64 = 0x2C, + EC_SERROR = 0x2F + }; + + BitUnion8(OperatingMode64) + Bitfield<0> spX; + Bitfield<3, 2> el; + Bitfield<4> width; + EndBitUnion(OperatingMode64) + + static bool inline + opModeIs64(OperatingMode mode) + { + return ((OperatingMode64)(uint8_t)mode).width == 0; + } + + static bool inline + opModeIsH(OperatingMode mode) + { + return (mode == MODE_EL1H || mode == MODE_EL2H || mode == MODE_EL3H); + } + + static bool inline + opModeIsT(OperatingMode mode) + { + return (mode == MODE_EL0T || mode == MODE_EL1T || mode == MODE_EL2T || + mode == MODE_EL3T); + } + + static ExceptionLevel inline + opModeToEL(OperatingMode mode) + { + bool aarch32 = ((mode >> 4) & 1) ? true : false; + if (aarch32) { + switch (mode) { + case MODE_USER: + return EL0; + case MODE_FIQ: + case MODE_IRQ: + case MODE_SVC: + case MODE_ABORT: + case MODE_UNDEFINED: + case MODE_SYSTEM: + return EL1; + case MODE_HYP: + return EL2; + case MODE_MON: + return EL3; + default: + panic("Invalid operating mode: %d", mode); + break; + } + } else { + // aarch64 + return (ExceptionLevel) ((mode >> 2) & 3); + } + } + static inline bool badMode(OperatingMode mode) { switch (mode) { + case MODE_EL0T: + case MODE_EL1T: + case MODE_EL1H: + case MODE_EL2T: + case MODE_EL2H: + case MODE_EL3T: + case MODE_EL3H: + case MODE_USER: + case MODE_FIQ: + case MODE_IRQ: + case MODE_SVC: + case MODE_MON: + case MODE_ABORT: + case MODE_HYP: + case MODE_UNDEFINED: + case MODE_SYSTEM: + return false; + default: + return true; + } + } + + + static inline bool + badMode32(OperatingMode mode) + { + switch (mode) { case MODE_USER: case MODE_FIQ: case MODE_IRQ: case MODE_SVC: case MODE_MON: case MODE_ABORT: + case MODE_HYP: case MODE_UNDEFINED: case MODE_SYSTEM: return false; diff --git a/src/arch/arm/utility.cc b/src/arch/arm/utility.cc index cddc2c5c4..3d7d9c4fc 100644 --- a/src/arch/arm/utility.cc +++ b/src/arch/arm/utility.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2012 ARM Limited + * Copyright (c) 2009-2013 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -40,6 +40,7 @@ #include "arch/arm/faults.hh" #include "arch/arm/isa_traits.hh" +#include "arch/arm/system.hh" #include "arch/arm/tlb.hh" #include "arch/arm/utility.hh" #include "arch/arm/vtophys.hh" @@ -70,51 +71,68 @@ getArgument(ThreadContext *tc, int &number, uint16_t size, bool fp) M5_DUMMY_RETURN } - if (size == (uint16_t)(-1)) - size = ArmISA::MachineBytes; if (fp) panic("getArgument(): Floating point arguments not implemented\n"); - if (number < NumArgumentRegs) { - // If the argument is 64 bits, it must be in an even regiser - // number. Increment the number here if it isn't even. - if (size == sizeof(uint64_t)) { - if ((number % 2) != 0) - number++; - // Read the two halves of the data. Number is inc here to - // get the second half of the 64 bit reg. - uint64_t tmp; - tmp = tc->readIntReg(number++); - tmp |= tc->readIntReg(number) << 32; - return tmp; + if (inAArch64(tc)) { + if (size == (uint16_t)(-1)) + size = sizeof(uint64_t); + + if (number < 8 /*NumArgumentRegs64*/) { + return tc->readIntReg(number); } else { - return tc->readIntReg(number); + panic("getArgument(): No support reading stack args for AArch64\n"); } } else { - Addr sp = tc->readIntReg(StackPointerReg); - FSTranslatingPortProxy &vp = tc->getVirtProxy(); - uint64_t arg; - if (size == sizeof(uint64_t)) { - // If the argument is even it must be aligned - if ((number % 2) != 0) - number++; - arg = vp.read<uint64_t>(sp + - (number-NumArgumentRegs) * sizeof(uint32_t)); - // since two 32 bit args == 1 64 bit arg, increment number - number++; + if (size == (uint16_t)(-1)) + size = ArmISA::MachineBytes; + + if (number < NumArgumentRegs) { + // If the argument is 64 bits, it must be in an even regiser + // number. Increment the number here if it isn't even. + if (size == sizeof(uint64_t)) { + if ((number % 2) != 0) + number++; + // Read the two halves of the data. Number is inc here to + // get the second half of the 64 bit reg. + uint64_t tmp; + tmp = tc->readIntReg(number++); + tmp |= tc->readIntReg(number) << 32; + return tmp; + } else { + return tc->readIntReg(number); + } } else { - arg = vp.read<uint32_t>(sp + - (number-NumArgumentRegs) * sizeof(uint32_t)); + Addr sp = tc->readIntReg(StackPointerReg); + FSTranslatingPortProxy &vp = tc->getVirtProxy(); + uint64_t arg; + if (size == sizeof(uint64_t)) { + // If the argument is even it must be aligned + if ((number % 2) != 0) + number++; + arg = vp.read<uint64_t>(sp + + (number-NumArgumentRegs) * sizeof(uint32_t)); + // since two 32 bit args == 1 64 bit arg, increment number + number++; + } else { + arg = vp.read<uint32_t>(sp + + (number-NumArgumentRegs) * sizeof(uint32_t)); + } + return arg; } - return arg; } + panic("getArgument() should always return\n"); } void skipFunction(ThreadContext *tc) { PCState newPC = tc->pcState(); - newPC.set(tc->readIntReg(ReturnAddressReg) & ~ULL(1)); + if (inAArch64(tc)) { + newPC.set(tc->readIntReg(INTREG_X30)); + } else { + newPC.set(tc->readIntReg(ReturnAddressReg) & ~ULL(1)); + } CheckerCPU *checker = tc->getCheckerCpuPtr(); if (checker) { @@ -151,6 +169,128 @@ copyRegs(ThreadContext *src, ThreadContext *dest) dest->getDTBPtr()->invalidateMiscReg(); } +bool +inSecureState(ThreadContext *tc) +{ + SCR scr = inAArch64(tc) ? tc->readMiscReg(MISCREG_SCR_EL3) : + tc->readMiscReg(MISCREG_SCR); + return ArmSystem::haveSecurity(tc) && inSecureState( + scr, tc->readMiscReg(MISCREG_CPSR)); +} + +bool +inAArch64(ThreadContext *tc) +{ + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + return opModeIs64((OperatingMode) (uint8_t) cpsr.mode); +} + +bool +longDescFormatInUse(ThreadContext *tc) +{ + TTBCR ttbcr = tc->readMiscReg(MISCREG_TTBCR); + return ArmSystem::haveLPAE(tc) && ttbcr.eae; +} + +uint32_t +getMPIDR(ArmSystem *arm_sys, ThreadContext *tc) +{ + if (arm_sys->multiProc) { + return 0x80000000 | // multiprocessor extensions available + tc->cpuId(); + } else { + return 0x80000000 | // multiprocessor extensions available + 0x40000000 | // in up system + tc->cpuId(); + } +} + +bool +ELIs64(ThreadContext *tc, ExceptionLevel el) +{ + if (ArmSystem::highestEL(tc) == el) + // Register width is hard-wired + return ArmSystem::highestELIs64(tc); + + switch (el) { + case EL0: + return opModeIs64(currOpMode(tc)); + case EL1: + { + // @todo: uncomment this to enable Virtualization + // if (ArmSystem::haveVirtualization(tc)) { + // HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); + // return hcr.rw; + // } + assert(ArmSystem::haveSecurity(tc)); + SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); + return scr.rw; + } + case EL2: + { + assert(ArmSystem::haveSecurity(tc)); + SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); + return scr.rw; + } + default: + panic("Invalid exception level"); + break; + } +} + +bool +isBigEndian64(ThreadContext *tc) +{ + switch (opModeToEL(currOpMode(tc))) { + case EL3: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL3)).ee; + case EL2: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL2)).ee; + case EL1: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL1)).ee; + case EL0: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL1)).e0e; + default: + panic("Invalid exception level"); + break; + } +} + +Addr +purifyTaggedAddr(Addr addr, ThreadContext *tc, ExceptionLevel el) +{ + TTBCR tcr; + + switch (el) { + case EL0: + case EL1: + tcr = tc->readMiscReg(MISCREG_TCR_EL1); + if (bits(addr, 55, 48) == 0xFF && tcr.tbi1) + return addr | mask(63, 55); + else if (!bits(addr, 55, 48) && tcr.tbi0) + return bits(addr,55, 0); + break; + // @todo: uncomment this to enable Virtualization + // case EL2: + // assert(ArmSystem::haveVirtualization()); + // tcr = tc->readMiscReg(MISCREG_TCR_EL2); + // if (tcr.tbi) + // return addr & mask(56); + // break; + case EL3: + assert(ArmSystem::haveSecurity(tc)); + tcr = tc->readMiscReg(MISCREG_TCR_EL3); + if (tcr.tbi) + return addr & mask(56); + break; + default: + panic("Invalid exception level"); + break; + } + + return addr; // Nothing to do if this is not a tagged address +} + Addr truncPage(Addr addr) { @@ -163,4 +303,667 @@ roundPage(Addr addr) return (addr + PageBytes - 1) & ~(PageBytes - 1); } +bool +mcrMrc15TrapToHyp(const MiscRegIndex miscReg, HCR hcr, CPSR cpsr, SCR scr, + HDCR hdcr, HSTR hstr, HCPTR hcptr, uint32_t iss) +{ + bool isRead; + uint32_t crm; + IntRegIndex rt; + uint32_t crn; + uint32_t opc1; + uint32_t opc2; + bool trapToHype = false; + + + if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) { + mcrMrcIssExtract(iss, isRead, crm, rt, crn, opc1, opc2); + trapToHype = ((uint32_t) hstr) & (1 << crn); + trapToHype |= hdcr.tpm && (crn == 9) && (crm >= 12); + trapToHype |= hcr.tidcp && ( + ((crn == 9) && ((crm <= 2) || ((crm >= 5) && (crm <= 8)))) || + ((crn == 10) && ((crm <= 1) || (crm == 4) || (crm == 8))) || + ((crn == 11) && ((crm <= 8) || (crm == 15))) ); + + if (!trapToHype) { + switch (unflattenMiscReg(miscReg)) { + case MISCREG_CPACR: + trapToHype = hcptr.tcpac; + break; + case MISCREG_REVIDR: + case MISCREG_TCMTR: + case MISCREG_TLBTR: + case MISCREG_AIDR: + trapToHype = hcr.tid1; + break; + case MISCREG_CTR: + case MISCREG_CCSIDR: + case MISCREG_CLIDR: + case MISCREG_CSSELR: + trapToHype = hcr.tid2; + break; + case MISCREG_ID_PFR0: + case MISCREG_ID_PFR1: + case MISCREG_ID_DFR0: + case MISCREG_ID_AFR0: + case MISCREG_ID_MMFR0: + case MISCREG_ID_MMFR1: + case MISCREG_ID_MMFR2: + case MISCREG_ID_MMFR3: + case MISCREG_ID_ISAR0: + case MISCREG_ID_ISAR1: + case MISCREG_ID_ISAR2: + case MISCREG_ID_ISAR3: + case MISCREG_ID_ISAR4: + case MISCREG_ID_ISAR5: + trapToHype = hcr.tid3; + break; + case MISCREG_DCISW: + case MISCREG_DCCSW: + case MISCREG_DCCISW: + trapToHype = hcr.tsw; + break; + case MISCREG_DCIMVAC: + case MISCREG_DCCIMVAC: + case MISCREG_DCCMVAC: + trapToHype = hcr.tpc; + break; + case MISCREG_ICIMVAU: + case MISCREG_ICIALLU: + case MISCREG_ICIALLUIS: + case MISCREG_DCCMVAU: + trapToHype = hcr.tpu; + break; + case MISCREG_TLBIALLIS: + case MISCREG_TLBIMVAIS: + case MISCREG_TLBIASIDIS: + case MISCREG_TLBIMVAAIS: + case MISCREG_DTLBIALL: + case MISCREG_ITLBIALL: + case MISCREG_DTLBIMVA: + case MISCREG_ITLBIMVA: + case MISCREG_DTLBIASID: + case MISCREG_ITLBIASID: + case MISCREG_TLBIMVAA: + case MISCREG_TLBIALL: + case MISCREG_TLBIMVA: + case MISCREG_TLBIASID: + trapToHype = hcr.ttlb; + break; + case MISCREG_ACTLR: + trapToHype = hcr.tac; + break; + case MISCREG_SCTLR: + case MISCREG_TTBR0: + case MISCREG_TTBR1: + case MISCREG_TTBCR: + case MISCREG_DACR: + case MISCREG_DFSR: + case MISCREG_IFSR: + case MISCREG_DFAR: + case MISCREG_IFAR: + case MISCREG_ADFSR: + case MISCREG_AIFSR: + case MISCREG_PRRR: + case MISCREG_NMRR: + case MISCREG_MAIR0: + case MISCREG_MAIR1: + case MISCREG_CONTEXTIDR: + trapToHype = hcr.tvm & !isRead; + break; + case MISCREG_PMCR: + trapToHype = hdcr.tpmcr; + break; + // No default action needed + default: + break; + } + } + } + return trapToHype; +} + + +bool +mcrMrc14TrapToHyp(const MiscRegIndex miscReg, HCR hcr, CPSR cpsr, SCR scr, + HDCR hdcr, HSTR hstr, HCPTR hcptr, uint32_t iss) +{ + bool isRead; + uint32_t crm; + IntRegIndex rt; + uint32_t crn; + uint32_t opc1; + uint32_t opc2; + bool trapToHype = false; + + if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) { + mcrMrcIssExtract(iss, isRead, crm, rt, crn, opc1, opc2); + inform("trap check M:%x N:%x 1:%x 2:%x hdcr %x, hcptr %x, hstr %x\n", + crm, crn, opc1, opc2, hdcr, hcptr, hstr); + trapToHype = hdcr.tda && (opc1 == 0); + trapToHype |= hcptr.tta && (opc1 == 1); + if (!trapToHype) { + switch (unflattenMiscReg(miscReg)) { + case MISCREG_DBGOSLSR: + case MISCREG_DBGOSLAR: + case MISCREG_DBGOSDLR: + case MISCREG_DBGPRCR: + trapToHype = hdcr.tdosa; + break; + case MISCREG_DBGDRAR: + case MISCREG_DBGDSAR: + trapToHype = hdcr.tdra; + break; + case MISCREG_JIDR: + trapToHype = hcr.tid0; + break; + case MISCREG_JOSCR: + case MISCREG_JMCR: + trapToHype = hstr.tjdbx; + break; + case MISCREG_TEECR: + case MISCREG_TEEHBR: + trapToHype = hstr.ttee; + break; + // No default action needed + default: + break; + } + } + } + return trapToHype; +} + +bool +mcrrMrrc15TrapToHyp(const MiscRegIndex miscReg, CPSR cpsr, SCR scr, HSTR hstr, + HCR hcr, uint32_t iss) +{ + uint32_t crm; + IntRegIndex rt; + uint32_t crn; + uint32_t opc1; + uint32_t opc2; + bool isRead; + bool trapToHype = false; + + if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) { + // This is technically the wrong function, but we can re-use it for + // the moment because we only need one field, which overlaps with the + // mcrmrc layout + mcrMrcIssExtract(iss, isRead, crm, rt, crn, opc1, opc2); + trapToHype = ((uint32_t) hstr) & (1 << crm); + + if (!trapToHype) { + switch (unflattenMiscReg(miscReg)) { + case MISCREG_SCTLR: + case MISCREG_TTBR0: + case MISCREG_TTBR1: + case MISCREG_TTBCR: + case MISCREG_DACR: + case MISCREG_DFSR: + case MISCREG_IFSR: + case MISCREG_DFAR: + case MISCREG_IFAR: + case MISCREG_ADFSR: + case MISCREG_AIFSR: + case MISCREG_PRRR: + case MISCREG_NMRR: + case MISCREG_MAIR0: + case MISCREG_MAIR1: + case MISCREG_CONTEXTIDR: + trapToHype = hcr.tvm & !isRead; + break; + // No default action needed + default: + break; + } + } + } + return trapToHype; +} + +bool +msrMrs64TrapToSup(const MiscRegIndex miscReg, ExceptionLevel el, + CPACR cpacr /* CPACR_EL1 */) +{ + bool trapToSup = false; + switch (miscReg) { + case MISCREG_FPCR: + case MISCREG_FPSR: + case MISCREG_FPEXC32_EL2: + if ((el == EL0 && cpacr.fpen != 0x3) || + (el == EL1 && !(cpacr.fpen & 0x1))) + trapToSup = true; + break; + default: + break; + } + return trapToSup; +} + +bool +msrMrs64TrapToHyp(const MiscRegIndex miscReg, bool isRead, + CPTR cptr /* CPTR_EL2 */, + HCR hcr /* HCR_EL2 */, + bool * isVfpNeon) +{ + bool trapToHyp = false; + *isVfpNeon = false; + + switch (miscReg) { + // FP/SIMD regs + case MISCREG_FPCR: + case MISCREG_FPSR: + case MISCREG_FPEXC32_EL2: + trapToHyp = cptr.tfp; + *isVfpNeon = true; + break; + // CPACR + case MISCREG_CPACR_EL1: + trapToHyp = cptr.tcpac; + break; + // Virtual memory control regs + case MISCREG_SCTLR_EL1: + case MISCREG_TTBR0_EL1: + case MISCREG_TTBR1_EL1: + case MISCREG_TCR_EL1: + case MISCREG_ESR_EL1: + case MISCREG_FAR_EL1: + case MISCREG_AFSR0_EL1: + case MISCREG_AFSR1_EL1: + case MISCREG_MAIR_EL1: + case MISCREG_AMAIR_EL1: + case MISCREG_CONTEXTIDR_EL1: + trapToHyp = (hcr.trvm && isRead) || (hcr.tvm && !isRead); + break; + // TLB maintenance instructions + case MISCREG_TLBI_VMALLE1: + case MISCREG_TLBI_VAE1_Xt: + case MISCREG_TLBI_ASIDE1_Xt: + case MISCREG_TLBI_VAAE1_Xt: + case MISCREG_TLBI_VALE1_Xt: + case MISCREG_TLBI_VAALE1_Xt: + case MISCREG_TLBI_VMALLE1IS: + case MISCREG_TLBI_VAE1IS_Xt: + case MISCREG_TLBI_ASIDE1IS_Xt: + case MISCREG_TLBI_VAAE1IS_Xt: + case MISCREG_TLBI_VALE1IS_Xt: + case MISCREG_TLBI_VAALE1IS_Xt: + trapToHyp = hcr.ttlb; + break; + // Cache maintenance instructions to the point of unification + case MISCREG_IC_IVAU_Xt: + case MISCREG_ICIALLU: + case MISCREG_ICIALLUIS: + case MISCREG_DC_CVAU_Xt: + trapToHyp = hcr.tpu; + break; + // Data/Unified cache maintenance instructions to the point of coherency + case MISCREG_DC_IVAC_Xt: + case MISCREG_DC_CIVAC_Xt: + case MISCREG_DC_CVAC_Xt: + trapToHyp = hcr.tpc; + break; + // Data/Unified cache maintenance instructions by set/way + case MISCREG_DC_ISW_Xt: + case MISCREG_DC_CSW_Xt: + case MISCREG_DC_CISW_Xt: + trapToHyp = hcr.tsw; + break; + // ACTLR + case MISCREG_ACTLR_EL1: + trapToHyp = hcr.tacr; + break; + + // @todo: Trap implementation-dependent functionality based on + // hcr.tidcp + + // ID regs, group 3 + case MISCREG_ID_PFR0_EL1: + case MISCREG_ID_PFR1_EL1: + case MISCREG_ID_DFR0_EL1: + case MISCREG_ID_AFR0_EL1: + case MISCREG_ID_MMFR0_EL1: + case MISCREG_ID_MMFR1_EL1: + case MISCREG_ID_MMFR2_EL1: + case MISCREG_ID_MMFR3_EL1: + case MISCREG_ID_ISAR0_EL1: + case MISCREG_ID_ISAR1_EL1: + case MISCREG_ID_ISAR2_EL1: + case MISCREG_ID_ISAR3_EL1: + case MISCREG_ID_ISAR4_EL1: + case MISCREG_ID_ISAR5_EL1: + case MISCREG_MVFR0_EL1: + case MISCREG_MVFR1_EL1: + case MISCREG_MVFR2_EL1: + case MISCREG_ID_AA64PFR0_EL1: + case MISCREG_ID_AA64PFR1_EL1: + case MISCREG_ID_AA64DFR0_EL1: + case MISCREG_ID_AA64DFR1_EL1: + case MISCREG_ID_AA64ISAR0_EL1: + case MISCREG_ID_AA64ISAR1_EL1: + case MISCREG_ID_AA64MMFR0_EL1: + case MISCREG_ID_AA64MMFR1_EL1: + case MISCREG_ID_AA64AFR0_EL1: + case MISCREG_ID_AA64AFR1_EL1: + assert(isRead); + trapToHyp = hcr.tid3; + break; + // ID regs, group 2 + case MISCREG_CTR_EL0: + case MISCREG_CCSIDR_EL1: + case MISCREG_CLIDR_EL1: + case MISCREG_CSSELR_EL1: + trapToHyp = hcr.tid2; + break; + // ID regs, group 1 + case MISCREG_AIDR_EL1: + case MISCREG_REVIDR_EL1: + assert(isRead); + trapToHyp = hcr.tid1; + break; + default: + break; + } + return trapToHyp; +} + +bool +msrMrs64TrapToMon(const MiscRegIndex miscReg, CPTR cptr /* CPTR_EL3 */, + ExceptionLevel el, bool * isVfpNeon) +{ + bool trapToMon = false; + *isVfpNeon = false; + + switch (miscReg) { + // FP/SIMD regs + case MISCREG_FPCR: + case MISCREG_FPSR: + case MISCREG_FPEXC32_EL2: + trapToMon = cptr.tfp; + *isVfpNeon = true; + break; + // CPACR, CPTR + case MISCREG_CPACR_EL1: + if (el == EL1) { + trapToMon = cptr.tcpac; + } + break; + case MISCREG_CPTR_EL2: + if (el == EL2) { + trapToMon = cptr.tcpac; + } + break; + default: + break; + } + return trapToMon; +} + +bool +decodeMrsMsrBankedReg(uint8_t sysM, bool r, bool &isIntReg, int ®Idx, + CPSR cpsr, SCR scr, NSACR nsacr, bool checkSecurity) +{ + OperatingMode mode; + bool ok = true; + + // R mostly indicates if its a int register or a misc reg, we override + // below if the few corner cases + isIntReg = !r; + // Loosely based on ARM ARM issue C section B9.3.10 + if (r) { + switch (sysM) + { + case 0xE: + regIdx = MISCREG_SPSR_FIQ; + mode = MODE_FIQ; + break; + case 0x10: + regIdx = MISCREG_SPSR_IRQ; + mode = MODE_IRQ; + break; + case 0x12: + regIdx = MISCREG_SPSR_SVC; + mode = MODE_SVC; + break; + case 0x14: + regIdx = MISCREG_SPSR_ABT; + mode = MODE_ABORT; + break; + case 0x16: + regIdx = MISCREG_SPSR_UND; + mode = MODE_UNDEFINED; + break; + case 0x1C: + regIdx = MISCREG_SPSR_MON; + mode = MODE_MON; + break; + case 0x1E: + regIdx = MISCREG_SPSR_HYP; + mode = MODE_HYP; + break; + default: + ok = false; + break; + } + } else { + int sysM4To3 = bits(sysM, 4, 3); + + if (sysM4To3 == 0) { + mode = MODE_USER; + regIdx = intRegInMode(mode, bits(sysM, 2, 0) + 8); + } else if (sysM4To3 == 1) { + mode = MODE_FIQ; + regIdx = intRegInMode(mode, bits(sysM, 2, 0) + 8); + } else if (sysM4To3 == 3) { + if (bits(sysM, 1) == 0) { + mode = MODE_MON; + regIdx = intRegInMode(mode, 14 - bits(sysM, 0)); + } else { + mode = MODE_HYP; + if (bits(sysM, 0) == 1) { + regIdx = intRegInMode(mode, 13); // R13 in HYP + } else { + isIntReg = false; + regIdx = MISCREG_ELR_HYP; + } + } + } else { // Other Banked registers + int sysM2 = bits(sysM, 2); + int sysM1 = bits(sysM, 1); + + mode = (OperatingMode) ( ((sysM2 || sysM1) << 0) | + (1 << 1) | + ((sysM2 && !sysM1) << 2) | + ((sysM2 && sysM1) << 3) | + (1 << 4) ); + regIdx = intRegInMode(mode, 14 - bits(sysM, 0)); + // Don't flatten the register here. This is going to go through + // setIntReg() which will do the flattening + ok &= mode != cpsr.mode; + } + } + + // Check that the requested register is accessable from the current mode + if (ok && checkSecurity && mode != cpsr.mode) { + switch (cpsr.mode) + { + case MODE_USER: + ok = false; + break; + case MODE_FIQ: + ok &= mode != MODE_HYP; + ok &= (mode != MODE_MON) || !scr.ns; + break; + case MODE_HYP: + ok &= mode != MODE_MON; + ok &= (mode != MODE_FIQ) || !nsacr.rfr; + break; + case MODE_IRQ: + case MODE_SVC: + case MODE_ABORT: + case MODE_UNDEFINED: + case MODE_SYSTEM: + ok &= mode != MODE_HYP; + ok &= (mode != MODE_MON) || !scr.ns; + ok &= (mode != MODE_FIQ) || !nsacr.rfr; + break; + // can access everything, no further checks required + case MODE_MON: + break; + default: + panic("unknown Mode 0x%x\n", cpsr.mode); + break; + } + } + return (ok); +} + +bool +vfpNeonEnabled(uint32_t &seq, HCPTR hcptr, NSACR nsacr, CPACR cpacr, CPSR cpsr, + uint32_t &iss, bool &trap, ThreadContext *tc, FPEXC fpexc, + bool isSIMD) +{ + iss = 0; + trap = false; + bool undefined = false; + bool haveSecurity = ArmSystem::haveSecurity(tc); + bool haveVirtualization = ArmSystem::haveVirtualization(tc); + bool isSecure = inSecureState(tc); + + // Non-secure view of CPACR and HCPTR determines behavior + // Copy register values + uint8_t cpacr_cp10 = cpacr.cp10; + bool cpacr_asedis = cpacr.asedis; + bool hcptr_cp10 = false; + bool hcptr_tase = false; + + bool cp10_enabled = cpacr.cp10 == 0x3 + || (cpacr.cp10 == 0x1 && inPrivilegedMode(cpsr)); + + bool cp11_enabled = cpacr.cp11 == 0x3 + || (cpacr.cp11 == 0x1 && inPrivilegedMode(cpsr)); + + if (cp11_enabled) { + undefined |= !(fpexc.en && cp10_enabled); + } else { + undefined |= !(fpexc.en && cp10_enabled && (cpacr.cp11 == cpacr.cp10)); + } + + if (haveVirtualization) { + hcptr_cp10 = hcptr.tcp10; + undefined |= hcptr.tcp10 != hcptr.tcp11; + hcptr_tase = hcptr.tase; + } + + if (haveSecurity) { + undefined |= nsacr.cp10 != nsacr.cp11; + if (!isSecure) { + // Modify register values to the Non-secure view + if (!nsacr.cp10) { + cpacr_cp10 = 0; + if (haveVirtualization) { + hcptr_cp10 = true; + } + } + if (nsacr.nsasedis) { + cpacr_asedis = true; + if (haveVirtualization) { + hcptr_tase = true; + } + } + } + } + + // Check Coprocessor Access Control Register for permission to use CP10/11. + if (!haveVirtualization || (cpsr.mode != MODE_HYP)) { + switch (cpacr_cp10) + { + case 0: + undefined = true; + break; + case 1: + undefined |= inUserMode(cpsr); + break; + } + + // Check if SIMD operations are disabled + if (isSIMD && cpacr_asedis) undefined = true; + } + + // If required, check FPEXC enabled bit. + undefined |= !fpexc.en; + + if (haveSecurity && haveVirtualization && !isSecure) { + if (hcptr_cp10 || (isSIMD && hcptr_tase)) { + iss = isSIMD ? (1 << 5) : 0xA; + trap = true; + } + } + + return (!undefined); +} + +bool +SPAlignmentCheckEnabled(ThreadContext* tc) +{ + switch (opModeToEL(currOpMode(tc))) { + case EL3: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL3)).sa; + case EL2: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL2)).sa; + case EL1: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL1)).sa; + case EL0: + return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL1)).sa0; + default: + panic("Invalid exception level"); + break; + } +} + +int +decodePhysAddrRange64(uint8_t pa_enc) +{ + switch (pa_enc) { + case 0x0: + return 32; + case 0x1: + return 36; + case 0x2: + return 40; + case 0x3: + return 42; + case 0x4: + return 44; + case 0x5: + case 0x6: + case 0x7: + return 48; + default: + panic("Invalid phys. address range encoding"); + } +} + +uint8_t +encodePhysAddrRange64(int pa_size) +{ + switch (pa_size) { + case 32: + return 0x0; + case 36: + return 0x1; + case 40: + return 0x2; + case 42: + return 0x3; + case 44: + return 0x4; + case 48: + return 0x5; + default: + panic("Invalid phys. address range"); + } +} + } // namespace ArmISA diff --git a/src/arch/arm/utility.hh b/src/arch/arm/utility.hh index e4fc658e0..1eea743bb 100644 --- a/src/arch/arm/utility.hh +++ b/src/arch/arm/utility.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -54,6 +54,8 @@ #include "cpu/static_inst.hh" #include "cpu/thread_context.hh" +class ArmSystem; + namespace ArmISA { inline PCState @@ -118,7 +120,7 @@ void initCPU(ThreadContext *tc, int cpuId); static inline bool inUserMode(CPSR cpsr) { - return cpsr.mode == MODE_USER; + return cpsr.mode == MODE_USER || cpsr.mode == MODE_EL0T; } static inline bool @@ -139,30 +141,139 @@ inPrivilegedMode(ThreadContext *tc) return !inUserMode(tc); } -static inline bool -vfpEnabled(CPACR cpacr, CPSR cpsr) +bool inAArch64(ThreadContext *tc); + +static inline OperatingMode +currOpMode(ThreadContext *tc) +{ + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + return (OperatingMode) (uint8_t) cpsr.mode; +} + +static inline ExceptionLevel +currEL(ThreadContext *tc) { - return cpacr.cp10 == 0x3 || - (cpacr.cp10 == 0x1 && inPrivilegedMode(cpsr)); + CPSR cpsr = tc->readMiscReg(MISCREG_CPSR); + return (ExceptionLevel) (uint8_t) cpsr.el; } +bool ELIs64(ThreadContext *tc, ExceptionLevel el); + +bool isBigEndian64(ThreadContext *tc); + +/** + * Removes the tag from tagged addresses if that mode is enabled. + * @param addr The address to be purified. + * @param tc The thread context. + * @param el The controlled exception level. + * @return The purified address. + */ +Addr purifyTaggedAddr(Addr addr, ThreadContext *tc, ExceptionLevel el); + static inline bool -vfpEnabled(CPACR cpacr, CPSR cpsr, FPEXC fpexc) +inSecureState(SCR scr, CPSR cpsr) +{ + switch ((OperatingMode) (uint8_t) cpsr.mode) { + case MODE_MON: + case MODE_EL3T: + case MODE_EL3H: + return true; + case MODE_HYP: + case MODE_EL2T: + case MODE_EL2H: + return false; + default: + return !scr.ns; + } +} + +bool longDescFormatInUse(ThreadContext *tc); + +bool inSecureState(ThreadContext *tc); + +uint32_t getMPIDR(ArmSystem *arm_sys, ThreadContext *tc); + +static inline uint32_t +mcrMrcIssBuild(bool isRead, uint32_t crm, IntRegIndex rt, uint32_t crn, + uint32_t opc1, uint32_t opc2) +{ + return (isRead << 0) | + (crm << 1) | + (rt << 5) | + (crn << 10) | + (opc1 << 14) | + (opc2 << 17); +} + +static inline void +mcrMrcIssExtract(uint32_t iss, bool &isRead, uint32_t &crm, IntRegIndex &rt, + uint32_t &crn, uint32_t &opc1, uint32_t &opc2) +{ + isRead = (iss >> 0) & 0x1; + crm = (iss >> 1) & 0xF; + rt = (IntRegIndex) ((iss >> 5) & 0xF); + crn = (iss >> 10) & 0xF; + opc1 = (iss >> 14) & 0x7; + opc2 = (iss >> 17) & 0x7; +} + +static inline uint32_t +mcrrMrrcIssBuild(bool isRead, uint32_t crm, IntRegIndex rt, IntRegIndex rt2, + uint32_t opc1) { - if ((cpacr.cp11 == 0x3) || - ((cpacr.cp11 == 0x1) && inPrivilegedMode(cpsr))) - return fpexc.en && vfpEnabled(cpacr, cpsr); - else - return fpexc.en && vfpEnabled(cpacr, cpsr) && - (cpacr.cp11 == cpacr.cp10); + return (isRead << 0) | + (crm << 1) | + (rt << 5) | + (rt2 << 10) | + (opc1 << 16); } +static inline uint32_t +msrMrs64IssBuild(bool isRead, uint32_t op0, uint32_t op1, uint32_t crn, + uint32_t crm, uint32_t op2, IntRegIndex rt) +{ + return isRead | + (crm << 1) | + (rt << 5) | + (crn << 10) | + (op1 << 14) | + (op2 << 17) | + (op0 << 20); +} + +bool +mcrMrc15TrapToHyp(const MiscRegIndex miscReg, HCR hcr, CPSR cpsr, SCR scr, + HDCR hdcr, HSTR hstr, HCPTR hcptr, uint32_t iss); +bool +mcrMrc14TrapToHyp(const MiscRegIndex miscReg, HCR hcr, CPSR cpsr, SCR scr, + HDCR hdcr, HSTR hstr, HCPTR hcptr, uint32_t iss); +bool +mcrrMrrc15TrapToHyp(const MiscRegIndex miscReg, CPSR cpsr, SCR scr, HSTR hstr, + HCR hcr, uint32_t iss); + +bool msrMrs64TrapToSup(const MiscRegIndex miscReg, ExceptionLevel el, + CPACR cpacr); +bool msrMrs64TrapToHyp(const MiscRegIndex miscReg, bool isRead, CPTR cptr, + HCR hcr, bool * isVfpNeon); +bool msrMrs64TrapToMon(const MiscRegIndex miscReg, CPTR cptr, + ExceptionLevel el, bool * isVfpNeon); + +bool +vfpNeonEnabled(uint32_t &seq, HCPTR hcptr, NSACR nsacr, CPACR cpacr, CPSR cpsr, + uint32_t &iss, bool &trap, ThreadContext *tc, + FPEXC fpexc = (1<<30), bool isSIMD = false); + static inline bool -neonEnabled(CPACR cpacr, CPSR cpsr, FPEXC fpexc) +vfpNeon64Enabled(CPACR cpacr, ExceptionLevel el) { - return !cpacr.asedis && vfpEnabled(cpacr, cpsr, fpexc); + if ((el == EL0 && cpacr.fpen != 0x3) || + (el == EL1 && !(cpacr.fpen & 0x1))) + return false; + return true; } +bool SPAlignmentCheckEnabled(ThreadContext* tc); + uint64_t getArgument(ThreadContext *tc, int &number, uint16_t size, bool fp); void skipFunction(ThreadContext *tc); @@ -182,6 +293,36 @@ getExecutingAsid(ThreadContext *tc) return tc->readMiscReg(MISCREG_CONTEXTIDR); } +// Decodes the register index to access based on the fields used in a MSR +// or MRS instruction +bool +decodeMrsMsrBankedReg(uint8_t sysM, bool r, bool &isIntReg, int ®Idx, + CPSR cpsr, SCR scr, NSACR nsacr, + bool checkSecurity = true); + +// This wrapper function is used to turn the register index into a source +// parameter for the instruction. See Operands.isa +static inline int +decodeMrsMsrBankedIntRegIndex(uint8_t sysM, bool r) +{ + int regIdx; + bool isIntReg; + bool validReg; + + validReg = decodeMrsMsrBankedReg(sysM, r, isIntReg, regIdx, 0, 0, 0, false); + return (validReg && isIntReg) ? regIdx : INTREG_DUMMY; +} + +/** + * Returns the n. of PA bits corresponding to the specified encoding. + */ +int decodePhysAddrRange64(uint8_t pa_enc); + +/** + * Returns the encoding corresponding to the specified n. of PA bits. + */ +uint8_t encodePhysAddrRange64(int pa_size); + } #endif diff --git a/src/arch/arm/vtophys.cc b/src/arch/arm/vtophys.cc index 7c26962cb..bed76acbd 100644 --- a/src/arch/arm/vtophys.cc +++ b/src/arch/arm/vtophys.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 ARM Limited + * Copyright (c) 2010, 2012-2013 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -45,6 +45,7 @@ #include <string> +#include "arch/arm/faults.hh" #include "arch/arm/table_walker.hh" #include "arch/arm/tlb.hh" #include "arch/arm/vtophys.hh" @@ -65,66 +66,30 @@ ArmISA::vtophys(Addr vaddr) Addr ArmISA::vtophys(ThreadContext *tc, Addr addr) { - SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR); - if (!sctlr.m) { - // Translation is currently disabled PA == VA - return addr; - } - bool success; - Addr pa; + Fault fault; + // Set up a functional memory Request to pass to the TLB + // to get it to translate the vaddr to a paddr + Request req(0, addr, 64, 0x40, -1, 0, 0, 0); ArmISA::TLB *tlb; - // Check the TLBs far a translation - // It's possible that there is a validy translation in the tlb + // Check the TLBs for a translation + // It's possible that there is a valid translation in the tlb // that is no loger valid in the page table in memory // so we need to check here first + // + // Calling translateFunctional invokes a table-walk if required + // so we should always succeed tlb = static_cast<ArmISA::TLB*>(tc->getDTBPtr()); - success = tlb->translateFunctional(tc, addr, pa); - if (success) - return pa; + fault = tlb->translateFunctional(&req, tc, BaseTLB::Read, TLB::NormalTran); + if (fault == NoFault) + return req.getPaddr(); tlb = static_cast<ArmISA::TLB*>(tc->getITBPtr()); - success = tlb->translateFunctional(tc, addr, pa); - if (success) - return pa; + fault = tlb->translateFunctional(&req, tc, BaseTLB::Read, TLB::NormalTran); + if (fault == NoFault) + return req.getPaddr(); - // We've failed everything, so we need to do a - // hardware tlb walk without messing with any - // state - - uint32_t N = tc->readMiscReg(MISCREG_TTBCR); - Addr ttbr; - if (N == 0 || !mbits(addr, 31, 32-N)) { - ttbr = tc->readMiscReg(MISCREG_TTBR0); - } else { - ttbr = tc->readMiscReg(MISCREG_TTBR1); - N = 0; - } - - PortProxy &port = tc->getPhysProxy(); - Addr l1desc_addr = mbits(ttbr, 31, 14-N) | (bits(addr,31-N,20) << 2); - - TableWalker::L1Descriptor l1desc; - l1desc.data = port.read<uint32_t>(l1desc_addr); - if (l1desc.type() == TableWalker::L1Descriptor::Ignore || - l1desc.type() == TableWalker::L1Descriptor::Reserved) { - warn("Unable to translate virtual address: %#x\n", addr); - return -1; - } - if (l1desc.type() == TableWalker::L1Descriptor::Section) - return l1desc.paddr(addr); - - // Didn't find it at the first level, try againt - Addr l2desc_addr = l1desc.l2Addr() | (bits(addr, 19, 12) << 2); - TableWalker::L2Descriptor l2desc; - l2desc.data = port.read<uint32_t>(l2desc_addr); - - if (l2desc.invalid()) { - warn("Unable to translate virtual address: %#x\n", addr); - return -1; - } - - return l2desc.paddr(addr); + panic("Table walkers support functional accesses. We should never get here\n"); } bool diff --git a/src/base/loader/elf_object.cc b/src/base/loader/elf_object.cc index 6d4c29776..9445f1df9 100644 --- a/src/base/loader/elf_object.cc +++ b/src/base/loader/elf_object.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011-2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2003-2005 The Regents of The University of Michigan * All rights reserved. * @@ -61,7 +73,7 @@ ElfObject::tryFile(const string &fname, int fd, size_t len, uint8_t *data) assert(elf != NULL); // Check that we actually have a elf file - if (gelf_getehdr(elf, &ehdr) ==0) { + if (gelf_getehdr(elf, &ehdr) == 0) { DPRINTFR(Loader, "Not ELF\n"); elf_end(elf); return NULL; @@ -94,23 +106,27 @@ ElfObject::tryFile(const string &fname, int fd, size_t len, uint8_t *data) } else if (ehdr.e_machine == EM_386 && ehdr.e_ident[EI_CLASS] == ELFCLASS32) { arch = ObjectFile::I386; - } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) { - arch = ObjectFile::Alpha; - } else if (ehdr.e_machine == EM_ARM) { + } else if (ehdr.e_machine == EM_ARM && + ehdr.e_ident[EI_CLASS] == ELFCLASS32) { if (bits(ehdr.e_entry, 0)) { arch = ObjectFile::Thumb; } else { arch = ObjectFile::Arm; } + } else if ((ehdr.e_machine == EM_AARCH64) && + ehdr.e_ident[EI_CLASS] == ELFCLASS64) { + arch = ObjectFile::Arm64; + } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) { + arch = ObjectFile::Alpha; } else if (ehdr.e_machine == EM_PPC && ehdr.e_ident[EI_CLASS] == ELFCLASS32) { - if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) { - arch = ObjectFile::Power; - } else { - fatal("The binary you're trying to load is compiled for " + if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) { + arch = ObjectFile::Power; + } else { + fatal("The binary you're trying to load is compiled for " "little endian Power.\nM5 only supports big " "endian Power. Please recompile your binary.\n"); - } + } } else if (ehdr.e_machine == EM_PPC64) { fatal("The binary you're trying to load is compiled for 64-bit " "Power. M5\n only supports 32-bit Power. Please " @@ -121,9 +137,7 @@ ElfObject::tryFile(const string &fname, int fd, size_t len, uint8_t *data) } //Detect the operating system - switch (ehdr.e_ident[EI_OSABI]) - { - + switch (ehdr.e_ident[EI_OSABI]) { case ELFOSABI_LINUX: opSys = ObjectFile::Linux; break; @@ -206,7 +220,8 @@ ElfObject::tryFile(const string &fname, int fd, size_t len, uint8_t *data) if(phdr.p_offset <= e_phoff && phdr.p_offset + phdr.p_filesz > e_phoff) { - result->_programHeaderTable = phdr.p_paddr + e_phoff; + result->_programHeaderTable = + phdr.p_paddr + (e_phoff - phdr.p_offset); break; } } @@ -423,15 +438,15 @@ ElfObject::loadWeakSymbols(SymbolTable *symtab, Addr addrMask) } bool -ElfObject::loadSections(PortProxy& memProxy, Addr addrMask) +ElfObject::loadSections(PortProxy& memProxy, Addr addrMask, Addr offset) { - if (!ObjectFile::loadSections(memProxy, addrMask)) + if (!ObjectFile::loadSections(memProxy, addrMask, offset)) return false; vector<Segment>::iterator extraIt; for (extraIt = extraSegments.begin(); extraIt != extraSegments.end(); extraIt++) { - if (!loadSection(&(*extraIt), memProxy, addrMask)) { + if (!loadSection(&(*extraIt), memProxy, addrMask, offset)) { return false; } } diff --git a/src/base/loader/elf_object.hh b/src/base/loader/elf_object.hh index d3d3e5197..84b73b0a8 100644 --- a/src/base/loader/elf_object.hh +++ b/src/base/loader/elf_object.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2003-2005 The Regents of The University of Michigan * All rights reserved. * @@ -66,7 +78,8 @@ class ElfObject : public ObjectFile virtual ~ElfObject() {} bool loadSections(PortProxy& memProxy, - Addr addrMask = std::numeric_limits<Addr>::max()); + Addr addrMask = std::numeric_limits<Addr>::max(), + Addr offset = 0); virtual bool loadGlobalSymbols(SymbolTable *symtab, Addr addrMask = std::numeric_limits<Addr>::max()); virtual bool loadLocalSymbols(SymbolTable *symtab, Addr addrMask = diff --git a/src/base/loader/object_file.cc b/src/base/loader/object_file.cc index b9f84283b..170e18d5e 100644 --- a/src/base/loader/object_file.cc +++ b/src/base/loader/object_file.cc @@ -66,10 +66,10 @@ ObjectFile::~ObjectFile() bool -ObjectFile::loadSection(Section *sec, PortProxy& memProxy, Addr addrMask) +ObjectFile::loadSection(Section *sec, PortProxy& memProxy, Addr addrMask, Addr offset) { if (sec->size != 0) { - Addr addr = sec->baseAddr & addrMask; + Addr addr = (sec->baseAddr & addrMask) + offset; if (sec->fileImage) { memProxy.writeBlob(addr, sec->fileImage, sec->size); } @@ -83,11 +83,11 @@ ObjectFile::loadSection(Section *sec, PortProxy& memProxy, Addr addrMask) bool -ObjectFile::loadSections(PortProxy& memProxy, Addr addrMask) +ObjectFile::loadSections(PortProxy& memProxy, Addr addrMask, Addr offset) { - return (loadSection(&text, memProxy, addrMask) - && loadSection(&data, memProxy, addrMask) - && loadSection(&bss, memProxy, addrMask)); + return (loadSection(&text, memProxy, addrMask, offset) + && loadSection(&data, memProxy, addrMask, offset) + && loadSection(&bss, memProxy, addrMask, offset)); } diff --git a/src/base/loader/object_file.hh b/src/base/loader/object_file.hh index bdc9a31a1..09cde5b53 100644 --- a/src/base/loader/object_file.hh +++ b/src/base/loader/object_file.hh @@ -52,6 +52,7 @@ class ObjectFile Mips, X86_64, I386, + Arm64, Arm, Thumb, Power @@ -84,7 +85,8 @@ class ObjectFile void close(); virtual bool loadSections(PortProxy& memProxy, Addr addrMask = - std::numeric_limits<Addr>::max()); + std::numeric_limits<Addr>::max(), + Addr offset = 0); virtual bool loadGlobalSymbols(SymbolTable *symtab, Addr addrMask = std::numeric_limits<Addr>::max()) = 0; virtual bool loadLocalSymbols(SymbolTable *symtab, Addr addrMask = @@ -114,7 +116,8 @@ class ObjectFile Section data; Section bss; - bool loadSection(Section *sec, PortProxy& memProxy, Addr addrMask); + bool loadSection(Section *sec, PortProxy& memProxy, Addr addrMask, + Addr offset = 0); void setGlobalPointer(Addr global_ptr) { globalPtr = global_ptr; } public: diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py index cd82207cd..652af0b80 100644 --- a/src/cpu/BaseCPU.py +++ b/src/cpu/BaseCPU.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012 ARM Limited +# Copyright (c) 2012-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -76,7 +76,7 @@ elif buildEnv['TARGET_ISA'] == 'mips': from MipsISA import MipsISA isa_class = MipsISA elif buildEnv['TARGET_ISA'] == 'arm': - from ArmTLB import ArmTLB + from ArmTLB import ArmTLB, ArmStage2IMMU, ArmStage2DMMU from ArmInterrupts import ArmInterrupts from ArmISA import ArmISA isa_class = ArmISA @@ -171,6 +171,8 @@ class BaseCPU(MemObject): elif buildEnv['TARGET_ISA'] == 'arm': dtb = Param.ArmTLB(ArmTLB(), "Data TLB") itb = Param.ArmTLB(ArmTLB(), "Instruction TLB") + istage2_mmu = Param.ArmStage2MMU(ArmStage2IMMU(), "Stage 2 trans") + dstage2_mmu = Param.ArmStage2MMU(ArmStage2DMMU(), "Stage 2 trans") interrupts = Param.ArmInterrupts( NULL, "Interrupt Controller") isa = VectorParam.ArmISA([ isa_class() ], "ISA instance") @@ -211,6 +213,9 @@ class BaseCPU(MemObject): if buildEnv['TARGET_ISA'] in ['x86', 'arm']: _cached_ports += ["itb.walker.port", "dtb.walker.port"] + if buildEnv['TARGET_ISA'] in ['arm']: + _cached_ports += ["istage2_mmu.stage2_tlb.walker.port", + "dstage2_mmu.stage2_tlb.walker.port"] _uncached_slave_ports = [] _uncached_master_ports = [] @@ -267,18 +272,35 @@ class BaseCPU(MemObject): if iwc and dwc: self.itb_walker_cache = iwc self.dtb_walker_cache = dwc - self.itb.walker.port = iwc.cpu_side - self.dtb.walker.port = dwc.cpu_side + if buildEnv['TARGET_ISA'] in ['arm']: + self.itb_walker_cache_bus = CoherentBus() + self.dtb_walker_cache_bus = CoherentBus() + self.itb_walker_cache_bus.master = iwc.cpu_side + self.dtb_walker_cache_bus.master = dwc.cpu_side + self.itb.walker.port = self.itb_walker_cache_bus.slave + self.dtb.walker.port = self.dtb_walker_cache_bus.slave + self.istage2_mmu.stage2_tlb.walker.port = self.itb_walker_cache_bus.slave + self.dstage2_mmu.stage2_tlb.walker.port = self.dtb_walker_cache_bus.slave + else: + self.itb.walker.port = iwc.cpu_side + self.dtb.walker.port = dwc.cpu_side self._cached_ports += ["itb_walker_cache.mem_side", \ "dtb_walker_cache.mem_side"] else: self._cached_ports += ["itb.walker.port", "dtb.walker.port"] + if buildEnv['TARGET_ISA'] in ['arm']: + self._cached_ports += ["istage2_mmu.stage2_tlb.walker.port", \ + "dstage2_mmu.stage2_tlb.walker.port"] + # Checker doesn't need its own tlb caches because it does # functional accesses only if self.checker != NULL: self._cached_ports += ["checker.itb.walker.port", \ "checker.dtb.walker.port"] + if buildEnv['TARGET_ISA'] in ['arm']: + self._cached_ports += ["checker.istage2_mmu.stage2_tlb.walker.port", \ + "checker.dstage2_mmu.stage2_tlb.walker.port"] def addTwoLevelCacheHierarchy(self, ic, dc, l2c, iwc = None, dwc = None): self.addPrivateSplitL1Caches(ic, dc, iwc, dwc) diff --git a/src/dev/arm/RealView.py b/src/dev/arm/RealView.py index b3c14580e..3c9c22ecc 100644 --- a/src/dev/arm/RealView.py +++ b/src/dev/arm/RealView.py @@ -1,4 +1,4 @@ -# Copyright (c) 2009-2012 ARM Limited +# Copyright (c) 2009-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -88,6 +88,17 @@ class RealViewCtrl(BasicPioDevice): proc_id1 = Param.UInt32(0x0C000222, "Processor ID, SYS_PROCID1") idreg = Param.UInt32(0x00000000, "ID Register, SYS_ID") +class VGic(PioDevice): + type = 'VGic' + cxx_header = "dev/arm/vgic.hh" + gic = Param.BaseGic(Parent.any, "Gic to use for interrupting") + platform = Param.Platform(Parent.any, "Platform this device is part of.") + vcpu_addr = Param.Addr(0, "Address for vcpu interfaces") + hv_addr = Param.Addr(0, "Address for hv control") + pio_delay = Param.Latency('10ns', "Delay for PIO r/w") + # The number of list registers is not currently configurable at runtime. + ppint = Param.UInt32("HV maintenance interrupt number") + class AmbaFake(AmbaPioDevice): type = 'AmbaFake' cxx_header = "dev/arm/amba_fake.hh" @@ -119,6 +130,15 @@ class CpuLocalTimer(BasicPioDevice): int_num_timer = Param.UInt32("Interrrupt number used per-cpu to GIC") int_num_watchdog = Param.UInt32("Interrupt number for per-cpu watchdog to GIC") +class GenericTimer(SimObject): + type = 'GenericTimer' + cxx_header = "dev/arm/generic_timer.hh" + system = Param.System(Parent.any, "system") + gic = Param.BaseGic(Parent.any, "GIC to use for interrupting") + int_num = Param.UInt32("Interrupt number used per-cpu to GIC") + # @todo: for now only one timer per CPU is supported, which is the + # normal behaviour when Security and Virt. extensions are disabled. + class PL031(AmbaIntDevice): type = 'PL031' cxx_header = "dev/arm/rtc_pl031.hh" @@ -166,6 +186,9 @@ class RealView(Platform): conf_table_reported = False) self.nvmem.port = mem_bus.master cur_sys.boot_loader = loc('boot.arm') + cur_sys.atags_addr = 0x100 + cur_sys.load_addr_mask = 0xfffffff + cur_sys.load_offset = 0 # Reference for memory map and interrupt number @@ -340,12 +363,14 @@ class VExpress_EMM(RealView): realview_io = RealViewCtrl(proc_id0=0x14000000, proc_id1=0x14000000, pio_addr=0x1C010000) gic = Pl390(dist_addr=0x2C001000, cpu_addr=0x2C002000) local_cpu_timer = CpuLocalTimer(int_num_timer=29, int_num_watchdog=30, pio_addr=0x2C080000) + generic_timer = GenericTimer(int_num=29) timer0 = Sp804(int_num0=34, int_num1=34, pio_addr=0x1C110000, clock0='1MHz', clock1='1MHz') timer1 = Sp804(int_num0=35, int_num1=35, pio_addr=0x1C120000, clock0='1MHz', clock1='1MHz') clcd = Pl111(pio_addr=0x1c1f0000, int_num=46) hdlcd = HDLcd(pio_addr=0x2b000000, int_num=117) kmi0 = Pl050(pio_addr=0x1c060000, int_num=44) kmi1 = Pl050(pio_addr=0x1c070000, int_num=45, is_mouse=True) + vgic = VGic(vcpu_addr=0x2c006000, hv_addr=0x2c004000, ppint=25) cf_ctrl = IdeController(disks=[], pci_func=0, pci_dev=0, pci_bus=2, io_shift = 2, ctrl_offset = 2, Command = 0x1, BAR0 = 0x1C1A0000, BAR0Size = '256B', @@ -380,7 +405,9 @@ class VExpress_EMM(RealView): conf_table_reported = False) self.nvmem.port = mem_bus.master cur_sys.boot_loader = loc('boot_emm.arm') - cur_sys.atags_addr = 0x80000100 + cur_sys.atags_addr = 0x8000000 + cur_sys.load_addr_mask = 0xfffffff + cur_sys.load_offset = 0x80000000 # Attach I/O devices that are on chip and also set the appropriate # ranges for the bridge @@ -396,6 +423,8 @@ class VExpress_EMM(RealView): AddrRange(0x40000000, size='512MB'), AddrRange(0x18000000, size='64MB'), AddrRange(0x1C000000, size='64MB')] + self.vgic.pio = bus.master + # Attach I/O devices to specified bus object. Can't do this # earlier, since the bus object itself is typically defined at the @@ -435,3 +464,13 @@ class VExpress_EMM(RealView): self.usb_fake.pio = bus.master self.mmc_fake.pio = bus.master +class VExpress_EMM64(VExpress_EMM): + def setupBootLoader(self, mem_bus, cur_sys, loc): + self.nvmem = SimpleMemory(range = AddrRange(0, size = '64MB')) + self.nvmem.port = mem_bus.master + cur_sys.boot_loader = loc('boot_emm.arm64') + cur_sys.atags_addr = 0x8000000 + cur_sys.load_addr_mask = 0xfffffff + cur_sys.load_offset = 0x80000000 + + diff --git a/src/dev/arm/SConscript b/src/dev/arm/SConscript index 68779ec64..419e2f471 100644 --- a/src/dev/arm/SConscript +++ b/src/dev/arm/SConscript @@ -1,6 +1,6 @@ # -*- mode:python -*- -# Copyright (c) 2009 ARM Limited +# Copyright (c) 2009, 2012-2013 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -47,6 +47,7 @@ if env['TARGET_ISA'] == 'arm': Source('amba_device.cc') Source('amba_fake.cc') Source('base_gic.cc') + Source('generic_timer.cc') Source('gic_pl390.cc') Source('pl011.cc') Source('pl111.cc') @@ -57,6 +58,7 @@ if env['TARGET_ISA'] == 'arm': Source('realview.cc') Source('rtc_pl031.cc') Source('timer_cpulocal.cc') + Source('vgic.cc') DebugFlag('AMBA') DebugFlag('HDLcd') @@ -64,3 +66,4 @@ if env['TARGET_ISA'] == 'arm': DebugFlag('Pl050') DebugFlag('GIC') DebugFlag('RVCTRL') + DebugFlag('VGIC') diff --git a/src/dev/arm/generic_timer.cc b/src/dev/arm/generic_timer.cc new file mode 100644 index 000000000..555c1050f --- /dev/null +++ b/src/dev/arm/generic_timer.cc @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2013 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Giacomo Gabrielli + */ + +#include "arch/arm/system.hh" +#include "debug/Checkpoint.hh" +#include "debug/Timer.hh" +#include "dev/arm/base_gic.hh" +#include "dev/arm/generic_timer.hh" + +void +GenericTimer::SystemCounter::setFreq(uint32_t freq) +{ + if (_freq != 0) { + // Altering the frequency after boot shouldn't be done in practice. + warn_once("The frequency of the system counter has already been set"); + } + _freq = freq; + _period = (1.0 / freq) * SimClock::Frequency; + _resetTick = curTick(); +} + +void +GenericTimer::SystemCounter::serialize(std::ostream &os) +{ + SERIALIZE_SCALAR(_freq); + SERIALIZE_SCALAR(_period); + SERIALIZE_SCALAR(_resetTick); +} + +void +GenericTimer::SystemCounter::unserialize(Checkpoint *cp, + const std::string §ion) +{ + UNSERIALIZE_SCALAR(_freq); + UNSERIALIZE_SCALAR(_period); + UNSERIALIZE_SCALAR(_resetTick); +} + +void +GenericTimer::ArchTimer::counterLimitReached() +{ + _control.istatus = 1; + + if (!_control.enable) + return; + + // DPRINTF(Timer, "Counter limit reached\n"); + + if (!_control.imask) { + // DPRINTF(Timer, "Causing interrupt\n"); + _parent->_gic->sendPPInt(_intNum, _cpuNum); + } +} + +void +GenericTimer::ArchTimer::setCompareValue(uint64_t val) +{ + _counterLimit = val; + if (_counterLimitReachedEvent.scheduled()) + _parent->deschedule(_counterLimitReachedEvent); + if (counterValue() >= _counterLimit) { + counterLimitReached(); + } else { + _control.istatus = 0; + _parent->schedule(_counterLimitReachedEvent, + curTick() + (_counterLimit - counterValue()) * _counter->period()); + } +} + +void +GenericTimer::ArchTimer::setTimerValue(uint32_t val) +{ + setCompareValue(counterValue() + sext<32>(val)); +} + +void +GenericTimer::ArchTimer::setControl(uint32_t val) +{ + ArchTimerCtrl new_ctl = val; + if ((new_ctl.enable && !new_ctl.imask) && + !(_control.enable && !_control.imask)) { + // Re-evalute the timer condition + if (_counterLimit >= counterValue()) { + _control.istatus = 1; + + DPRINTF(Timer, "Causing interrupt in control\n"); + //_parent->_gic->sendPPInt(_intNum, _cpuNum); + } + } + _control.enable = new_ctl.enable; + _control.imask = new_ctl.imask; +} + +void +GenericTimer::ArchTimer::serialize(std::ostream &os) +{ + SERIALIZE_SCALAR(_cpuNum); + SERIALIZE_SCALAR(_intNum); + uint32_t control_serial = _control; + SERIALIZE_SCALAR(control_serial); + SERIALIZE_SCALAR(_counterLimit); + bool event_scheduled = _counterLimitReachedEvent.scheduled(); + SERIALIZE_SCALAR(event_scheduled); + Tick event_time; + if (event_scheduled) { + event_time = _counterLimitReachedEvent.when(); + SERIALIZE_SCALAR(event_time); + } +} + +void +GenericTimer::ArchTimer::unserialize(Checkpoint *cp, const std::string §ion) +{ + UNSERIALIZE_SCALAR(_cpuNum); + UNSERIALIZE_SCALAR(_intNum); + uint32_t control_serial; + UNSERIALIZE_SCALAR(control_serial); + _control = control_serial; + bool event_scheduled; + UNSERIALIZE_SCALAR(event_scheduled); + Tick event_time; + if (event_scheduled) { + UNSERIALIZE_SCALAR(event_time); + _parent->schedule(_counterLimitReachedEvent, event_time); + } +} + +GenericTimer::GenericTimer(Params *p) + : SimObject(p), _gic(p->gic) +{ + for (int i = 0; i < CPU_MAX; ++i) { + std::stringstream oss; + oss << name() << ".arch_timer" << i; + _archTimers[i]._name = oss.str(); + _archTimers[i]._parent = this; + _archTimers[i]._counter = &_systemCounter; + _archTimers[i]._cpuNum = i; + _archTimers[i]._intNum = p->int_num; + } + + ((ArmSystem *) p->system)->setGenericTimer(this); +} + +void +GenericTimer::serialize(std::ostream &os) +{ + nameOut(os, csprintf("%s.sys_counter", name())); + _systemCounter.serialize(os); + for (int i = 0; i < CPU_MAX; ++i) { + nameOut(os, csprintf("%s.arch_timer%d", name(), i)); + _archTimers[i].serialize(os); + } +} + +void +GenericTimer::unserialize(Checkpoint *cp, const std::string §ion) +{ + _systemCounter.unserialize(cp, csprintf("%s.sys_counter", section)); + for (int i = 0; i < CPU_MAX; ++i) { + _archTimers[i].unserialize(cp, csprintf("%s.arch_timer%d", section, i)); + } +} + +GenericTimer * +GenericTimerParams::create() +{ + return new GenericTimer(this); +} diff --git a/src/dev/arm/generic_timer.hh b/src/dev/arm/generic_timer.hh new file mode 100644 index 000000000..bc43f8b3b --- /dev/null +++ b/src/dev/arm/generic_timer.hh @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2013 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Giacomo Gabrielli + */ + +#ifndef __DEV_ARM_GENERIC_TIMER_HH__ +#define __DEV_ARM_GENERIC_TIMER_HH__ + +#include "base/bitunion.hh" +#include "params/GenericTimer.hh" +#include "sim/core.hh" +#include "sim/sim_object.hh" + +/// @file +/// This module implements the global system counter and the local per-CPU +/// architected timers as specified by the ARM Generic Timer extension (ARM +/// ARM, Issue C, Chapter 17). + +class Checkpoint; +class BaseGic; + +/// Wrapper around the actual counters and timers of the Generic Timer +/// extension. +class GenericTimer : public SimObject +{ + public: + + /// Global system counter. It is shared by the architected timers. + /// @todo: implement memory-mapped controls + class SystemCounter + { + protected: + /// Counter frequency (as specified by CNTFRQ). + uint64_t _freq; + /// Cached copy of the counter period (inverse of the frequency). + Tick _period; + /// Tick when the counter was reset. + Tick _resetTick; + + public: + /// Ctor. + SystemCounter() + : _freq(0), _period(0), _resetTick(0) + { + setFreq(0x01800000); + } + + /// Returns the current value of the physical counter. + uint64_t value() const + { + if (_freq == 0) + return 0; // Counter is still off. + return (curTick() - _resetTick) / _period; + } + + /// Returns the counter frequency. + uint64_t freq() const { return _freq; } + /// Sets the counter frequency. + /// @param freq frequency in Hz. + void setFreq(uint32_t freq); + + /// Returns the counter period. + Tick period() const { return _period; } + + void serialize(std::ostream &os); + void unserialize(Checkpoint *cp, const std::string §ion); + }; + + /// Per-CPU architected timer. + class ArchTimer + { + protected: + /// Control register. + BitUnion32(ArchTimerCtrl) + Bitfield<0> enable; + Bitfield<1> imask; + Bitfield<2> istatus; + EndBitUnion(ArchTimerCtrl) + + /// Name of this timer. + std::string _name; + /// Pointer to parent class. + GenericTimer *_parent; + /// Pointer to the global system counter. + SystemCounter *_counter; + /// ID of the CPU this timer is attached to. + int _cpuNum; + /// ID of the interrupt to be triggered. + int _intNum; + /// Cached value of the control register ({CNTP/CNTHP/CNTV}_CTL). + ArchTimerCtrl _control; + /// Programmed limit value for the upcounter ({CNTP/CNTHP/CNTV}_CVAL). + uint64_t _counterLimit; + + /// Called when the upcounter reaches the programmed value. + void counterLimitReached(); + EventWrapper<ArchTimer, &ArchTimer::counterLimitReached> + _counterLimitReachedEvent; + + /// Returns the value of the counter which this timer relies on. + uint64_t counterValue() const { return _counter->value(); } + + public: + /// Ctor. + ArchTimer() + : _control(0), _counterLimit(0), _counterLimitReachedEvent(this) + {} + + /// Returns the timer name. + std::string name() const { return _name; } + + /// Returns the CompareValue view of the timer. + uint64_t compareValue() const { return _counterLimit; } + /// Sets the CompareValue view of the timer. + void setCompareValue(uint64_t val); + + /// Returns the TimerValue view of the timer. + uint32_t timerValue() const { return _counterLimit - counterValue(); } + /// Sets the TimerValue view of the timer. + void setTimerValue(uint32_t val); + + /// Sets the control register. + uint32_t control() const { return _control; } + void setControl(uint32_t val); + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + friend class GenericTimer; + }; + + protected: + + static const int CPU_MAX = 8; + + /// Pointer to the GIC, needed to trigger timer interrupts. + BaseGic *_gic; + /// System counter. + SystemCounter _systemCounter; + /// Per-CPU architected timers. + // @todo: this would become a 2-dim. array with Security and Virt. + ArchTimer _archTimers[CPU_MAX]; + + public: + typedef GenericTimerParams Params; + const Params * + params() const + { + return dynamic_cast<const Params *>(_params); + } + + /// Ctor. + GenericTimer(Params *p); + + /// Returns a pointer to the system counter. + SystemCounter *getSystemCounter() { return &_systemCounter; } + + /// Returns a pointer to the architected timer for cpu_id. + ArchTimer *getArchTimer(int cpu_id) { return &_archTimers[cpu_id]; } + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); +}; + +#endif // __DEV_ARM_GENERIC_TIMER_HH__ diff --git a/src/dev/arm/gic_pl390.cc b/src/dev/arm/gic_pl390.cc index d2a660e88..7fc65b2b7 100644 --- a/src/dev/arm/gic_pl390.cc +++ b/src/dev/arm/gic_pl390.cc @@ -56,7 +56,8 @@ Pl390::Pl390(const Params *p) : BaseGic(p), distAddr(p->dist_addr), cpuAddr(p->cpu_addr), distPioDelay(p->dist_pio_delay), cpuPioDelay(p->cpu_pio_delay), intLatency(p->int_latency), - enabled(false), itLines(p->it_lines), msixRegAddr(p->msix_addr), + enabled(false), itLines(p->it_lines), irqEnable(false), + msixRegAddr(p->msix_addr), msixReg(0x0) { itLinesLog2 = ceilLog2(itLines); diff --git a/src/dev/arm/vgic.cc b/src/dev/arm/vgic.cc new file mode 100644 index 000000000..2faf2030e --- /dev/null +++ b/src/dev/arm/vgic.cc @@ -0,0 +1,553 @@ +/* + * Copyright (c) 2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Matt Evans + */ + +#include "base/trace.hh" +#include "debug/Checkpoint.hh" +#include "debug/VGIC.hh" +#include "dev/arm/base_gic.hh" +#include "dev/arm/vgic.hh" +#include "dev/terminal.hh" +#include "mem/packet.hh" +#include "mem/packet_access.hh" + +VGic::VGic(const Params *p) + : PioDevice(p), platform(p->platform), gic(p->gic), vcpuAddr(p->vcpu_addr), + hvAddr(p->hv_addr), pioDelay(p->pio_delay), + maintInt(p->ppint) +{ + for (int x = 0; x < VGIC_CPU_MAX; x++) { + postVIntEvent[x] = new PostVIntEvent(x, p->platform); + maintIntPosted[x] = false; + vIntPosted[x] = false; + } + for (int c = 0; c < VGIC_CPU_MAX; c++) { + memset(&vcpuData[c], 0, sizeof(struct vcpuIntData)); + } + assert(sys->numRunningContexts() <= VGIC_CPU_MAX); +} + +Tick +VGic::read(PacketPtr pkt) +{ + Addr addr = pkt->getAddr(); + + if (addr >= vcpuAddr && addr < vcpuAddr + GICV_SIZE) + return readVCpu(pkt); + else if (addr >= hvAddr && addr < hvAddr + GICH_REG_SIZE) + return readCtrl(pkt); + else + panic("Read to unknown address %#x\n", pkt->getAddr()); +} + +Tick +VGic::write(PacketPtr pkt) +{ + Addr addr = pkt->getAddr(); + + if (addr >= vcpuAddr && addr < vcpuAddr + GICV_SIZE) + return writeVCpu(pkt); + else if (addr >= hvAddr && addr < hvAddr + GICH_REG_SIZE) + return writeCtrl(pkt); + else + panic("Write to unknown address %#x\n", pkt->getAddr()); +} + +Tick +VGic::readVCpu(PacketPtr pkt) +{ + Addr daddr = pkt->getAddr() - vcpuAddr; + pkt->allocate(); + + int ctx_id = pkt->req->contextId(); + assert(ctx_id < VGIC_CPU_MAX); + struct vcpuIntData *vid = &vcpuData[ctx_id]; + + DPRINTF(VGIC, "VGIC VCPU read register %#x\n", daddr); + + switch (daddr) { + case GICV_CTLR: + pkt->set<uint32_t>(vid->vctrl); + break; + case GICV_IAR: { + int i = findHighestPendingLR(vid); + if (i < 0 || !vid->vctrl.En) { + pkt->set<uint32_t>(1023); // "No int" marker + } else { + ListReg *lr = &vid->LR[i]; + + pkt->set<uint32_t>(lr->VirtualID | + (((int)lr->CpuID) << 10)); + // We don't support auto-EOI of HW interrupts via real GIC! + // Fortunately, KVM doesn't use this. How about Xen...? Ulp! + if (lr->HW) + panic("VGIC does not support 'HW' List Register feature (LR %#x)!\n", + *lr); + lr->State = LR_ACTIVE; + DPRINTF(VGIC, "Consumed interrupt %d (cpu%d) from LR%d (EOI%d)\n", + lr->VirtualID, lr->CpuID, i, lr->EOI); + } + } break; + default: + panic("VGIC VCPU read of bad address %#x\n", daddr); + } + + updateIntState(ctx_id); + + pkt->makeAtomicResponse(); + return pioDelay; +} + +Tick +VGic::readCtrl(PacketPtr pkt) +{ + Addr daddr = pkt->getAddr() - hvAddr; + pkt->allocate(); + + int ctx_id = pkt->req->contextId(); + + DPRINTF(VGIC, "VGIC HVCtrl read register %#x\n", daddr); + + /* Munge the address: 0-0xfff is the usual space banked by requester CPU. + * Anything > that is 0x200-sized slices of 'per CPU' regs. + */ + if (daddr & ~0x1ff) { + ctx_id = (daddr >> 9); + if (ctx_id > 8) + panic("VGIC: Weird unbanked hv ctrl address %#x!\n", daddr); + daddr &= ~0x1ff; + } + assert(ctx_id < VGIC_CPU_MAX); + struct vcpuIntData *vid = &vcpuData[ctx_id]; + + switch (daddr) { + case GICH_HCR: + pkt->set<uint32_t>(vid->hcr); + break; + + case GICH_VTR: + pkt->set<uint32_t>(0x44000000 | (NUM_LR - 1)); + break; + + case GICH_VMCR: + pkt->set<uint32_t>( + ((uint32_t)vid->VMPriMask << 27) | + ((uint32_t)vid->VMBP << 21) | + ((uint32_t)vid->VMABP << 18) | + ((uint32_t)vid->VEM << 9) | + ((uint32_t)vid->VMCBPR << 4) | + ((uint32_t)vid->VMFiqEn << 3) | + ((uint32_t)vid->VMAckCtl << 2) | + ((uint32_t)vid->VMGrp1En << 1) | + ((uint32_t)vid->VMGrp0En << 0) + ); + break; + + case GICH_MISR: + pkt->set<uint32_t>(getMISR(vid)); + break; + + case GICH_EISR0: + pkt->set<uint32_t>(vid->eisr & 0xffffffff); + break; + + case GICH_EISR1: + pkt->set<uint32_t>(vid->eisr >> 32); + break; + + case GICH_ELSR0: { + uint32_t bm = 0; + for (int i = 0; i < ((NUM_LR < 32) ? NUM_LR : 32); i++) { + if (!vid->LR[i].State) + bm |= 1 << i; + } + pkt->set<uint32_t>(bm); + } break; + + case GICH_ELSR1: { + uint32_t bm = 0; + for (int i = 32; i < NUM_LR; i++) { + if (!vid->LR[i].State) + bm |= 1 << (i-32); + } + pkt->set<uint32_t>(bm); + } break; + + case GICH_APR0: + warn_once("VGIC GICH_APR read!\n"); + pkt->set<uint32_t>(0); + break; + + case GICH_LR0: + case GICH_LR1: + case GICH_LR2: + case GICH_LR3: + pkt->set<uint32_t>(vid->LR[(daddr - GICH_LR0) >> 2]); + break; + + default: + panic("VGIC HVCtrl read of bad address %#x\n", daddr); + } + + pkt->makeAtomicResponse(); + return pioDelay; +} + +Tick +VGic::writeVCpu(PacketPtr pkt) +{ + Addr daddr = pkt->getAddr() - vcpuAddr; + pkt->allocate(); + + int ctx_id = pkt->req->contextId(); + assert(ctx_id < VGIC_CPU_MAX); + struct vcpuIntData *vid = &vcpuData[ctx_id]; + + DPRINTF(VGIC, "VGIC VCPU write register %#x <= %#x\n", daddr, pkt->get<uint32_t>()); + + switch (daddr) { + case GICV_CTLR: + vid->vctrl = pkt->get<uint32_t>(); + break; + case GICV_PMR: + vid->VMPriMask = pkt->get<uint32_t>(); + break; + case GICV_EOIR: { + // We don't handle the split EOI-then-DIR mode. Linux (guest) + // doesn't need it though. + assert(!vid->vctrl.EOImode); + uint32_t w = pkt->get<uint32_t>(); + unsigned int virq = w & 0x3ff; + unsigned int vcpu = (w >> 10) & 7; + int i = findLRForVIRQ(vid, virq, vcpu); + if (i < 0) { + DPRINTF(VGIC, "EOIR: No LR for irq %d(cpu%d)\n", virq, vcpu); + } else { + DPRINTF(VGIC, "EOIR: Found LR%d for irq %d(cpu%d)\n", i, virq, vcpu); + ListReg *lr = &vid->LR[i]; + lr->State = 0; + // Maintenance interrupt -- via eisr -- is flagged when + // LRs have EOI=1 and State=INVALID! + } + } break; + default: + panic("VGIC VCPU write %#x to unk address %#x\n", pkt->get<uint32_t>(), daddr); + } + + // This updates the EISRs and flags IRQs: + updateIntState(ctx_id); + + pkt->makeAtomicResponse(); + return pioDelay; +} + +Tick +VGic::writeCtrl(PacketPtr pkt) +{ + Addr daddr = pkt->getAddr() - hvAddr; + pkt->allocate(); + + int ctx_id = pkt->req->contextId(); + + DPRINTF(VGIC, "VGIC HVCtrl write register %#x <= %#x\n", daddr, pkt->get<uint32_t>()); + + /* Munge the address: 0-0xfff is the usual space banked by requester CPU. + * Anything > that is 0x200-sized slices of 'per CPU' regs. + */ + if (daddr & ~0x1ff) { + ctx_id = (daddr >> 9); + if (ctx_id > 8) + panic("VGIC: Weird unbanked hv ctrl address %#x!\n", daddr); + daddr &= ~0x1ff; + } + assert(ctx_id < VGIC_CPU_MAX); + struct vcpuIntData *vid = &vcpuData[ctx_id]; + + switch (daddr) { + case GICH_HCR: + vid->hcr = pkt->get<uint32_t>(); + // update int state + break; + + case GICH_VMCR: { + uint32_t d = pkt->get<uint32_t>(); + vid->VMPriMask = d >> 27; + vid->VMBP = (d >> 21) & 7; + vid->VMABP = (d >> 18) & 7; + vid->VEM = (d >> 9) & 1; + vid->VMCBPR = (d >> 4) & 1; + vid->VMFiqEn = (d >> 3) & 1; + vid->VMAckCtl = (d >> 2) & 1; + vid->VMGrp1En = (d >> 1) & 1; + vid->VMGrp0En = d & 1; + } break; + + case GICH_APR0: + warn_once("VGIC GICH_APR0 written, ignored\n"); + break; + + case GICH_LR0: + case GICH_LR1: + case GICH_LR2: + case GICH_LR3: + vid->LR[(daddr - GICH_LR0) >> 2] = pkt->get<uint32_t>(); + // update int state + break; + + default: + panic("VGIC HVCtrl write to bad address %#x\n", daddr); + } + + updateIntState(ctx_id); + + pkt->makeAtomicResponse(); + return pioDelay; +} + + +uint32_t +VGic::getMISR(struct vcpuIntData *vid) +{ + return (!!vid->hcr.VGrp1DIE && !vid->VMGrp1En ? 0x80 : 0) | + (!!vid->hcr.VGrp1EIE && vid->VMGrp1En ? 0x40 : 0) | + (!!vid->hcr.VGrp0DIE && !vid->VMGrp0En ? 0x20 : 0) | + (!!vid->hcr.VGrp0EIE && vid->VMGrp0En ? 0x10 : 0) | + (!!vid->hcr.NPIE && !lrPending(vid) ? 0x08 : 0) | + (!!vid->hcr.LRENPIE && vid->hcr.EOICount ? 0x04 : 0) | + (!!vid->hcr.UIE && lrValid(vid) <= 1 ? 0x02 : 0) | + (vid->eisr ? 0x01 : 0); +} + +void +VGic::postVInt(uint32_t cpu, Tick when) +{ + DPRINTF(VGIC, "Posting VIRQ to %d\n", cpu); + if (!(postVIntEvent[cpu]->scheduled())) + eventq->schedule(postVIntEvent[cpu], when); +} + +void +VGic::unPostVInt(uint32_t cpu) +{ + DPRINTF(VGIC, "Unposting VIRQ to %d\n", cpu); + platform->intrctrl->clear(cpu, ArmISA::INT_VIRT_IRQ, 0); +} + +void +VGic::postMaintInt(uint32_t cpu) +{ + DPRINTF(VGIC, "Posting maintenance PPI to GIC/cpu%d\n", cpu); + // Linux DT configures this as Level. + gic->sendPPInt(maintInt, cpu); +} + +void +VGic::unPostMaintInt(uint32_t cpu) +{ + DPRINTF(VGIC, "Unposting maintenance PPI to GIC/cpu%d\n", cpu); + gic->clearPPInt(maintInt, cpu); +} + +/* Update state (in general); something concerned with ctx_id has changed. + * This may raise a maintenance interrupt. + */ +void +VGic::updateIntState(int ctx_id) +{ + // @todo This should update APRs! + + // Build EISR contents: + // (Cached so that regs can read them without messing about again) + struct vcpuIntData *tvid = &vcpuData[ctx_id]; + + tvid->eisr = 0; + for (int i = 0; i < NUM_LR; i++) { + if (!tvid->LR[i].State && tvid->LR[i].EOI) { + tvid->eisr |= 1 << i; + } + } + + assert(sys->numRunningContexts() <= VGIC_CPU_MAX); + for (int i = 0; i < sys->numRunningContexts(); i++) { + struct vcpuIntData *vid = &vcpuData[i]; + // Are any LRs active that weren't before? + if (!vIntPosted[i]) { + if (lrPending(vid) && vid->vctrl.En) { + vIntPosted[i] = true; + postVInt(i, curTick() + 1); + } + } else if (!lrPending(vid)) { + vIntPosted[i] = false; + unPostVInt(i); + } + + // Any maintenance ints to send? + if (!maintIntPosted[i]) { + if (vid->hcr.En && getMISR(vid)) { + maintIntPosted[i] = true; + postMaintInt(i); + } + } else { + if (!vid->hcr.En || !getMISR(vid)) { + unPostMaintInt(i); + maintIntPosted[i] = false; + } + } + } +} + +AddrRangeList +VGic::getAddrRanges() const +{ + AddrRangeList ranges; + ranges.push_back(RangeSize(hvAddr, GICH_REG_SIZE)); + ranges.push_back(RangeSize(vcpuAddr, GICV_SIZE)); + return ranges; +} + +void +VGic::serialize(std::ostream &os) +{ + Tick interrupt_time[VGIC_CPU_MAX]; + for (uint32_t cpu = 0; cpu < VGIC_CPU_MAX; cpu++) { + interrupt_time[cpu] = 0; + if (postVIntEvent[cpu]->scheduled()) { + interrupt_time[cpu] = postVIntEvent[cpu]->when(); + } + } + + DPRINTF(Checkpoint, "Serializing VGIC\n"); + + SERIALIZE_ARRAY(interrupt_time, VGIC_CPU_MAX); + SERIALIZE_ARRAY(maintIntPosted, VGIC_CPU_MAX); + SERIALIZE_ARRAY(vIntPosted, VGIC_CPU_MAX); + SERIALIZE_SCALAR(vcpuAddr); + SERIALIZE_SCALAR(hvAddr); + SERIALIZE_SCALAR(pioDelay); + SERIALIZE_SCALAR(maintInt); + + for (uint32_t cpu = 0; cpu < VGIC_CPU_MAX; cpu++) { + nameOut(os, csprintf("%s.vcpuData%d", name(), cpu)); + uint32_t vctrl_val = vcpuData[cpu].vctrl; + SERIALIZE_SCALAR(vctrl_val); + uint32_t hcr_val = vcpuData[cpu].hcr; + SERIALIZE_SCALAR(hcr_val); + uint64_t eisr_val = vcpuData[cpu].eisr; + SERIALIZE_SCALAR(eisr_val); + uint8_t VMGrp0En_val = vcpuData[cpu].VMGrp0En; + SERIALIZE_SCALAR(VMGrp0En_val); + uint8_t VMGrp1En_val = vcpuData[cpu].VMGrp1En; + SERIALIZE_SCALAR(VMGrp1En_val); + uint8_t VMAckCtl_val = vcpuData[cpu].VMAckCtl; + SERIALIZE_SCALAR(VMAckCtl_val); + uint8_t VMFiqEn_val = vcpuData[cpu].VMFiqEn; + SERIALIZE_SCALAR(VMFiqEn_val); + uint8_t VMCBPR_val = vcpuData[cpu].VMCBPR; + SERIALIZE_SCALAR(VMCBPR_val); + uint8_t VEM_val = vcpuData[cpu].VEM; + SERIALIZE_SCALAR(VEM_val); + uint8_t VMABP_val = vcpuData[cpu].VMABP; + SERIALIZE_SCALAR(VMABP_val); + uint8_t VMBP_val = vcpuData[cpu].VMBP; + SERIALIZE_SCALAR(VMBP_val); + uint8_t VMPriMask_val = vcpuData[cpu].VMPriMask; + SERIALIZE_SCALAR(VMPriMask_val); + + for (int i = 0; i < NUM_LR; i++) { + uint32_t lr = vcpuData[cpu].LR[i]; + nameOut(os, csprintf("%s.vcpuData%d.LR%d", name(), cpu, i)); + SERIALIZE_SCALAR(lr); + } + } +} + +void VGic::unserialize(Checkpoint *cp, const std::string §ion) +{ + DPRINTF(Checkpoint, "Unserializing Arm GIC\n"); + + Tick interrupt_time[VGIC_CPU_MAX]; + UNSERIALIZE_ARRAY(interrupt_time, VGIC_CPU_MAX); + for (uint32_t cpu = 0; cpu < VGIC_CPU_MAX; cpu++) { + if (interrupt_time[cpu]) + schedule(postVIntEvent[cpu], interrupt_time[cpu]); + + uint32_t tmp; + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "vctrl_val", tmp); + vcpuData[cpu].vctrl = tmp; + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "hcr_val", tmp); + vcpuData[cpu].hcr = tmp; + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "eisr_val", vcpuData[cpu].eisr); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VMGrp0En_val", vcpuData[cpu].VMGrp0En); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VMGrp1En_val", vcpuData[cpu].VMGrp1En); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VMAckCtl_val", vcpuData[cpu].VMAckCtl); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VMFiqEn_val", vcpuData[cpu].VMFiqEn); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VMCBPR_val", vcpuData[cpu].VMCBPR); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VEM_val", vcpuData[cpu].VEM); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VMABP_val", vcpuData[cpu].VMABP); + paramIn(cp, csprintf("%s.vcpuData%d", section, cpu), + "VMPriMask_val", vcpuData[cpu].VMPriMask); + + for (int i = 0; i < NUM_LR; i++) { + paramIn(cp, csprintf("%s.vcpuData%d.LR%d", section, cpu, i), + "lr", tmp); + vcpuData[cpu].LR[i] = tmp; + } + } + UNSERIALIZE_ARRAY(maintIntPosted, VGIC_CPU_MAX); + UNSERIALIZE_ARRAY(vIntPosted, VGIC_CPU_MAX); + UNSERIALIZE_SCALAR(vcpuAddr); + UNSERIALIZE_SCALAR(hvAddr); + UNSERIALIZE_SCALAR(pioDelay); + UNSERIALIZE_SCALAR(maintInt); +} + +VGic * +VGicParams::create() +{ + return new VGic(this); +} diff --git a/src/dev/arm/vgic.hh b/src/dev/arm/vgic.hh new file mode 100644 index 000000000..e1c4960e9 --- /dev/null +++ b/src/dev/arm/vgic.hh @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2013 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Matt Evans + */ + + +/** @file + * Implementiation of a GIC-400 List Register-based VGIC interface. + * The VGIC is, in this implementation, completely separate from the GIC itself. + * Only a VIRQ line to the CPU and a PPI line to the GIC (for a HV maintenance IRQ) + * is required. + * + * The mode in which the List Registers may flag (via LR.HW) that a hardware EOI + * is to be performed is NOT supported. (This requires tighter integration with + * the GIC.) + */ + +#ifndef __DEV_ARM_VGIC_H__ +#define __DEV_ARM_VGIC_H__ + +#include "base/addr_range.hh" +#include "base/bitunion.hh" +#include "cpu/intr_control.hh" +#include "dev/io_device.hh" +#include "dev/platform.hh" +#include "params/VGic.hh" + +class VGic : public PioDevice +{ + private: + static const int VGIC_CPU_MAX = 256; + static const int NUM_LR = 4; + + static const int GICH_SIZE = 0x200; + static const int GICH_REG_SIZE = 0x2000; + + static const int GICH_HCR = 0x000; + static const int GICH_VTR = 0x004; + static const int GICH_VMCR = 0x008; + static const int GICH_MISR = 0x010; + static const int GICH_EISR0 = 0x020; + static const int GICH_EISR1 = 0x024; + static const int GICH_ELSR0 = 0x030; + static const int GICH_ELSR1 = 0x034; + static const int GICH_APR0 = 0x0f0; + static const int GICH_LR0 = 0x100; + static const int GICH_LR1 = 0x104; + static const int GICH_LR2 = 0x108; + static const int GICH_LR3 = 0x10c; + + static const int GICV_SIZE = 0x2000; + static const int GICV_CTLR = 0x000; + static const int GICV_PMR = 0x004; + static const int GICV_BPR = 0x008; + static const int GICV_IAR = 0x00c; + static const int GICV_EOIR = 0x010; + static const int GICV_RPR = 0x014; + static const int GICV_HPPIR = 0x018; + static const int GICV_ABPR = 0x01c; + static const int GICV_AIAR = 0x020; + static const int GICV_AEOIR = 0x024; + static const int GICV_AHPPIR = 0x028; + static const int GICV_APR0 = 0x0d0; + static const int GICV_IIDR = 0x0fc; + static const int GICV_DIR = 0x1000; + + static const uint32_t LR_PENDING = 1; + static const uint32_t LR_ACTIVE = 2; + + /** Event definition to post interrupt to CPU after a delay + */ + class PostVIntEvent : public Event + { + private: + uint32_t cpu; + Platform *platform; + public: + PostVIntEvent( uint32_t c, Platform* p) + : cpu(c), platform(p) + { } + void process() { platform->intrctrl->post(cpu, ArmISA::INT_VIRT_IRQ, 0);} + const char *description() const { return "Post VInterrupt to CPU"; } + }; + + PostVIntEvent *postVIntEvent[VGIC_CPU_MAX]; + bool maintIntPosted[VGIC_CPU_MAX]; + bool vIntPosted[VGIC_CPU_MAX]; + + Platform *platform; + BaseGic *gic; + + Addr vcpuAddr; + Addr hvAddr; + Tick pioDelay; + int maintInt; + + BitUnion32(ListReg) + Bitfield<31> HW; + Bitfield<30> Grp1; + Bitfield<29,28> State; + Bitfield<27,23> Priority; + Bitfield<19> EOI; + Bitfield<12,10> CpuID; + Bitfield<9,0> VirtualID; + EndBitUnion(ListReg) + + BitUnion32(HCR) + Bitfield<31,27> EOICount; + Bitfield<7> VGrp1DIE; + Bitfield<6> VGrp1EIE; + Bitfield<5> VGrp0DIE; + Bitfield<4> VGrp0EIE; + Bitfield<3> NPIE; + Bitfield<2> LRENPIE; + Bitfield<1> UIE; + Bitfield<0> En; + EndBitUnion(HCR) + + BitUnion32(VCTLR) + Bitfield<9> EOImode; + Bitfield<4> CPBR; + Bitfield<3> FIQEn; + Bitfield<2> AckCtl; + Bitfield<1> EnGrp1; + Bitfield<0> En; // This gets written to enable, not group 1. + EndBitUnion(VCTLR) + + /* State per CPU. EVERYTHING should be in this struct and simply replicated + * N times. + */ + struct vcpuIntData { + ListReg LR[NUM_LR]; + VCTLR vctrl; + + HCR hcr; + uint64_t eisr; + + /* Host info, guest info (should be 100% accessible via GICH_* regs!) */ + uint8_t VMGrp0En; + uint8_t VMGrp1En; + uint8_t VMAckCtl; + uint8_t VMFiqEn; + uint8_t VMCBPR; + uint8_t VEM; + uint8_t VMABP; + uint8_t VMBP; + uint8_t VMPriMask; + }; + + struct vcpuIntData vcpuData[VGIC_CPU_MAX]; + + public: + typedef VGicParams Params; + const Params * + params() const + { + return dynamic_cast<const Params *>(_params); + } + VGic(const Params *p); + + virtual AddrRangeList getAddrRanges() const; + + virtual Tick read(PacketPtr pkt); + virtual Tick write(PacketPtr pkt); + + virtual void serialize(std::ostream &os); + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + private: + Tick readVCpu(PacketPtr pkt); + Tick readCtrl(PacketPtr pkt); + + Tick writeVCpu(PacketPtr pkt); + Tick writeCtrl(PacketPtr pkt); + + void updateIntState(int ctx_id); + uint32_t getMISR(struct vcpuIntData *vid); + void postVInt(uint32_t cpu, Tick when); + void unPostVInt(uint32_t cpu); + void postMaintInt(uint32_t cpu); + void unPostMaintInt(uint32_t cpu); + + unsigned int lrPending(struct vcpuIntData *vid) + { + unsigned int pend = 0; + for (int i = 0; i < NUM_LR; i++) { + if (vid->LR[i].State & LR_PENDING) + pend++; + } + return pend; + } + unsigned int lrValid(struct vcpuIntData *vid) + { + unsigned int valid = 0; + for (int i = 0; i < NUM_LR; i++) { + if (vid->LR[i].State) + valid++; + } + return valid; + } + + /** Returns LR index or -1 if none pending */ + int findHighestPendingLR(struct vcpuIntData *vid) + { + unsigned int prio = 0xff; + int p = -1; + for (int i = 0; i < NUM_LR; i++) { + if ((vid->LR[i].State & LR_PENDING) && (vid->LR[i].Priority < prio)) { + p = i; + prio = vid->LR[i].Priority; + } + } + return p; + } + + int findLRForVIRQ(struct vcpuIntData *vid, int virq, int vcpu) + { + for (int i = 0; i < NUM_LR; i++) { + if (vid->LR[i].State && + vid->LR[i].VirtualID == virq && + vid->LR[i].CpuID == vcpu) + return i; + } + return -1; + } +}; + +#endif diff --git a/src/sim/System.py b/src/sim/System.py index 302e2fa60..95162be89 100644 --- a/src/sim/System.py +++ b/src/sim/System.py @@ -86,4 +86,5 @@ class System(MemObject): readfile = Param.String("", "file to read startup script from") symbolfile = Param.String("", "file to get the symbols from") load_addr_mask = Param.UInt64(0xffffffffff, - "Address to mask loading binaries with"); + "Address to mask loading binaries with") + load_offset = Param.UInt64(0, "Address to offset loading binaries with") diff --git a/src/sim/process.cc b/src/sim/process.cc index 1654ea5c5..ccaac2096 100644 --- a/src/sim/process.cc +++ b/src/sim/process.cc @@ -695,15 +695,22 @@ LiveProcess::create(LiveProcessParams * params) fatal("Unknown/unsupported operating system."); } #elif THE_ISA == ARM_ISA - if (objFile->getArch() != ObjectFile::Arm && - objFile->getArch() != ObjectFile::Thumb) + ObjectFile::Arch arch = objFile->getArch(); + if (arch != ObjectFile::Arm && arch != ObjectFile::Thumb && + arch != ObjectFile::Arm64) fatal("Object file architecture does not match compiled ISA (ARM)."); switch (objFile->getOpSys()) { case ObjectFile::UnknownOpSys: warn("Unknown operating system; assuming Linux."); // fall through case ObjectFile::Linux: - process = new ArmLinuxProcess(params, objFile, objFile->getArch()); + if (arch == ObjectFile::Arm64) { + process = new ArmLinuxProcess64(params, objFile, + objFile->getArch()); + } else { + process = new ArmLinuxProcess32(params, objFile, + objFile->getArch()); + } break; case ObjectFile::LinuxArmOABI: fatal("M5 does not support ARM OABI binaries. Please recompile with an" diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh index 6d4207090..bbf759cf6 100644 --- a/src/sim/serialize.hh +++ b/src/sim/serialize.hh @@ -58,7 +58,7 @@ class EventQueue; * SimObject shouldn't cause the version number to increase, only changes to * existing objects such as serializing/unserializing more state, changing sizes * of serialized arrays, etc. */ -static const uint64_t gem5CheckpointVersion = 0x0000000000000008; +static const uint64_t gem5CheckpointVersion = 0x0000000000000009; template <class T> void paramOut(std::ostream &os, const std::string &name, const T ¶m); diff --git a/src/sim/system.cc b/src/sim/system.cc index 7de483216..e2bf0a3d2 100644 --- a/src/sim/system.cc +++ b/src/sim/system.cc @@ -79,6 +79,7 @@ System::System(Params *p) init_param(p->init_param), physProxy(_systemPort, p->cache_line_size), loadAddrMask(p->load_addr_mask), + loadAddrOffset(p->load_offset), nextPID(0), physmem(name() + ".physmem", p->memories), memoryMode(p->mem_mode), @@ -274,14 +275,15 @@ System::initState() */ if (params()->kernel != "") { // Validate kernel mapping before loading binary - if (!(isMemAddr(kernelStart & loadAddrMask) && - isMemAddr(kernelEnd & loadAddrMask))) { + if (!(isMemAddr((kernelStart & loadAddrMask) + loadAddrOffset) && + isMemAddr((kernelEnd & loadAddrMask) + loadAddrOffset))) { fatal("Kernel is mapped to invalid location (not memory). " - "kernelStart 0x(%x) - kernelEnd 0x(%x)\n", kernelStart, - kernelEnd); + "kernelStart 0x(%x) - kernelEnd 0x(%x) %#x:%#x\n", kernelStart, + kernelEnd, (kernelStart & loadAddrMask) + loadAddrOffset, + (kernelEnd & loadAddrMask) + loadAddrOffset); } // Load program sections into memory - kernel->loadSections(physProxy, loadAddrMask); + kernel->loadSections(physProxy, loadAddrMask, loadAddrOffset); DPRINTF(Loader, "Kernel start = %#x\n", kernelStart); DPRINTF(Loader, "Kernel end = %#x\n", kernelEnd); diff --git a/src/sim/system.hh b/src/sim/system.hh index c8945c8c1..ecef2c4f2 100644 --- a/src/sim/system.hh +++ b/src/sim/system.hh @@ -237,6 +237,13 @@ class System : public MemObject */ Addr loadAddrMask; + /** Offset that should be used for binary/symbol loading. + * This further allows more flexibily than the loadAddrMask allows alone in + * loading kernels and similar. The loadAddrOffset is applied after the + * loadAddrMask. + */ + Addr loadAddrOffset; + protected: uint64_t nextPID; @@ -321,7 +328,7 @@ class System : public MemObject * Called by pseudo_inst to track the number of work items completed by * this system. */ - uint64_t + uint64_t incWorkItemsEnd() { return ++workItemsEnd; @@ -332,13 +339,13 @@ class System : public MemObject * Returns the total number of cpus that have executed work item begin or * ends. */ - int + int markWorkItem(int index) { int count = 0; assert(index < activeCpus.size()); activeCpus[index] = true; - for (std::vector<bool>::iterator i = activeCpus.begin(); + for (std::vector<bool>::iterator i = activeCpus.begin(); i < activeCpus.end(); i++) { if (*i) count++; } |